1 /* 2 * Copyright 2011 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 /* 27 * Authors: 28 * Christian König <deathsimple@vodafone.de> 29 */ 30 31 #include <linux/firmware.h> 32 #include <linux/module.h> 33 34 #include <drm/drm.h> 35 36 #include "radeon.h" 37 #include "radeon_ucode.h" 38 #include "r600d.h" 39 40 /* 1 second timeout */ 41 #define UVD_IDLE_TIMEOUT_MS 1000 42 43 /* Firmware Names */ 44 #define FIRMWARE_R600 "radeon/R600_uvd.bin" 45 #define FIRMWARE_RS780 "radeon/RS780_uvd.bin" 46 #define FIRMWARE_RV770 "radeon/RV770_uvd.bin" 47 #define FIRMWARE_RV710 "radeon/RV710_uvd.bin" 48 #define FIRMWARE_CYPRESS "radeon/CYPRESS_uvd.bin" 49 #define FIRMWARE_SUMO "radeon/SUMO_uvd.bin" 50 #define FIRMWARE_TAHITI "radeon/TAHITI_uvd.bin" 51 #define FIRMWARE_BONAIRE_LEGACY "radeon/BONAIRE_uvd.bin" 52 #define FIRMWARE_BONAIRE "radeon/bonaire_uvd.bin" 53 54 MODULE_FIRMWARE(FIRMWARE_R600); 55 MODULE_FIRMWARE(FIRMWARE_RS780); 56 MODULE_FIRMWARE(FIRMWARE_RV770); 57 MODULE_FIRMWARE(FIRMWARE_RV710); 58 MODULE_FIRMWARE(FIRMWARE_CYPRESS); 59 MODULE_FIRMWARE(FIRMWARE_SUMO); 60 MODULE_FIRMWARE(FIRMWARE_TAHITI); 61 MODULE_FIRMWARE(FIRMWARE_BONAIRE_LEGACY); 62 MODULE_FIRMWARE(FIRMWARE_BONAIRE); 63 64 static void radeon_uvd_idle_work_handler(struct work_struct *work); 65 66 int radeon_uvd_init(struct radeon_device *rdev) 67 { 68 unsigned long bo_size; 69 const char *fw_name = NULL, *legacy_fw_name = NULL; 70 int i, r; 71 72 INIT_DELAYED_WORK(&rdev->uvd.idle_work, radeon_uvd_idle_work_handler); 73 74 switch (rdev->family) { 75 case CHIP_RV610: 76 case CHIP_RV630: 77 case CHIP_RV670: 78 case CHIP_RV620: 79 case CHIP_RV635: 80 legacy_fw_name = FIRMWARE_R600; 81 break; 82 83 case CHIP_RS780: 84 case CHIP_RS880: 85 legacy_fw_name = FIRMWARE_RS780; 86 break; 87 88 case CHIP_RV770: 89 legacy_fw_name = FIRMWARE_RV770; 90 break; 91 92 case CHIP_RV710: 93 case CHIP_RV730: 94 case CHIP_RV740: 95 legacy_fw_name = FIRMWARE_RV710; 96 break; 97 98 case CHIP_CYPRESS: 99 case CHIP_HEMLOCK: 100 case CHIP_JUNIPER: 101 case CHIP_REDWOOD: 102 case CHIP_CEDAR: 103 legacy_fw_name = FIRMWARE_CYPRESS; 104 break; 105 106 case CHIP_SUMO: 107 case CHIP_SUMO2: 108 case CHIP_PALM: 109 case CHIP_CAYMAN: 110 case CHIP_BARTS: 111 case CHIP_TURKS: 112 case CHIP_CAICOS: 113 legacy_fw_name = FIRMWARE_SUMO; 114 break; 115 116 case CHIP_TAHITI: 117 case CHIP_VERDE: 118 case CHIP_PITCAIRN: 119 case CHIP_ARUBA: 120 case CHIP_OLAND: 121 legacy_fw_name = FIRMWARE_TAHITI; 122 break; 123 124 case CHIP_BONAIRE: 125 case CHIP_KABINI: 126 case CHIP_KAVERI: 127 case CHIP_HAWAII: 128 case CHIP_MULLINS: 129 legacy_fw_name = FIRMWARE_BONAIRE_LEGACY; 130 fw_name = FIRMWARE_BONAIRE; 131 break; 132 133 default: 134 return -EINVAL; 135 } 136 137 rdev->uvd.fw_header_present = false; 138 rdev->uvd.max_handles = RADEON_DEFAULT_UVD_HANDLES; 139 if (fw_name) { 140 /* Let's try to load the newer firmware first */ 141 r = request_firmware(&rdev->uvd_fw, fw_name, rdev->dev); 142 if (r) { 143 dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n", 144 fw_name); 145 } else { 146 struct common_firmware_header *hdr = (void *)rdev->uvd_fw->data; 147 unsigned version_major, version_minor, family_id; 148 149 r = radeon_ucode_validate(rdev->uvd_fw); 150 if (r) 151 return r; 152 153 rdev->uvd.fw_header_present = true; 154 155 family_id = (__force u32)(hdr->ucode_version) & 0xff; 156 version_major = (le32_to_cpu((__force __le32)(hdr->ucode_version)) 157 >> 24) & 0xff; 158 version_minor = (le32_to_cpu((__force __le32)(hdr->ucode_version)) 159 >> 8) & 0xff; 160 DRM_INFO("Found UVD firmware Version: %u.%u Family ID: %u\n", 161 version_major, version_minor, family_id); 162 163 /* 164 * Limit the number of UVD handles depending on 165 * microcode major and minor versions. 166 */ 167 if ((version_major >= 0x01) && (version_minor >= 0x37)) 168 rdev->uvd.max_handles = RADEON_MAX_UVD_HANDLES; 169 } 170 } 171 172 /* 173 * In case there is only legacy firmware, or we encounter an error 174 * while loading the new firmware, we fall back to loading the legacy 175 * firmware now. 176 */ 177 if (!fw_name || r) { 178 r = request_firmware(&rdev->uvd_fw, legacy_fw_name, rdev->dev); 179 if (r) { 180 dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n", 181 legacy_fw_name); 182 return r; 183 } 184 } 185 186 bo_size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 8) + 187 RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE + 188 RADEON_UVD_SESSION_SIZE * rdev->uvd.max_handles; 189 r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true, 190 RADEON_GEM_DOMAIN_VRAM, 0, NULL, 191 NULL, &rdev->uvd.vcpu_bo); 192 if (r) { 193 dev_err(rdev->dev, "(%d) failed to allocate UVD bo\n", r); 194 return r; 195 } 196 197 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false); 198 if (r) { 199 radeon_bo_unref(&rdev->uvd.vcpu_bo); 200 dev_err(rdev->dev, "(%d) failed to reserve UVD bo\n", r); 201 return r; 202 } 203 204 r = radeon_bo_pin(rdev->uvd.vcpu_bo, RADEON_GEM_DOMAIN_VRAM, 205 &rdev->uvd.gpu_addr); 206 if (r) { 207 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 208 radeon_bo_unref(&rdev->uvd.vcpu_bo); 209 dev_err(rdev->dev, "(%d) UVD bo pin failed\n", r); 210 return r; 211 } 212 213 r = radeon_bo_kmap(rdev->uvd.vcpu_bo, &rdev->uvd.cpu_addr); 214 if (r) { 215 dev_err(rdev->dev, "(%d) UVD map failed\n", r); 216 return r; 217 } 218 219 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 220 221 for (i = 0; i < rdev->uvd.max_handles; ++i) { 222 atomic_set(&rdev->uvd.handles[i], 0); 223 rdev->uvd.filp[i] = NULL; 224 rdev->uvd.img_size[i] = 0; 225 } 226 227 return 0; 228 } 229 230 void radeon_uvd_fini(struct radeon_device *rdev) 231 { 232 int r; 233 234 if (rdev->uvd.vcpu_bo == NULL) 235 return; 236 237 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false); 238 if (!r) { 239 radeon_bo_kunmap(rdev->uvd.vcpu_bo); 240 radeon_bo_unpin(rdev->uvd.vcpu_bo); 241 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 242 } 243 244 radeon_bo_unref(&rdev->uvd.vcpu_bo); 245 246 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX]); 247 248 release_firmware(rdev->uvd_fw); 249 } 250 251 int radeon_uvd_suspend(struct radeon_device *rdev) 252 { 253 int i, r; 254 255 if (rdev->uvd.vcpu_bo == NULL) 256 return 0; 257 258 for (i = 0; i < rdev->uvd.max_handles; ++i) { 259 uint32_t handle = atomic_read(&rdev->uvd.handles[i]); 260 if (handle != 0) { 261 struct radeon_fence *fence; 262 263 radeon_uvd_note_usage(rdev); 264 265 r = radeon_uvd_get_destroy_msg(rdev, 266 R600_RING_TYPE_UVD_INDEX, handle, &fence); 267 if (r) { 268 DRM_ERROR("Error destroying UVD (%d)!\n", r); 269 continue; 270 } 271 272 radeon_fence_wait(fence, false); 273 radeon_fence_unref(&fence); 274 275 rdev->uvd.filp[i] = NULL; 276 atomic_set(&rdev->uvd.handles[i], 0); 277 } 278 } 279 280 return 0; 281 } 282 283 int radeon_uvd_resume(struct radeon_device *rdev) 284 { 285 unsigned size; 286 void *ptr; 287 288 if (rdev->uvd.vcpu_bo == NULL) 289 return -EINVAL; 290 291 memcpy_toio((void __iomem *)rdev->uvd.cpu_addr, rdev->uvd_fw->data, rdev->uvd_fw->size); 292 293 size = radeon_bo_size(rdev->uvd.vcpu_bo); 294 size -= rdev->uvd_fw->size; 295 296 ptr = rdev->uvd.cpu_addr; 297 ptr += rdev->uvd_fw->size; 298 299 memset_io((void __iomem *)ptr, 0, size); 300 301 return 0; 302 } 303 304 void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo, 305 uint32_t allowed_domains) 306 { 307 int i; 308 309 for (i = 0; i < rbo->placement.num_placement; ++i) { 310 rbo->placements[i].fpfn = 0 >> PAGE_SHIFT; 311 rbo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; 312 } 313 314 /* If it must be in VRAM it must be in the first segment as well */ 315 if (allowed_domains == RADEON_GEM_DOMAIN_VRAM) 316 return; 317 318 /* abort if we already have more than one placement */ 319 if (rbo->placement.num_placement > 1) 320 return; 321 322 /* add another 256MB segment */ 323 rbo->placements[1] = rbo->placements[0]; 324 rbo->placements[1].fpfn += (256 * 1024 * 1024) >> PAGE_SHIFT; 325 rbo->placements[1].lpfn += (256 * 1024 * 1024) >> PAGE_SHIFT; 326 rbo->placement.num_placement++; 327 rbo->placement.num_busy_placement++; 328 } 329 330 void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file *filp) 331 { 332 int i, r; 333 for (i = 0; i < rdev->uvd.max_handles; ++i) { 334 uint32_t handle = atomic_read(&rdev->uvd.handles[i]); 335 if (handle != 0 && rdev->uvd.filp[i] == filp) { 336 struct radeon_fence *fence; 337 338 radeon_uvd_note_usage(rdev); 339 340 r = radeon_uvd_get_destroy_msg(rdev, 341 R600_RING_TYPE_UVD_INDEX, handle, &fence); 342 if (r) { 343 DRM_ERROR("Error destroying UVD (%d)!\n", r); 344 continue; 345 } 346 347 radeon_fence_wait(fence, false); 348 radeon_fence_unref(&fence); 349 350 rdev->uvd.filp[i] = NULL; 351 atomic_set(&rdev->uvd.handles[i], 0); 352 } 353 } 354 } 355 356 static int radeon_uvd_cs_msg_decode(uint32_t *msg, unsigned buf_sizes[]) 357 { 358 unsigned stream_type = msg[4]; 359 unsigned width = msg[6]; 360 unsigned height = msg[7]; 361 unsigned dpb_size = msg[9]; 362 unsigned pitch = msg[28]; 363 364 unsigned width_in_mb = width / 16; 365 unsigned height_in_mb = ALIGN(height / 16, 2); 366 367 unsigned image_size, tmp, min_dpb_size; 368 369 image_size = width * height; 370 image_size += image_size / 2; 371 image_size = ALIGN(image_size, 1024); 372 373 switch (stream_type) { 374 case 0: /* H264 */ 375 376 /* reference picture buffer */ 377 min_dpb_size = image_size * 17; 378 379 /* macroblock context buffer */ 380 min_dpb_size += width_in_mb * height_in_mb * 17 * 192; 381 382 /* IT surface buffer */ 383 min_dpb_size += width_in_mb * height_in_mb * 32; 384 break; 385 386 case 1: /* VC1 */ 387 388 /* reference picture buffer */ 389 min_dpb_size = image_size * 3; 390 391 /* CONTEXT_BUFFER */ 392 min_dpb_size += width_in_mb * height_in_mb * 128; 393 394 /* IT surface buffer */ 395 min_dpb_size += width_in_mb * 64; 396 397 /* DB surface buffer */ 398 min_dpb_size += width_in_mb * 128; 399 400 /* BP */ 401 tmp = max(width_in_mb, height_in_mb); 402 min_dpb_size += ALIGN(tmp * 7 * 16, 64); 403 break; 404 405 case 3: /* MPEG2 */ 406 407 /* reference picture buffer */ 408 min_dpb_size = image_size * 3; 409 break; 410 411 case 4: /* MPEG4 */ 412 413 /* reference picture buffer */ 414 min_dpb_size = image_size * 3; 415 416 /* CM */ 417 min_dpb_size += width_in_mb * height_in_mb * 64; 418 419 /* IT surface buffer */ 420 min_dpb_size += ALIGN(width_in_mb * height_in_mb * 32, 64); 421 break; 422 423 default: 424 DRM_ERROR("UVD codec not handled %d!\n", stream_type); 425 return -EINVAL; 426 } 427 428 if (width > pitch) { 429 DRM_ERROR("Invalid UVD decoding target pitch!\n"); 430 return -EINVAL; 431 } 432 433 if (dpb_size < min_dpb_size) { 434 DRM_ERROR("Invalid dpb_size in UVD message (%d / %d)!\n", 435 dpb_size, min_dpb_size); 436 return -EINVAL; 437 } 438 439 buf_sizes[0x1] = dpb_size; 440 buf_sizes[0x2] = image_size; 441 return 0; 442 } 443 444 static int radeon_uvd_validate_codec(struct radeon_cs_parser *p, 445 unsigned stream_type) 446 { 447 switch (stream_type) { 448 case 0: /* H264 */ 449 case 1: /* VC1 */ 450 /* always supported */ 451 return 0; 452 453 case 3: /* MPEG2 */ 454 case 4: /* MPEG4 */ 455 /* only since UVD 3 */ 456 if (p->rdev->family >= CHIP_PALM) 457 return 0; 458 459 fallthrough; 460 default: 461 DRM_ERROR("UVD codec not supported by hardware %d!\n", 462 stream_type); 463 return -EINVAL; 464 } 465 } 466 467 static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo, 468 unsigned offset, unsigned buf_sizes[]) 469 { 470 int32_t *msg, msg_type, handle; 471 unsigned img_size = 0; 472 void *ptr; 473 long r; 474 int i; 475 476 if (offset & 0x3F) { 477 DRM_ERROR("UVD messages must be 64 byte aligned!\n"); 478 return -EINVAL; 479 } 480 481 r = dma_resv_wait_timeout(bo->tbo.base.resv, false, false, 482 MAX_SCHEDULE_TIMEOUT); 483 if (r <= 0) { 484 DRM_ERROR("Failed waiting for UVD message (%ld)!\n", r); 485 return r ? r : -ETIME; 486 } 487 488 r = radeon_bo_kmap(bo, &ptr); 489 if (r) { 490 DRM_ERROR("Failed mapping the UVD message (%ld)!\n", r); 491 return r; 492 } 493 494 msg = ptr + offset; 495 496 msg_type = msg[1]; 497 handle = msg[2]; 498 499 if (handle == 0) { 500 DRM_ERROR("Invalid UVD handle!\n"); 501 return -EINVAL; 502 } 503 504 switch (msg_type) { 505 case 0: 506 /* it's a create msg, calc image size (width * height) */ 507 img_size = msg[7] * msg[8]; 508 509 r = radeon_uvd_validate_codec(p, msg[4]); 510 radeon_bo_kunmap(bo); 511 if (r) 512 return r; 513 514 /* try to alloc a new handle */ 515 for (i = 0; i < p->rdev->uvd.max_handles; ++i) { 516 if (atomic_read(&p->rdev->uvd.handles[i]) == handle) { 517 DRM_ERROR("Handle 0x%x already in use!\n", handle); 518 return -EINVAL; 519 } 520 521 if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], 0, handle)) { 522 p->rdev->uvd.filp[i] = p->filp; 523 p->rdev->uvd.img_size[i] = img_size; 524 return 0; 525 } 526 } 527 528 DRM_ERROR("No more free UVD handles!\n"); 529 return -EINVAL; 530 531 case 1: 532 /* it's a decode msg, validate codec and calc buffer sizes */ 533 r = radeon_uvd_validate_codec(p, msg[4]); 534 if (!r) 535 r = radeon_uvd_cs_msg_decode(msg, buf_sizes); 536 radeon_bo_kunmap(bo); 537 if (r) 538 return r; 539 540 /* validate the handle */ 541 for (i = 0; i < p->rdev->uvd.max_handles; ++i) { 542 if (atomic_read(&p->rdev->uvd.handles[i]) == handle) { 543 if (p->rdev->uvd.filp[i] != p->filp) { 544 DRM_ERROR("UVD handle collision detected!\n"); 545 return -EINVAL; 546 } 547 return 0; 548 } 549 } 550 551 DRM_ERROR("Invalid UVD handle 0x%x!\n", handle); 552 return -ENOENT; 553 554 case 2: 555 /* it's a destroy msg, free the handle */ 556 for (i = 0; i < p->rdev->uvd.max_handles; ++i) 557 atomic_cmpxchg(&p->rdev->uvd.handles[i], handle, 0); 558 radeon_bo_kunmap(bo); 559 return 0; 560 561 default: 562 563 DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type); 564 return -EINVAL; 565 } 566 567 BUG(); 568 return -EINVAL; 569 } 570 571 static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p, 572 int data0, int data1, 573 unsigned buf_sizes[], bool *has_msg_cmd) 574 { 575 struct radeon_cs_chunk *relocs_chunk; 576 struct radeon_bo_list *reloc; 577 unsigned idx, cmd, offset; 578 uint64_t start, end; 579 int r; 580 581 relocs_chunk = p->chunk_relocs; 582 offset = radeon_get_ib_value(p, data0); 583 idx = radeon_get_ib_value(p, data1); 584 if (idx >= relocs_chunk->length_dw) { 585 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", 586 idx, relocs_chunk->length_dw); 587 return -EINVAL; 588 } 589 590 reloc = &p->relocs[(idx / 4)]; 591 start = reloc->gpu_offset; 592 end = start + radeon_bo_size(reloc->robj); 593 start += offset; 594 595 p->ib.ptr[data0] = start & 0xFFFFFFFF; 596 p->ib.ptr[data1] = start >> 32; 597 598 cmd = radeon_get_ib_value(p, p->idx) >> 1; 599 600 if (cmd < 0x4) { 601 if (end <= start) { 602 DRM_ERROR("invalid reloc offset %X!\n", offset); 603 return -EINVAL; 604 } 605 if ((end - start) < buf_sizes[cmd]) { 606 DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd, 607 (unsigned)(end - start), buf_sizes[cmd]); 608 return -EINVAL; 609 } 610 611 } else if (cmd != 0x100) { 612 DRM_ERROR("invalid UVD command %X!\n", cmd); 613 return -EINVAL; 614 } 615 616 if ((start >> 28) != ((end - 1) >> 28)) { 617 DRM_ERROR("reloc %LX-%LX crossing 256MB boundary!\n", 618 start, end); 619 return -EINVAL; 620 } 621 622 /* TODO: is this still necessary on NI+ ? */ 623 if ((cmd == 0 || cmd == 0x3) && 624 (start >> 28) != (p->rdev->uvd.gpu_addr >> 28)) { 625 DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n", 626 start, end); 627 return -EINVAL; 628 } 629 630 if (cmd == 0) { 631 if (*has_msg_cmd) { 632 DRM_ERROR("More than one message in a UVD-IB!\n"); 633 return -EINVAL; 634 } 635 *has_msg_cmd = true; 636 r = radeon_uvd_cs_msg(p, reloc->robj, offset, buf_sizes); 637 if (r) 638 return r; 639 } else if (!*has_msg_cmd) { 640 DRM_ERROR("Message needed before other commands are send!\n"); 641 return -EINVAL; 642 } 643 644 return 0; 645 } 646 647 static int radeon_uvd_cs_reg(struct radeon_cs_parser *p, 648 struct radeon_cs_packet *pkt, 649 int *data0, int *data1, 650 unsigned buf_sizes[], 651 bool *has_msg_cmd) 652 { 653 int i, r; 654 655 p->idx++; 656 for (i = 0; i <= pkt->count; ++i) { 657 switch (pkt->reg + i*4) { 658 case UVD_GPCOM_VCPU_DATA0: 659 *data0 = p->idx; 660 break; 661 case UVD_GPCOM_VCPU_DATA1: 662 *data1 = p->idx; 663 break; 664 case UVD_GPCOM_VCPU_CMD: 665 r = radeon_uvd_cs_reloc(p, *data0, *data1, 666 buf_sizes, has_msg_cmd); 667 if (r) 668 return r; 669 break; 670 case UVD_ENGINE_CNTL: 671 case UVD_NO_OP: 672 break; 673 default: 674 DRM_ERROR("Invalid reg 0x%X!\n", 675 pkt->reg + i*4); 676 return -EINVAL; 677 } 678 p->idx++; 679 } 680 return 0; 681 } 682 683 int radeon_uvd_cs_parse(struct radeon_cs_parser *p) 684 { 685 struct radeon_cs_packet pkt; 686 int r, data0 = 0, data1 = 0; 687 688 /* does the IB has a msg command */ 689 bool has_msg_cmd = false; 690 691 /* minimum buffer sizes */ 692 unsigned buf_sizes[] = { 693 [0x00000000] = 2048, 694 [0x00000001] = 32 * 1024 * 1024, 695 [0x00000002] = 2048 * 1152 * 3, 696 [0x00000003] = 2048, 697 }; 698 699 if (p->chunk_ib->length_dw % 16) { 700 DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n", 701 p->chunk_ib->length_dw); 702 return -EINVAL; 703 } 704 705 if (p->chunk_relocs == NULL) { 706 DRM_ERROR("No relocation chunk !\n"); 707 return -EINVAL; 708 } 709 710 711 do { 712 r = radeon_cs_packet_parse(p, &pkt, p->idx); 713 if (r) 714 return r; 715 switch (pkt.type) { 716 case RADEON_PACKET_TYPE0: 717 r = radeon_uvd_cs_reg(p, &pkt, &data0, &data1, 718 buf_sizes, &has_msg_cmd); 719 if (r) 720 return r; 721 break; 722 case RADEON_PACKET_TYPE2: 723 p->idx += pkt.count + 2; 724 break; 725 default: 726 DRM_ERROR("Unknown packet type %d !\n", pkt.type); 727 return -EINVAL; 728 } 729 } while (p->idx < p->chunk_ib->length_dw); 730 731 if (!has_msg_cmd) { 732 DRM_ERROR("UVD-IBs need a msg command!\n"); 733 return -EINVAL; 734 } 735 736 return 0; 737 } 738 739 static int radeon_uvd_send_msg(struct radeon_device *rdev, 740 int ring, uint64_t addr, 741 struct radeon_fence **fence) 742 { 743 struct radeon_ib ib; 744 int i, r; 745 746 r = radeon_ib_get(rdev, ring, &ib, NULL, 64); 747 if (r) 748 return r; 749 750 ib.ptr[0] = PACKET0(UVD_GPCOM_VCPU_DATA0, 0); 751 ib.ptr[1] = addr; 752 ib.ptr[2] = PACKET0(UVD_GPCOM_VCPU_DATA1, 0); 753 ib.ptr[3] = addr >> 32; 754 ib.ptr[4] = PACKET0(UVD_GPCOM_VCPU_CMD, 0); 755 ib.ptr[5] = 0; 756 for (i = 6; i < 16; i += 2) { 757 ib.ptr[i] = PACKET0(UVD_NO_OP, 0); 758 ib.ptr[i+1] = 0; 759 } 760 ib.length_dw = 16; 761 762 r = radeon_ib_schedule(rdev, &ib, NULL, false); 763 764 if (fence) 765 *fence = radeon_fence_ref(ib.fence); 766 767 radeon_ib_free(rdev, &ib); 768 return r; 769 } 770 771 /* 772 * multiple fence commands without any stream commands in between can 773 * crash the vcpu so just try to emmit a dummy create/destroy msg to 774 * avoid this 775 */ 776 int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring, 777 uint32_t handle, struct radeon_fence **fence) 778 { 779 /* we use the last page of the vcpu bo for the UVD message */ 780 uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) - 781 RADEON_GPU_PAGE_SIZE; 782 783 uint32_t __iomem *msg = (void __iomem *)(rdev->uvd.cpu_addr + offs); 784 uint64_t addr = rdev->uvd.gpu_addr + offs; 785 786 int r, i; 787 788 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, true); 789 if (r) 790 return r; 791 792 /* stitch together an UVD create msg */ 793 writel((__force u32)cpu_to_le32(0x00000de4), &msg[0]); 794 writel(0x0, (void __iomem *)&msg[1]); 795 writel((__force u32)cpu_to_le32(handle), &msg[2]); 796 writel(0x0, &msg[3]); 797 writel(0x0, &msg[4]); 798 writel(0x0, &msg[5]); 799 writel(0x0, &msg[6]); 800 writel((__force u32)cpu_to_le32(0x00000780), &msg[7]); 801 writel((__force u32)cpu_to_le32(0x00000440), &msg[8]); 802 writel(0x0, &msg[9]); 803 writel((__force u32)cpu_to_le32(0x01b37000), &msg[10]); 804 for (i = 11; i < 1024; ++i) 805 writel(0x0, &msg[i]); 806 807 r = radeon_uvd_send_msg(rdev, ring, addr, fence); 808 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 809 return r; 810 } 811 812 int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring, 813 uint32_t handle, struct radeon_fence **fence) 814 { 815 /* we use the last page of the vcpu bo for the UVD message */ 816 uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) - 817 RADEON_GPU_PAGE_SIZE; 818 819 uint32_t __iomem *msg = (void __iomem *)(rdev->uvd.cpu_addr + offs); 820 uint64_t addr = rdev->uvd.gpu_addr + offs; 821 822 int r, i; 823 824 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, true); 825 if (r) 826 return r; 827 828 /* stitch together an UVD destroy msg */ 829 writel((__force u32)cpu_to_le32(0x00000de4), &msg[0]); 830 writel((__force u32)cpu_to_le32(0x00000002), &msg[1]); 831 writel((__force u32)cpu_to_le32(handle), &msg[2]); 832 writel(0x0, &msg[3]); 833 for (i = 4; i < 1024; ++i) 834 writel(0x0, &msg[i]); 835 836 r = radeon_uvd_send_msg(rdev, ring, addr, fence); 837 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 838 return r; 839 } 840 841 /** 842 * radeon_uvd_count_handles - count number of open streams 843 * 844 * @rdev: radeon_device pointer 845 * @sd: number of SD streams 846 * @hd: number of HD streams 847 * 848 * Count the number of open SD/HD streams as a hint for power mangement 849 */ 850 static void radeon_uvd_count_handles(struct radeon_device *rdev, 851 unsigned *sd, unsigned *hd) 852 { 853 unsigned i; 854 855 *sd = 0; 856 *hd = 0; 857 858 for (i = 0; i < rdev->uvd.max_handles; ++i) { 859 if (!atomic_read(&rdev->uvd.handles[i])) 860 continue; 861 862 if (rdev->uvd.img_size[i] >= 720*576) 863 ++(*hd); 864 else 865 ++(*sd); 866 } 867 } 868 869 static void radeon_uvd_idle_work_handler(struct work_struct *work) 870 { 871 struct radeon_device *rdev = 872 container_of(work, struct radeon_device, uvd.idle_work.work); 873 874 if (radeon_fence_count_emitted(rdev, R600_RING_TYPE_UVD_INDEX) == 0) { 875 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { 876 radeon_uvd_count_handles(rdev, &rdev->pm.dpm.sd, 877 &rdev->pm.dpm.hd); 878 radeon_dpm_enable_uvd(rdev, false); 879 } else { 880 radeon_set_uvd_clocks(rdev, 0, 0); 881 } 882 } else { 883 schedule_delayed_work(&rdev->uvd.idle_work, 884 msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); 885 } 886 } 887 888 void radeon_uvd_note_usage(struct radeon_device *rdev) 889 { 890 bool streams_changed = false; 891 bool set_clocks = !cancel_delayed_work_sync(&rdev->uvd.idle_work); 892 set_clocks &= schedule_delayed_work(&rdev->uvd.idle_work, 893 msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); 894 895 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { 896 unsigned hd = 0, sd = 0; 897 radeon_uvd_count_handles(rdev, &sd, &hd); 898 if ((rdev->pm.dpm.sd != sd) || 899 (rdev->pm.dpm.hd != hd)) { 900 rdev->pm.dpm.sd = sd; 901 rdev->pm.dpm.hd = hd; 902 /* disable this for now */ 903 /*streams_changed = true;*/ 904 } 905 } 906 907 if (set_clocks || streams_changed) { 908 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { 909 radeon_dpm_enable_uvd(rdev, true); 910 } else { 911 radeon_set_uvd_clocks(rdev, 53300, 40000); 912 } 913 } 914 } 915 916 static unsigned radeon_uvd_calc_upll_post_div(unsigned vco_freq, 917 unsigned target_freq, 918 unsigned pd_min, 919 unsigned pd_even) 920 { 921 unsigned post_div = vco_freq / target_freq; 922 923 /* adjust to post divider minimum value */ 924 if (post_div < pd_min) 925 post_div = pd_min; 926 927 /* we alway need a frequency less than or equal the target */ 928 if ((vco_freq / post_div) > target_freq) 929 post_div += 1; 930 931 /* post dividers above a certain value must be even */ 932 if (post_div > pd_even && post_div % 2) 933 post_div += 1; 934 935 return post_div; 936 } 937 938 /** 939 * radeon_uvd_calc_upll_dividers - calc UPLL clock dividers 940 * 941 * @rdev: radeon_device pointer 942 * @vclk: wanted VCLK 943 * @dclk: wanted DCLK 944 * @vco_min: minimum VCO frequency 945 * @vco_max: maximum VCO frequency 946 * @fb_factor: factor to multiply vco freq with 947 * @fb_mask: limit and bitmask for feedback divider 948 * @pd_min: post divider minimum 949 * @pd_max: post divider maximum 950 * @pd_even: post divider must be even above this value 951 * @optimal_fb_div: resulting feedback divider 952 * @optimal_vclk_div: resulting vclk post divider 953 * @optimal_dclk_div: resulting dclk post divider 954 * 955 * Calculate dividers for UVDs UPLL (R6xx-SI, except APUs). 956 * Returns zero on success -EINVAL on error. 957 */ 958 int radeon_uvd_calc_upll_dividers(struct radeon_device *rdev, 959 unsigned vclk, unsigned dclk, 960 unsigned vco_min, unsigned vco_max, 961 unsigned fb_factor, unsigned fb_mask, 962 unsigned pd_min, unsigned pd_max, 963 unsigned pd_even, 964 unsigned *optimal_fb_div, 965 unsigned *optimal_vclk_div, 966 unsigned *optimal_dclk_div) 967 { 968 unsigned vco_freq, ref_freq = rdev->clock.spll.reference_freq; 969 970 /* start off with something large */ 971 unsigned optimal_score = ~0; 972 973 /* loop through vco from low to high */ 974 vco_min = max(max(vco_min, vclk), dclk); 975 for (vco_freq = vco_min; vco_freq <= vco_max; vco_freq += 100) { 976 977 uint64_t fb_div = (uint64_t)vco_freq * fb_factor; 978 unsigned vclk_div, dclk_div, score; 979 980 do_div(fb_div, ref_freq); 981 982 /* fb div out of range ? */ 983 if (fb_div > fb_mask) 984 break; /* it can oly get worse */ 985 986 fb_div &= fb_mask; 987 988 /* calc vclk divider with current vco freq */ 989 vclk_div = radeon_uvd_calc_upll_post_div(vco_freq, vclk, 990 pd_min, pd_even); 991 if (vclk_div > pd_max) 992 break; /* vco is too big, it has to stop */ 993 994 /* calc dclk divider with current vco freq */ 995 dclk_div = radeon_uvd_calc_upll_post_div(vco_freq, dclk, 996 pd_min, pd_even); 997 if (dclk_div > pd_max) 998 break; /* vco is too big, it has to stop */ 999 1000 /* calc score with current vco freq */ 1001 score = vclk - (vco_freq / vclk_div) + dclk - (vco_freq / dclk_div); 1002 1003 /* determine if this vco setting is better than current optimal settings */ 1004 if (score < optimal_score) { 1005 *optimal_fb_div = fb_div; 1006 *optimal_vclk_div = vclk_div; 1007 *optimal_dclk_div = dclk_div; 1008 optimal_score = score; 1009 if (optimal_score == 0) 1010 break; /* it can't get better than this */ 1011 } 1012 } 1013 1014 /* did we found a valid setup ? */ 1015 if (optimal_score == ~0) 1016 return -EINVAL; 1017 1018 return 0; 1019 } 1020 1021 int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev, 1022 unsigned cg_upll_func_cntl) 1023 { 1024 unsigned i; 1025 1026 /* make sure UPLL_CTLREQ is deasserted */ 1027 WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK); 1028 1029 mdelay(10); 1030 1031 /* assert UPLL_CTLREQ */ 1032 WREG32_P(cg_upll_func_cntl, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK); 1033 1034 /* wait for CTLACK and CTLACK2 to get asserted */ 1035 for (i = 0; i < 100; ++i) { 1036 uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK; 1037 if ((RREG32(cg_upll_func_cntl) & mask) == mask) 1038 break; 1039 mdelay(10); 1040 } 1041 1042 /* deassert UPLL_CTLREQ */ 1043 WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK); 1044 1045 if (i == 100) { 1046 DRM_ERROR("Timeout setting UVD clocks!\n"); 1047 return -ETIMEDOUT; 1048 } 1049 1050 return 0; 1051 } 1052