1 /* 2 * Copyright 2011 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 /* 27 * Authors: 28 * Christian König <deathsimple@vodafone.de> 29 */ 30 31 #include <linux/firmware.h> 32 #include <linux/module.h> 33 34 #include <drm/drm.h> 35 36 #include "radeon.h" 37 #include "radeon_ucode.h" 38 #include "r600d.h" 39 40 /* 1 second timeout */ 41 #define UVD_IDLE_TIMEOUT_MS 1000 42 43 /* Firmware Names */ 44 #define FIRMWARE_R600 "radeon/R600_uvd.bin" 45 #define FIRMWARE_RS780 "radeon/RS780_uvd.bin" 46 #define FIRMWARE_RV770 "radeon/RV770_uvd.bin" 47 #define FIRMWARE_RV710 "radeon/RV710_uvd.bin" 48 #define FIRMWARE_CYPRESS "radeon/CYPRESS_uvd.bin" 49 #define FIRMWARE_SUMO "radeon/SUMO_uvd.bin" 50 #define FIRMWARE_TAHITI "radeon/TAHITI_uvd.bin" 51 #define FIRMWARE_BONAIRE_LEGACY "radeon/BONAIRE_uvd.bin" 52 #define FIRMWARE_BONAIRE "radeon/bonaire_uvd.bin" 53 54 MODULE_FIRMWARE(FIRMWARE_R600); 55 MODULE_FIRMWARE(FIRMWARE_RS780); 56 MODULE_FIRMWARE(FIRMWARE_RV770); 57 MODULE_FIRMWARE(FIRMWARE_RV710); 58 MODULE_FIRMWARE(FIRMWARE_CYPRESS); 59 MODULE_FIRMWARE(FIRMWARE_SUMO); 60 MODULE_FIRMWARE(FIRMWARE_TAHITI); 61 MODULE_FIRMWARE(FIRMWARE_BONAIRE_LEGACY); 62 MODULE_FIRMWARE(FIRMWARE_BONAIRE); 63 64 static void radeon_uvd_idle_work_handler(struct work_struct *work); 65 66 int radeon_uvd_init(struct radeon_device *rdev) 67 { 68 unsigned long bo_size; 69 const char *fw_name = NULL, *legacy_fw_name = NULL; 70 int i, r; 71 72 INIT_DELAYED_WORK(&rdev->uvd.idle_work, radeon_uvd_idle_work_handler); 73 74 switch (rdev->family) { 75 case CHIP_RV610: 76 case CHIP_RV630: 77 case CHIP_RV670: 78 case CHIP_RV620: 79 case CHIP_RV635: 80 legacy_fw_name = FIRMWARE_R600; 81 break; 82 83 case CHIP_RS780: 84 case CHIP_RS880: 85 legacy_fw_name = FIRMWARE_RS780; 86 break; 87 88 case CHIP_RV770: 89 legacy_fw_name = FIRMWARE_RV770; 90 break; 91 92 case CHIP_RV710: 93 case CHIP_RV730: 94 case CHIP_RV740: 95 legacy_fw_name = FIRMWARE_RV710; 96 break; 97 98 case CHIP_CYPRESS: 99 case CHIP_HEMLOCK: 100 case CHIP_JUNIPER: 101 case CHIP_REDWOOD: 102 case CHIP_CEDAR: 103 legacy_fw_name = FIRMWARE_CYPRESS; 104 break; 105 106 case CHIP_SUMO: 107 case CHIP_SUMO2: 108 case CHIP_PALM: 109 case CHIP_CAYMAN: 110 case CHIP_BARTS: 111 case CHIP_TURKS: 112 case CHIP_CAICOS: 113 legacy_fw_name = FIRMWARE_SUMO; 114 break; 115 116 case CHIP_TAHITI: 117 case CHIP_VERDE: 118 case CHIP_PITCAIRN: 119 case CHIP_ARUBA: 120 case CHIP_OLAND: 121 legacy_fw_name = FIRMWARE_TAHITI; 122 break; 123 124 case CHIP_BONAIRE: 125 case CHIP_KABINI: 126 case CHIP_KAVERI: 127 case CHIP_HAWAII: 128 case CHIP_MULLINS: 129 legacy_fw_name = FIRMWARE_BONAIRE_LEGACY; 130 fw_name = FIRMWARE_BONAIRE; 131 break; 132 133 default: 134 return -EINVAL; 135 } 136 137 rdev->uvd.fw_header_present = false; 138 rdev->uvd.max_handles = RADEON_DEFAULT_UVD_HANDLES; 139 if (fw_name) { 140 /* Let's try to load the newer firmware first */ 141 r = request_firmware(&rdev->uvd_fw, fw_name, rdev->dev); 142 if (r) { 143 dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n", 144 fw_name); 145 } else { 146 struct common_firmware_header *hdr = (void *)rdev->uvd_fw->data; 147 unsigned version_major, version_minor, family_id; 148 149 r = radeon_ucode_validate(rdev->uvd_fw); 150 if (r) 151 return r; 152 153 rdev->uvd.fw_header_present = true; 154 155 family_id = (__force u32)(hdr->ucode_version) & 0xff; 156 version_major = (le32_to_cpu((__force __le32)(hdr->ucode_version)) 157 >> 24) & 0xff; 158 version_minor = (le32_to_cpu((__force __le32)(hdr->ucode_version)) 159 >> 8) & 0xff; 160 DRM_INFO("Found UVD firmware Version: %u.%u Family ID: %u\n", 161 version_major, version_minor, family_id); 162 163 /* 164 * Limit the number of UVD handles depending on 165 * microcode major and minor versions. 166 */ 167 if ((version_major >= 0x01) && (version_minor >= 0x37)) 168 rdev->uvd.max_handles = RADEON_MAX_UVD_HANDLES; 169 } 170 } 171 172 /* 173 * In case there is only legacy firmware, or we encounter an error 174 * while loading the new firmware, we fall back to loading the legacy 175 * firmware now. 176 */ 177 if (!fw_name || r) { 178 r = request_firmware(&rdev->uvd_fw, legacy_fw_name, rdev->dev); 179 if (r) { 180 dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n", 181 legacy_fw_name); 182 return r; 183 } 184 } 185 186 bo_size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 8) + 187 RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE + 188 RADEON_UVD_SESSION_SIZE * rdev->uvd.max_handles; 189 r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true, 190 RADEON_GEM_DOMAIN_VRAM, 0, NULL, 191 NULL, &rdev->uvd.vcpu_bo); 192 if (r) { 193 dev_err(rdev->dev, "(%d) failed to allocate UVD bo\n", r); 194 return r; 195 } 196 197 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false); 198 if (r) { 199 radeon_bo_unref(&rdev->uvd.vcpu_bo); 200 dev_err(rdev->dev, "(%d) failed to reserve UVD bo\n", r); 201 return r; 202 } 203 204 r = radeon_bo_pin(rdev->uvd.vcpu_bo, RADEON_GEM_DOMAIN_VRAM, 205 &rdev->uvd.gpu_addr); 206 if (r) { 207 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 208 radeon_bo_unref(&rdev->uvd.vcpu_bo); 209 dev_err(rdev->dev, "(%d) UVD bo pin failed\n", r); 210 return r; 211 } 212 213 r = radeon_bo_kmap(rdev->uvd.vcpu_bo, &rdev->uvd.cpu_addr); 214 if (r) { 215 dev_err(rdev->dev, "(%d) UVD map failed\n", r); 216 return r; 217 } 218 219 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 220 221 for (i = 0; i < rdev->uvd.max_handles; ++i) { 222 atomic_set(&rdev->uvd.handles[i], 0); 223 rdev->uvd.filp[i] = NULL; 224 rdev->uvd.img_size[i] = 0; 225 } 226 227 return 0; 228 } 229 230 void radeon_uvd_fini(struct radeon_device *rdev) 231 { 232 int r; 233 234 if (rdev->uvd.vcpu_bo == NULL) 235 return; 236 237 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false); 238 if (!r) { 239 radeon_bo_kunmap(rdev->uvd.vcpu_bo); 240 radeon_bo_unpin(rdev->uvd.vcpu_bo); 241 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 242 } 243 244 radeon_bo_unref(&rdev->uvd.vcpu_bo); 245 246 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX]); 247 248 release_firmware(rdev->uvd_fw); 249 } 250 251 int radeon_uvd_suspend(struct radeon_device *rdev) 252 { 253 int i, r; 254 255 if (rdev->uvd.vcpu_bo == NULL) 256 return 0; 257 258 for (i = 0; i < rdev->uvd.max_handles; ++i) { 259 uint32_t handle = atomic_read(&rdev->uvd.handles[i]); 260 if (handle != 0) { 261 struct radeon_fence *fence; 262 263 radeon_uvd_note_usage(rdev); 264 265 r = radeon_uvd_get_destroy_msg(rdev, 266 R600_RING_TYPE_UVD_INDEX, handle, &fence); 267 if (r) { 268 DRM_ERROR("Error destroying UVD (%d)!\n", r); 269 continue; 270 } 271 272 radeon_fence_wait(fence, false); 273 radeon_fence_unref(&fence); 274 275 rdev->uvd.filp[i] = NULL; 276 atomic_set(&rdev->uvd.handles[i], 0); 277 } 278 } 279 280 return 0; 281 } 282 283 int radeon_uvd_resume(struct radeon_device *rdev) 284 { 285 unsigned size; 286 void *ptr; 287 288 if (rdev->uvd.vcpu_bo == NULL) 289 return -EINVAL; 290 291 memcpy_toio((void __iomem *)rdev->uvd.cpu_addr, rdev->uvd_fw->data, rdev->uvd_fw->size); 292 293 size = radeon_bo_size(rdev->uvd.vcpu_bo); 294 size -= rdev->uvd_fw->size; 295 296 ptr = rdev->uvd.cpu_addr; 297 ptr += rdev->uvd_fw->size; 298 299 memset_io((void __iomem *)ptr, 0, size); 300 301 return 0; 302 } 303 304 void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo, 305 uint32_t allowed_domains) 306 { 307 int i; 308 309 for (i = 0; i < rbo->placement.num_placement; ++i) { 310 rbo->placements[i].fpfn = 0 >> PAGE_SHIFT; 311 rbo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; 312 } 313 314 /* If it must be in VRAM it must be in the first segment as well */ 315 if (allowed_domains == RADEON_GEM_DOMAIN_VRAM) 316 return; 317 318 /* abort if we already have more than one placement */ 319 if (rbo->placement.num_placement > 1) 320 return; 321 322 /* add another 256MB segment */ 323 rbo->placements[1] = rbo->placements[0]; 324 rbo->placements[1].fpfn += (256 * 1024 * 1024) >> PAGE_SHIFT; 325 rbo->placements[1].lpfn += (256 * 1024 * 1024) >> PAGE_SHIFT; 326 rbo->placement.num_placement++; 327 rbo->placement.num_busy_placement++; 328 } 329 330 void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file *filp) 331 { 332 int i, r; 333 for (i = 0; i < rdev->uvd.max_handles; ++i) { 334 uint32_t handle = atomic_read(&rdev->uvd.handles[i]); 335 if (handle != 0 && rdev->uvd.filp[i] == filp) { 336 struct radeon_fence *fence; 337 338 radeon_uvd_note_usage(rdev); 339 340 r = radeon_uvd_get_destroy_msg(rdev, 341 R600_RING_TYPE_UVD_INDEX, handle, &fence); 342 if (r) { 343 DRM_ERROR("Error destroying UVD (%d)!\n", r); 344 continue; 345 } 346 347 radeon_fence_wait(fence, false); 348 radeon_fence_unref(&fence); 349 350 rdev->uvd.filp[i] = NULL; 351 atomic_set(&rdev->uvd.handles[i], 0); 352 } 353 } 354 } 355 356 static int radeon_uvd_cs_msg_decode(uint32_t *msg, unsigned buf_sizes[]) 357 { 358 unsigned stream_type = msg[4]; 359 unsigned width = msg[6]; 360 unsigned height = msg[7]; 361 unsigned dpb_size = msg[9]; 362 unsigned pitch = msg[28]; 363 364 unsigned width_in_mb = width / 16; 365 unsigned height_in_mb = ALIGN(height / 16, 2); 366 367 unsigned image_size, tmp, min_dpb_size; 368 369 image_size = width * height; 370 image_size += image_size / 2; 371 image_size = ALIGN(image_size, 1024); 372 373 switch (stream_type) { 374 case 0: /* H264 */ 375 376 /* reference picture buffer */ 377 min_dpb_size = image_size * 17; 378 379 /* macroblock context buffer */ 380 min_dpb_size += width_in_mb * height_in_mb * 17 * 192; 381 382 /* IT surface buffer */ 383 min_dpb_size += width_in_mb * height_in_mb * 32; 384 break; 385 386 case 1: /* VC1 */ 387 388 /* reference picture buffer */ 389 min_dpb_size = image_size * 3; 390 391 /* CONTEXT_BUFFER */ 392 min_dpb_size += width_in_mb * height_in_mb * 128; 393 394 /* IT surface buffer */ 395 min_dpb_size += width_in_mb * 64; 396 397 /* DB surface buffer */ 398 min_dpb_size += width_in_mb * 128; 399 400 /* BP */ 401 tmp = max(width_in_mb, height_in_mb); 402 min_dpb_size += ALIGN(tmp * 7 * 16, 64); 403 break; 404 405 case 3: /* MPEG2 */ 406 407 /* reference picture buffer */ 408 min_dpb_size = image_size * 3; 409 break; 410 411 case 4: /* MPEG4 */ 412 413 /* reference picture buffer */ 414 min_dpb_size = image_size * 3; 415 416 /* CM */ 417 min_dpb_size += width_in_mb * height_in_mb * 64; 418 419 /* IT surface buffer */ 420 min_dpb_size += ALIGN(width_in_mb * height_in_mb * 32, 64); 421 break; 422 423 default: 424 DRM_ERROR("UVD codec not handled %d!\n", stream_type); 425 return -EINVAL; 426 } 427 428 if (width > pitch) { 429 DRM_ERROR("Invalid UVD decoding target pitch!\n"); 430 return -EINVAL; 431 } 432 433 if (dpb_size < min_dpb_size) { 434 DRM_ERROR("Invalid dpb_size in UVD message (%d / %d)!\n", 435 dpb_size, min_dpb_size); 436 return -EINVAL; 437 } 438 439 buf_sizes[0x1] = dpb_size; 440 buf_sizes[0x2] = image_size; 441 return 0; 442 } 443 444 static int radeon_uvd_validate_codec(struct radeon_cs_parser *p, 445 unsigned stream_type) 446 { 447 switch (stream_type) { 448 case 0: /* H264 */ 449 case 1: /* VC1 */ 450 /* always supported */ 451 return 0; 452 453 case 3: /* MPEG2 */ 454 case 4: /* MPEG4 */ 455 /* only since UVD 3 */ 456 if (p->rdev->family >= CHIP_PALM) 457 return 0; 458 459 fallthrough; 460 default: 461 DRM_ERROR("UVD codec not supported by hardware %d!\n", 462 stream_type); 463 return -EINVAL; 464 } 465 } 466 467 static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo, 468 unsigned offset, unsigned buf_sizes[]) 469 { 470 int32_t *msg, msg_type, handle; 471 unsigned img_size = 0; 472 struct dma_fence *f; 473 void *ptr; 474 475 int i, r; 476 477 if (offset & 0x3F) { 478 DRM_ERROR("UVD messages must be 64 byte aligned!\n"); 479 return -EINVAL; 480 } 481 482 f = dma_resv_excl_fence(bo->tbo.base.resv); 483 if (f) { 484 r = radeon_fence_wait((struct radeon_fence *)f, false); 485 if (r) { 486 DRM_ERROR("Failed waiting for UVD message (%d)!\n", r); 487 return r; 488 } 489 } 490 491 r = radeon_bo_kmap(bo, &ptr); 492 if (r) { 493 DRM_ERROR("Failed mapping the UVD message (%d)!\n", r); 494 return r; 495 } 496 497 msg = ptr + offset; 498 499 msg_type = msg[1]; 500 handle = msg[2]; 501 502 if (handle == 0) { 503 DRM_ERROR("Invalid UVD handle!\n"); 504 return -EINVAL; 505 } 506 507 switch (msg_type) { 508 case 0: 509 /* it's a create msg, calc image size (width * height) */ 510 img_size = msg[7] * msg[8]; 511 512 r = radeon_uvd_validate_codec(p, msg[4]); 513 radeon_bo_kunmap(bo); 514 if (r) 515 return r; 516 517 /* try to alloc a new handle */ 518 for (i = 0; i < p->rdev->uvd.max_handles; ++i) { 519 if (atomic_read(&p->rdev->uvd.handles[i]) == handle) { 520 DRM_ERROR("Handle 0x%x already in use!\n", handle); 521 return -EINVAL; 522 } 523 524 if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], 0, handle)) { 525 p->rdev->uvd.filp[i] = p->filp; 526 p->rdev->uvd.img_size[i] = img_size; 527 return 0; 528 } 529 } 530 531 DRM_ERROR("No more free UVD handles!\n"); 532 return -EINVAL; 533 534 case 1: 535 /* it's a decode msg, validate codec and calc buffer sizes */ 536 r = radeon_uvd_validate_codec(p, msg[4]); 537 if (!r) 538 r = radeon_uvd_cs_msg_decode(msg, buf_sizes); 539 radeon_bo_kunmap(bo); 540 if (r) 541 return r; 542 543 /* validate the handle */ 544 for (i = 0; i < p->rdev->uvd.max_handles; ++i) { 545 if (atomic_read(&p->rdev->uvd.handles[i]) == handle) { 546 if (p->rdev->uvd.filp[i] != p->filp) { 547 DRM_ERROR("UVD handle collision detected!\n"); 548 return -EINVAL; 549 } 550 return 0; 551 } 552 } 553 554 DRM_ERROR("Invalid UVD handle 0x%x!\n", handle); 555 return -ENOENT; 556 557 case 2: 558 /* it's a destroy msg, free the handle */ 559 for (i = 0; i < p->rdev->uvd.max_handles; ++i) 560 atomic_cmpxchg(&p->rdev->uvd.handles[i], handle, 0); 561 radeon_bo_kunmap(bo); 562 return 0; 563 564 default: 565 566 DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type); 567 return -EINVAL; 568 } 569 570 BUG(); 571 return -EINVAL; 572 } 573 574 static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p, 575 int data0, int data1, 576 unsigned buf_sizes[], bool *has_msg_cmd) 577 { 578 struct radeon_cs_chunk *relocs_chunk; 579 struct radeon_bo_list *reloc; 580 unsigned idx, cmd, offset; 581 uint64_t start, end; 582 int r; 583 584 relocs_chunk = p->chunk_relocs; 585 offset = radeon_get_ib_value(p, data0); 586 idx = radeon_get_ib_value(p, data1); 587 if (idx >= relocs_chunk->length_dw) { 588 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", 589 idx, relocs_chunk->length_dw); 590 return -EINVAL; 591 } 592 593 reloc = &p->relocs[(idx / 4)]; 594 start = reloc->gpu_offset; 595 end = start + radeon_bo_size(reloc->robj); 596 start += offset; 597 598 p->ib.ptr[data0] = start & 0xFFFFFFFF; 599 p->ib.ptr[data1] = start >> 32; 600 601 cmd = radeon_get_ib_value(p, p->idx) >> 1; 602 603 if (cmd < 0x4) { 604 if (end <= start) { 605 DRM_ERROR("invalid reloc offset %X!\n", offset); 606 return -EINVAL; 607 } 608 if ((end - start) < buf_sizes[cmd]) { 609 DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd, 610 (unsigned)(end - start), buf_sizes[cmd]); 611 return -EINVAL; 612 } 613 614 } else if (cmd != 0x100) { 615 DRM_ERROR("invalid UVD command %X!\n", cmd); 616 return -EINVAL; 617 } 618 619 if ((start >> 28) != ((end - 1) >> 28)) { 620 DRM_ERROR("reloc %LX-%LX crossing 256MB boundary!\n", 621 start, end); 622 return -EINVAL; 623 } 624 625 /* TODO: is this still necessary on NI+ ? */ 626 if ((cmd == 0 || cmd == 0x3) && 627 (start >> 28) != (p->rdev->uvd.gpu_addr >> 28)) { 628 DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n", 629 start, end); 630 return -EINVAL; 631 } 632 633 if (cmd == 0) { 634 if (*has_msg_cmd) { 635 DRM_ERROR("More than one message in a UVD-IB!\n"); 636 return -EINVAL; 637 } 638 *has_msg_cmd = true; 639 r = radeon_uvd_cs_msg(p, reloc->robj, offset, buf_sizes); 640 if (r) 641 return r; 642 } else if (!*has_msg_cmd) { 643 DRM_ERROR("Message needed before other commands are send!\n"); 644 return -EINVAL; 645 } 646 647 return 0; 648 } 649 650 static int radeon_uvd_cs_reg(struct radeon_cs_parser *p, 651 struct radeon_cs_packet *pkt, 652 int *data0, int *data1, 653 unsigned buf_sizes[], 654 bool *has_msg_cmd) 655 { 656 int i, r; 657 658 p->idx++; 659 for (i = 0; i <= pkt->count; ++i) { 660 switch (pkt->reg + i*4) { 661 case UVD_GPCOM_VCPU_DATA0: 662 *data0 = p->idx; 663 break; 664 case UVD_GPCOM_VCPU_DATA1: 665 *data1 = p->idx; 666 break; 667 case UVD_GPCOM_VCPU_CMD: 668 r = radeon_uvd_cs_reloc(p, *data0, *data1, 669 buf_sizes, has_msg_cmd); 670 if (r) 671 return r; 672 break; 673 case UVD_ENGINE_CNTL: 674 case UVD_NO_OP: 675 break; 676 default: 677 DRM_ERROR("Invalid reg 0x%X!\n", 678 pkt->reg + i*4); 679 return -EINVAL; 680 } 681 p->idx++; 682 } 683 return 0; 684 } 685 686 int radeon_uvd_cs_parse(struct radeon_cs_parser *p) 687 { 688 struct radeon_cs_packet pkt; 689 int r, data0 = 0, data1 = 0; 690 691 /* does the IB has a msg command */ 692 bool has_msg_cmd = false; 693 694 /* minimum buffer sizes */ 695 unsigned buf_sizes[] = { 696 [0x00000000] = 2048, 697 [0x00000001] = 32 * 1024 * 1024, 698 [0x00000002] = 2048 * 1152 * 3, 699 [0x00000003] = 2048, 700 }; 701 702 if (p->chunk_ib->length_dw % 16) { 703 DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n", 704 p->chunk_ib->length_dw); 705 return -EINVAL; 706 } 707 708 if (p->chunk_relocs == NULL) { 709 DRM_ERROR("No relocation chunk !\n"); 710 return -EINVAL; 711 } 712 713 714 do { 715 r = radeon_cs_packet_parse(p, &pkt, p->idx); 716 if (r) 717 return r; 718 switch (pkt.type) { 719 case RADEON_PACKET_TYPE0: 720 r = radeon_uvd_cs_reg(p, &pkt, &data0, &data1, 721 buf_sizes, &has_msg_cmd); 722 if (r) 723 return r; 724 break; 725 case RADEON_PACKET_TYPE2: 726 p->idx += pkt.count + 2; 727 break; 728 default: 729 DRM_ERROR("Unknown packet type %d !\n", pkt.type); 730 return -EINVAL; 731 } 732 } while (p->idx < p->chunk_ib->length_dw); 733 734 if (!has_msg_cmd) { 735 DRM_ERROR("UVD-IBs need a msg command!\n"); 736 return -EINVAL; 737 } 738 739 return 0; 740 } 741 742 static int radeon_uvd_send_msg(struct radeon_device *rdev, 743 int ring, uint64_t addr, 744 struct radeon_fence **fence) 745 { 746 struct radeon_ib ib; 747 int i, r; 748 749 r = radeon_ib_get(rdev, ring, &ib, NULL, 64); 750 if (r) 751 return r; 752 753 ib.ptr[0] = PACKET0(UVD_GPCOM_VCPU_DATA0, 0); 754 ib.ptr[1] = addr; 755 ib.ptr[2] = PACKET0(UVD_GPCOM_VCPU_DATA1, 0); 756 ib.ptr[3] = addr >> 32; 757 ib.ptr[4] = PACKET0(UVD_GPCOM_VCPU_CMD, 0); 758 ib.ptr[5] = 0; 759 for (i = 6; i < 16; i += 2) { 760 ib.ptr[i] = PACKET0(UVD_NO_OP, 0); 761 ib.ptr[i+1] = 0; 762 } 763 ib.length_dw = 16; 764 765 r = radeon_ib_schedule(rdev, &ib, NULL, false); 766 767 if (fence) 768 *fence = radeon_fence_ref(ib.fence); 769 770 radeon_ib_free(rdev, &ib); 771 return r; 772 } 773 774 /* 775 * multiple fence commands without any stream commands in between can 776 * crash the vcpu so just try to emmit a dummy create/destroy msg to 777 * avoid this 778 */ 779 int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring, 780 uint32_t handle, struct radeon_fence **fence) 781 { 782 /* we use the last page of the vcpu bo for the UVD message */ 783 uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) - 784 RADEON_GPU_PAGE_SIZE; 785 786 uint32_t __iomem *msg = (void __iomem *)(rdev->uvd.cpu_addr + offs); 787 uint64_t addr = rdev->uvd.gpu_addr + offs; 788 789 int r, i; 790 791 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, true); 792 if (r) 793 return r; 794 795 /* stitch together an UVD create msg */ 796 writel((__force u32)cpu_to_le32(0x00000de4), &msg[0]); 797 writel(0x0, (void __iomem *)&msg[1]); 798 writel((__force u32)cpu_to_le32(handle), &msg[2]); 799 writel(0x0, &msg[3]); 800 writel(0x0, &msg[4]); 801 writel(0x0, &msg[5]); 802 writel(0x0, &msg[6]); 803 writel((__force u32)cpu_to_le32(0x00000780), &msg[7]); 804 writel((__force u32)cpu_to_le32(0x00000440), &msg[8]); 805 writel(0x0, &msg[9]); 806 writel((__force u32)cpu_to_le32(0x01b37000), &msg[10]); 807 for (i = 11; i < 1024; ++i) 808 writel(0x0, &msg[i]); 809 810 r = radeon_uvd_send_msg(rdev, ring, addr, fence); 811 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 812 return r; 813 } 814 815 int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring, 816 uint32_t handle, struct radeon_fence **fence) 817 { 818 /* we use the last page of the vcpu bo for the UVD message */ 819 uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) - 820 RADEON_GPU_PAGE_SIZE; 821 822 uint32_t __iomem *msg = (void __iomem *)(rdev->uvd.cpu_addr + offs); 823 uint64_t addr = rdev->uvd.gpu_addr + offs; 824 825 int r, i; 826 827 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, true); 828 if (r) 829 return r; 830 831 /* stitch together an UVD destroy msg */ 832 writel((__force u32)cpu_to_le32(0x00000de4), &msg[0]); 833 writel((__force u32)cpu_to_le32(0x00000002), &msg[1]); 834 writel((__force u32)cpu_to_le32(handle), &msg[2]); 835 writel(0x0, &msg[3]); 836 for (i = 4; i < 1024; ++i) 837 writel(0x0, &msg[i]); 838 839 r = radeon_uvd_send_msg(rdev, ring, addr, fence); 840 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 841 return r; 842 } 843 844 /** 845 * radeon_uvd_count_handles - count number of open streams 846 * 847 * @rdev: radeon_device pointer 848 * @sd: number of SD streams 849 * @hd: number of HD streams 850 * 851 * Count the number of open SD/HD streams as a hint for power mangement 852 */ 853 static void radeon_uvd_count_handles(struct radeon_device *rdev, 854 unsigned *sd, unsigned *hd) 855 { 856 unsigned i; 857 858 *sd = 0; 859 *hd = 0; 860 861 for (i = 0; i < rdev->uvd.max_handles; ++i) { 862 if (!atomic_read(&rdev->uvd.handles[i])) 863 continue; 864 865 if (rdev->uvd.img_size[i] >= 720*576) 866 ++(*hd); 867 else 868 ++(*sd); 869 } 870 } 871 872 static void radeon_uvd_idle_work_handler(struct work_struct *work) 873 { 874 struct radeon_device *rdev = 875 container_of(work, struct radeon_device, uvd.idle_work.work); 876 877 if (radeon_fence_count_emitted(rdev, R600_RING_TYPE_UVD_INDEX) == 0) { 878 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { 879 radeon_uvd_count_handles(rdev, &rdev->pm.dpm.sd, 880 &rdev->pm.dpm.hd); 881 radeon_dpm_enable_uvd(rdev, false); 882 } else { 883 radeon_set_uvd_clocks(rdev, 0, 0); 884 } 885 } else { 886 schedule_delayed_work(&rdev->uvd.idle_work, 887 msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); 888 } 889 } 890 891 void radeon_uvd_note_usage(struct radeon_device *rdev) 892 { 893 bool streams_changed = false; 894 bool set_clocks = !cancel_delayed_work_sync(&rdev->uvd.idle_work); 895 set_clocks &= schedule_delayed_work(&rdev->uvd.idle_work, 896 msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); 897 898 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { 899 unsigned hd = 0, sd = 0; 900 radeon_uvd_count_handles(rdev, &sd, &hd); 901 if ((rdev->pm.dpm.sd != sd) || 902 (rdev->pm.dpm.hd != hd)) { 903 rdev->pm.dpm.sd = sd; 904 rdev->pm.dpm.hd = hd; 905 /* disable this for now */ 906 /*streams_changed = true;*/ 907 } 908 } 909 910 if (set_clocks || streams_changed) { 911 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { 912 radeon_dpm_enable_uvd(rdev, true); 913 } else { 914 radeon_set_uvd_clocks(rdev, 53300, 40000); 915 } 916 } 917 } 918 919 static unsigned radeon_uvd_calc_upll_post_div(unsigned vco_freq, 920 unsigned target_freq, 921 unsigned pd_min, 922 unsigned pd_even) 923 { 924 unsigned post_div = vco_freq / target_freq; 925 926 /* adjust to post divider minimum value */ 927 if (post_div < pd_min) 928 post_div = pd_min; 929 930 /* we alway need a frequency less than or equal the target */ 931 if ((vco_freq / post_div) > target_freq) 932 post_div += 1; 933 934 /* post dividers above a certain value must be even */ 935 if (post_div > pd_even && post_div % 2) 936 post_div += 1; 937 938 return post_div; 939 } 940 941 /** 942 * radeon_uvd_calc_upll_dividers - calc UPLL clock dividers 943 * 944 * @rdev: radeon_device pointer 945 * @vclk: wanted VCLK 946 * @dclk: wanted DCLK 947 * @vco_min: minimum VCO frequency 948 * @vco_max: maximum VCO frequency 949 * @fb_factor: factor to multiply vco freq with 950 * @fb_mask: limit and bitmask for feedback divider 951 * @pd_min: post divider minimum 952 * @pd_max: post divider maximum 953 * @pd_even: post divider must be even above this value 954 * @optimal_fb_div: resulting feedback divider 955 * @optimal_vclk_div: resulting vclk post divider 956 * @optimal_dclk_div: resulting dclk post divider 957 * 958 * Calculate dividers for UVDs UPLL (R6xx-SI, except APUs). 959 * Returns zero on success -EINVAL on error. 960 */ 961 int radeon_uvd_calc_upll_dividers(struct radeon_device *rdev, 962 unsigned vclk, unsigned dclk, 963 unsigned vco_min, unsigned vco_max, 964 unsigned fb_factor, unsigned fb_mask, 965 unsigned pd_min, unsigned pd_max, 966 unsigned pd_even, 967 unsigned *optimal_fb_div, 968 unsigned *optimal_vclk_div, 969 unsigned *optimal_dclk_div) 970 { 971 unsigned vco_freq, ref_freq = rdev->clock.spll.reference_freq; 972 973 /* start off with something large */ 974 unsigned optimal_score = ~0; 975 976 /* loop through vco from low to high */ 977 vco_min = max(max(vco_min, vclk), dclk); 978 for (vco_freq = vco_min; vco_freq <= vco_max; vco_freq += 100) { 979 980 uint64_t fb_div = (uint64_t)vco_freq * fb_factor; 981 unsigned vclk_div, dclk_div, score; 982 983 do_div(fb_div, ref_freq); 984 985 /* fb div out of range ? */ 986 if (fb_div > fb_mask) 987 break; /* it can oly get worse */ 988 989 fb_div &= fb_mask; 990 991 /* calc vclk divider with current vco freq */ 992 vclk_div = radeon_uvd_calc_upll_post_div(vco_freq, vclk, 993 pd_min, pd_even); 994 if (vclk_div > pd_max) 995 break; /* vco is too big, it has to stop */ 996 997 /* calc dclk divider with current vco freq */ 998 dclk_div = radeon_uvd_calc_upll_post_div(vco_freq, dclk, 999 pd_min, pd_even); 1000 if (dclk_div > pd_max) 1001 break; /* vco is too big, it has to stop */ 1002 1003 /* calc score with current vco freq */ 1004 score = vclk - (vco_freq / vclk_div) + dclk - (vco_freq / dclk_div); 1005 1006 /* determine if this vco setting is better than current optimal settings */ 1007 if (score < optimal_score) { 1008 *optimal_fb_div = fb_div; 1009 *optimal_vclk_div = vclk_div; 1010 *optimal_dclk_div = dclk_div; 1011 optimal_score = score; 1012 if (optimal_score == 0) 1013 break; /* it can't get better than this */ 1014 } 1015 } 1016 1017 /* did we found a valid setup ? */ 1018 if (optimal_score == ~0) 1019 return -EINVAL; 1020 1021 return 0; 1022 } 1023 1024 int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev, 1025 unsigned cg_upll_func_cntl) 1026 { 1027 unsigned i; 1028 1029 /* make sure UPLL_CTLREQ is deasserted */ 1030 WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK); 1031 1032 mdelay(10); 1033 1034 /* assert UPLL_CTLREQ */ 1035 WREG32_P(cg_upll_func_cntl, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK); 1036 1037 /* wait for CTLACK and CTLACK2 to get asserted */ 1038 for (i = 0; i < 100; ++i) { 1039 uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK; 1040 if ((RREG32(cg_upll_func_cntl) & mask) == mask) 1041 break; 1042 mdelay(10); 1043 } 1044 1045 /* deassert UPLL_CTLREQ */ 1046 WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK); 1047 1048 if (i == 100) { 1049 DRM_ERROR("Timeout setting UVD clocks!\n"); 1050 return -ETIMEDOUT; 1051 } 1052 1053 return 0; 1054 } 1055