1 /* 2 * Copyright 2011 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 /* 27 * Authors: 28 * Christian König <deathsimple@vodafone.de> 29 */ 30 31 #include <linux/firmware.h> 32 #include <linux/module.h> 33 #include <drm/drmP.h> 34 #include <drm/drm.h> 35 36 #include "radeon.h" 37 #include "radeon_ucode.h" 38 #include "r600d.h" 39 40 /* 1 second timeout */ 41 #define UVD_IDLE_TIMEOUT_MS 1000 42 43 /* Firmware Names */ 44 #define FIRMWARE_R600 "radeon/R600_uvd.bin" 45 #define FIRMWARE_RS780 "radeon/RS780_uvd.bin" 46 #define FIRMWARE_RV770 "radeon/RV770_uvd.bin" 47 #define FIRMWARE_RV710 "radeon/RV710_uvd.bin" 48 #define FIRMWARE_CYPRESS "radeon/CYPRESS_uvd.bin" 49 #define FIRMWARE_SUMO "radeon/SUMO_uvd.bin" 50 #define FIRMWARE_TAHITI "radeon/TAHITI_uvd.bin" 51 #define FIRMWARE_BONAIRE_LEGACY "radeon/BONAIRE_uvd.bin" 52 #define FIRMWARE_BONAIRE "radeon/bonaire_uvd.bin" 53 54 MODULE_FIRMWARE(FIRMWARE_R600); 55 MODULE_FIRMWARE(FIRMWARE_RS780); 56 MODULE_FIRMWARE(FIRMWARE_RV770); 57 MODULE_FIRMWARE(FIRMWARE_RV710); 58 MODULE_FIRMWARE(FIRMWARE_CYPRESS); 59 MODULE_FIRMWARE(FIRMWARE_SUMO); 60 MODULE_FIRMWARE(FIRMWARE_TAHITI); 61 MODULE_FIRMWARE(FIRMWARE_BONAIRE_LEGACY); 62 MODULE_FIRMWARE(FIRMWARE_BONAIRE); 63 64 static void radeon_uvd_idle_work_handler(struct work_struct *work); 65 66 int radeon_uvd_init(struct radeon_device *rdev) 67 { 68 unsigned long bo_size; 69 const char *fw_name = NULL, *legacy_fw_name = NULL; 70 int i, r; 71 72 INIT_DELAYED_WORK(&rdev->uvd.idle_work, radeon_uvd_idle_work_handler); 73 74 switch (rdev->family) { 75 case CHIP_RV610: 76 case CHIP_RV630: 77 case CHIP_RV670: 78 case CHIP_RV620: 79 case CHIP_RV635: 80 legacy_fw_name = FIRMWARE_R600; 81 break; 82 83 case CHIP_RS780: 84 case CHIP_RS880: 85 legacy_fw_name = FIRMWARE_RS780; 86 break; 87 88 case CHIP_RV770: 89 legacy_fw_name = FIRMWARE_RV770; 90 break; 91 92 case CHIP_RV710: 93 case CHIP_RV730: 94 case CHIP_RV740: 95 legacy_fw_name = FIRMWARE_RV710; 96 break; 97 98 case CHIP_CYPRESS: 99 case CHIP_HEMLOCK: 100 case CHIP_JUNIPER: 101 case CHIP_REDWOOD: 102 case CHIP_CEDAR: 103 legacy_fw_name = FIRMWARE_CYPRESS; 104 break; 105 106 case CHIP_SUMO: 107 case CHIP_SUMO2: 108 case CHIP_PALM: 109 case CHIP_CAYMAN: 110 case CHIP_BARTS: 111 case CHIP_TURKS: 112 case CHIP_CAICOS: 113 legacy_fw_name = FIRMWARE_SUMO; 114 break; 115 116 case CHIP_TAHITI: 117 case CHIP_VERDE: 118 case CHIP_PITCAIRN: 119 case CHIP_ARUBA: 120 case CHIP_OLAND: 121 legacy_fw_name = FIRMWARE_TAHITI; 122 break; 123 124 case CHIP_BONAIRE: 125 case CHIP_KABINI: 126 case CHIP_KAVERI: 127 case CHIP_HAWAII: 128 case CHIP_MULLINS: 129 legacy_fw_name = FIRMWARE_BONAIRE_LEGACY; 130 fw_name = FIRMWARE_BONAIRE; 131 break; 132 133 default: 134 return -EINVAL; 135 } 136 137 rdev->uvd.fw_header_present = false; 138 rdev->uvd.max_handles = RADEON_DEFAULT_UVD_HANDLES; 139 if (fw_name) { 140 /* Let's try to load the newer firmware first */ 141 r = request_firmware(&rdev->uvd_fw, fw_name, rdev->dev); 142 if (r) { 143 dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n", 144 fw_name); 145 } else { 146 struct common_firmware_header *hdr = (void *)rdev->uvd_fw->data; 147 unsigned version_major, version_minor, family_id; 148 149 r = radeon_ucode_validate(rdev->uvd_fw); 150 if (r) 151 return r; 152 153 rdev->uvd.fw_header_present = true; 154 155 family_id = le32_to_cpu(hdr->ucode_version) & 0xff; 156 version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff; 157 version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; 158 DRM_INFO("Found UVD firmware Version: %hu.%hu Family ID: %hu\n", 159 version_major, version_minor, family_id); 160 161 /* 162 * Limit the number of UVD handles depending on 163 * microcode major and minor versions. 164 */ 165 if ((version_major >= 0x01) && (version_minor >= 0x37)) 166 rdev->uvd.max_handles = RADEON_MAX_UVD_HANDLES; 167 } 168 } 169 170 /* 171 * In case there is only legacy firmware, or we encounter an error 172 * while loading the new firmware, we fall back to loading the legacy 173 * firmware now. 174 */ 175 if (!fw_name || r) { 176 r = request_firmware(&rdev->uvd_fw, legacy_fw_name, rdev->dev); 177 if (r) { 178 dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n", 179 legacy_fw_name); 180 return r; 181 } 182 } 183 184 bo_size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 8) + 185 RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE + 186 RADEON_UVD_SESSION_SIZE * rdev->uvd.max_handles; 187 r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true, 188 RADEON_GEM_DOMAIN_VRAM, 0, NULL, 189 NULL, &rdev->uvd.vcpu_bo); 190 if (r) { 191 dev_err(rdev->dev, "(%d) failed to allocate UVD bo\n", r); 192 return r; 193 } 194 195 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false); 196 if (r) { 197 radeon_bo_unref(&rdev->uvd.vcpu_bo); 198 dev_err(rdev->dev, "(%d) failed to reserve UVD bo\n", r); 199 return r; 200 } 201 202 r = radeon_bo_pin(rdev->uvd.vcpu_bo, RADEON_GEM_DOMAIN_VRAM, 203 &rdev->uvd.gpu_addr); 204 if (r) { 205 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 206 radeon_bo_unref(&rdev->uvd.vcpu_bo); 207 dev_err(rdev->dev, "(%d) UVD bo pin failed\n", r); 208 return r; 209 } 210 211 r = radeon_bo_kmap(rdev->uvd.vcpu_bo, &rdev->uvd.cpu_addr); 212 if (r) { 213 dev_err(rdev->dev, "(%d) UVD map failed\n", r); 214 return r; 215 } 216 217 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 218 219 for (i = 0; i < rdev->uvd.max_handles; ++i) { 220 atomic_set(&rdev->uvd.handles[i], 0); 221 rdev->uvd.filp[i] = NULL; 222 rdev->uvd.img_size[i] = 0; 223 } 224 225 return 0; 226 } 227 228 void radeon_uvd_fini(struct radeon_device *rdev) 229 { 230 int r; 231 232 if (rdev->uvd.vcpu_bo == NULL) 233 return; 234 235 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false); 236 if (!r) { 237 radeon_bo_kunmap(rdev->uvd.vcpu_bo); 238 radeon_bo_unpin(rdev->uvd.vcpu_bo); 239 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 240 } 241 242 radeon_bo_unref(&rdev->uvd.vcpu_bo); 243 244 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX]); 245 246 release_firmware(rdev->uvd_fw); 247 } 248 249 int radeon_uvd_suspend(struct radeon_device *rdev) 250 { 251 int i, r; 252 253 if (rdev->uvd.vcpu_bo == NULL) 254 return 0; 255 256 for (i = 0; i < rdev->uvd.max_handles; ++i) { 257 uint32_t handle = atomic_read(&rdev->uvd.handles[i]); 258 if (handle != 0) { 259 struct radeon_fence *fence; 260 261 radeon_uvd_note_usage(rdev); 262 263 r = radeon_uvd_get_destroy_msg(rdev, 264 R600_RING_TYPE_UVD_INDEX, handle, &fence); 265 if (r) { 266 DRM_ERROR("Error destroying UVD (%d)!\n", r); 267 continue; 268 } 269 270 radeon_fence_wait(fence, false); 271 radeon_fence_unref(&fence); 272 273 rdev->uvd.filp[i] = NULL; 274 atomic_set(&rdev->uvd.handles[i], 0); 275 } 276 } 277 278 return 0; 279 } 280 281 int radeon_uvd_resume(struct radeon_device *rdev) 282 { 283 unsigned size; 284 void *ptr; 285 286 if (rdev->uvd.vcpu_bo == NULL) 287 return -EINVAL; 288 289 memcpy(rdev->uvd.cpu_addr, rdev->uvd_fw->data, rdev->uvd_fw->size); 290 291 size = radeon_bo_size(rdev->uvd.vcpu_bo); 292 size -= rdev->uvd_fw->size; 293 294 ptr = rdev->uvd.cpu_addr; 295 ptr += rdev->uvd_fw->size; 296 297 memset(ptr, 0, size); 298 299 return 0; 300 } 301 302 void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo, 303 uint32_t allowed_domains) 304 { 305 int i; 306 307 for (i = 0; i < rbo->placement.num_placement; ++i) { 308 rbo->placements[i].fpfn = 0 >> PAGE_SHIFT; 309 rbo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; 310 } 311 312 /* If it must be in VRAM it must be in the first segment as well */ 313 if (allowed_domains == RADEON_GEM_DOMAIN_VRAM) 314 return; 315 316 /* abort if we already have more than one placement */ 317 if (rbo->placement.num_placement > 1) 318 return; 319 320 /* add another 256MB segment */ 321 rbo->placements[1] = rbo->placements[0]; 322 rbo->placements[1].fpfn += (256 * 1024 * 1024) >> PAGE_SHIFT; 323 rbo->placements[1].lpfn += (256 * 1024 * 1024) >> PAGE_SHIFT; 324 rbo->placement.num_placement++; 325 rbo->placement.num_busy_placement++; 326 } 327 328 void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file *filp) 329 { 330 int i, r; 331 for (i = 0; i < rdev->uvd.max_handles; ++i) { 332 uint32_t handle = atomic_read(&rdev->uvd.handles[i]); 333 if (handle != 0 && rdev->uvd.filp[i] == filp) { 334 struct radeon_fence *fence; 335 336 radeon_uvd_note_usage(rdev); 337 338 r = radeon_uvd_get_destroy_msg(rdev, 339 R600_RING_TYPE_UVD_INDEX, handle, &fence); 340 if (r) { 341 DRM_ERROR("Error destroying UVD (%d)!\n", r); 342 continue; 343 } 344 345 radeon_fence_wait(fence, false); 346 radeon_fence_unref(&fence); 347 348 rdev->uvd.filp[i] = NULL; 349 atomic_set(&rdev->uvd.handles[i], 0); 350 } 351 } 352 } 353 354 static int radeon_uvd_cs_msg_decode(uint32_t *msg, unsigned buf_sizes[]) 355 { 356 unsigned stream_type = msg[4]; 357 unsigned width = msg[6]; 358 unsigned height = msg[7]; 359 unsigned dpb_size = msg[9]; 360 unsigned pitch = msg[28]; 361 362 unsigned width_in_mb = width / 16; 363 unsigned height_in_mb = ALIGN(height / 16, 2); 364 365 unsigned image_size, tmp, min_dpb_size; 366 367 image_size = width * height; 368 image_size += image_size / 2; 369 image_size = ALIGN(image_size, 1024); 370 371 switch (stream_type) { 372 case 0: /* H264 */ 373 374 /* reference picture buffer */ 375 min_dpb_size = image_size * 17; 376 377 /* macroblock context buffer */ 378 min_dpb_size += width_in_mb * height_in_mb * 17 * 192; 379 380 /* IT surface buffer */ 381 min_dpb_size += width_in_mb * height_in_mb * 32; 382 break; 383 384 case 1: /* VC1 */ 385 386 /* reference picture buffer */ 387 min_dpb_size = image_size * 3; 388 389 /* CONTEXT_BUFFER */ 390 min_dpb_size += width_in_mb * height_in_mb * 128; 391 392 /* IT surface buffer */ 393 min_dpb_size += width_in_mb * 64; 394 395 /* DB surface buffer */ 396 min_dpb_size += width_in_mb * 128; 397 398 /* BP */ 399 tmp = max(width_in_mb, height_in_mb); 400 min_dpb_size += ALIGN(tmp * 7 * 16, 64); 401 break; 402 403 case 3: /* MPEG2 */ 404 405 /* reference picture buffer */ 406 min_dpb_size = image_size * 3; 407 break; 408 409 case 4: /* MPEG4 */ 410 411 /* reference picture buffer */ 412 min_dpb_size = image_size * 3; 413 414 /* CM */ 415 min_dpb_size += width_in_mb * height_in_mb * 64; 416 417 /* IT surface buffer */ 418 min_dpb_size += ALIGN(width_in_mb * height_in_mb * 32, 64); 419 break; 420 421 default: 422 DRM_ERROR("UVD codec not handled %d!\n", stream_type); 423 return -EINVAL; 424 } 425 426 if (width > pitch) { 427 DRM_ERROR("Invalid UVD decoding target pitch!\n"); 428 return -EINVAL; 429 } 430 431 if (dpb_size < min_dpb_size) { 432 DRM_ERROR("Invalid dpb_size in UVD message (%d / %d)!\n", 433 dpb_size, min_dpb_size); 434 return -EINVAL; 435 } 436 437 buf_sizes[0x1] = dpb_size; 438 buf_sizes[0x2] = image_size; 439 return 0; 440 } 441 442 static int radeon_uvd_validate_codec(struct radeon_cs_parser *p, 443 unsigned stream_type) 444 { 445 switch (stream_type) { 446 case 0: /* H264 */ 447 case 1: /* VC1 */ 448 /* always supported */ 449 return 0; 450 451 case 3: /* MPEG2 */ 452 case 4: /* MPEG4 */ 453 /* only since UVD 3 */ 454 if (p->rdev->family >= CHIP_PALM) 455 return 0; 456 457 /* fall through */ 458 default: 459 DRM_ERROR("UVD codec not supported by hardware %d!\n", 460 stream_type); 461 return -EINVAL; 462 } 463 } 464 465 static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo, 466 unsigned offset, unsigned buf_sizes[]) 467 { 468 int32_t *msg, msg_type, handle; 469 unsigned img_size = 0; 470 struct dma_fence *f; 471 void *ptr; 472 473 int i, r; 474 475 if (offset & 0x3F) { 476 DRM_ERROR("UVD messages must be 64 byte aligned!\n"); 477 return -EINVAL; 478 } 479 480 f = reservation_object_get_excl(bo->tbo.resv); 481 if (f) { 482 r = radeon_fence_wait((struct radeon_fence *)f, false); 483 if (r) { 484 DRM_ERROR("Failed waiting for UVD message (%d)!\n", r); 485 return r; 486 } 487 } 488 489 r = radeon_bo_kmap(bo, &ptr); 490 if (r) { 491 DRM_ERROR("Failed mapping the UVD message (%d)!\n", r); 492 return r; 493 } 494 495 msg = ptr + offset; 496 497 msg_type = msg[1]; 498 handle = msg[2]; 499 500 if (handle == 0) { 501 DRM_ERROR("Invalid UVD handle!\n"); 502 return -EINVAL; 503 } 504 505 switch (msg_type) { 506 case 0: 507 /* it's a create msg, calc image size (width * height) */ 508 img_size = msg[7] * msg[8]; 509 510 r = radeon_uvd_validate_codec(p, msg[4]); 511 radeon_bo_kunmap(bo); 512 if (r) 513 return r; 514 515 /* try to alloc a new handle */ 516 for (i = 0; i < p->rdev->uvd.max_handles; ++i) { 517 if (atomic_read(&p->rdev->uvd.handles[i]) == handle) { 518 DRM_ERROR("Handle 0x%x already in use!\n", handle); 519 return -EINVAL; 520 } 521 522 if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], 0, handle)) { 523 p->rdev->uvd.filp[i] = p->filp; 524 p->rdev->uvd.img_size[i] = img_size; 525 return 0; 526 } 527 } 528 529 DRM_ERROR("No more free UVD handles!\n"); 530 return -EINVAL; 531 532 case 1: 533 /* it's a decode msg, validate codec and calc buffer sizes */ 534 r = radeon_uvd_validate_codec(p, msg[4]); 535 if (!r) 536 r = radeon_uvd_cs_msg_decode(msg, buf_sizes); 537 radeon_bo_kunmap(bo); 538 if (r) 539 return r; 540 541 /* validate the handle */ 542 for (i = 0; i < p->rdev->uvd.max_handles; ++i) { 543 if (atomic_read(&p->rdev->uvd.handles[i]) == handle) { 544 if (p->rdev->uvd.filp[i] != p->filp) { 545 DRM_ERROR("UVD handle collision detected!\n"); 546 return -EINVAL; 547 } 548 return 0; 549 } 550 } 551 552 DRM_ERROR("Invalid UVD handle 0x%x!\n", handle); 553 return -ENOENT; 554 555 case 2: 556 /* it's a destroy msg, free the handle */ 557 for (i = 0; i < p->rdev->uvd.max_handles; ++i) 558 atomic_cmpxchg(&p->rdev->uvd.handles[i], handle, 0); 559 radeon_bo_kunmap(bo); 560 return 0; 561 562 default: 563 564 DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type); 565 return -EINVAL; 566 } 567 568 BUG(); 569 return -EINVAL; 570 } 571 572 static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p, 573 int data0, int data1, 574 unsigned buf_sizes[], bool *has_msg_cmd) 575 { 576 struct radeon_cs_chunk *relocs_chunk; 577 struct radeon_bo_list *reloc; 578 unsigned idx, cmd, offset; 579 uint64_t start, end; 580 int r; 581 582 relocs_chunk = p->chunk_relocs; 583 offset = radeon_get_ib_value(p, data0); 584 idx = radeon_get_ib_value(p, data1); 585 if (idx >= relocs_chunk->length_dw) { 586 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", 587 idx, relocs_chunk->length_dw); 588 return -EINVAL; 589 } 590 591 reloc = &p->relocs[(idx / 4)]; 592 start = reloc->gpu_offset; 593 end = start + radeon_bo_size(reloc->robj); 594 start += offset; 595 596 p->ib.ptr[data0] = start & 0xFFFFFFFF; 597 p->ib.ptr[data1] = start >> 32; 598 599 cmd = radeon_get_ib_value(p, p->idx) >> 1; 600 601 if (cmd < 0x4) { 602 if (end <= start) { 603 DRM_ERROR("invalid reloc offset %X!\n", offset); 604 return -EINVAL; 605 } 606 if ((end - start) < buf_sizes[cmd]) { 607 DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd, 608 (unsigned)(end - start), buf_sizes[cmd]); 609 return -EINVAL; 610 } 611 612 } else if (cmd != 0x100) { 613 DRM_ERROR("invalid UVD command %X!\n", cmd); 614 return -EINVAL; 615 } 616 617 if ((start >> 28) != ((end - 1) >> 28)) { 618 DRM_ERROR("reloc %LX-%LX crossing 256MB boundary!\n", 619 start, end); 620 return -EINVAL; 621 } 622 623 /* TODO: is this still necessary on NI+ ? */ 624 if ((cmd == 0 || cmd == 0x3) && 625 (start >> 28) != (p->rdev->uvd.gpu_addr >> 28)) { 626 DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n", 627 start, end); 628 return -EINVAL; 629 } 630 631 if (cmd == 0) { 632 if (*has_msg_cmd) { 633 DRM_ERROR("More than one message in a UVD-IB!\n"); 634 return -EINVAL; 635 } 636 *has_msg_cmd = true; 637 r = radeon_uvd_cs_msg(p, reloc->robj, offset, buf_sizes); 638 if (r) 639 return r; 640 } else if (!*has_msg_cmd) { 641 DRM_ERROR("Message needed before other commands are send!\n"); 642 return -EINVAL; 643 } 644 645 return 0; 646 } 647 648 static int radeon_uvd_cs_reg(struct radeon_cs_parser *p, 649 struct radeon_cs_packet *pkt, 650 int *data0, int *data1, 651 unsigned buf_sizes[], 652 bool *has_msg_cmd) 653 { 654 int i, r; 655 656 p->idx++; 657 for (i = 0; i <= pkt->count; ++i) { 658 switch (pkt->reg + i*4) { 659 case UVD_GPCOM_VCPU_DATA0: 660 *data0 = p->idx; 661 break; 662 case UVD_GPCOM_VCPU_DATA1: 663 *data1 = p->idx; 664 break; 665 case UVD_GPCOM_VCPU_CMD: 666 r = radeon_uvd_cs_reloc(p, *data0, *data1, 667 buf_sizes, has_msg_cmd); 668 if (r) 669 return r; 670 break; 671 case UVD_ENGINE_CNTL: 672 case UVD_NO_OP: 673 break; 674 default: 675 DRM_ERROR("Invalid reg 0x%X!\n", 676 pkt->reg + i*4); 677 return -EINVAL; 678 } 679 p->idx++; 680 } 681 return 0; 682 } 683 684 int radeon_uvd_cs_parse(struct radeon_cs_parser *p) 685 { 686 struct radeon_cs_packet pkt; 687 int r, data0 = 0, data1 = 0; 688 689 /* does the IB has a msg command */ 690 bool has_msg_cmd = false; 691 692 /* minimum buffer sizes */ 693 unsigned buf_sizes[] = { 694 [0x00000000] = 2048, 695 [0x00000001] = 32 * 1024 * 1024, 696 [0x00000002] = 2048 * 1152 * 3, 697 [0x00000003] = 2048, 698 }; 699 700 if (p->chunk_ib->length_dw % 16) { 701 DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n", 702 p->chunk_ib->length_dw); 703 return -EINVAL; 704 } 705 706 if (p->chunk_relocs == NULL) { 707 DRM_ERROR("No relocation chunk !\n"); 708 return -EINVAL; 709 } 710 711 712 do { 713 r = radeon_cs_packet_parse(p, &pkt, p->idx); 714 if (r) 715 return r; 716 switch (pkt.type) { 717 case RADEON_PACKET_TYPE0: 718 r = radeon_uvd_cs_reg(p, &pkt, &data0, &data1, 719 buf_sizes, &has_msg_cmd); 720 if (r) 721 return r; 722 break; 723 case RADEON_PACKET_TYPE2: 724 p->idx += pkt.count + 2; 725 break; 726 default: 727 DRM_ERROR("Unknown packet type %d !\n", pkt.type); 728 return -EINVAL; 729 } 730 } while (p->idx < p->chunk_ib->length_dw); 731 732 if (!has_msg_cmd) { 733 DRM_ERROR("UVD-IBs need a msg command!\n"); 734 return -EINVAL; 735 } 736 737 return 0; 738 } 739 740 static int radeon_uvd_send_msg(struct radeon_device *rdev, 741 int ring, uint64_t addr, 742 struct radeon_fence **fence) 743 { 744 struct radeon_ib ib; 745 int i, r; 746 747 r = radeon_ib_get(rdev, ring, &ib, NULL, 64); 748 if (r) 749 return r; 750 751 ib.ptr[0] = PACKET0(UVD_GPCOM_VCPU_DATA0, 0); 752 ib.ptr[1] = addr; 753 ib.ptr[2] = PACKET0(UVD_GPCOM_VCPU_DATA1, 0); 754 ib.ptr[3] = addr >> 32; 755 ib.ptr[4] = PACKET0(UVD_GPCOM_VCPU_CMD, 0); 756 ib.ptr[5] = 0; 757 for (i = 6; i < 16; i += 2) { 758 ib.ptr[i] = PACKET0(UVD_NO_OP, 0); 759 ib.ptr[i+1] = 0; 760 } 761 ib.length_dw = 16; 762 763 r = radeon_ib_schedule(rdev, &ib, NULL, false); 764 765 if (fence) 766 *fence = radeon_fence_ref(ib.fence); 767 768 radeon_ib_free(rdev, &ib); 769 return r; 770 } 771 772 /* 773 * multiple fence commands without any stream commands in between can 774 * crash the vcpu so just try to emmit a dummy create/destroy msg to 775 * avoid this 776 */ 777 int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring, 778 uint32_t handle, struct radeon_fence **fence) 779 { 780 /* we use the last page of the vcpu bo for the UVD message */ 781 uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) - 782 RADEON_GPU_PAGE_SIZE; 783 784 uint32_t *msg = rdev->uvd.cpu_addr + offs; 785 uint64_t addr = rdev->uvd.gpu_addr + offs; 786 787 int r, i; 788 789 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, true); 790 if (r) 791 return r; 792 793 /* stitch together an UVD create msg */ 794 msg[0] = cpu_to_le32(0x00000de4); 795 msg[1] = cpu_to_le32(0x00000000); 796 msg[2] = cpu_to_le32(handle); 797 msg[3] = cpu_to_le32(0x00000000); 798 msg[4] = cpu_to_le32(0x00000000); 799 msg[5] = cpu_to_le32(0x00000000); 800 msg[6] = cpu_to_le32(0x00000000); 801 msg[7] = cpu_to_le32(0x00000780); 802 msg[8] = cpu_to_le32(0x00000440); 803 msg[9] = cpu_to_le32(0x00000000); 804 msg[10] = cpu_to_le32(0x01b37000); 805 for (i = 11; i < 1024; ++i) 806 msg[i] = cpu_to_le32(0x0); 807 808 r = radeon_uvd_send_msg(rdev, ring, addr, fence); 809 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 810 return r; 811 } 812 813 int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring, 814 uint32_t handle, struct radeon_fence **fence) 815 { 816 /* we use the last page of the vcpu bo for the UVD message */ 817 uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) - 818 RADEON_GPU_PAGE_SIZE; 819 820 uint32_t *msg = rdev->uvd.cpu_addr + offs; 821 uint64_t addr = rdev->uvd.gpu_addr + offs; 822 823 int r, i; 824 825 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, true); 826 if (r) 827 return r; 828 829 /* stitch together an UVD destroy msg */ 830 msg[0] = cpu_to_le32(0x00000de4); 831 msg[1] = cpu_to_le32(0x00000002); 832 msg[2] = cpu_to_le32(handle); 833 msg[3] = cpu_to_le32(0x00000000); 834 for (i = 4; i < 1024; ++i) 835 msg[i] = cpu_to_le32(0x0); 836 837 r = radeon_uvd_send_msg(rdev, ring, addr, fence); 838 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 839 return r; 840 } 841 842 /** 843 * radeon_uvd_count_handles - count number of open streams 844 * 845 * @rdev: radeon_device pointer 846 * @sd: number of SD streams 847 * @hd: number of HD streams 848 * 849 * Count the number of open SD/HD streams as a hint for power mangement 850 */ 851 static void radeon_uvd_count_handles(struct radeon_device *rdev, 852 unsigned *sd, unsigned *hd) 853 { 854 unsigned i; 855 856 *sd = 0; 857 *hd = 0; 858 859 for (i = 0; i < rdev->uvd.max_handles; ++i) { 860 if (!atomic_read(&rdev->uvd.handles[i])) 861 continue; 862 863 if (rdev->uvd.img_size[i] >= 720*576) 864 ++(*hd); 865 else 866 ++(*sd); 867 } 868 } 869 870 static void radeon_uvd_idle_work_handler(struct work_struct *work) 871 { 872 struct radeon_device *rdev = 873 container_of(work, struct radeon_device, uvd.idle_work.work); 874 875 if (radeon_fence_count_emitted(rdev, R600_RING_TYPE_UVD_INDEX) == 0) { 876 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { 877 radeon_uvd_count_handles(rdev, &rdev->pm.dpm.sd, 878 &rdev->pm.dpm.hd); 879 radeon_dpm_enable_uvd(rdev, false); 880 } else { 881 radeon_set_uvd_clocks(rdev, 0, 0); 882 } 883 } else { 884 schedule_delayed_work(&rdev->uvd.idle_work, 885 msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); 886 } 887 } 888 889 void radeon_uvd_note_usage(struct radeon_device *rdev) 890 { 891 bool streams_changed = false; 892 bool set_clocks = !cancel_delayed_work_sync(&rdev->uvd.idle_work); 893 set_clocks &= schedule_delayed_work(&rdev->uvd.idle_work, 894 msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); 895 896 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { 897 unsigned hd = 0, sd = 0; 898 radeon_uvd_count_handles(rdev, &sd, &hd); 899 if ((rdev->pm.dpm.sd != sd) || 900 (rdev->pm.dpm.hd != hd)) { 901 rdev->pm.dpm.sd = sd; 902 rdev->pm.dpm.hd = hd; 903 /* disable this for now */ 904 /*streams_changed = true;*/ 905 } 906 } 907 908 if (set_clocks || streams_changed) { 909 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { 910 radeon_dpm_enable_uvd(rdev, true); 911 } else { 912 radeon_set_uvd_clocks(rdev, 53300, 40000); 913 } 914 } 915 } 916 917 static unsigned radeon_uvd_calc_upll_post_div(unsigned vco_freq, 918 unsigned target_freq, 919 unsigned pd_min, 920 unsigned pd_even) 921 { 922 unsigned post_div = vco_freq / target_freq; 923 924 /* adjust to post divider minimum value */ 925 if (post_div < pd_min) 926 post_div = pd_min; 927 928 /* we alway need a frequency less than or equal the target */ 929 if ((vco_freq / post_div) > target_freq) 930 post_div += 1; 931 932 /* post dividers above a certain value must be even */ 933 if (post_div > pd_even && post_div % 2) 934 post_div += 1; 935 936 return post_div; 937 } 938 939 /** 940 * radeon_uvd_calc_upll_dividers - calc UPLL clock dividers 941 * 942 * @rdev: radeon_device pointer 943 * @vclk: wanted VCLK 944 * @dclk: wanted DCLK 945 * @vco_min: minimum VCO frequency 946 * @vco_max: maximum VCO frequency 947 * @fb_factor: factor to multiply vco freq with 948 * @fb_mask: limit and bitmask for feedback divider 949 * @pd_min: post divider minimum 950 * @pd_max: post divider maximum 951 * @pd_even: post divider must be even above this value 952 * @optimal_fb_div: resulting feedback divider 953 * @optimal_vclk_div: resulting vclk post divider 954 * @optimal_dclk_div: resulting dclk post divider 955 * 956 * Calculate dividers for UVDs UPLL (R6xx-SI, except APUs). 957 * Returns zero on success -EINVAL on error. 958 */ 959 int radeon_uvd_calc_upll_dividers(struct radeon_device *rdev, 960 unsigned vclk, unsigned dclk, 961 unsigned vco_min, unsigned vco_max, 962 unsigned fb_factor, unsigned fb_mask, 963 unsigned pd_min, unsigned pd_max, 964 unsigned pd_even, 965 unsigned *optimal_fb_div, 966 unsigned *optimal_vclk_div, 967 unsigned *optimal_dclk_div) 968 { 969 unsigned vco_freq, ref_freq = rdev->clock.spll.reference_freq; 970 971 /* start off with something large */ 972 unsigned optimal_score = ~0; 973 974 /* loop through vco from low to high */ 975 vco_min = max(max(vco_min, vclk), dclk); 976 for (vco_freq = vco_min; vco_freq <= vco_max; vco_freq += 100) { 977 978 uint64_t fb_div = (uint64_t)vco_freq * fb_factor; 979 unsigned vclk_div, dclk_div, score; 980 981 do_div(fb_div, ref_freq); 982 983 /* fb div out of range ? */ 984 if (fb_div > fb_mask) 985 break; /* it can oly get worse */ 986 987 fb_div &= fb_mask; 988 989 /* calc vclk divider with current vco freq */ 990 vclk_div = radeon_uvd_calc_upll_post_div(vco_freq, vclk, 991 pd_min, pd_even); 992 if (vclk_div > pd_max) 993 break; /* vco is too big, it has to stop */ 994 995 /* calc dclk divider with current vco freq */ 996 dclk_div = radeon_uvd_calc_upll_post_div(vco_freq, dclk, 997 pd_min, pd_even); 998 if (vclk_div > pd_max) 999 break; /* vco is too big, it has to stop */ 1000 1001 /* calc score with current vco freq */ 1002 score = vclk - (vco_freq / vclk_div) + dclk - (vco_freq / dclk_div); 1003 1004 /* determine if this vco setting is better than current optimal settings */ 1005 if (score < optimal_score) { 1006 *optimal_fb_div = fb_div; 1007 *optimal_vclk_div = vclk_div; 1008 *optimal_dclk_div = dclk_div; 1009 optimal_score = score; 1010 if (optimal_score == 0) 1011 break; /* it can't get better than this */ 1012 } 1013 } 1014 1015 /* did we found a valid setup ? */ 1016 if (optimal_score == ~0) 1017 return -EINVAL; 1018 1019 return 0; 1020 } 1021 1022 int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev, 1023 unsigned cg_upll_func_cntl) 1024 { 1025 unsigned i; 1026 1027 /* make sure UPLL_CTLREQ is deasserted */ 1028 WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK); 1029 1030 mdelay(10); 1031 1032 /* assert UPLL_CTLREQ */ 1033 WREG32_P(cg_upll_func_cntl, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK); 1034 1035 /* wait for CTLACK and CTLACK2 to get asserted */ 1036 for (i = 0; i < 100; ++i) { 1037 uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK; 1038 if ((RREG32(cg_upll_func_cntl) & mask) == mask) 1039 break; 1040 mdelay(10); 1041 } 1042 1043 /* deassert UPLL_CTLREQ */ 1044 WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK); 1045 1046 if (i == 100) { 1047 DRM_ERROR("Timeout setting UVD clocks!\n"); 1048 return -ETIMEDOUT; 1049 } 1050 1051 return 0; 1052 } 1053