1 /* 2 * Copyright 2011 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 /* 27 * Authors: 28 * Christian König <deathsimple@vodafone.de> 29 */ 30 31 #include <linux/firmware.h> 32 #include <linux/module.h> 33 34 #include <drm/drm.h> 35 36 #include "radeon.h" 37 #include "radeon_ucode.h" 38 #include "r600d.h" 39 40 /* 1 second timeout */ 41 #define UVD_IDLE_TIMEOUT_MS 1000 42 43 /* Firmware Names */ 44 #define FIRMWARE_R600 "radeon/R600_uvd.bin" 45 #define FIRMWARE_RS780 "radeon/RS780_uvd.bin" 46 #define FIRMWARE_RV770 "radeon/RV770_uvd.bin" 47 #define FIRMWARE_RV710 "radeon/RV710_uvd.bin" 48 #define FIRMWARE_CYPRESS "radeon/CYPRESS_uvd.bin" 49 #define FIRMWARE_SUMO "radeon/SUMO_uvd.bin" 50 #define FIRMWARE_TAHITI "radeon/TAHITI_uvd.bin" 51 #define FIRMWARE_BONAIRE_LEGACY "radeon/BONAIRE_uvd.bin" 52 #define FIRMWARE_BONAIRE "radeon/bonaire_uvd.bin" 53 54 MODULE_FIRMWARE(FIRMWARE_R600); 55 MODULE_FIRMWARE(FIRMWARE_RS780); 56 MODULE_FIRMWARE(FIRMWARE_RV770); 57 MODULE_FIRMWARE(FIRMWARE_RV710); 58 MODULE_FIRMWARE(FIRMWARE_CYPRESS); 59 MODULE_FIRMWARE(FIRMWARE_SUMO); 60 MODULE_FIRMWARE(FIRMWARE_TAHITI); 61 MODULE_FIRMWARE(FIRMWARE_BONAIRE_LEGACY); 62 MODULE_FIRMWARE(FIRMWARE_BONAIRE); 63 64 static void radeon_uvd_idle_work_handler(struct work_struct *work); 65 66 int radeon_uvd_init(struct radeon_device *rdev) 67 { 68 unsigned long bo_size; 69 const char *fw_name = NULL, *legacy_fw_name = NULL; 70 int i, r; 71 72 INIT_DELAYED_WORK(&rdev->uvd.idle_work, radeon_uvd_idle_work_handler); 73 74 switch (rdev->family) { 75 case CHIP_RV610: 76 case CHIP_RV630: 77 case CHIP_RV670: 78 case CHIP_RV620: 79 case CHIP_RV635: 80 legacy_fw_name = FIRMWARE_R600; 81 break; 82 83 case CHIP_RS780: 84 case CHIP_RS880: 85 legacy_fw_name = FIRMWARE_RS780; 86 break; 87 88 case CHIP_RV770: 89 legacy_fw_name = FIRMWARE_RV770; 90 break; 91 92 case CHIP_RV710: 93 case CHIP_RV730: 94 case CHIP_RV740: 95 legacy_fw_name = FIRMWARE_RV710; 96 break; 97 98 case CHIP_CYPRESS: 99 case CHIP_HEMLOCK: 100 case CHIP_JUNIPER: 101 case CHIP_REDWOOD: 102 case CHIP_CEDAR: 103 legacy_fw_name = FIRMWARE_CYPRESS; 104 break; 105 106 case CHIP_SUMO: 107 case CHIP_SUMO2: 108 case CHIP_PALM: 109 case CHIP_CAYMAN: 110 case CHIP_BARTS: 111 case CHIP_TURKS: 112 case CHIP_CAICOS: 113 legacy_fw_name = FIRMWARE_SUMO; 114 break; 115 116 case CHIP_TAHITI: 117 case CHIP_VERDE: 118 case CHIP_PITCAIRN: 119 case CHIP_ARUBA: 120 case CHIP_OLAND: 121 legacy_fw_name = FIRMWARE_TAHITI; 122 break; 123 124 case CHIP_BONAIRE: 125 case CHIP_KABINI: 126 case CHIP_KAVERI: 127 case CHIP_HAWAII: 128 case CHIP_MULLINS: 129 legacy_fw_name = FIRMWARE_BONAIRE_LEGACY; 130 fw_name = FIRMWARE_BONAIRE; 131 break; 132 133 default: 134 return -EINVAL; 135 } 136 137 rdev->uvd.fw_header_present = false; 138 rdev->uvd.max_handles = RADEON_DEFAULT_UVD_HANDLES; 139 if (fw_name) { 140 /* Let's try to load the newer firmware first */ 141 r = request_firmware(&rdev->uvd_fw, fw_name, rdev->dev); 142 if (r) { 143 dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n", 144 fw_name); 145 } else { 146 struct common_firmware_header *hdr = (void *)rdev->uvd_fw->data; 147 unsigned version_major, version_minor, family_id; 148 149 r = radeon_ucode_validate(rdev->uvd_fw); 150 if (r) 151 return r; 152 153 rdev->uvd.fw_header_present = true; 154 155 family_id = (__force u32)(hdr->ucode_version) & 0xff; 156 version_major = (le32_to_cpu((__force __le32)(hdr->ucode_version)) 157 >> 24) & 0xff; 158 version_minor = (le32_to_cpu((__force __le32)(hdr->ucode_version)) 159 >> 8) & 0xff; 160 DRM_INFO("Found UVD firmware Version: %u.%u Family ID: %u\n", 161 version_major, version_minor, family_id); 162 163 /* 164 * Limit the number of UVD handles depending on 165 * microcode major and minor versions. 166 */ 167 if ((version_major >= 0x01) && (version_minor >= 0x37)) 168 rdev->uvd.max_handles = RADEON_MAX_UVD_HANDLES; 169 } 170 } 171 172 /* 173 * In case there is only legacy firmware, or we encounter an error 174 * while loading the new firmware, we fall back to loading the legacy 175 * firmware now. 176 */ 177 if (!fw_name || r) { 178 r = request_firmware(&rdev->uvd_fw, legacy_fw_name, rdev->dev); 179 if (r) { 180 dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n", 181 legacy_fw_name); 182 return r; 183 } 184 } 185 186 bo_size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 8) + 187 RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE + 188 RADEON_UVD_SESSION_SIZE * rdev->uvd.max_handles; 189 r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true, 190 RADEON_GEM_DOMAIN_VRAM, 0, NULL, 191 NULL, &rdev->uvd.vcpu_bo); 192 if (r) { 193 dev_err(rdev->dev, "(%d) failed to allocate UVD bo\n", r); 194 return r; 195 } 196 197 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false); 198 if (r) { 199 radeon_bo_unref(&rdev->uvd.vcpu_bo); 200 dev_err(rdev->dev, "(%d) failed to reserve UVD bo\n", r); 201 return r; 202 } 203 204 r = radeon_bo_pin(rdev->uvd.vcpu_bo, RADEON_GEM_DOMAIN_VRAM, 205 &rdev->uvd.gpu_addr); 206 if (r) { 207 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 208 radeon_bo_unref(&rdev->uvd.vcpu_bo); 209 dev_err(rdev->dev, "(%d) UVD bo pin failed\n", r); 210 return r; 211 } 212 213 r = radeon_bo_kmap(rdev->uvd.vcpu_bo, &rdev->uvd.cpu_addr); 214 if (r) { 215 dev_err(rdev->dev, "(%d) UVD map failed\n", r); 216 return r; 217 } 218 219 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 220 221 for (i = 0; i < rdev->uvd.max_handles; ++i) { 222 atomic_set(&rdev->uvd.handles[i], 0); 223 rdev->uvd.filp[i] = NULL; 224 rdev->uvd.img_size[i] = 0; 225 } 226 227 return 0; 228 } 229 230 void radeon_uvd_fini(struct radeon_device *rdev) 231 { 232 int r; 233 234 if (rdev->uvd.vcpu_bo == NULL) 235 return; 236 237 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false); 238 if (!r) { 239 radeon_bo_kunmap(rdev->uvd.vcpu_bo); 240 radeon_bo_unpin(rdev->uvd.vcpu_bo); 241 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 242 } 243 244 radeon_bo_unref(&rdev->uvd.vcpu_bo); 245 246 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX]); 247 248 release_firmware(rdev->uvd_fw); 249 } 250 251 int radeon_uvd_suspend(struct radeon_device *rdev) 252 { 253 int i, r; 254 255 if (rdev->uvd.vcpu_bo == NULL) 256 return 0; 257 258 for (i = 0; i < rdev->uvd.max_handles; ++i) { 259 uint32_t handle = atomic_read(&rdev->uvd.handles[i]); 260 if (handle != 0) { 261 struct radeon_fence *fence; 262 263 radeon_uvd_note_usage(rdev); 264 265 r = radeon_uvd_get_destroy_msg(rdev, 266 R600_RING_TYPE_UVD_INDEX, handle, &fence); 267 if (r) { 268 DRM_ERROR("Error destroying UVD (%d)!\n", r); 269 continue; 270 } 271 272 radeon_fence_wait(fence, false); 273 radeon_fence_unref(&fence); 274 275 rdev->uvd.filp[i] = NULL; 276 atomic_set(&rdev->uvd.handles[i], 0); 277 } 278 } 279 280 return 0; 281 } 282 283 int radeon_uvd_resume(struct radeon_device *rdev) 284 { 285 unsigned size; 286 void *ptr; 287 288 if (rdev->uvd.vcpu_bo == NULL) 289 return -EINVAL; 290 291 memcpy_toio((void __iomem *)rdev->uvd.cpu_addr, rdev->uvd_fw->data, rdev->uvd_fw->size); 292 293 size = radeon_bo_size(rdev->uvd.vcpu_bo); 294 size -= rdev->uvd_fw->size; 295 296 ptr = rdev->uvd.cpu_addr; 297 ptr += rdev->uvd_fw->size; 298 299 memset_io((void __iomem *)ptr, 0, size); 300 301 return 0; 302 } 303 304 void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo, 305 uint32_t allowed_domains) 306 { 307 int i; 308 309 for (i = 0; i < rbo->placement.num_placement; ++i) { 310 rbo->placements[i].fpfn = 0 >> PAGE_SHIFT; 311 rbo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; 312 } 313 314 /* If it must be in VRAM it must be in the first segment as well */ 315 if (allowed_domains == RADEON_GEM_DOMAIN_VRAM) 316 return; 317 318 /* abort if we already have more than one placement */ 319 if (rbo->placement.num_placement > 1) 320 return; 321 322 /* add another 256MB segment */ 323 rbo->placements[1] = rbo->placements[0]; 324 rbo->placements[1].fpfn += (256 * 1024 * 1024) >> PAGE_SHIFT; 325 rbo->placements[1].lpfn += (256 * 1024 * 1024) >> PAGE_SHIFT; 326 rbo->placement.num_placement++; 327 rbo->placement.num_busy_placement++; 328 } 329 330 void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file *filp) 331 { 332 int i, r; 333 for (i = 0; i < rdev->uvd.max_handles; ++i) { 334 uint32_t handle = atomic_read(&rdev->uvd.handles[i]); 335 if (handle != 0 && rdev->uvd.filp[i] == filp) { 336 struct radeon_fence *fence; 337 338 radeon_uvd_note_usage(rdev); 339 340 r = radeon_uvd_get_destroy_msg(rdev, 341 R600_RING_TYPE_UVD_INDEX, handle, &fence); 342 if (r) { 343 DRM_ERROR("Error destroying UVD (%d)!\n", r); 344 continue; 345 } 346 347 radeon_fence_wait(fence, false); 348 radeon_fence_unref(&fence); 349 350 rdev->uvd.filp[i] = NULL; 351 atomic_set(&rdev->uvd.handles[i], 0); 352 } 353 } 354 } 355 356 static int radeon_uvd_cs_msg_decode(uint32_t *msg, unsigned buf_sizes[]) 357 { 358 unsigned stream_type = msg[4]; 359 unsigned width = msg[6]; 360 unsigned height = msg[7]; 361 unsigned dpb_size = msg[9]; 362 unsigned pitch = msg[28]; 363 364 unsigned width_in_mb = width / 16; 365 unsigned height_in_mb = ALIGN(height / 16, 2); 366 367 unsigned image_size, tmp, min_dpb_size; 368 369 image_size = width * height; 370 image_size += image_size / 2; 371 image_size = ALIGN(image_size, 1024); 372 373 switch (stream_type) { 374 case 0: /* H264 */ 375 376 /* reference picture buffer */ 377 min_dpb_size = image_size * 17; 378 379 /* macroblock context buffer */ 380 min_dpb_size += width_in_mb * height_in_mb * 17 * 192; 381 382 /* IT surface buffer */ 383 min_dpb_size += width_in_mb * height_in_mb * 32; 384 break; 385 386 case 1: /* VC1 */ 387 388 /* reference picture buffer */ 389 min_dpb_size = image_size * 3; 390 391 /* CONTEXT_BUFFER */ 392 min_dpb_size += width_in_mb * height_in_mb * 128; 393 394 /* IT surface buffer */ 395 min_dpb_size += width_in_mb * 64; 396 397 /* DB surface buffer */ 398 min_dpb_size += width_in_mb * 128; 399 400 /* BP */ 401 tmp = max(width_in_mb, height_in_mb); 402 min_dpb_size += ALIGN(tmp * 7 * 16, 64); 403 break; 404 405 case 3: /* MPEG2 */ 406 407 /* reference picture buffer */ 408 min_dpb_size = image_size * 3; 409 break; 410 411 case 4: /* MPEG4 */ 412 413 /* reference picture buffer */ 414 min_dpb_size = image_size * 3; 415 416 /* CM */ 417 min_dpb_size += width_in_mb * height_in_mb * 64; 418 419 /* IT surface buffer */ 420 min_dpb_size += ALIGN(width_in_mb * height_in_mb * 32, 64); 421 break; 422 423 default: 424 DRM_ERROR("UVD codec not handled %d!\n", stream_type); 425 return -EINVAL; 426 } 427 428 if (width > pitch) { 429 DRM_ERROR("Invalid UVD decoding target pitch!\n"); 430 return -EINVAL; 431 } 432 433 if (dpb_size < min_dpb_size) { 434 DRM_ERROR("Invalid dpb_size in UVD message (%d / %d)!\n", 435 dpb_size, min_dpb_size); 436 return -EINVAL; 437 } 438 439 buf_sizes[0x1] = dpb_size; 440 buf_sizes[0x2] = image_size; 441 return 0; 442 } 443 444 static int radeon_uvd_validate_codec(struct radeon_cs_parser *p, 445 unsigned stream_type) 446 { 447 switch (stream_type) { 448 case 0: /* H264 */ 449 case 1: /* VC1 */ 450 /* always supported */ 451 return 0; 452 453 case 3: /* MPEG2 */ 454 case 4: /* MPEG4 */ 455 /* only since UVD 3 */ 456 if (p->rdev->family >= CHIP_PALM) 457 return 0; 458 459 fallthrough; 460 default: 461 DRM_ERROR("UVD codec not supported by hardware %d!\n", 462 stream_type); 463 return -EINVAL; 464 } 465 } 466 467 static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo, 468 unsigned offset, unsigned buf_sizes[]) 469 { 470 int32_t *msg, msg_type, handle; 471 unsigned img_size = 0; 472 void *ptr; 473 int i, r; 474 475 if (offset & 0x3F) { 476 DRM_ERROR("UVD messages must be 64 byte aligned!\n"); 477 return -EINVAL; 478 } 479 480 r = radeon_bo_kmap(bo, &ptr); 481 if (r) { 482 DRM_ERROR("Failed mapping the UVD message (%d)!\n", r); 483 return r; 484 } 485 486 msg = ptr + offset; 487 488 msg_type = msg[1]; 489 handle = msg[2]; 490 491 if (handle == 0) { 492 radeon_bo_kunmap(bo); 493 DRM_ERROR("Invalid UVD handle!\n"); 494 return -EINVAL; 495 } 496 497 switch (msg_type) { 498 case 0: 499 /* it's a create msg, calc image size (width * height) */ 500 img_size = msg[7] * msg[8]; 501 502 r = radeon_uvd_validate_codec(p, msg[4]); 503 radeon_bo_kunmap(bo); 504 if (r) 505 return r; 506 507 /* try to alloc a new handle */ 508 for (i = 0; i < p->rdev->uvd.max_handles; ++i) { 509 if (atomic_read(&p->rdev->uvd.handles[i]) == handle) { 510 DRM_ERROR("Handle 0x%x already in use!\n", handle); 511 return -EINVAL; 512 } 513 514 if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], 0, handle)) { 515 p->rdev->uvd.filp[i] = p->filp; 516 p->rdev->uvd.img_size[i] = img_size; 517 return 0; 518 } 519 } 520 521 DRM_ERROR("No more free UVD handles!\n"); 522 return -EINVAL; 523 524 case 1: 525 /* it's a decode msg, validate codec and calc buffer sizes */ 526 r = radeon_uvd_validate_codec(p, msg[4]); 527 if (!r) 528 r = radeon_uvd_cs_msg_decode(msg, buf_sizes); 529 radeon_bo_kunmap(bo); 530 if (r) 531 return r; 532 533 /* validate the handle */ 534 for (i = 0; i < p->rdev->uvd.max_handles; ++i) { 535 if (atomic_read(&p->rdev->uvd.handles[i]) == handle) { 536 if (p->rdev->uvd.filp[i] != p->filp) { 537 DRM_ERROR("UVD handle collision detected!\n"); 538 return -EINVAL; 539 } 540 return 0; 541 } 542 } 543 544 DRM_ERROR("Invalid UVD handle 0x%x!\n", handle); 545 return -ENOENT; 546 547 case 2: 548 /* it's a destroy msg, free the handle */ 549 for (i = 0; i < p->rdev->uvd.max_handles; ++i) 550 atomic_cmpxchg(&p->rdev->uvd.handles[i], handle, 0); 551 radeon_bo_kunmap(bo); 552 return 0; 553 554 default: 555 DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type); 556 } 557 558 radeon_bo_kunmap(bo); 559 return -EINVAL; 560 } 561 562 static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p, 563 int data0, int data1, 564 unsigned buf_sizes[], bool *has_msg_cmd) 565 { 566 struct radeon_cs_chunk *relocs_chunk; 567 struct radeon_bo_list *reloc; 568 unsigned idx, cmd, offset; 569 uint64_t start, end; 570 int r; 571 572 relocs_chunk = p->chunk_relocs; 573 offset = radeon_get_ib_value(p, data0); 574 idx = radeon_get_ib_value(p, data1); 575 if (idx >= relocs_chunk->length_dw) { 576 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", 577 idx, relocs_chunk->length_dw); 578 return -EINVAL; 579 } 580 581 reloc = &p->relocs[(idx / 4)]; 582 start = reloc->gpu_offset; 583 end = start + radeon_bo_size(reloc->robj); 584 start += offset; 585 586 p->ib.ptr[data0] = start & 0xFFFFFFFF; 587 p->ib.ptr[data1] = start >> 32; 588 589 cmd = radeon_get_ib_value(p, p->idx) >> 1; 590 591 if (cmd < 0x4) { 592 if (end <= start) { 593 DRM_ERROR("invalid reloc offset %X!\n", offset); 594 return -EINVAL; 595 } 596 if ((end - start) < buf_sizes[cmd]) { 597 DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd, 598 (unsigned)(end - start), buf_sizes[cmd]); 599 return -EINVAL; 600 } 601 602 } else if (cmd != 0x100) { 603 DRM_ERROR("invalid UVD command %X!\n", cmd); 604 return -EINVAL; 605 } 606 607 if ((start >> 28) != ((end - 1) >> 28)) { 608 DRM_ERROR("reloc %LX-%LX crossing 256MB boundary!\n", 609 start, end); 610 return -EINVAL; 611 } 612 613 /* TODO: is this still necessary on NI+ ? */ 614 if ((cmd == 0 || cmd == 0x3) && 615 (start >> 28) != (p->rdev->uvd.gpu_addr >> 28)) { 616 DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n", 617 start, end); 618 return -EINVAL; 619 } 620 621 if (cmd == 0) { 622 if (*has_msg_cmd) { 623 DRM_ERROR("More than one message in a UVD-IB!\n"); 624 return -EINVAL; 625 } 626 *has_msg_cmd = true; 627 r = radeon_uvd_cs_msg(p, reloc->robj, offset, buf_sizes); 628 if (r) 629 return r; 630 } else if (!*has_msg_cmd) { 631 DRM_ERROR("Message needed before other commands are send!\n"); 632 return -EINVAL; 633 } 634 635 return 0; 636 } 637 638 static int radeon_uvd_cs_reg(struct radeon_cs_parser *p, 639 struct radeon_cs_packet *pkt, 640 int *data0, int *data1, 641 unsigned buf_sizes[], 642 bool *has_msg_cmd) 643 { 644 int i, r; 645 646 p->idx++; 647 for (i = 0; i <= pkt->count; ++i) { 648 switch (pkt->reg + i*4) { 649 case UVD_GPCOM_VCPU_DATA0: 650 *data0 = p->idx; 651 break; 652 case UVD_GPCOM_VCPU_DATA1: 653 *data1 = p->idx; 654 break; 655 case UVD_GPCOM_VCPU_CMD: 656 r = radeon_uvd_cs_reloc(p, *data0, *data1, 657 buf_sizes, has_msg_cmd); 658 if (r) 659 return r; 660 break; 661 case UVD_ENGINE_CNTL: 662 case UVD_NO_OP: 663 break; 664 default: 665 DRM_ERROR("Invalid reg 0x%X!\n", 666 pkt->reg + i*4); 667 return -EINVAL; 668 } 669 p->idx++; 670 } 671 return 0; 672 } 673 674 int radeon_uvd_cs_parse(struct radeon_cs_parser *p) 675 { 676 struct radeon_cs_packet pkt; 677 int r, data0 = 0, data1 = 0; 678 679 /* does the IB has a msg command */ 680 bool has_msg_cmd = false; 681 682 /* minimum buffer sizes */ 683 unsigned buf_sizes[] = { 684 [0x00000000] = 2048, 685 [0x00000001] = 32 * 1024 * 1024, 686 [0x00000002] = 2048 * 1152 * 3, 687 [0x00000003] = 2048, 688 }; 689 690 if (p->chunk_ib->length_dw % 16) { 691 DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n", 692 p->chunk_ib->length_dw); 693 return -EINVAL; 694 } 695 696 if (p->chunk_relocs == NULL) { 697 DRM_ERROR("No relocation chunk !\n"); 698 return -EINVAL; 699 } 700 701 702 do { 703 r = radeon_cs_packet_parse(p, &pkt, p->idx); 704 if (r) 705 return r; 706 switch (pkt.type) { 707 case RADEON_PACKET_TYPE0: 708 r = radeon_uvd_cs_reg(p, &pkt, &data0, &data1, 709 buf_sizes, &has_msg_cmd); 710 if (r) 711 return r; 712 break; 713 case RADEON_PACKET_TYPE2: 714 p->idx += pkt.count + 2; 715 break; 716 default: 717 DRM_ERROR("Unknown packet type %d !\n", pkt.type); 718 return -EINVAL; 719 } 720 } while (p->idx < p->chunk_ib->length_dw); 721 722 if (!has_msg_cmd) { 723 DRM_ERROR("UVD-IBs need a msg command!\n"); 724 return -EINVAL; 725 } 726 727 return 0; 728 } 729 730 static int radeon_uvd_send_msg(struct radeon_device *rdev, 731 int ring, uint64_t addr, 732 struct radeon_fence **fence) 733 { 734 struct radeon_ib ib; 735 int i, r; 736 737 r = radeon_ib_get(rdev, ring, &ib, NULL, 64); 738 if (r) 739 return r; 740 741 ib.ptr[0] = PACKET0(UVD_GPCOM_VCPU_DATA0, 0); 742 ib.ptr[1] = addr; 743 ib.ptr[2] = PACKET0(UVD_GPCOM_VCPU_DATA1, 0); 744 ib.ptr[3] = addr >> 32; 745 ib.ptr[4] = PACKET0(UVD_GPCOM_VCPU_CMD, 0); 746 ib.ptr[5] = 0; 747 for (i = 6; i < 16; i += 2) { 748 ib.ptr[i] = PACKET0(UVD_NO_OP, 0); 749 ib.ptr[i+1] = 0; 750 } 751 ib.length_dw = 16; 752 753 r = radeon_ib_schedule(rdev, &ib, NULL, false); 754 755 if (fence) 756 *fence = radeon_fence_ref(ib.fence); 757 758 radeon_ib_free(rdev, &ib); 759 return r; 760 } 761 762 /* 763 * multiple fence commands without any stream commands in between can 764 * crash the vcpu so just try to emmit a dummy create/destroy msg to 765 * avoid this 766 */ 767 int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring, 768 uint32_t handle, struct radeon_fence **fence) 769 { 770 /* we use the last page of the vcpu bo for the UVD message */ 771 uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) - 772 RADEON_GPU_PAGE_SIZE; 773 774 uint32_t __iomem *msg = (void __iomem *)(rdev->uvd.cpu_addr + offs); 775 uint64_t addr = rdev->uvd.gpu_addr + offs; 776 777 int r, i; 778 779 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, true); 780 if (r) 781 return r; 782 783 /* stitch together an UVD create msg */ 784 writel((__force u32)cpu_to_le32(0x00000de4), &msg[0]); 785 writel(0x0, (void __iomem *)&msg[1]); 786 writel((__force u32)cpu_to_le32(handle), &msg[2]); 787 writel(0x0, &msg[3]); 788 writel(0x0, &msg[4]); 789 writel(0x0, &msg[5]); 790 writel(0x0, &msg[6]); 791 writel((__force u32)cpu_to_le32(0x00000780), &msg[7]); 792 writel((__force u32)cpu_to_le32(0x00000440), &msg[8]); 793 writel(0x0, &msg[9]); 794 writel((__force u32)cpu_to_le32(0x01b37000), &msg[10]); 795 for (i = 11; i < 1024; ++i) 796 writel(0x0, &msg[i]); 797 798 r = radeon_uvd_send_msg(rdev, ring, addr, fence); 799 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 800 return r; 801 } 802 803 int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring, 804 uint32_t handle, struct radeon_fence **fence) 805 { 806 /* we use the last page of the vcpu bo for the UVD message */ 807 uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) - 808 RADEON_GPU_PAGE_SIZE; 809 810 uint32_t __iomem *msg = (void __iomem *)(rdev->uvd.cpu_addr + offs); 811 uint64_t addr = rdev->uvd.gpu_addr + offs; 812 813 int r, i; 814 815 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, true); 816 if (r) 817 return r; 818 819 /* stitch together an UVD destroy msg */ 820 writel((__force u32)cpu_to_le32(0x00000de4), &msg[0]); 821 writel((__force u32)cpu_to_le32(0x00000002), &msg[1]); 822 writel((__force u32)cpu_to_le32(handle), &msg[2]); 823 writel(0x0, &msg[3]); 824 for (i = 4; i < 1024; ++i) 825 writel(0x0, &msg[i]); 826 827 r = radeon_uvd_send_msg(rdev, ring, addr, fence); 828 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 829 return r; 830 } 831 832 /** 833 * radeon_uvd_count_handles - count number of open streams 834 * 835 * @rdev: radeon_device pointer 836 * @sd: number of SD streams 837 * @hd: number of HD streams 838 * 839 * Count the number of open SD/HD streams as a hint for power mangement 840 */ 841 static void radeon_uvd_count_handles(struct radeon_device *rdev, 842 unsigned *sd, unsigned *hd) 843 { 844 unsigned i; 845 846 *sd = 0; 847 *hd = 0; 848 849 for (i = 0; i < rdev->uvd.max_handles; ++i) { 850 if (!atomic_read(&rdev->uvd.handles[i])) 851 continue; 852 853 if (rdev->uvd.img_size[i] >= 720*576) 854 ++(*hd); 855 else 856 ++(*sd); 857 } 858 } 859 860 static void radeon_uvd_idle_work_handler(struct work_struct *work) 861 { 862 struct radeon_device *rdev = 863 container_of(work, struct radeon_device, uvd.idle_work.work); 864 865 if (radeon_fence_count_emitted(rdev, R600_RING_TYPE_UVD_INDEX) == 0) { 866 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { 867 radeon_uvd_count_handles(rdev, &rdev->pm.dpm.sd, 868 &rdev->pm.dpm.hd); 869 radeon_dpm_enable_uvd(rdev, false); 870 } else { 871 radeon_set_uvd_clocks(rdev, 0, 0); 872 } 873 } else { 874 schedule_delayed_work(&rdev->uvd.idle_work, 875 msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); 876 } 877 } 878 879 void radeon_uvd_note_usage(struct radeon_device *rdev) 880 { 881 bool streams_changed = false; 882 bool set_clocks = !cancel_delayed_work_sync(&rdev->uvd.idle_work); 883 set_clocks &= schedule_delayed_work(&rdev->uvd.idle_work, 884 msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); 885 886 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { 887 unsigned hd = 0, sd = 0; 888 radeon_uvd_count_handles(rdev, &sd, &hd); 889 if ((rdev->pm.dpm.sd != sd) || 890 (rdev->pm.dpm.hd != hd)) { 891 rdev->pm.dpm.sd = sd; 892 rdev->pm.dpm.hd = hd; 893 /* disable this for now */ 894 /*streams_changed = true;*/ 895 } 896 } 897 898 if (set_clocks || streams_changed) { 899 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { 900 radeon_dpm_enable_uvd(rdev, true); 901 } else { 902 radeon_set_uvd_clocks(rdev, 53300, 40000); 903 } 904 } 905 } 906 907 static unsigned radeon_uvd_calc_upll_post_div(unsigned vco_freq, 908 unsigned target_freq, 909 unsigned pd_min, 910 unsigned pd_even) 911 { 912 unsigned post_div = vco_freq / target_freq; 913 914 /* adjust to post divider minimum value */ 915 if (post_div < pd_min) 916 post_div = pd_min; 917 918 /* we alway need a frequency less than or equal the target */ 919 if ((vco_freq / post_div) > target_freq) 920 post_div += 1; 921 922 /* post dividers above a certain value must be even */ 923 if (post_div > pd_even && post_div % 2) 924 post_div += 1; 925 926 return post_div; 927 } 928 929 /** 930 * radeon_uvd_calc_upll_dividers - calc UPLL clock dividers 931 * 932 * @rdev: radeon_device pointer 933 * @vclk: wanted VCLK 934 * @dclk: wanted DCLK 935 * @vco_min: minimum VCO frequency 936 * @vco_max: maximum VCO frequency 937 * @fb_factor: factor to multiply vco freq with 938 * @fb_mask: limit and bitmask for feedback divider 939 * @pd_min: post divider minimum 940 * @pd_max: post divider maximum 941 * @pd_even: post divider must be even above this value 942 * @optimal_fb_div: resulting feedback divider 943 * @optimal_vclk_div: resulting vclk post divider 944 * @optimal_dclk_div: resulting dclk post divider 945 * 946 * Calculate dividers for UVDs UPLL (R6xx-SI, except APUs). 947 * Returns zero on success -EINVAL on error. 948 */ 949 int radeon_uvd_calc_upll_dividers(struct radeon_device *rdev, 950 unsigned vclk, unsigned dclk, 951 unsigned vco_min, unsigned vco_max, 952 unsigned fb_factor, unsigned fb_mask, 953 unsigned pd_min, unsigned pd_max, 954 unsigned pd_even, 955 unsigned *optimal_fb_div, 956 unsigned *optimal_vclk_div, 957 unsigned *optimal_dclk_div) 958 { 959 unsigned vco_freq, ref_freq = rdev->clock.spll.reference_freq; 960 961 /* start off with something large */ 962 unsigned optimal_score = ~0; 963 964 /* loop through vco from low to high */ 965 vco_min = max(max(vco_min, vclk), dclk); 966 for (vco_freq = vco_min; vco_freq <= vco_max; vco_freq += 100) { 967 968 uint64_t fb_div = (uint64_t)vco_freq * fb_factor; 969 unsigned vclk_div, dclk_div, score; 970 971 do_div(fb_div, ref_freq); 972 973 /* fb div out of range ? */ 974 if (fb_div > fb_mask) 975 break; /* it can oly get worse */ 976 977 fb_div &= fb_mask; 978 979 /* calc vclk divider with current vco freq */ 980 vclk_div = radeon_uvd_calc_upll_post_div(vco_freq, vclk, 981 pd_min, pd_even); 982 if (vclk_div > pd_max) 983 break; /* vco is too big, it has to stop */ 984 985 /* calc dclk divider with current vco freq */ 986 dclk_div = radeon_uvd_calc_upll_post_div(vco_freq, dclk, 987 pd_min, pd_even); 988 if (dclk_div > pd_max) 989 break; /* vco is too big, it has to stop */ 990 991 /* calc score with current vco freq */ 992 score = vclk - (vco_freq / vclk_div) + dclk - (vco_freq / dclk_div); 993 994 /* determine if this vco setting is better than current optimal settings */ 995 if (score < optimal_score) { 996 *optimal_fb_div = fb_div; 997 *optimal_vclk_div = vclk_div; 998 *optimal_dclk_div = dclk_div; 999 optimal_score = score; 1000 if (optimal_score == 0) 1001 break; /* it can't get better than this */ 1002 } 1003 } 1004 1005 /* did we found a valid setup ? */ 1006 if (optimal_score == ~0) 1007 return -EINVAL; 1008 1009 return 0; 1010 } 1011 1012 int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev, 1013 unsigned cg_upll_func_cntl) 1014 { 1015 unsigned i; 1016 1017 /* make sure UPLL_CTLREQ is deasserted */ 1018 WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK); 1019 1020 mdelay(10); 1021 1022 /* assert UPLL_CTLREQ */ 1023 WREG32_P(cg_upll_func_cntl, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK); 1024 1025 /* wait for CTLACK and CTLACK2 to get asserted */ 1026 for (i = 0; i < 100; ++i) { 1027 uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK; 1028 if ((RREG32(cg_upll_func_cntl) & mask) == mask) 1029 break; 1030 mdelay(10); 1031 } 1032 1033 /* deassert UPLL_CTLREQ */ 1034 WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK); 1035 1036 if (i == 100) { 1037 DRM_ERROR("Timeout setting UVD clocks!\n"); 1038 return -ETIMEDOUT; 1039 } 1040 1041 return 0; 1042 } 1043