1 /* 2 * Copyright 2011 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 /* 27 * Authors: 28 * Christian König <deathsimple@vodafone.de> 29 */ 30 31 #include <linux/firmware.h> 32 #include <linux/module.h> 33 #include <drm/drmP.h> 34 #include <drm/drm.h> 35 36 #include "radeon.h" 37 #include "r600d.h" 38 39 /* 1 second timeout */ 40 #define UVD_IDLE_TIMEOUT_MS 1000 41 42 /* Firmware Names */ 43 #define FIRMWARE_RV710 "radeon/RV710_uvd.bin" 44 #define FIRMWARE_CYPRESS "radeon/CYPRESS_uvd.bin" 45 #define FIRMWARE_SUMO "radeon/SUMO_uvd.bin" 46 #define FIRMWARE_TAHITI "radeon/TAHITI_uvd.bin" 47 #define FIRMWARE_BONAIRE "radeon/BONAIRE_uvd.bin" 48 49 MODULE_FIRMWARE(FIRMWARE_RV710); 50 MODULE_FIRMWARE(FIRMWARE_CYPRESS); 51 MODULE_FIRMWARE(FIRMWARE_SUMO); 52 MODULE_FIRMWARE(FIRMWARE_TAHITI); 53 MODULE_FIRMWARE(FIRMWARE_BONAIRE); 54 55 static void radeon_uvd_idle_work_handler(struct work_struct *work); 56 57 int radeon_uvd_init(struct radeon_device *rdev) 58 { 59 unsigned long bo_size; 60 const char *fw_name; 61 int i, r; 62 63 INIT_DELAYED_WORK(&rdev->uvd.idle_work, radeon_uvd_idle_work_handler); 64 65 switch (rdev->family) { 66 case CHIP_RV710: 67 case CHIP_RV730: 68 case CHIP_RV740: 69 fw_name = FIRMWARE_RV710; 70 break; 71 72 case CHIP_CYPRESS: 73 case CHIP_HEMLOCK: 74 case CHIP_JUNIPER: 75 case CHIP_REDWOOD: 76 case CHIP_CEDAR: 77 fw_name = FIRMWARE_CYPRESS; 78 break; 79 80 case CHIP_SUMO: 81 case CHIP_SUMO2: 82 case CHIP_PALM: 83 case CHIP_CAYMAN: 84 case CHIP_BARTS: 85 case CHIP_TURKS: 86 case CHIP_CAICOS: 87 fw_name = FIRMWARE_SUMO; 88 break; 89 90 case CHIP_TAHITI: 91 case CHIP_VERDE: 92 case CHIP_PITCAIRN: 93 case CHIP_ARUBA: 94 case CHIP_OLAND: 95 fw_name = FIRMWARE_TAHITI; 96 break; 97 98 case CHIP_BONAIRE: 99 case CHIP_KABINI: 100 case CHIP_KAVERI: 101 case CHIP_HAWAII: 102 fw_name = FIRMWARE_BONAIRE; 103 break; 104 105 default: 106 return -EINVAL; 107 } 108 109 r = request_firmware(&rdev->uvd_fw, fw_name, rdev->dev); 110 if (r) { 111 dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n", 112 fw_name); 113 return r; 114 } 115 116 bo_size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 8) + 117 RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE; 118 r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true, 119 RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->uvd.vcpu_bo); 120 if (r) { 121 dev_err(rdev->dev, "(%d) failed to allocate UVD bo\n", r); 122 return r; 123 } 124 125 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false); 126 if (r) { 127 radeon_bo_unref(&rdev->uvd.vcpu_bo); 128 dev_err(rdev->dev, "(%d) failed to reserve UVD bo\n", r); 129 return r; 130 } 131 132 r = radeon_bo_pin(rdev->uvd.vcpu_bo, RADEON_GEM_DOMAIN_VRAM, 133 &rdev->uvd.gpu_addr); 134 if (r) { 135 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 136 radeon_bo_unref(&rdev->uvd.vcpu_bo); 137 dev_err(rdev->dev, "(%d) UVD bo pin failed\n", r); 138 return r; 139 } 140 141 r = radeon_bo_kmap(rdev->uvd.vcpu_bo, &rdev->uvd.cpu_addr); 142 if (r) { 143 dev_err(rdev->dev, "(%d) UVD map failed\n", r); 144 return r; 145 } 146 147 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 148 149 for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { 150 atomic_set(&rdev->uvd.handles[i], 0); 151 rdev->uvd.filp[i] = NULL; 152 rdev->uvd.img_size[i] = 0; 153 } 154 155 return 0; 156 } 157 158 void radeon_uvd_fini(struct radeon_device *rdev) 159 { 160 int r; 161 162 if (rdev->uvd.vcpu_bo == NULL) 163 return; 164 165 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false); 166 if (!r) { 167 radeon_bo_kunmap(rdev->uvd.vcpu_bo); 168 radeon_bo_unpin(rdev->uvd.vcpu_bo); 169 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 170 } 171 172 radeon_bo_unref(&rdev->uvd.vcpu_bo); 173 174 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX]); 175 176 release_firmware(rdev->uvd_fw); 177 } 178 179 int radeon_uvd_suspend(struct radeon_device *rdev) 180 { 181 unsigned size; 182 void *ptr; 183 int i; 184 185 if (rdev->uvd.vcpu_bo == NULL) 186 return 0; 187 188 for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) 189 if (atomic_read(&rdev->uvd.handles[i])) 190 break; 191 192 if (i == RADEON_MAX_UVD_HANDLES) 193 return 0; 194 195 size = radeon_bo_size(rdev->uvd.vcpu_bo); 196 size -= rdev->uvd_fw->size; 197 198 ptr = rdev->uvd.cpu_addr; 199 ptr += rdev->uvd_fw->size; 200 201 rdev->uvd.saved_bo = kmalloc(size, GFP_KERNEL); 202 memcpy(rdev->uvd.saved_bo, ptr, size); 203 204 return 0; 205 } 206 207 int radeon_uvd_resume(struct radeon_device *rdev) 208 { 209 unsigned size; 210 void *ptr; 211 212 if (rdev->uvd.vcpu_bo == NULL) 213 return -EINVAL; 214 215 memcpy(rdev->uvd.cpu_addr, rdev->uvd_fw->data, rdev->uvd_fw->size); 216 217 size = radeon_bo_size(rdev->uvd.vcpu_bo); 218 size -= rdev->uvd_fw->size; 219 220 ptr = rdev->uvd.cpu_addr; 221 ptr += rdev->uvd_fw->size; 222 223 if (rdev->uvd.saved_bo != NULL) { 224 memcpy(ptr, rdev->uvd.saved_bo, size); 225 kfree(rdev->uvd.saved_bo); 226 rdev->uvd.saved_bo = NULL; 227 } else 228 memset(ptr, 0, size); 229 230 return 0; 231 } 232 233 void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo) 234 { 235 rbo->placement.fpfn = 0 >> PAGE_SHIFT; 236 rbo->placement.lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; 237 } 238 239 void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file *filp) 240 { 241 int i, r; 242 for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { 243 uint32_t handle = atomic_read(&rdev->uvd.handles[i]); 244 if (handle != 0 && rdev->uvd.filp[i] == filp) { 245 struct radeon_fence *fence; 246 247 radeon_uvd_note_usage(rdev); 248 249 r = radeon_uvd_get_destroy_msg(rdev, 250 R600_RING_TYPE_UVD_INDEX, handle, &fence); 251 if (r) { 252 DRM_ERROR("Error destroying UVD (%d)!\n", r); 253 continue; 254 } 255 256 radeon_fence_wait(fence, false); 257 radeon_fence_unref(&fence); 258 259 rdev->uvd.filp[i] = NULL; 260 atomic_set(&rdev->uvd.handles[i], 0); 261 } 262 } 263 } 264 265 static int radeon_uvd_cs_msg_decode(uint32_t *msg, unsigned buf_sizes[]) 266 { 267 unsigned stream_type = msg[4]; 268 unsigned width = msg[6]; 269 unsigned height = msg[7]; 270 unsigned dpb_size = msg[9]; 271 unsigned pitch = msg[28]; 272 273 unsigned width_in_mb = width / 16; 274 unsigned height_in_mb = ALIGN(height / 16, 2); 275 276 unsigned image_size, tmp, min_dpb_size; 277 278 image_size = width * height; 279 image_size += image_size / 2; 280 image_size = ALIGN(image_size, 1024); 281 282 switch (stream_type) { 283 case 0: /* H264 */ 284 285 /* reference picture buffer */ 286 min_dpb_size = image_size * 17; 287 288 /* macroblock context buffer */ 289 min_dpb_size += width_in_mb * height_in_mb * 17 * 192; 290 291 /* IT surface buffer */ 292 min_dpb_size += width_in_mb * height_in_mb * 32; 293 break; 294 295 case 1: /* VC1 */ 296 297 /* reference picture buffer */ 298 min_dpb_size = image_size * 3; 299 300 /* CONTEXT_BUFFER */ 301 min_dpb_size += width_in_mb * height_in_mb * 128; 302 303 /* IT surface buffer */ 304 min_dpb_size += width_in_mb * 64; 305 306 /* DB surface buffer */ 307 min_dpb_size += width_in_mb * 128; 308 309 /* BP */ 310 tmp = max(width_in_mb, height_in_mb); 311 min_dpb_size += ALIGN(tmp * 7 * 16, 64); 312 break; 313 314 case 3: /* MPEG2 */ 315 316 /* reference picture buffer */ 317 min_dpb_size = image_size * 3; 318 break; 319 320 case 4: /* MPEG4 */ 321 322 /* reference picture buffer */ 323 min_dpb_size = image_size * 3; 324 325 /* CM */ 326 min_dpb_size += width_in_mb * height_in_mb * 64; 327 328 /* IT surface buffer */ 329 min_dpb_size += ALIGN(width_in_mb * height_in_mb * 32, 64); 330 break; 331 332 default: 333 DRM_ERROR("UVD codec not handled %d!\n", stream_type); 334 return -EINVAL; 335 } 336 337 if (width > pitch) { 338 DRM_ERROR("Invalid UVD decoding target pitch!\n"); 339 return -EINVAL; 340 } 341 342 if (dpb_size < min_dpb_size) { 343 DRM_ERROR("Invalid dpb_size in UVD message (%d / %d)!\n", 344 dpb_size, min_dpb_size); 345 return -EINVAL; 346 } 347 348 buf_sizes[0x1] = dpb_size; 349 buf_sizes[0x2] = image_size; 350 return 0; 351 } 352 353 static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo, 354 unsigned offset, unsigned buf_sizes[]) 355 { 356 int32_t *msg, msg_type, handle; 357 unsigned img_size = 0; 358 void *ptr; 359 360 int i, r; 361 362 if (offset & 0x3F) { 363 DRM_ERROR("UVD messages must be 64 byte aligned!\n"); 364 return -EINVAL; 365 } 366 367 if (bo->tbo.sync_obj) { 368 r = radeon_fence_wait(bo->tbo.sync_obj, false); 369 if (r) { 370 DRM_ERROR("Failed waiting for UVD message (%d)!\n", r); 371 return r; 372 } 373 } 374 375 r = radeon_bo_kmap(bo, &ptr); 376 if (r) { 377 DRM_ERROR("Failed mapping the UVD message (%d)!\n", r); 378 return r; 379 } 380 381 msg = ptr + offset; 382 383 msg_type = msg[1]; 384 handle = msg[2]; 385 386 if (handle == 0) { 387 DRM_ERROR("Invalid UVD handle!\n"); 388 return -EINVAL; 389 } 390 391 if (msg_type == 1) { 392 /* it's a decode msg, calc buffer sizes */ 393 r = radeon_uvd_cs_msg_decode(msg, buf_sizes); 394 /* calc image size (width * height) */ 395 img_size = msg[6] * msg[7]; 396 radeon_bo_kunmap(bo); 397 if (r) 398 return r; 399 400 } else if (msg_type == 2) { 401 /* it's a destroy msg, free the handle */ 402 for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) 403 atomic_cmpxchg(&p->rdev->uvd.handles[i], handle, 0); 404 radeon_bo_kunmap(bo); 405 return 0; 406 } else { 407 /* it's a create msg, calc image size (width * height) */ 408 img_size = msg[7] * msg[8]; 409 radeon_bo_kunmap(bo); 410 411 if (msg_type != 0) { 412 DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type); 413 return -EINVAL; 414 } 415 416 /* it's a create msg, no special handling needed */ 417 } 418 419 /* create or decode, validate the handle */ 420 for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { 421 if (atomic_read(&p->rdev->uvd.handles[i]) == handle) 422 return 0; 423 } 424 425 /* handle not found try to alloc a new one */ 426 for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { 427 if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], 0, handle)) { 428 p->rdev->uvd.filp[i] = p->filp; 429 p->rdev->uvd.img_size[i] = img_size; 430 return 0; 431 } 432 } 433 434 DRM_ERROR("No more free UVD handles!\n"); 435 return -EINVAL; 436 } 437 438 static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p, 439 int data0, int data1, 440 unsigned buf_sizes[], bool *has_msg_cmd) 441 { 442 struct radeon_cs_chunk *relocs_chunk; 443 struct radeon_cs_reloc *reloc; 444 unsigned idx, cmd, offset; 445 uint64_t start, end; 446 int r; 447 448 relocs_chunk = &p->chunks[p->chunk_relocs_idx]; 449 offset = radeon_get_ib_value(p, data0); 450 idx = radeon_get_ib_value(p, data1); 451 if (idx >= relocs_chunk->length_dw) { 452 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", 453 idx, relocs_chunk->length_dw); 454 return -EINVAL; 455 } 456 457 reloc = p->relocs_ptr[(idx / 4)]; 458 start = reloc->gpu_offset; 459 end = start + radeon_bo_size(reloc->robj); 460 start += offset; 461 462 p->ib.ptr[data0] = start & 0xFFFFFFFF; 463 p->ib.ptr[data1] = start >> 32; 464 465 cmd = radeon_get_ib_value(p, p->idx) >> 1; 466 467 if (cmd < 0x4) { 468 if ((end - start) < buf_sizes[cmd]) { 469 DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd, 470 (unsigned)(end - start), buf_sizes[cmd]); 471 return -EINVAL; 472 } 473 474 } else if (cmd != 0x100) { 475 DRM_ERROR("invalid UVD command %X!\n", cmd); 476 return -EINVAL; 477 } 478 479 if ((start >> 28) != ((end - 1) >> 28)) { 480 DRM_ERROR("reloc %LX-%LX crossing 256MB boundary!\n", 481 start, end); 482 return -EINVAL; 483 } 484 485 /* TODO: is this still necessary on NI+ ? */ 486 if ((cmd == 0 || cmd == 0x3) && 487 (start >> 28) != (p->rdev->uvd.gpu_addr >> 28)) { 488 DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n", 489 start, end); 490 return -EINVAL; 491 } 492 493 if (cmd == 0) { 494 if (*has_msg_cmd) { 495 DRM_ERROR("More than one message in a UVD-IB!\n"); 496 return -EINVAL; 497 } 498 *has_msg_cmd = true; 499 r = radeon_uvd_cs_msg(p, reloc->robj, offset, buf_sizes); 500 if (r) 501 return r; 502 } else if (!*has_msg_cmd) { 503 DRM_ERROR("Message needed before other commands are send!\n"); 504 return -EINVAL; 505 } 506 507 return 0; 508 } 509 510 static int radeon_uvd_cs_reg(struct radeon_cs_parser *p, 511 struct radeon_cs_packet *pkt, 512 int *data0, int *data1, 513 unsigned buf_sizes[], 514 bool *has_msg_cmd) 515 { 516 int i, r; 517 518 p->idx++; 519 for (i = 0; i <= pkt->count; ++i) { 520 switch (pkt->reg + i*4) { 521 case UVD_GPCOM_VCPU_DATA0: 522 *data0 = p->idx; 523 break; 524 case UVD_GPCOM_VCPU_DATA1: 525 *data1 = p->idx; 526 break; 527 case UVD_GPCOM_VCPU_CMD: 528 r = radeon_uvd_cs_reloc(p, *data0, *data1, 529 buf_sizes, has_msg_cmd); 530 if (r) 531 return r; 532 break; 533 case UVD_ENGINE_CNTL: 534 break; 535 default: 536 DRM_ERROR("Invalid reg 0x%X!\n", 537 pkt->reg + i*4); 538 return -EINVAL; 539 } 540 p->idx++; 541 } 542 return 0; 543 } 544 545 int radeon_uvd_cs_parse(struct radeon_cs_parser *p) 546 { 547 struct radeon_cs_packet pkt; 548 int r, data0 = 0, data1 = 0; 549 550 /* does the IB has a msg command */ 551 bool has_msg_cmd = false; 552 553 /* minimum buffer sizes */ 554 unsigned buf_sizes[] = { 555 [0x00000000] = 2048, 556 [0x00000001] = 32 * 1024 * 1024, 557 [0x00000002] = 2048 * 1152 * 3, 558 [0x00000003] = 2048, 559 }; 560 561 if (p->chunks[p->chunk_ib_idx].length_dw % 16) { 562 DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n", 563 p->chunks[p->chunk_ib_idx].length_dw); 564 return -EINVAL; 565 } 566 567 if (p->chunk_relocs_idx == -1) { 568 DRM_ERROR("No relocation chunk !\n"); 569 return -EINVAL; 570 } 571 572 573 do { 574 r = radeon_cs_packet_parse(p, &pkt, p->idx); 575 if (r) 576 return r; 577 switch (pkt.type) { 578 case RADEON_PACKET_TYPE0: 579 r = radeon_uvd_cs_reg(p, &pkt, &data0, &data1, 580 buf_sizes, &has_msg_cmd); 581 if (r) 582 return r; 583 break; 584 case RADEON_PACKET_TYPE2: 585 p->idx += pkt.count + 2; 586 break; 587 default: 588 DRM_ERROR("Unknown packet type %d !\n", pkt.type); 589 return -EINVAL; 590 } 591 } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw); 592 593 if (!has_msg_cmd) { 594 DRM_ERROR("UVD-IBs need a msg command!\n"); 595 return -EINVAL; 596 } 597 598 return 0; 599 } 600 601 static int radeon_uvd_send_msg(struct radeon_device *rdev, 602 int ring, struct radeon_bo *bo, 603 struct radeon_fence **fence) 604 { 605 struct ttm_validate_buffer tv; 606 struct ww_acquire_ctx ticket; 607 struct list_head head; 608 struct radeon_ib ib; 609 uint64_t addr; 610 int i, r; 611 612 memset(&tv, 0, sizeof(tv)); 613 tv.bo = &bo->tbo; 614 615 INIT_LIST_HEAD(&head); 616 list_add(&tv.head, &head); 617 618 r = ttm_eu_reserve_buffers(&ticket, &head); 619 if (r) 620 return r; 621 622 radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_VRAM); 623 radeon_uvd_force_into_uvd_segment(bo); 624 625 r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); 626 if (r) 627 goto err; 628 629 r = radeon_ib_get(rdev, ring, &ib, NULL, 64); 630 if (r) 631 goto err; 632 633 addr = radeon_bo_gpu_offset(bo); 634 ib.ptr[0] = PACKET0(UVD_GPCOM_VCPU_DATA0, 0); 635 ib.ptr[1] = addr; 636 ib.ptr[2] = PACKET0(UVD_GPCOM_VCPU_DATA1, 0); 637 ib.ptr[3] = addr >> 32; 638 ib.ptr[4] = PACKET0(UVD_GPCOM_VCPU_CMD, 0); 639 ib.ptr[5] = 0; 640 for (i = 6; i < 16; ++i) 641 ib.ptr[i] = PACKET2(0); 642 ib.length_dw = 16; 643 644 r = radeon_ib_schedule(rdev, &ib, NULL); 645 if (r) 646 goto err; 647 ttm_eu_fence_buffer_objects(&ticket, &head, ib.fence); 648 649 if (fence) 650 *fence = radeon_fence_ref(ib.fence); 651 652 radeon_ib_free(rdev, &ib); 653 radeon_bo_unref(&bo); 654 return 0; 655 656 err: 657 ttm_eu_backoff_reservation(&ticket, &head); 658 return r; 659 } 660 661 /* multiple fence commands without any stream commands in between can 662 crash the vcpu so just try to emmit a dummy create/destroy msg to 663 avoid this */ 664 int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring, 665 uint32_t handle, struct radeon_fence **fence) 666 { 667 struct radeon_bo *bo; 668 uint32_t *msg; 669 int r, i; 670 671 r = radeon_bo_create(rdev, 1024, PAGE_SIZE, true, 672 RADEON_GEM_DOMAIN_VRAM, NULL, &bo); 673 if (r) 674 return r; 675 676 r = radeon_bo_reserve(bo, false); 677 if (r) { 678 radeon_bo_unref(&bo); 679 return r; 680 } 681 682 r = radeon_bo_kmap(bo, (void **)&msg); 683 if (r) { 684 radeon_bo_unreserve(bo); 685 radeon_bo_unref(&bo); 686 return r; 687 } 688 689 /* stitch together an UVD create msg */ 690 msg[0] = cpu_to_le32(0x00000de4); 691 msg[1] = cpu_to_le32(0x00000000); 692 msg[2] = cpu_to_le32(handle); 693 msg[3] = cpu_to_le32(0x00000000); 694 msg[4] = cpu_to_le32(0x00000000); 695 msg[5] = cpu_to_le32(0x00000000); 696 msg[6] = cpu_to_le32(0x00000000); 697 msg[7] = cpu_to_le32(0x00000780); 698 msg[8] = cpu_to_le32(0x00000440); 699 msg[9] = cpu_to_le32(0x00000000); 700 msg[10] = cpu_to_le32(0x01b37000); 701 for (i = 11; i < 1024; ++i) 702 msg[i] = cpu_to_le32(0x0); 703 704 radeon_bo_kunmap(bo); 705 radeon_bo_unreserve(bo); 706 707 return radeon_uvd_send_msg(rdev, ring, bo, fence); 708 } 709 710 int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring, 711 uint32_t handle, struct radeon_fence **fence) 712 { 713 struct radeon_bo *bo; 714 uint32_t *msg; 715 int r, i; 716 717 r = radeon_bo_create(rdev, 1024, PAGE_SIZE, true, 718 RADEON_GEM_DOMAIN_VRAM, NULL, &bo); 719 if (r) 720 return r; 721 722 r = radeon_bo_reserve(bo, false); 723 if (r) { 724 radeon_bo_unref(&bo); 725 return r; 726 } 727 728 r = radeon_bo_kmap(bo, (void **)&msg); 729 if (r) { 730 radeon_bo_unreserve(bo); 731 radeon_bo_unref(&bo); 732 return r; 733 } 734 735 /* stitch together an UVD destroy msg */ 736 msg[0] = cpu_to_le32(0x00000de4); 737 msg[1] = cpu_to_le32(0x00000002); 738 msg[2] = cpu_to_le32(handle); 739 msg[3] = cpu_to_le32(0x00000000); 740 for (i = 4; i < 1024; ++i) 741 msg[i] = cpu_to_le32(0x0); 742 743 radeon_bo_kunmap(bo); 744 radeon_bo_unreserve(bo); 745 746 return radeon_uvd_send_msg(rdev, ring, bo, fence); 747 } 748 749 /** 750 * radeon_uvd_count_handles - count number of open streams 751 * 752 * @rdev: radeon_device pointer 753 * @sd: number of SD streams 754 * @hd: number of HD streams 755 * 756 * Count the number of open SD/HD streams as a hint for power mangement 757 */ 758 static void radeon_uvd_count_handles(struct radeon_device *rdev, 759 unsigned *sd, unsigned *hd) 760 { 761 unsigned i; 762 763 *sd = 0; 764 *hd = 0; 765 766 for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { 767 if (!atomic_read(&rdev->uvd.handles[i])) 768 continue; 769 770 if (rdev->uvd.img_size[i] >= 720*576) 771 ++(*hd); 772 else 773 ++(*sd); 774 } 775 } 776 777 static void radeon_uvd_idle_work_handler(struct work_struct *work) 778 { 779 struct radeon_device *rdev = 780 container_of(work, struct radeon_device, uvd.idle_work.work); 781 782 if (radeon_fence_count_emitted(rdev, R600_RING_TYPE_UVD_INDEX) == 0) { 783 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { 784 radeon_uvd_count_handles(rdev, &rdev->pm.dpm.sd, 785 &rdev->pm.dpm.hd); 786 radeon_dpm_enable_uvd(rdev, false); 787 } else { 788 radeon_set_uvd_clocks(rdev, 0, 0); 789 } 790 } else { 791 schedule_delayed_work(&rdev->uvd.idle_work, 792 msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); 793 } 794 } 795 796 void radeon_uvd_note_usage(struct radeon_device *rdev) 797 { 798 bool streams_changed = false; 799 bool set_clocks = !cancel_delayed_work_sync(&rdev->uvd.idle_work); 800 set_clocks &= schedule_delayed_work(&rdev->uvd.idle_work, 801 msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); 802 803 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { 804 unsigned hd = 0, sd = 0; 805 radeon_uvd_count_handles(rdev, &sd, &hd); 806 if ((rdev->pm.dpm.sd != sd) || 807 (rdev->pm.dpm.hd != hd)) { 808 rdev->pm.dpm.sd = sd; 809 rdev->pm.dpm.hd = hd; 810 streams_changed = true; 811 } 812 } 813 814 if (set_clocks || streams_changed) { 815 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { 816 radeon_dpm_enable_uvd(rdev, true); 817 } else { 818 radeon_set_uvd_clocks(rdev, 53300, 40000); 819 } 820 } 821 } 822 823 static unsigned radeon_uvd_calc_upll_post_div(unsigned vco_freq, 824 unsigned target_freq, 825 unsigned pd_min, 826 unsigned pd_even) 827 { 828 unsigned post_div = vco_freq / target_freq; 829 830 /* adjust to post divider minimum value */ 831 if (post_div < pd_min) 832 post_div = pd_min; 833 834 /* we alway need a frequency less than or equal the target */ 835 if ((vco_freq / post_div) > target_freq) 836 post_div += 1; 837 838 /* post dividers above a certain value must be even */ 839 if (post_div > pd_even && post_div % 2) 840 post_div += 1; 841 842 return post_div; 843 } 844 845 /** 846 * radeon_uvd_calc_upll_dividers - calc UPLL clock dividers 847 * 848 * @rdev: radeon_device pointer 849 * @vclk: wanted VCLK 850 * @dclk: wanted DCLK 851 * @vco_min: minimum VCO frequency 852 * @vco_max: maximum VCO frequency 853 * @fb_factor: factor to multiply vco freq with 854 * @fb_mask: limit and bitmask for feedback divider 855 * @pd_min: post divider minimum 856 * @pd_max: post divider maximum 857 * @pd_even: post divider must be even above this value 858 * @optimal_fb_div: resulting feedback divider 859 * @optimal_vclk_div: resulting vclk post divider 860 * @optimal_dclk_div: resulting dclk post divider 861 * 862 * Calculate dividers for UVDs UPLL (R6xx-SI, except APUs). 863 * Returns zero on success -EINVAL on error. 864 */ 865 int radeon_uvd_calc_upll_dividers(struct radeon_device *rdev, 866 unsigned vclk, unsigned dclk, 867 unsigned vco_min, unsigned vco_max, 868 unsigned fb_factor, unsigned fb_mask, 869 unsigned pd_min, unsigned pd_max, 870 unsigned pd_even, 871 unsigned *optimal_fb_div, 872 unsigned *optimal_vclk_div, 873 unsigned *optimal_dclk_div) 874 { 875 unsigned vco_freq, ref_freq = rdev->clock.spll.reference_freq; 876 877 /* start off with something large */ 878 unsigned optimal_score = ~0; 879 880 /* loop through vco from low to high */ 881 vco_min = max(max(vco_min, vclk), dclk); 882 for (vco_freq = vco_min; vco_freq <= vco_max; vco_freq += 100) { 883 884 uint64_t fb_div = (uint64_t)vco_freq * fb_factor; 885 unsigned vclk_div, dclk_div, score; 886 887 do_div(fb_div, ref_freq); 888 889 /* fb div out of range ? */ 890 if (fb_div > fb_mask) 891 break; /* it can oly get worse */ 892 893 fb_div &= fb_mask; 894 895 /* calc vclk divider with current vco freq */ 896 vclk_div = radeon_uvd_calc_upll_post_div(vco_freq, vclk, 897 pd_min, pd_even); 898 if (vclk_div > pd_max) 899 break; /* vco is too big, it has to stop */ 900 901 /* calc dclk divider with current vco freq */ 902 dclk_div = radeon_uvd_calc_upll_post_div(vco_freq, dclk, 903 pd_min, pd_even); 904 if (vclk_div > pd_max) 905 break; /* vco is too big, it has to stop */ 906 907 /* calc score with current vco freq */ 908 score = vclk - (vco_freq / vclk_div) + dclk - (vco_freq / dclk_div); 909 910 /* determine if this vco setting is better than current optimal settings */ 911 if (score < optimal_score) { 912 *optimal_fb_div = fb_div; 913 *optimal_vclk_div = vclk_div; 914 *optimal_dclk_div = dclk_div; 915 optimal_score = score; 916 if (optimal_score == 0) 917 break; /* it can't get better than this */ 918 } 919 } 920 921 /* did we found a valid setup ? */ 922 if (optimal_score == ~0) 923 return -EINVAL; 924 925 return 0; 926 } 927 928 int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev, 929 unsigned cg_upll_func_cntl) 930 { 931 unsigned i; 932 933 /* make sure UPLL_CTLREQ is deasserted */ 934 WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK); 935 936 mdelay(10); 937 938 /* assert UPLL_CTLREQ */ 939 WREG32_P(cg_upll_func_cntl, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK); 940 941 /* wait for CTLACK and CTLACK2 to get asserted */ 942 for (i = 0; i < 100; ++i) { 943 uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK; 944 if ((RREG32(cg_upll_func_cntl) & mask) == mask) 945 break; 946 mdelay(10); 947 } 948 949 /* deassert UPLL_CTLREQ */ 950 WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK); 951 952 if (i == 100) { 953 DRM_ERROR("Timeout setting UVD clocks!\n"); 954 return -ETIMEDOUT; 955 } 956 957 return 0; 958 } 959