1 /* 2 * Copyright 2011 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 /* 27 * Authors: 28 * Christian König <deathsimple@vodafone.de> 29 */ 30 31 #include <linux/firmware.h> 32 #include <linux/module.h> 33 #include <drm/drmP.h> 34 #include <drm/drm.h> 35 36 #include "radeon.h" 37 #include "r600d.h" 38 39 /* 1 second timeout */ 40 #define UVD_IDLE_TIMEOUT_MS 1000 41 42 /* Firmware Names */ 43 #define FIRMWARE_RV710 "radeon/RV710_uvd.bin" 44 #define FIRMWARE_CYPRESS "radeon/CYPRESS_uvd.bin" 45 #define FIRMWARE_SUMO "radeon/SUMO_uvd.bin" 46 #define FIRMWARE_TAHITI "radeon/TAHITI_uvd.bin" 47 #define FIRMWARE_BONAIRE "radeon/BONAIRE_uvd.bin" 48 49 MODULE_FIRMWARE(FIRMWARE_RV710); 50 MODULE_FIRMWARE(FIRMWARE_CYPRESS); 51 MODULE_FIRMWARE(FIRMWARE_SUMO); 52 MODULE_FIRMWARE(FIRMWARE_TAHITI); 53 MODULE_FIRMWARE(FIRMWARE_BONAIRE); 54 55 static void radeon_uvd_idle_work_handler(struct work_struct *work); 56 57 int radeon_uvd_init(struct radeon_device *rdev) 58 { 59 unsigned long bo_size; 60 const char *fw_name; 61 int i, r; 62 63 INIT_DELAYED_WORK(&rdev->uvd.idle_work, radeon_uvd_idle_work_handler); 64 65 switch (rdev->family) { 66 case CHIP_RV710: 67 case CHIP_RV730: 68 case CHIP_RV740: 69 fw_name = FIRMWARE_RV710; 70 break; 71 72 case CHIP_CYPRESS: 73 case CHIP_HEMLOCK: 74 case CHIP_JUNIPER: 75 case CHIP_REDWOOD: 76 case CHIP_CEDAR: 77 fw_name = FIRMWARE_CYPRESS; 78 break; 79 80 case CHIP_SUMO: 81 case CHIP_SUMO2: 82 case CHIP_PALM: 83 case CHIP_CAYMAN: 84 case CHIP_BARTS: 85 case CHIP_TURKS: 86 case CHIP_CAICOS: 87 fw_name = FIRMWARE_SUMO; 88 break; 89 90 case CHIP_TAHITI: 91 case CHIP_VERDE: 92 case CHIP_PITCAIRN: 93 case CHIP_ARUBA: 94 case CHIP_OLAND: 95 fw_name = FIRMWARE_TAHITI; 96 break; 97 98 case CHIP_BONAIRE: 99 case CHIP_KABINI: 100 case CHIP_KAVERI: 101 case CHIP_HAWAII: 102 fw_name = FIRMWARE_BONAIRE; 103 break; 104 105 default: 106 return -EINVAL; 107 } 108 109 r = request_firmware(&rdev->uvd_fw, fw_name, rdev->dev); 110 if (r) { 111 dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n", 112 fw_name); 113 return r; 114 } 115 116 bo_size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 8) + 117 RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE; 118 r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true, 119 RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->uvd.vcpu_bo); 120 if (r) { 121 dev_err(rdev->dev, "(%d) failed to allocate UVD bo\n", r); 122 return r; 123 } 124 125 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false); 126 if (r) { 127 radeon_bo_unref(&rdev->uvd.vcpu_bo); 128 dev_err(rdev->dev, "(%d) failed to reserve UVD bo\n", r); 129 return r; 130 } 131 132 r = radeon_bo_pin(rdev->uvd.vcpu_bo, RADEON_GEM_DOMAIN_VRAM, 133 &rdev->uvd.gpu_addr); 134 if (r) { 135 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 136 radeon_bo_unref(&rdev->uvd.vcpu_bo); 137 dev_err(rdev->dev, "(%d) UVD bo pin failed\n", r); 138 return r; 139 } 140 141 r = radeon_bo_kmap(rdev->uvd.vcpu_bo, &rdev->uvd.cpu_addr); 142 if (r) { 143 dev_err(rdev->dev, "(%d) UVD map failed\n", r); 144 return r; 145 } 146 147 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 148 149 for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { 150 atomic_set(&rdev->uvd.handles[i], 0); 151 rdev->uvd.filp[i] = NULL; 152 rdev->uvd.img_size[i] = 0; 153 } 154 155 return 0; 156 } 157 158 void radeon_uvd_fini(struct radeon_device *rdev) 159 { 160 int r; 161 162 if (rdev->uvd.vcpu_bo == NULL) 163 return; 164 165 r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false); 166 if (!r) { 167 radeon_bo_kunmap(rdev->uvd.vcpu_bo); 168 radeon_bo_unpin(rdev->uvd.vcpu_bo); 169 radeon_bo_unreserve(rdev->uvd.vcpu_bo); 170 } 171 172 radeon_bo_unref(&rdev->uvd.vcpu_bo); 173 174 release_firmware(rdev->uvd_fw); 175 } 176 177 int radeon_uvd_suspend(struct radeon_device *rdev) 178 { 179 unsigned size; 180 void *ptr; 181 int i; 182 183 if (rdev->uvd.vcpu_bo == NULL) 184 return 0; 185 186 for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) 187 if (atomic_read(&rdev->uvd.handles[i])) 188 break; 189 190 if (i == RADEON_MAX_UVD_HANDLES) 191 return 0; 192 193 size = radeon_bo_size(rdev->uvd.vcpu_bo); 194 size -= rdev->uvd_fw->size; 195 196 ptr = rdev->uvd.cpu_addr; 197 ptr += rdev->uvd_fw->size; 198 199 rdev->uvd.saved_bo = kmalloc(size, GFP_KERNEL); 200 memcpy(rdev->uvd.saved_bo, ptr, size); 201 202 return 0; 203 } 204 205 int radeon_uvd_resume(struct radeon_device *rdev) 206 { 207 unsigned size; 208 void *ptr; 209 210 if (rdev->uvd.vcpu_bo == NULL) 211 return -EINVAL; 212 213 memcpy(rdev->uvd.cpu_addr, rdev->uvd_fw->data, rdev->uvd_fw->size); 214 215 size = radeon_bo_size(rdev->uvd.vcpu_bo); 216 size -= rdev->uvd_fw->size; 217 218 ptr = rdev->uvd.cpu_addr; 219 ptr += rdev->uvd_fw->size; 220 221 if (rdev->uvd.saved_bo != NULL) { 222 memcpy(ptr, rdev->uvd.saved_bo, size); 223 kfree(rdev->uvd.saved_bo); 224 rdev->uvd.saved_bo = NULL; 225 } else 226 memset(ptr, 0, size); 227 228 return 0; 229 } 230 231 void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo) 232 { 233 rbo->placement.fpfn = 0 >> PAGE_SHIFT; 234 rbo->placement.lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; 235 } 236 237 void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file *filp) 238 { 239 int i, r; 240 for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { 241 uint32_t handle = atomic_read(&rdev->uvd.handles[i]); 242 if (handle != 0 && rdev->uvd.filp[i] == filp) { 243 struct radeon_fence *fence; 244 245 radeon_uvd_note_usage(rdev); 246 247 r = radeon_uvd_get_destroy_msg(rdev, 248 R600_RING_TYPE_UVD_INDEX, handle, &fence); 249 if (r) { 250 DRM_ERROR("Error destroying UVD (%d)!\n", r); 251 continue; 252 } 253 254 radeon_fence_wait(fence, false); 255 radeon_fence_unref(&fence); 256 257 rdev->uvd.filp[i] = NULL; 258 atomic_set(&rdev->uvd.handles[i], 0); 259 } 260 } 261 } 262 263 static int radeon_uvd_cs_msg_decode(uint32_t *msg, unsigned buf_sizes[]) 264 { 265 unsigned stream_type = msg[4]; 266 unsigned width = msg[6]; 267 unsigned height = msg[7]; 268 unsigned dpb_size = msg[9]; 269 unsigned pitch = msg[28]; 270 271 unsigned width_in_mb = width / 16; 272 unsigned height_in_mb = ALIGN(height / 16, 2); 273 274 unsigned image_size, tmp, min_dpb_size; 275 276 image_size = width * height; 277 image_size += image_size / 2; 278 image_size = ALIGN(image_size, 1024); 279 280 switch (stream_type) { 281 case 0: /* H264 */ 282 283 /* reference picture buffer */ 284 min_dpb_size = image_size * 17; 285 286 /* macroblock context buffer */ 287 min_dpb_size += width_in_mb * height_in_mb * 17 * 192; 288 289 /* IT surface buffer */ 290 min_dpb_size += width_in_mb * height_in_mb * 32; 291 break; 292 293 case 1: /* VC1 */ 294 295 /* reference picture buffer */ 296 min_dpb_size = image_size * 3; 297 298 /* CONTEXT_BUFFER */ 299 min_dpb_size += width_in_mb * height_in_mb * 128; 300 301 /* IT surface buffer */ 302 min_dpb_size += width_in_mb * 64; 303 304 /* DB surface buffer */ 305 min_dpb_size += width_in_mb * 128; 306 307 /* BP */ 308 tmp = max(width_in_mb, height_in_mb); 309 min_dpb_size += ALIGN(tmp * 7 * 16, 64); 310 break; 311 312 case 3: /* MPEG2 */ 313 314 /* reference picture buffer */ 315 min_dpb_size = image_size * 3; 316 break; 317 318 case 4: /* MPEG4 */ 319 320 /* reference picture buffer */ 321 min_dpb_size = image_size * 3; 322 323 /* CM */ 324 min_dpb_size += width_in_mb * height_in_mb * 64; 325 326 /* IT surface buffer */ 327 min_dpb_size += ALIGN(width_in_mb * height_in_mb * 32, 64); 328 break; 329 330 default: 331 DRM_ERROR("UVD codec not handled %d!\n", stream_type); 332 return -EINVAL; 333 } 334 335 if (width > pitch) { 336 DRM_ERROR("Invalid UVD decoding target pitch!\n"); 337 return -EINVAL; 338 } 339 340 if (dpb_size < min_dpb_size) { 341 DRM_ERROR("Invalid dpb_size in UVD message (%d / %d)!\n", 342 dpb_size, min_dpb_size); 343 return -EINVAL; 344 } 345 346 buf_sizes[0x1] = dpb_size; 347 buf_sizes[0x2] = image_size; 348 return 0; 349 } 350 351 static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo, 352 unsigned offset, unsigned buf_sizes[]) 353 { 354 int32_t *msg, msg_type, handle; 355 unsigned img_size = 0; 356 void *ptr; 357 358 int i, r; 359 360 if (offset & 0x3F) { 361 DRM_ERROR("UVD messages must be 64 byte aligned!\n"); 362 return -EINVAL; 363 } 364 365 if (bo->tbo.sync_obj) { 366 r = radeon_fence_wait(bo->tbo.sync_obj, false); 367 if (r) { 368 DRM_ERROR("Failed waiting for UVD message (%d)!\n", r); 369 return r; 370 } 371 } 372 373 r = radeon_bo_kmap(bo, &ptr); 374 if (r) { 375 DRM_ERROR("Failed mapping the UVD message (%d)!\n", r); 376 return r; 377 } 378 379 msg = ptr + offset; 380 381 msg_type = msg[1]; 382 handle = msg[2]; 383 384 if (handle == 0) { 385 DRM_ERROR("Invalid UVD handle!\n"); 386 return -EINVAL; 387 } 388 389 if (msg_type == 1) { 390 /* it's a decode msg, calc buffer sizes */ 391 r = radeon_uvd_cs_msg_decode(msg, buf_sizes); 392 /* calc image size (width * height) */ 393 img_size = msg[6] * msg[7]; 394 radeon_bo_kunmap(bo); 395 if (r) 396 return r; 397 398 } else if (msg_type == 2) { 399 /* it's a destroy msg, free the handle */ 400 for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) 401 atomic_cmpxchg(&p->rdev->uvd.handles[i], handle, 0); 402 radeon_bo_kunmap(bo); 403 return 0; 404 } else { 405 /* it's a create msg, calc image size (width * height) */ 406 img_size = msg[7] * msg[8]; 407 radeon_bo_kunmap(bo); 408 409 if (msg_type != 0) { 410 DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type); 411 return -EINVAL; 412 } 413 414 /* it's a create msg, no special handling needed */ 415 } 416 417 /* create or decode, validate the handle */ 418 for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { 419 if (atomic_read(&p->rdev->uvd.handles[i]) == handle) 420 return 0; 421 } 422 423 /* handle not found try to alloc a new one */ 424 for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { 425 if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], 0, handle)) { 426 p->rdev->uvd.filp[i] = p->filp; 427 p->rdev->uvd.img_size[i] = img_size; 428 return 0; 429 } 430 } 431 432 DRM_ERROR("No more free UVD handles!\n"); 433 return -EINVAL; 434 } 435 436 static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p, 437 int data0, int data1, 438 unsigned buf_sizes[], bool *has_msg_cmd) 439 { 440 struct radeon_cs_chunk *relocs_chunk; 441 struct radeon_cs_reloc *reloc; 442 unsigned idx, cmd, offset; 443 uint64_t start, end; 444 int r; 445 446 relocs_chunk = &p->chunks[p->chunk_relocs_idx]; 447 offset = radeon_get_ib_value(p, data0); 448 idx = radeon_get_ib_value(p, data1); 449 if (idx >= relocs_chunk->length_dw) { 450 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", 451 idx, relocs_chunk->length_dw); 452 return -EINVAL; 453 } 454 455 reloc = p->relocs_ptr[(idx / 4)]; 456 start = reloc->lobj.gpu_offset; 457 end = start + radeon_bo_size(reloc->robj); 458 start += offset; 459 460 p->ib.ptr[data0] = start & 0xFFFFFFFF; 461 p->ib.ptr[data1] = start >> 32; 462 463 cmd = radeon_get_ib_value(p, p->idx) >> 1; 464 465 if (cmd < 0x4) { 466 if ((end - start) < buf_sizes[cmd]) { 467 DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd, 468 (unsigned)(end - start), buf_sizes[cmd]); 469 return -EINVAL; 470 } 471 472 } else if (cmd != 0x100) { 473 DRM_ERROR("invalid UVD command %X!\n", cmd); 474 return -EINVAL; 475 } 476 477 if ((start >> 28) != ((end - 1) >> 28)) { 478 DRM_ERROR("reloc %LX-%LX crossing 256MB boundary!\n", 479 start, end); 480 return -EINVAL; 481 } 482 483 /* TODO: is this still necessary on NI+ ? */ 484 if ((cmd == 0 || cmd == 0x3) && 485 (start >> 28) != (p->rdev->uvd.gpu_addr >> 28)) { 486 DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n", 487 start, end); 488 return -EINVAL; 489 } 490 491 if (cmd == 0) { 492 if (*has_msg_cmd) { 493 DRM_ERROR("More than one message in a UVD-IB!\n"); 494 return -EINVAL; 495 } 496 *has_msg_cmd = true; 497 r = radeon_uvd_cs_msg(p, reloc->robj, offset, buf_sizes); 498 if (r) 499 return r; 500 } else if (!*has_msg_cmd) { 501 DRM_ERROR("Message needed before other commands are send!\n"); 502 return -EINVAL; 503 } 504 505 return 0; 506 } 507 508 static int radeon_uvd_cs_reg(struct radeon_cs_parser *p, 509 struct radeon_cs_packet *pkt, 510 int *data0, int *data1, 511 unsigned buf_sizes[], 512 bool *has_msg_cmd) 513 { 514 int i, r; 515 516 p->idx++; 517 for (i = 0; i <= pkt->count; ++i) { 518 switch (pkt->reg + i*4) { 519 case UVD_GPCOM_VCPU_DATA0: 520 *data0 = p->idx; 521 break; 522 case UVD_GPCOM_VCPU_DATA1: 523 *data1 = p->idx; 524 break; 525 case UVD_GPCOM_VCPU_CMD: 526 r = radeon_uvd_cs_reloc(p, *data0, *data1, 527 buf_sizes, has_msg_cmd); 528 if (r) 529 return r; 530 break; 531 case UVD_ENGINE_CNTL: 532 break; 533 default: 534 DRM_ERROR("Invalid reg 0x%X!\n", 535 pkt->reg + i*4); 536 return -EINVAL; 537 } 538 p->idx++; 539 } 540 return 0; 541 } 542 543 int radeon_uvd_cs_parse(struct radeon_cs_parser *p) 544 { 545 struct radeon_cs_packet pkt; 546 int r, data0 = 0, data1 = 0; 547 548 /* does the IB has a msg command */ 549 bool has_msg_cmd = false; 550 551 /* minimum buffer sizes */ 552 unsigned buf_sizes[] = { 553 [0x00000000] = 2048, 554 [0x00000001] = 32 * 1024 * 1024, 555 [0x00000002] = 2048 * 1152 * 3, 556 [0x00000003] = 2048, 557 }; 558 559 if (p->chunks[p->chunk_ib_idx].length_dw % 16) { 560 DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n", 561 p->chunks[p->chunk_ib_idx].length_dw); 562 return -EINVAL; 563 } 564 565 if (p->chunk_relocs_idx == -1) { 566 DRM_ERROR("No relocation chunk !\n"); 567 return -EINVAL; 568 } 569 570 571 do { 572 r = radeon_cs_packet_parse(p, &pkt, p->idx); 573 if (r) 574 return r; 575 switch (pkt.type) { 576 case RADEON_PACKET_TYPE0: 577 r = radeon_uvd_cs_reg(p, &pkt, &data0, &data1, 578 buf_sizes, &has_msg_cmd); 579 if (r) 580 return r; 581 break; 582 case RADEON_PACKET_TYPE2: 583 p->idx += pkt.count + 2; 584 break; 585 default: 586 DRM_ERROR("Unknown packet type %d !\n", pkt.type); 587 return -EINVAL; 588 } 589 } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw); 590 591 if (!has_msg_cmd) { 592 DRM_ERROR("UVD-IBs need a msg command!\n"); 593 return -EINVAL; 594 } 595 596 return 0; 597 } 598 599 static int radeon_uvd_send_msg(struct radeon_device *rdev, 600 int ring, struct radeon_bo *bo, 601 struct radeon_fence **fence) 602 { 603 struct ttm_validate_buffer tv; 604 struct ww_acquire_ctx ticket; 605 struct list_head head; 606 struct radeon_ib ib; 607 uint64_t addr; 608 int i, r; 609 610 memset(&tv, 0, sizeof(tv)); 611 tv.bo = &bo->tbo; 612 613 INIT_LIST_HEAD(&head); 614 list_add(&tv.head, &head); 615 616 r = ttm_eu_reserve_buffers(&ticket, &head); 617 if (r) 618 return r; 619 620 radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_VRAM); 621 radeon_uvd_force_into_uvd_segment(bo); 622 623 r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); 624 if (r) 625 goto err; 626 627 r = radeon_ib_get(rdev, ring, &ib, NULL, 64); 628 if (r) 629 goto err; 630 631 addr = radeon_bo_gpu_offset(bo); 632 ib.ptr[0] = PACKET0(UVD_GPCOM_VCPU_DATA0, 0); 633 ib.ptr[1] = addr; 634 ib.ptr[2] = PACKET0(UVD_GPCOM_VCPU_DATA1, 0); 635 ib.ptr[3] = addr >> 32; 636 ib.ptr[4] = PACKET0(UVD_GPCOM_VCPU_CMD, 0); 637 ib.ptr[5] = 0; 638 for (i = 6; i < 16; ++i) 639 ib.ptr[i] = PACKET2(0); 640 ib.length_dw = 16; 641 642 r = radeon_ib_schedule(rdev, &ib, NULL); 643 if (r) 644 goto err; 645 ttm_eu_fence_buffer_objects(&ticket, &head, ib.fence); 646 647 if (fence) 648 *fence = radeon_fence_ref(ib.fence); 649 650 radeon_ib_free(rdev, &ib); 651 radeon_bo_unref(&bo); 652 return 0; 653 654 err: 655 ttm_eu_backoff_reservation(&ticket, &head); 656 return r; 657 } 658 659 /* multiple fence commands without any stream commands in between can 660 crash the vcpu so just try to emmit a dummy create/destroy msg to 661 avoid this */ 662 int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring, 663 uint32_t handle, struct radeon_fence **fence) 664 { 665 struct radeon_bo *bo; 666 uint32_t *msg; 667 int r, i; 668 669 r = radeon_bo_create(rdev, 1024, PAGE_SIZE, true, 670 RADEON_GEM_DOMAIN_VRAM, NULL, &bo); 671 if (r) 672 return r; 673 674 r = radeon_bo_reserve(bo, false); 675 if (r) { 676 radeon_bo_unref(&bo); 677 return r; 678 } 679 680 r = radeon_bo_kmap(bo, (void **)&msg); 681 if (r) { 682 radeon_bo_unreserve(bo); 683 radeon_bo_unref(&bo); 684 return r; 685 } 686 687 /* stitch together an UVD create msg */ 688 msg[0] = cpu_to_le32(0x00000de4); 689 msg[1] = cpu_to_le32(0x00000000); 690 msg[2] = cpu_to_le32(handle); 691 msg[3] = cpu_to_le32(0x00000000); 692 msg[4] = cpu_to_le32(0x00000000); 693 msg[5] = cpu_to_le32(0x00000000); 694 msg[6] = cpu_to_le32(0x00000000); 695 msg[7] = cpu_to_le32(0x00000780); 696 msg[8] = cpu_to_le32(0x00000440); 697 msg[9] = cpu_to_le32(0x00000000); 698 msg[10] = cpu_to_le32(0x01b37000); 699 for (i = 11; i < 1024; ++i) 700 msg[i] = cpu_to_le32(0x0); 701 702 radeon_bo_kunmap(bo); 703 radeon_bo_unreserve(bo); 704 705 return radeon_uvd_send_msg(rdev, ring, bo, fence); 706 } 707 708 int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring, 709 uint32_t handle, struct radeon_fence **fence) 710 { 711 struct radeon_bo *bo; 712 uint32_t *msg; 713 int r, i; 714 715 r = radeon_bo_create(rdev, 1024, PAGE_SIZE, true, 716 RADEON_GEM_DOMAIN_VRAM, NULL, &bo); 717 if (r) 718 return r; 719 720 r = radeon_bo_reserve(bo, false); 721 if (r) { 722 radeon_bo_unref(&bo); 723 return r; 724 } 725 726 r = radeon_bo_kmap(bo, (void **)&msg); 727 if (r) { 728 radeon_bo_unreserve(bo); 729 radeon_bo_unref(&bo); 730 return r; 731 } 732 733 /* stitch together an UVD destroy msg */ 734 msg[0] = cpu_to_le32(0x00000de4); 735 msg[1] = cpu_to_le32(0x00000002); 736 msg[2] = cpu_to_le32(handle); 737 msg[3] = cpu_to_le32(0x00000000); 738 for (i = 4; i < 1024; ++i) 739 msg[i] = cpu_to_le32(0x0); 740 741 radeon_bo_kunmap(bo); 742 radeon_bo_unreserve(bo); 743 744 return radeon_uvd_send_msg(rdev, ring, bo, fence); 745 } 746 747 /** 748 * radeon_uvd_count_handles - count number of open streams 749 * 750 * @rdev: radeon_device pointer 751 * @sd: number of SD streams 752 * @hd: number of HD streams 753 * 754 * Count the number of open SD/HD streams as a hint for power mangement 755 */ 756 static void radeon_uvd_count_handles(struct radeon_device *rdev, 757 unsigned *sd, unsigned *hd) 758 { 759 unsigned i; 760 761 *sd = 0; 762 *hd = 0; 763 764 for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { 765 if (!atomic_read(&rdev->uvd.handles[i])) 766 continue; 767 768 if (rdev->uvd.img_size[i] >= 720*576) 769 ++(*hd); 770 else 771 ++(*sd); 772 } 773 } 774 775 static void radeon_uvd_idle_work_handler(struct work_struct *work) 776 { 777 struct radeon_device *rdev = 778 container_of(work, struct radeon_device, uvd.idle_work.work); 779 780 if (radeon_fence_count_emitted(rdev, R600_RING_TYPE_UVD_INDEX) == 0) { 781 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { 782 radeon_uvd_count_handles(rdev, &rdev->pm.dpm.sd, 783 &rdev->pm.dpm.hd); 784 radeon_dpm_enable_uvd(rdev, false); 785 } else { 786 radeon_set_uvd_clocks(rdev, 0, 0); 787 } 788 } else { 789 schedule_delayed_work(&rdev->uvd.idle_work, 790 msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); 791 } 792 } 793 794 void radeon_uvd_note_usage(struct radeon_device *rdev) 795 { 796 bool streams_changed = false; 797 bool set_clocks = !cancel_delayed_work_sync(&rdev->uvd.idle_work); 798 set_clocks &= schedule_delayed_work(&rdev->uvd.idle_work, 799 msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); 800 801 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { 802 unsigned hd = 0, sd = 0; 803 radeon_uvd_count_handles(rdev, &sd, &hd); 804 if ((rdev->pm.dpm.sd != sd) || 805 (rdev->pm.dpm.hd != hd)) { 806 rdev->pm.dpm.sd = sd; 807 rdev->pm.dpm.hd = hd; 808 /* disable this for now */ 809 /*streams_changed = true;*/ 810 } 811 } 812 813 if (set_clocks || streams_changed) { 814 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { 815 radeon_dpm_enable_uvd(rdev, true); 816 } else { 817 radeon_set_uvd_clocks(rdev, 53300, 40000); 818 } 819 } 820 } 821 822 static unsigned radeon_uvd_calc_upll_post_div(unsigned vco_freq, 823 unsigned target_freq, 824 unsigned pd_min, 825 unsigned pd_even) 826 { 827 unsigned post_div = vco_freq / target_freq; 828 829 /* adjust to post divider minimum value */ 830 if (post_div < pd_min) 831 post_div = pd_min; 832 833 /* we alway need a frequency less than or equal the target */ 834 if ((vco_freq / post_div) > target_freq) 835 post_div += 1; 836 837 /* post dividers above a certain value must be even */ 838 if (post_div > pd_even && post_div % 2) 839 post_div += 1; 840 841 return post_div; 842 } 843 844 /** 845 * radeon_uvd_calc_upll_dividers - calc UPLL clock dividers 846 * 847 * @rdev: radeon_device pointer 848 * @vclk: wanted VCLK 849 * @dclk: wanted DCLK 850 * @vco_min: minimum VCO frequency 851 * @vco_max: maximum VCO frequency 852 * @fb_factor: factor to multiply vco freq with 853 * @fb_mask: limit and bitmask for feedback divider 854 * @pd_min: post divider minimum 855 * @pd_max: post divider maximum 856 * @pd_even: post divider must be even above this value 857 * @optimal_fb_div: resulting feedback divider 858 * @optimal_vclk_div: resulting vclk post divider 859 * @optimal_dclk_div: resulting dclk post divider 860 * 861 * Calculate dividers for UVDs UPLL (R6xx-SI, except APUs). 862 * Returns zero on success -EINVAL on error. 863 */ 864 int radeon_uvd_calc_upll_dividers(struct radeon_device *rdev, 865 unsigned vclk, unsigned dclk, 866 unsigned vco_min, unsigned vco_max, 867 unsigned fb_factor, unsigned fb_mask, 868 unsigned pd_min, unsigned pd_max, 869 unsigned pd_even, 870 unsigned *optimal_fb_div, 871 unsigned *optimal_vclk_div, 872 unsigned *optimal_dclk_div) 873 { 874 unsigned vco_freq, ref_freq = rdev->clock.spll.reference_freq; 875 876 /* start off with something large */ 877 unsigned optimal_score = ~0; 878 879 /* loop through vco from low to high */ 880 vco_min = max(max(vco_min, vclk), dclk); 881 for (vco_freq = vco_min; vco_freq <= vco_max; vco_freq += 100) { 882 883 uint64_t fb_div = (uint64_t)vco_freq * fb_factor; 884 unsigned vclk_div, dclk_div, score; 885 886 do_div(fb_div, ref_freq); 887 888 /* fb div out of range ? */ 889 if (fb_div > fb_mask) 890 break; /* it can oly get worse */ 891 892 fb_div &= fb_mask; 893 894 /* calc vclk divider with current vco freq */ 895 vclk_div = radeon_uvd_calc_upll_post_div(vco_freq, vclk, 896 pd_min, pd_even); 897 if (vclk_div > pd_max) 898 break; /* vco is too big, it has to stop */ 899 900 /* calc dclk divider with current vco freq */ 901 dclk_div = radeon_uvd_calc_upll_post_div(vco_freq, dclk, 902 pd_min, pd_even); 903 if (vclk_div > pd_max) 904 break; /* vco is too big, it has to stop */ 905 906 /* calc score with current vco freq */ 907 score = vclk - (vco_freq / vclk_div) + dclk - (vco_freq / dclk_div); 908 909 /* determine if this vco setting is better than current optimal settings */ 910 if (score < optimal_score) { 911 *optimal_fb_div = fb_div; 912 *optimal_vclk_div = vclk_div; 913 *optimal_dclk_div = dclk_div; 914 optimal_score = score; 915 if (optimal_score == 0) 916 break; /* it can't get better than this */ 917 } 918 } 919 920 /* did we found a valid setup ? */ 921 if (optimal_score == ~0) 922 return -EINVAL; 923 924 return 0; 925 } 926 927 int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev, 928 unsigned cg_upll_func_cntl) 929 { 930 unsigned i; 931 932 /* make sure UPLL_CTLREQ is deasserted */ 933 WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK); 934 935 mdelay(10); 936 937 /* assert UPLL_CTLREQ */ 938 WREG32_P(cg_upll_func_cntl, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK); 939 940 /* wait for CTLACK and CTLACK2 to get asserted */ 941 for (i = 0; i < 100; ++i) { 942 uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK; 943 if ((RREG32(cg_upll_func_cntl) & mask) == mask) 944 break; 945 mdelay(10); 946 } 947 948 /* deassert UPLL_CTLREQ */ 949 WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK); 950 951 if (i == 100) { 952 DRM_ERROR("Timeout setting UVD clocks!\n"); 953 return -ETIMEDOUT; 954 } 955 956 return 0; 957 } 958