1 /* 2 * Copyright (C) 2013 Red Hat 3 * Author: Rob Clark <robdclark@gmail.com> 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 as published by 7 * the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 * more details. 13 * 14 * You should have received a copy of the GNU General Public License along with 15 * this program. If not, see <http://www.gnu.org/licenses/>. 16 */ 17 18 #include "msm_gpu.h" 19 #include "msm_gem.h" 20 #include "msm_mmu.h" 21 22 23 /* 24 * Power Management: 25 */ 26 27 #ifdef DOWNSTREAM_CONFIG_MSM_BUS_SCALING 28 #include <mach/board.h> 29 static void bs_init(struct msm_gpu *gpu) 30 { 31 if (gpu->bus_scale_table) { 32 gpu->bsc = msm_bus_scale_register_client(gpu->bus_scale_table); 33 DBG("bus scale client: %08x", gpu->bsc); 34 } 35 } 36 37 static void bs_fini(struct msm_gpu *gpu) 38 { 39 if (gpu->bsc) { 40 msm_bus_scale_unregister_client(gpu->bsc); 41 gpu->bsc = 0; 42 } 43 } 44 45 static void bs_set(struct msm_gpu *gpu, int idx) 46 { 47 if (gpu->bsc) { 48 DBG("set bus scaling: %d", idx); 49 msm_bus_scale_client_update_request(gpu->bsc, idx); 50 } 51 } 52 #else 53 static void bs_init(struct msm_gpu *gpu) {} 54 static void bs_fini(struct msm_gpu *gpu) {} 55 static void bs_set(struct msm_gpu *gpu, int idx) {} 56 #endif 57 58 static int enable_pwrrail(struct msm_gpu *gpu) 59 { 60 struct drm_device *dev = gpu->dev; 61 int ret = 0; 62 63 if (gpu->gpu_reg) { 64 ret = regulator_enable(gpu->gpu_reg); 65 if (ret) { 66 dev_err(dev->dev, "failed to enable 'gpu_reg': %d\n", ret); 67 return ret; 68 } 69 } 70 71 if (gpu->gpu_cx) { 72 ret = regulator_enable(gpu->gpu_cx); 73 if (ret) { 74 dev_err(dev->dev, "failed to enable 'gpu_cx': %d\n", ret); 75 return ret; 76 } 77 } 78 79 return 0; 80 } 81 82 static int disable_pwrrail(struct msm_gpu *gpu) 83 { 84 if (gpu->gpu_cx) 85 regulator_disable(gpu->gpu_cx); 86 if (gpu->gpu_reg) 87 regulator_disable(gpu->gpu_reg); 88 return 0; 89 } 90 91 static int enable_clk(struct msm_gpu *gpu) 92 { 93 struct clk *rate_clk = NULL; 94 int i; 95 96 /* NOTE: kgsl_pwrctrl_clk() ignores grp_clks[0].. */ 97 for (i = ARRAY_SIZE(gpu->grp_clks) - 1; i > 0; i--) { 98 if (gpu->grp_clks[i]) { 99 clk_prepare(gpu->grp_clks[i]); 100 rate_clk = gpu->grp_clks[i]; 101 } 102 } 103 104 if (rate_clk && gpu->fast_rate) 105 clk_set_rate(rate_clk, gpu->fast_rate); 106 107 for (i = ARRAY_SIZE(gpu->grp_clks) - 1; i > 0; i--) 108 if (gpu->grp_clks[i]) 109 clk_enable(gpu->grp_clks[i]); 110 111 return 0; 112 } 113 114 static int disable_clk(struct msm_gpu *gpu) 115 { 116 struct clk *rate_clk = NULL; 117 int i; 118 119 /* NOTE: kgsl_pwrctrl_clk() ignores grp_clks[0].. */ 120 for (i = ARRAY_SIZE(gpu->grp_clks) - 1; i > 0; i--) { 121 if (gpu->grp_clks[i]) { 122 clk_disable(gpu->grp_clks[i]); 123 rate_clk = gpu->grp_clks[i]; 124 } 125 } 126 127 if (rate_clk && gpu->slow_rate) 128 clk_set_rate(rate_clk, gpu->slow_rate); 129 130 for (i = ARRAY_SIZE(gpu->grp_clks) - 1; i > 0; i--) 131 if (gpu->grp_clks[i]) 132 clk_unprepare(gpu->grp_clks[i]); 133 134 return 0; 135 } 136 137 static int enable_axi(struct msm_gpu *gpu) 138 { 139 if (gpu->ebi1_clk) 140 clk_prepare_enable(gpu->ebi1_clk); 141 if (gpu->bus_freq) 142 bs_set(gpu, gpu->bus_freq); 143 return 0; 144 } 145 146 static int disable_axi(struct msm_gpu *gpu) 147 { 148 if (gpu->ebi1_clk) 149 clk_disable_unprepare(gpu->ebi1_clk); 150 if (gpu->bus_freq) 151 bs_set(gpu, 0); 152 return 0; 153 } 154 155 int msm_gpu_pm_resume(struct msm_gpu *gpu) 156 { 157 struct drm_device *dev = gpu->dev; 158 int ret; 159 160 DBG("%s: active_cnt=%d", gpu->name, gpu->active_cnt); 161 162 WARN_ON(!mutex_is_locked(&dev->struct_mutex)); 163 164 if (gpu->active_cnt++ > 0) 165 return 0; 166 167 if (WARN_ON(gpu->active_cnt <= 0)) 168 return -EINVAL; 169 170 ret = enable_pwrrail(gpu); 171 if (ret) 172 return ret; 173 174 ret = enable_clk(gpu); 175 if (ret) 176 return ret; 177 178 ret = enable_axi(gpu); 179 if (ret) 180 return ret; 181 182 return 0; 183 } 184 185 int msm_gpu_pm_suspend(struct msm_gpu *gpu) 186 { 187 struct drm_device *dev = gpu->dev; 188 int ret; 189 190 DBG("%s: active_cnt=%d", gpu->name, gpu->active_cnt); 191 192 WARN_ON(!mutex_is_locked(&dev->struct_mutex)); 193 194 if (--gpu->active_cnt > 0) 195 return 0; 196 197 if (WARN_ON(gpu->active_cnt < 0)) 198 return -EINVAL; 199 200 ret = disable_axi(gpu); 201 if (ret) 202 return ret; 203 204 ret = disable_clk(gpu); 205 if (ret) 206 return ret; 207 208 ret = disable_pwrrail(gpu); 209 if (ret) 210 return ret; 211 212 return 0; 213 } 214 215 /* 216 * Inactivity detection (for suspend): 217 */ 218 219 static void inactive_worker(struct work_struct *work) 220 { 221 struct msm_gpu *gpu = container_of(work, struct msm_gpu, inactive_work); 222 struct drm_device *dev = gpu->dev; 223 224 if (gpu->inactive) 225 return; 226 227 DBG("%s: inactive!\n", gpu->name); 228 mutex_lock(&dev->struct_mutex); 229 if (!(msm_gpu_active(gpu) || gpu->inactive)) { 230 disable_axi(gpu); 231 disable_clk(gpu); 232 gpu->inactive = true; 233 } 234 mutex_unlock(&dev->struct_mutex); 235 } 236 237 static void inactive_handler(unsigned long data) 238 { 239 struct msm_gpu *gpu = (struct msm_gpu *)data; 240 struct msm_drm_private *priv = gpu->dev->dev_private; 241 242 queue_work(priv->wq, &gpu->inactive_work); 243 } 244 245 /* cancel inactive timer and make sure we are awake: */ 246 static void inactive_cancel(struct msm_gpu *gpu) 247 { 248 DBG("%s", gpu->name); 249 del_timer(&gpu->inactive_timer); 250 if (gpu->inactive) { 251 enable_clk(gpu); 252 enable_axi(gpu); 253 gpu->inactive = false; 254 } 255 } 256 257 static void inactive_start(struct msm_gpu *gpu) 258 { 259 DBG("%s", gpu->name); 260 mod_timer(&gpu->inactive_timer, 261 round_jiffies_up(jiffies + DRM_MSM_INACTIVE_JIFFIES)); 262 } 263 264 /* 265 * Hangcheck detection for locked gpu: 266 */ 267 268 static void retire_submits(struct msm_gpu *gpu, uint32_t fence); 269 270 static void recover_worker(struct work_struct *work) 271 { 272 struct msm_gpu *gpu = container_of(work, struct msm_gpu, recover_work); 273 struct drm_device *dev = gpu->dev; 274 275 dev_err(dev->dev, "%s: hangcheck recover!\n", gpu->name); 276 277 mutex_lock(&dev->struct_mutex); 278 if (msm_gpu_active(gpu)) { 279 struct msm_gem_submit *submit; 280 uint32_t fence = gpu->funcs->last_fence(gpu); 281 282 /* retire completed submits, plus the one that hung: */ 283 retire_submits(gpu, fence + 1); 284 285 inactive_cancel(gpu); 286 gpu->funcs->recover(gpu); 287 288 /* replay the remaining submits after the one that hung: */ 289 list_for_each_entry(submit, &gpu->submit_list, node) { 290 gpu->funcs->submit(gpu, submit, NULL); 291 } 292 } 293 mutex_unlock(&dev->struct_mutex); 294 295 msm_gpu_retire(gpu); 296 } 297 298 static void hangcheck_timer_reset(struct msm_gpu *gpu) 299 { 300 DBG("%s", gpu->name); 301 mod_timer(&gpu->hangcheck_timer, 302 round_jiffies_up(jiffies + DRM_MSM_HANGCHECK_JIFFIES)); 303 } 304 305 static void hangcheck_handler(unsigned long data) 306 { 307 struct msm_gpu *gpu = (struct msm_gpu *)data; 308 struct drm_device *dev = gpu->dev; 309 struct msm_drm_private *priv = dev->dev_private; 310 uint32_t fence = gpu->funcs->last_fence(gpu); 311 312 if (fence != gpu->hangcheck_fence) { 313 /* some progress has been made.. ya! */ 314 gpu->hangcheck_fence = fence; 315 } else if (fence < gpu->submitted_fence) { 316 /* no progress and not done.. hung! */ 317 gpu->hangcheck_fence = fence; 318 dev_err(dev->dev, "%s: hangcheck detected gpu lockup!\n", 319 gpu->name); 320 dev_err(dev->dev, "%s: completed fence: %u\n", 321 gpu->name, fence); 322 dev_err(dev->dev, "%s: submitted fence: %u\n", 323 gpu->name, gpu->submitted_fence); 324 queue_work(priv->wq, &gpu->recover_work); 325 } 326 327 /* if still more pending work, reset the hangcheck timer: */ 328 if (gpu->submitted_fence > gpu->hangcheck_fence) 329 hangcheck_timer_reset(gpu); 330 331 /* workaround for missing irq: */ 332 queue_work(priv->wq, &gpu->retire_work); 333 } 334 335 /* 336 * Performance Counters: 337 */ 338 339 /* called under perf_lock */ 340 static int update_hw_cntrs(struct msm_gpu *gpu, uint32_t ncntrs, uint32_t *cntrs) 341 { 342 uint32_t current_cntrs[ARRAY_SIZE(gpu->last_cntrs)]; 343 int i, n = min(ncntrs, gpu->num_perfcntrs); 344 345 /* read current values: */ 346 for (i = 0; i < gpu->num_perfcntrs; i++) 347 current_cntrs[i] = gpu_read(gpu, gpu->perfcntrs[i].sample_reg); 348 349 /* update cntrs: */ 350 for (i = 0; i < n; i++) 351 cntrs[i] = current_cntrs[i] - gpu->last_cntrs[i]; 352 353 /* save current values: */ 354 for (i = 0; i < gpu->num_perfcntrs; i++) 355 gpu->last_cntrs[i] = current_cntrs[i]; 356 357 return n; 358 } 359 360 static void update_sw_cntrs(struct msm_gpu *gpu) 361 { 362 ktime_t time; 363 uint32_t elapsed; 364 unsigned long flags; 365 366 spin_lock_irqsave(&gpu->perf_lock, flags); 367 if (!gpu->perfcntr_active) 368 goto out; 369 370 time = ktime_get(); 371 elapsed = ktime_to_us(ktime_sub(time, gpu->last_sample.time)); 372 373 gpu->totaltime += elapsed; 374 if (gpu->last_sample.active) 375 gpu->activetime += elapsed; 376 377 gpu->last_sample.active = msm_gpu_active(gpu); 378 gpu->last_sample.time = time; 379 380 out: 381 spin_unlock_irqrestore(&gpu->perf_lock, flags); 382 } 383 384 void msm_gpu_perfcntr_start(struct msm_gpu *gpu) 385 { 386 unsigned long flags; 387 388 spin_lock_irqsave(&gpu->perf_lock, flags); 389 /* we could dynamically enable/disable perfcntr registers too.. */ 390 gpu->last_sample.active = msm_gpu_active(gpu); 391 gpu->last_sample.time = ktime_get(); 392 gpu->activetime = gpu->totaltime = 0; 393 gpu->perfcntr_active = true; 394 update_hw_cntrs(gpu, 0, NULL); 395 spin_unlock_irqrestore(&gpu->perf_lock, flags); 396 } 397 398 void msm_gpu_perfcntr_stop(struct msm_gpu *gpu) 399 { 400 gpu->perfcntr_active = false; 401 } 402 403 /* returns -errno or # of cntrs sampled */ 404 int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime, 405 uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs) 406 { 407 unsigned long flags; 408 int ret; 409 410 spin_lock_irqsave(&gpu->perf_lock, flags); 411 412 if (!gpu->perfcntr_active) { 413 ret = -EINVAL; 414 goto out; 415 } 416 417 *activetime = gpu->activetime; 418 *totaltime = gpu->totaltime; 419 420 gpu->activetime = gpu->totaltime = 0; 421 422 ret = update_hw_cntrs(gpu, ncntrs, cntrs); 423 424 out: 425 spin_unlock_irqrestore(&gpu->perf_lock, flags); 426 427 return ret; 428 } 429 430 /* 431 * Cmdstream submission/retirement: 432 */ 433 434 static void retire_submits(struct msm_gpu *gpu, uint32_t fence) 435 { 436 struct drm_device *dev = gpu->dev; 437 438 WARN_ON(!mutex_is_locked(&dev->struct_mutex)); 439 440 while (!list_empty(&gpu->submit_list)) { 441 struct msm_gem_submit *submit; 442 443 submit = list_first_entry(&gpu->submit_list, 444 struct msm_gem_submit, node); 445 446 if (submit->fence <= fence) { 447 list_del(&submit->node); 448 kfree(submit); 449 } else { 450 break; 451 } 452 } 453 } 454 455 static void retire_worker(struct work_struct *work) 456 { 457 struct msm_gpu *gpu = container_of(work, struct msm_gpu, retire_work); 458 struct drm_device *dev = gpu->dev; 459 uint32_t fence = gpu->funcs->last_fence(gpu); 460 461 msm_update_fence(gpu->dev, fence); 462 463 mutex_lock(&dev->struct_mutex); 464 465 retire_submits(gpu, fence); 466 467 while (!list_empty(&gpu->active_list)) { 468 struct msm_gem_object *obj; 469 470 obj = list_first_entry(&gpu->active_list, 471 struct msm_gem_object, mm_list); 472 473 if ((obj->read_fence <= fence) && 474 (obj->write_fence <= fence)) { 475 /* move to inactive: */ 476 msm_gem_move_to_inactive(&obj->base); 477 msm_gem_put_iova(&obj->base, gpu->id); 478 drm_gem_object_unreference(&obj->base); 479 } else { 480 break; 481 } 482 } 483 484 mutex_unlock(&dev->struct_mutex); 485 486 if (!msm_gpu_active(gpu)) 487 inactive_start(gpu); 488 } 489 490 /* call from irq handler to schedule work to retire bo's */ 491 void msm_gpu_retire(struct msm_gpu *gpu) 492 { 493 struct msm_drm_private *priv = gpu->dev->dev_private; 494 queue_work(priv->wq, &gpu->retire_work); 495 update_sw_cntrs(gpu); 496 } 497 498 /* add bo's to gpu's ring, and kick gpu: */ 499 int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, 500 struct msm_file_private *ctx) 501 { 502 struct drm_device *dev = gpu->dev; 503 struct msm_drm_private *priv = dev->dev_private; 504 int i, ret; 505 506 WARN_ON(!mutex_is_locked(&dev->struct_mutex)); 507 508 submit->fence = ++priv->next_fence; 509 510 gpu->submitted_fence = submit->fence; 511 512 inactive_cancel(gpu); 513 514 list_add_tail(&submit->node, &gpu->submit_list); 515 516 msm_rd_dump_submit(submit); 517 518 gpu->submitted_fence = submit->fence; 519 520 update_sw_cntrs(gpu); 521 522 for (i = 0; i < submit->nr_bos; i++) { 523 struct msm_gem_object *msm_obj = submit->bos[i].obj; 524 525 /* can't happen yet.. but when we add 2d support we'll have 526 * to deal w/ cross-ring synchronization: 527 */ 528 WARN_ON(is_active(msm_obj) && (msm_obj->gpu != gpu)); 529 530 if (!is_active(msm_obj)) { 531 uint32_t iova; 532 533 /* ring takes a reference to the bo and iova: */ 534 drm_gem_object_reference(&msm_obj->base); 535 msm_gem_get_iova_locked(&msm_obj->base, 536 submit->gpu->id, &iova); 537 } 538 539 if (submit->bos[i].flags & MSM_SUBMIT_BO_READ) 540 msm_gem_move_to_active(&msm_obj->base, gpu, false, submit->fence); 541 542 if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE) 543 msm_gem_move_to_active(&msm_obj->base, gpu, true, submit->fence); 544 } 545 546 ret = gpu->funcs->submit(gpu, submit, ctx); 547 priv->lastctx = ctx; 548 549 hangcheck_timer_reset(gpu); 550 551 return ret; 552 } 553 554 /* 555 * Init/Cleanup: 556 */ 557 558 static irqreturn_t irq_handler(int irq, void *data) 559 { 560 struct msm_gpu *gpu = data; 561 return gpu->funcs->irq(gpu); 562 } 563 564 static const char *clk_names[] = { 565 "src_clk", "core_clk", "iface_clk", "mem_clk", "mem_iface_clk", 566 "alt_mem_iface_clk", 567 }; 568 569 int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, 570 struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs, 571 const char *name, const char *ioname, const char *irqname, int ringsz) 572 { 573 struct iommu_domain *iommu; 574 int i, ret; 575 576 if (WARN_ON(gpu->num_perfcntrs > ARRAY_SIZE(gpu->last_cntrs))) 577 gpu->num_perfcntrs = ARRAY_SIZE(gpu->last_cntrs); 578 579 gpu->dev = drm; 580 gpu->funcs = funcs; 581 gpu->name = name; 582 gpu->inactive = true; 583 584 INIT_LIST_HEAD(&gpu->active_list); 585 INIT_WORK(&gpu->retire_work, retire_worker); 586 INIT_WORK(&gpu->inactive_work, inactive_worker); 587 INIT_WORK(&gpu->recover_work, recover_worker); 588 589 INIT_LIST_HEAD(&gpu->submit_list); 590 591 setup_timer(&gpu->inactive_timer, inactive_handler, 592 (unsigned long)gpu); 593 setup_timer(&gpu->hangcheck_timer, hangcheck_handler, 594 (unsigned long)gpu); 595 596 spin_lock_init(&gpu->perf_lock); 597 598 BUG_ON(ARRAY_SIZE(clk_names) != ARRAY_SIZE(gpu->grp_clks)); 599 600 /* Map registers: */ 601 gpu->mmio = msm_ioremap(pdev, ioname, name); 602 if (IS_ERR(gpu->mmio)) { 603 ret = PTR_ERR(gpu->mmio); 604 goto fail; 605 } 606 607 /* Get Interrupt: */ 608 gpu->irq = platform_get_irq_byname(pdev, irqname); 609 if (gpu->irq < 0) { 610 ret = gpu->irq; 611 dev_err(drm->dev, "failed to get irq: %d\n", ret); 612 goto fail; 613 } 614 615 ret = devm_request_irq(&pdev->dev, gpu->irq, irq_handler, 616 IRQF_TRIGGER_HIGH, gpu->name, gpu); 617 if (ret) { 618 dev_err(drm->dev, "failed to request IRQ%u: %d\n", gpu->irq, ret); 619 goto fail; 620 } 621 622 /* Acquire clocks: */ 623 for (i = 0; i < ARRAY_SIZE(clk_names); i++) { 624 gpu->grp_clks[i] = devm_clk_get(&pdev->dev, clk_names[i]); 625 DBG("grp_clks[%s]: %p", clk_names[i], gpu->grp_clks[i]); 626 if (IS_ERR(gpu->grp_clks[i])) 627 gpu->grp_clks[i] = NULL; 628 } 629 630 gpu->ebi1_clk = devm_clk_get(&pdev->dev, "bus_clk"); 631 DBG("ebi1_clk: %p", gpu->ebi1_clk); 632 if (IS_ERR(gpu->ebi1_clk)) 633 gpu->ebi1_clk = NULL; 634 635 /* Acquire regulators: */ 636 gpu->gpu_reg = devm_regulator_get(&pdev->dev, "vdd"); 637 DBG("gpu_reg: %p", gpu->gpu_reg); 638 if (IS_ERR(gpu->gpu_reg)) 639 gpu->gpu_reg = NULL; 640 641 gpu->gpu_cx = devm_regulator_get(&pdev->dev, "vddcx"); 642 DBG("gpu_cx: %p", gpu->gpu_cx); 643 if (IS_ERR(gpu->gpu_cx)) 644 gpu->gpu_cx = NULL; 645 646 /* Setup IOMMU.. eventually we will (I think) do this once per context 647 * and have separate page tables per context. For now, to keep things 648 * simple and to get something working, just use a single address space: 649 */ 650 iommu = iommu_domain_alloc(&platform_bus_type); 651 if (iommu) { 652 dev_info(drm->dev, "%s: using IOMMU\n", name); 653 gpu->mmu = msm_iommu_new(&pdev->dev, iommu); 654 } else { 655 dev_info(drm->dev, "%s: no IOMMU, fallback to VRAM carveout!\n", name); 656 } 657 gpu->id = msm_register_mmu(drm, gpu->mmu); 658 659 660 /* Create ringbuffer: */ 661 mutex_lock(&drm->struct_mutex); 662 gpu->rb = msm_ringbuffer_new(gpu, ringsz); 663 mutex_unlock(&drm->struct_mutex); 664 if (IS_ERR(gpu->rb)) { 665 ret = PTR_ERR(gpu->rb); 666 gpu->rb = NULL; 667 dev_err(drm->dev, "could not create ringbuffer: %d\n", ret); 668 goto fail; 669 } 670 671 bs_init(gpu); 672 673 return 0; 674 675 fail: 676 return ret; 677 } 678 679 void msm_gpu_cleanup(struct msm_gpu *gpu) 680 { 681 DBG("%s", gpu->name); 682 683 WARN_ON(!list_empty(&gpu->active_list)); 684 685 bs_fini(gpu); 686 687 if (gpu->rb) { 688 if (gpu->rb_iova) 689 msm_gem_put_iova(gpu->rb->bo, gpu->id); 690 msm_ringbuffer_destroy(gpu->rb); 691 } 692 693 if (gpu->mmu) 694 gpu->mmu->funcs->destroy(gpu->mmu); 695 } 696