1 /* 2 * Copyright (C) 2013 Red Hat 3 * Author: Rob Clark <robdclark@gmail.com> 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 as published by 7 * the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 * more details. 13 * 14 * You should have received a copy of the GNU General Public License along with 15 * this program. If not, see <http://www.gnu.org/licenses/>. 16 */ 17 18 #include "msm_gpu.h" 19 #include "msm_gem.h" 20 #include "msm_mmu.h" 21 #include "msm_fence.h" 22 23 #include <linux/string_helpers.h> 24 #include <linux/pm_opp.h> 25 #include <linux/devfreq.h> 26 27 28 /* 29 * Power Management: 30 */ 31 32 static int msm_devfreq_target(struct device *dev, unsigned long *freq, 33 u32 flags) 34 { 35 struct msm_gpu *gpu = platform_get_drvdata(to_platform_device(dev)); 36 struct dev_pm_opp *opp; 37 38 opp = devfreq_recommended_opp(dev, freq, flags); 39 40 if (IS_ERR(opp)) 41 return PTR_ERR(opp); 42 43 clk_set_rate(gpu->core_clk, *freq); 44 dev_pm_opp_put(opp); 45 46 return 0; 47 } 48 49 static int msm_devfreq_get_dev_status(struct device *dev, 50 struct devfreq_dev_status *status) 51 { 52 struct msm_gpu *gpu = platform_get_drvdata(to_platform_device(dev)); 53 u64 cycles; 54 u32 freq = ((u32) status->current_frequency) / 1000000; 55 ktime_t time; 56 57 status->current_frequency = (unsigned long) clk_get_rate(gpu->core_clk); 58 gpu->funcs->gpu_busy(gpu, &cycles); 59 60 status->busy_time = ((u32) (cycles - gpu->devfreq.busy_cycles)) / freq; 61 62 gpu->devfreq.busy_cycles = cycles; 63 64 time = ktime_get(); 65 status->total_time = ktime_us_delta(time, gpu->devfreq.time); 66 gpu->devfreq.time = time; 67 68 return 0; 69 } 70 71 static int msm_devfreq_get_cur_freq(struct device *dev, unsigned long *freq) 72 { 73 struct msm_gpu *gpu = platform_get_drvdata(to_platform_device(dev)); 74 75 *freq = (unsigned long) clk_get_rate(gpu->core_clk); 76 77 return 0; 78 } 79 80 static struct devfreq_dev_profile msm_devfreq_profile = { 81 .polling_ms = 10, 82 .target = msm_devfreq_target, 83 .get_dev_status = msm_devfreq_get_dev_status, 84 .get_cur_freq = msm_devfreq_get_cur_freq, 85 }; 86 87 static void msm_devfreq_init(struct msm_gpu *gpu) 88 { 89 /* We need target support to do devfreq */ 90 if (!gpu->funcs->gpu_busy) 91 return; 92 93 msm_devfreq_profile.initial_freq = gpu->fast_rate; 94 95 /* 96 * Don't set the freq_table or max_state and let devfreq build the table 97 * from OPP 98 */ 99 100 gpu->devfreq.devfreq = devm_devfreq_add_device(&gpu->pdev->dev, 101 &msm_devfreq_profile, "simple_ondemand", NULL); 102 103 if (IS_ERR(gpu->devfreq.devfreq)) { 104 dev_err(&gpu->pdev->dev, "Couldn't initialize GPU devfreq\n"); 105 gpu->devfreq.devfreq = NULL; 106 } 107 } 108 109 static int enable_pwrrail(struct msm_gpu *gpu) 110 { 111 struct drm_device *dev = gpu->dev; 112 int ret = 0; 113 114 if (gpu->gpu_reg) { 115 ret = regulator_enable(gpu->gpu_reg); 116 if (ret) { 117 dev_err(dev->dev, "failed to enable 'gpu_reg': %d\n", ret); 118 return ret; 119 } 120 } 121 122 if (gpu->gpu_cx) { 123 ret = regulator_enable(gpu->gpu_cx); 124 if (ret) { 125 dev_err(dev->dev, "failed to enable 'gpu_cx': %d\n", ret); 126 return ret; 127 } 128 } 129 130 return 0; 131 } 132 133 static int disable_pwrrail(struct msm_gpu *gpu) 134 { 135 if (gpu->gpu_cx) 136 regulator_disable(gpu->gpu_cx); 137 if (gpu->gpu_reg) 138 regulator_disable(gpu->gpu_reg); 139 return 0; 140 } 141 142 static int enable_clk(struct msm_gpu *gpu) 143 { 144 int i; 145 146 if (gpu->core_clk && gpu->fast_rate) 147 clk_set_rate(gpu->core_clk, gpu->fast_rate); 148 149 /* Set the RBBM timer rate to 19.2Mhz */ 150 if (gpu->rbbmtimer_clk) 151 clk_set_rate(gpu->rbbmtimer_clk, 19200000); 152 153 for (i = gpu->nr_clocks - 1; i >= 0; i--) 154 if (gpu->grp_clks[i]) 155 clk_prepare(gpu->grp_clks[i]); 156 157 for (i = gpu->nr_clocks - 1; i >= 0; i--) 158 if (gpu->grp_clks[i]) 159 clk_enable(gpu->grp_clks[i]); 160 161 return 0; 162 } 163 164 static int disable_clk(struct msm_gpu *gpu) 165 { 166 int i; 167 168 for (i = gpu->nr_clocks - 1; i >= 0; i--) 169 if (gpu->grp_clks[i]) 170 clk_disable(gpu->grp_clks[i]); 171 172 for (i = gpu->nr_clocks - 1; i >= 0; i--) 173 if (gpu->grp_clks[i]) 174 clk_unprepare(gpu->grp_clks[i]); 175 176 /* 177 * Set the clock to a deliberately low rate. On older targets the clock 178 * speed had to be non zero to avoid problems. On newer targets this 179 * will be rounded down to zero anyway so it all works out. 180 */ 181 if (gpu->core_clk) 182 clk_set_rate(gpu->core_clk, 27000000); 183 184 if (gpu->rbbmtimer_clk) 185 clk_set_rate(gpu->rbbmtimer_clk, 0); 186 187 return 0; 188 } 189 190 static int enable_axi(struct msm_gpu *gpu) 191 { 192 if (gpu->ebi1_clk) 193 clk_prepare_enable(gpu->ebi1_clk); 194 return 0; 195 } 196 197 static int disable_axi(struct msm_gpu *gpu) 198 { 199 if (gpu->ebi1_clk) 200 clk_disable_unprepare(gpu->ebi1_clk); 201 return 0; 202 } 203 204 int msm_gpu_pm_resume(struct msm_gpu *gpu) 205 { 206 int ret; 207 208 DBG("%s", gpu->name); 209 210 ret = enable_pwrrail(gpu); 211 if (ret) 212 return ret; 213 214 ret = enable_clk(gpu); 215 if (ret) 216 return ret; 217 218 ret = enable_axi(gpu); 219 if (ret) 220 return ret; 221 222 if (gpu->devfreq.devfreq) { 223 gpu->devfreq.busy_cycles = 0; 224 gpu->devfreq.time = ktime_get(); 225 226 devfreq_resume_device(gpu->devfreq.devfreq); 227 } 228 229 gpu->needs_hw_init = true; 230 231 return 0; 232 } 233 234 int msm_gpu_pm_suspend(struct msm_gpu *gpu) 235 { 236 int ret; 237 238 DBG("%s", gpu->name); 239 240 if (gpu->devfreq.devfreq) 241 devfreq_suspend_device(gpu->devfreq.devfreq); 242 243 ret = disable_axi(gpu); 244 if (ret) 245 return ret; 246 247 ret = disable_clk(gpu); 248 if (ret) 249 return ret; 250 251 ret = disable_pwrrail(gpu); 252 if (ret) 253 return ret; 254 255 return 0; 256 } 257 258 int msm_gpu_hw_init(struct msm_gpu *gpu) 259 { 260 int ret; 261 262 WARN_ON(!mutex_is_locked(&gpu->dev->struct_mutex)); 263 264 if (!gpu->needs_hw_init) 265 return 0; 266 267 disable_irq(gpu->irq); 268 ret = gpu->funcs->hw_init(gpu); 269 if (!ret) 270 gpu->needs_hw_init = false; 271 enable_irq(gpu->irq); 272 273 return ret; 274 } 275 276 /* 277 * Hangcheck detection for locked gpu: 278 */ 279 280 static void update_fences(struct msm_gpu *gpu, struct msm_ringbuffer *ring, 281 uint32_t fence) 282 { 283 struct msm_gem_submit *submit; 284 285 list_for_each_entry(submit, &ring->submits, node) { 286 if (submit->seqno > fence) 287 break; 288 289 msm_update_fence(submit->ring->fctx, 290 submit->fence->seqno); 291 } 292 } 293 294 static struct msm_gem_submit * 295 find_submit(struct msm_ringbuffer *ring, uint32_t fence) 296 { 297 struct msm_gem_submit *submit; 298 299 WARN_ON(!mutex_is_locked(&ring->gpu->dev->struct_mutex)); 300 301 list_for_each_entry(submit, &ring->submits, node) 302 if (submit->seqno == fence) 303 return submit; 304 305 return NULL; 306 } 307 308 static void retire_submits(struct msm_gpu *gpu); 309 310 static void recover_worker(struct work_struct *work) 311 { 312 struct msm_gpu *gpu = container_of(work, struct msm_gpu, recover_work); 313 struct drm_device *dev = gpu->dev; 314 struct msm_drm_private *priv = dev->dev_private; 315 struct msm_gem_submit *submit; 316 struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu); 317 int i; 318 319 mutex_lock(&dev->struct_mutex); 320 321 dev_err(dev->dev, "%s: hangcheck recover!\n", gpu->name); 322 323 submit = find_submit(cur_ring, cur_ring->memptrs->fence + 1); 324 if (submit) { 325 struct task_struct *task; 326 327 rcu_read_lock(); 328 task = pid_task(submit->pid, PIDTYPE_PID); 329 if (task) { 330 char *cmd; 331 332 /* 333 * So slightly annoying, in other paths like 334 * mmap'ing gem buffers, mmap_sem is acquired 335 * before struct_mutex, which means we can't 336 * hold struct_mutex across the call to 337 * get_cmdline(). But submits are retired 338 * from the same in-order workqueue, so we can 339 * safely drop the lock here without worrying 340 * about the submit going away. 341 */ 342 mutex_unlock(&dev->struct_mutex); 343 cmd = kstrdup_quotable_cmdline(task, GFP_KERNEL); 344 mutex_lock(&dev->struct_mutex); 345 346 dev_err(dev->dev, "%s: offending task: %s (%s)\n", 347 gpu->name, task->comm, cmd); 348 349 msm_rd_dump_submit(priv->hangrd, submit, 350 "offending task: %s (%s)", task->comm, cmd); 351 352 kfree(cmd); 353 } else { 354 msm_rd_dump_submit(priv->hangrd, submit, NULL); 355 } 356 rcu_read_unlock(); 357 } 358 359 360 /* 361 * Update all the rings with the latest and greatest fence.. this 362 * needs to happen after msm_rd_dump_submit() to ensure that the 363 * bo's referenced by the offending submit are still around. 364 */ 365 for (i = 0; i < gpu->nr_rings; i++) { 366 struct msm_ringbuffer *ring = gpu->rb[i]; 367 368 uint32_t fence = ring->memptrs->fence; 369 370 /* 371 * For the current (faulting?) ring/submit advance the fence by 372 * one more to clear the faulting submit 373 */ 374 if (ring == cur_ring) 375 fence++; 376 377 update_fences(gpu, ring, fence); 378 } 379 380 if (msm_gpu_active(gpu)) { 381 /* retire completed submits, plus the one that hung: */ 382 retire_submits(gpu); 383 384 pm_runtime_get_sync(&gpu->pdev->dev); 385 gpu->funcs->recover(gpu); 386 pm_runtime_put_sync(&gpu->pdev->dev); 387 388 /* 389 * Replay all remaining submits starting with highest priority 390 * ring 391 */ 392 for (i = 0; i < gpu->nr_rings; i++) { 393 struct msm_ringbuffer *ring = gpu->rb[i]; 394 395 list_for_each_entry(submit, &ring->submits, node) 396 gpu->funcs->submit(gpu, submit, NULL); 397 } 398 } 399 400 mutex_unlock(&dev->struct_mutex); 401 402 msm_gpu_retire(gpu); 403 } 404 405 static void hangcheck_timer_reset(struct msm_gpu *gpu) 406 { 407 DBG("%s", gpu->name); 408 mod_timer(&gpu->hangcheck_timer, 409 round_jiffies_up(jiffies + DRM_MSM_HANGCHECK_JIFFIES)); 410 } 411 412 static void hangcheck_handler(struct timer_list *t) 413 { 414 struct msm_gpu *gpu = from_timer(gpu, t, hangcheck_timer); 415 struct drm_device *dev = gpu->dev; 416 struct msm_drm_private *priv = dev->dev_private; 417 struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); 418 uint32_t fence = ring->memptrs->fence; 419 420 if (fence != ring->hangcheck_fence) { 421 /* some progress has been made.. ya! */ 422 ring->hangcheck_fence = fence; 423 } else if (fence < ring->seqno) { 424 /* no progress and not done.. hung! */ 425 ring->hangcheck_fence = fence; 426 dev_err(dev->dev, "%s: hangcheck detected gpu lockup rb %d!\n", 427 gpu->name, ring->id); 428 dev_err(dev->dev, "%s: completed fence: %u\n", 429 gpu->name, fence); 430 dev_err(dev->dev, "%s: submitted fence: %u\n", 431 gpu->name, ring->seqno); 432 433 queue_work(priv->wq, &gpu->recover_work); 434 } 435 436 /* if still more pending work, reset the hangcheck timer: */ 437 if (ring->seqno > ring->hangcheck_fence) 438 hangcheck_timer_reset(gpu); 439 440 /* workaround for missing irq: */ 441 queue_work(priv->wq, &gpu->retire_work); 442 } 443 444 /* 445 * Performance Counters: 446 */ 447 448 /* called under perf_lock */ 449 static int update_hw_cntrs(struct msm_gpu *gpu, uint32_t ncntrs, uint32_t *cntrs) 450 { 451 uint32_t current_cntrs[ARRAY_SIZE(gpu->last_cntrs)]; 452 int i, n = min(ncntrs, gpu->num_perfcntrs); 453 454 /* read current values: */ 455 for (i = 0; i < gpu->num_perfcntrs; i++) 456 current_cntrs[i] = gpu_read(gpu, gpu->perfcntrs[i].sample_reg); 457 458 /* update cntrs: */ 459 for (i = 0; i < n; i++) 460 cntrs[i] = current_cntrs[i] - gpu->last_cntrs[i]; 461 462 /* save current values: */ 463 for (i = 0; i < gpu->num_perfcntrs; i++) 464 gpu->last_cntrs[i] = current_cntrs[i]; 465 466 return n; 467 } 468 469 static void update_sw_cntrs(struct msm_gpu *gpu) 470 { 471 ktime_t time; 472 uint32_t elapsed; 473 unsigned long flags; 474 475 spin_lock_irqsave(&gpu->perf_lock, flags); 476 if (!gpu->perfcntr_active) 477 goto out; 478 479 time = ktime_get(); 480 elapsed = ktime_to_us(ktime_sub(time, gpu->last_sample.time)); 481 482 gpu->totaltime += elapsed; 483 if (gpu->last_sample.active) 484 gpu->activetime += elapsed; 485 486 gpu->last_sample.active = msm_gpu_active(gpu); 487 gpu->last_sample.time = time; 488 489 out: 490 spin_unlock_irqrestore(&gpu->perf_lock, flags); 491 } 492 493 void msm_gpu_perfcntr_start(struct msm_gpu *gpu) 494 { 495 unsigned long flags; 496 497 pm_runtime_get_sync(&gpu->pdev->dev); 498 499 spin_lock_irqsave(&gpu->perf_lock, flags); 500 /* we could dynamically enable/disable perfcntr registers too.. */ 501 gpu->last_sample.active = msm_gpu_active(gpu); 502 gpu->last_sample.time = ktime_get(); 503 gpu->activetime = gpu->totaltime = 0; 504 gpu->perfcntr_active = true; 505 update_hw_cntrs(gpu, 0, NULL); 506 spin_unlock_irqrestore(&gpu->perf_lock, flags); 507 } 508 509 void msm_gpu_perfcntr_stop(struct msm_gpu *gpu) 510 { 511 gpu->perfcntr_active = false; 512 pm_runtime_put_sync(&gpu->pdev->dev); 513 } 514 515 /* returns -errno or # of cntrs sampled */ 516 int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime, 517 uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs) 518 { 519 unsigned long flags; 520 int ret; 521 522 spin_lock_irqsave(&gpu->perf_lock, flags); 523 524 if (!gpu->perfcntr_active) { 525 ret = -EINVAL; 526 goto out; 527 } 528 529 *activetime = gpu->activetime; 530 *totaltime = gpu->totaltime; 531 532 gpu->activetime = gpu->totaltime = 0; 533 534 ret = update_hw_cntrs(gpu, ncntrs, cntrs); 535 536 out: 537 spin_unlock_irqrestore(&gpu->perf_lock, flags); 538 539 return ret; 540 } 541 542 /* 543 * Cmdstream submission/retirement: 544 */ 545 546 static void retire_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) 547 { 548 int i; 549 550 for (i = 0; i < submit->nr_bos; i++) { 551 struct msm_gem_object *msm_obj = submit->bos[i].obj; 552 /* move to inactive: */ 553 msm_gem_move_to_inactive(&msm_obj->base); 554 msm_gem_put_iova(&msm_obj->base, gpu->aspace); 555 drm_gem_object_unreference(&msm_obj->base); 556 } 557 558 pm_runtime_mark_last_busy(&gpu->pdev->dev); 559 pm_runtime_put_autosuspend(&gpu->pdev->dev); 560 msm_gem_submit_free(submit); 561 } 562 563 static void retire_submits(struct msm_gpu *gpu) 564 { 565 struct drm_device *dev = gpu->dev; 566 struct msm_gem_submit *submit, *tmp; 567 int i; 568 569 WARN_ON(!mutex_is_locked(&dev->struct_mutex)); 570 571 /* Retire the commits starting with highest priority */ 572 for (i = 0; i < gpu->nr_rings; i++) { 573 struct msm_ringbuffer *ring = gpu->rb[i]; 574 575 list_for_each_entry_safe(submit, tmp, &ring->submits, node) { 576 if (dma_fence_is_signaled(submit->fence)) 577 retire_submit(gpu, submit); 578 } 579 } 580 } 581 582 static void retire_worker(struct work_struct *work) 583 { 584 struct msm_gpu *gpu = container_of(work, struct msm_gpu, retire_work); 585 struct drm_device *dev = gpu->dev; 586 int i; 587 588 for (i = 0; i < gpu->nr_rings; i++) 589 update_fences(gpu, gpu->rb[i], gpu->rb[i]->memptrs->fence); 590 591 mutex_lock(&dev->struct_mutex); 592 retire_submits(gpu); 593 mutex_unlock(&dev->struct_mutex); 594 } 595 596 /* call from irq handler to schedule work to retire bo's */ 597 void msm_gpu_retire(struct msm_gpu *gpu) 598 { 599 struct msm_drm_private *priv = gpu->dev->dev_private; 600 queue_work(priv->wq, &gpu->retire_work); 601 update_sw_cntrs(gpu); 602 } 603 604 /* add bo's to gpu's ring, and kick gpu: */ 605 void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, 606 struct msm_file_private *ctx) 607 { 608 struct drm_device *dev = gpu->dev; 609 struct msm_drm_private *priv = dev->dev_private; 610 struct msm_ringbuffer *ring = submit->ring; 611 int i; 612 613 WARN_ON(!mutex_is_locked(&dev->struct_mutex)); 614 615 pm_runtime_get_sync(&gpu->pdev->dev); 616 617 msm_gpu_hw_init(gpu); 618 619 submit->seqno = ++ring->seqno; 620 621 list_add_tail(&submit->node, &ring->submits); 622 623 msm_rd_dump_submit(priv->rd, submit, NULL); 624 625 update_sw_cntrs(gpu); 626 627 for (i = 0; i < submit->nr_bos; i++) { 628 struct msm_gem_object *msm_obj = submit->bos[i].obj; 629 uint64_t iova; 630 631 /* can't happen yet.. but when we add 2d support we'll have 632 * to deal w/ cross-ring synchronization: 633 */ 634 WARN_ON(is_active(msm_obj) && (msm_obj->gpu != gpu)); 635 636 /* submit takes a reference to the bo and iova until retired: */ 637 drm_gem_object_reference(&msm_obj->base); 638 msm_gem_get_iova(&msm_obj->base, 639 submit->gpu->aspace, &iova); 640 641 if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE) 642 msm_gem_move_to_active(&msm_obj->base, gpu, true, submit->fence); 643 else if (submit->bos[i].flags & MSM_SUBMIT_BO_READ) 644 msm_gem_move_to_active(&msm_obj->base, gpu, false, submit->fence); 645 } 646 647 gpu->funcs->submit(gpu, submit, ctx); 648 priv->lastctx = ctx; 649 650 hangcheck_timer_reset(gpu); 651 } 652 653 /* 654 * Init/Cleanup: 655 */ 656 657 static irqreturn_t irq_handler(int irq, void *data) 658 { 659 struct msm_gpu *gpu = data; 660 return gpu->funcs->irq(gpu); 661 } 662 663 static struct clk *get_clock(struct device *dev, const char *name) 664 { 665 struct clk *clk = devm_clk_get(dev, name); 666 667 return IS_ERR(clk) ? NULL : clk; 668 } 669 670 static int get_clocks(struct platform_device *pdev, struct msm_gpu *gpu) 671 { 672 struct device *dev = &pdev->dev; 673 struct property *prop; 674 const char *name; 675 int i = 0; 676 677 gpu->nr_clocks = of_property_count_strings(dev->of_node, "clock-names"); 678 if (gpu->nr_clocks < 1) { 679 gpu->nr_clocks = 0; 680 return 0; 681 } 682 683 gpu->grp_clks = devm_kcalloc(dev, sizeof(struct clk *), gpu->nr_clocks, 684 GFP_KERNEL); 685 if (!gpu->grp_clks) 686 return -ENOMEM; 687 688 of_property_for_each_string(dev->of_node, "clock-names", prop, name) { 689 gpu->grp_clks[i] = get_clock(dev, name); 690 691 /* Remember the key clocks that we need to control later */ 692 if (!strcmp(name, "core") || !strcmp(name, "core_clk")) 693 gpu->core_clk = gpu->grp_clks[i]; 694 else if (!strcmp(name, "rbbmtimer") || !strcmp(name, "rbbmtimer_clk")) 695 gpu->rbbmtimer_clk = gpu->grp_clks[i]; 696 697 ++i; 698 } 699 700 return 0; 701 } 702 703 static struct msm_gem_address_space * 704 msm_gpu_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev, 705 uint64_t va_start, uint64_t va_end) 706 { 707 struct iommu_domain *iommu; 708 struct msm_gem_address_space *aspace; 709 int ret; 710 711 /* 712 * Setup IOMMU.. eventually we will (I think) do this once per context 713 * and have separate page tables per context. For now, to keep things 714 * simple and to get something working, just use a single address space: 715 */ 716 iommu = iommu_domain_alloc(&platform_bus_type); 717 if (!iommu) 718 return NULL; 719 720 iommu->geometry.aperture_start = va_start; 721 iommu->geometry.aperture_end = va_end; 722 723 dev_info(gpu->dev->dev, "%s: using IOMMU\n", gpu->name); 724 725 aspace = msm_gem_address_space_create(&pdev->dev, iommu, "gpu"); 726 if (IS_ERR(aspace)) { 727 dev_err(gpu->dev->dev, "failed to init iommu: %ld\n", 728 PTR_ERR(aspace)); 729 iommu_domain_free(iommu); 730 return ERR_CAST(aspace); 731 } 732 733 ret = aspace->mmu->funcs->attach(aspace->mmu, NULL, 0); 734 if (ret) { 735 msm_gem_address_space_put(aspace); 736 return ERR_PTR(ret); 737 } 738 739 return aspace; 740 } 741 742 int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, 743 struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs, 744 const char *name, struct msm_gpu_config *config) 745 { 746 int i, ret, nr_rings = config->nr_rings; 747 void *memptrs; 748 uint64_t memptrs_iova; 749 750 if (WARN_ON(gpu->num_perfcntrs > ARRAY_SIZE(gpu->last_cntrs))) 751 gpu->num_perfcntrs = ARRAY_SIZE(gpu->last_cntrs); 752 753 gpu->dev = drm; 754 gpu->funcs = funcs; 755 gpu->name = name; 756 757 INIT_LIST_HEAD(&gpu->active_list); 758 INIT_WORK(&gpu->retire_work, retire_worker); 759 INIT_WORK(&gpu->recover_work, recover_worker); 760 761 762 timer_setup(&gpu->hangcheck_timer, hangcheck_handler, 0); 763 764 spin_lock_init(&gpu->perf_lock); 765 766 767 /* Map registers: */ 768 gpu->mmio = msm_ioremap(pdev, config->ioname, name); 769 if (IS_ERR(gpu->mmio)) { 770 ret = PTR_ERR(gpu->mmio); 771 goto fail; 772 } 773 774 /* Get Interrupt: */ 775 gpu->irq = platform_get_irq_byname(pdev, config->irqname); 776 if (gpu->irq < 0) { 777 ret = gpu->irq; 778 dev_err(drm->dev, "failed to get irq: %d\n", ret); 779 goto fail; 780 } 781 782 ret = devm_request_irq(&pdev->dev, gpu->irq, irq_handler, 783 IRQF_TRIGGER_HIGH, gpu->name, gpu); 784 if (ret) { 785 dev_err(drm->dev, "failed to request IRQ%u: %d\n", gpu->irq, ret); 786 goto fail; 787 } 788 789 ret = get_clocks(pdev, gpu); 790 if (ret) 791 goto fail; 792 793 gpu->ebi1_clk = msm_clk_get(pdev, "bus"); 794 DBG("ebi1_clk: %p", gpu->ebi1_clk); 795 if (IS_ERR(gpu->ebi1_clk)) 796 gpu->ebi1_clk = NULL; 797 798 /* Acquire regulators: */ 799 gpu->gpu_reg = devm_regulator_get(&pdev->dev, "vdd"); 800 DBG("gpu_reg: %p", gpu->gpu_reg); 801 if (IS_ERR(gpu->gpu_reg)) 802 gpu->gpu_reg = NULL; 803 804 gpu->gpu_cx = devm_regulator_get(&pdev->dev, "vddcx"); 805 DBG("gpu_cx: %p", gpu->gpu_cx); 806 if (IS_ERR(gpu->gpu_cx)) 807 gpu->gpu_cx = NULL; 808 809 gpu->pdev = pdev; 810 platform_set_drvdata(pdev, gpu); 811 812 msm_devfreq_init(gpu); 813 814 gpu->aspace = msm_gpu_create_address_space(gpu, pdev, 815 config->va_start, config->va_end); 816 817 if (gpu->aspace == NULL) 818 dev_info(drm->dev, "%s: no IOMMU, fallback to VRAM carveout!\n", name); 819 else if (IS_ERR(gpu->aspace)) { 820 ret = PTR_ERR(gpu->aspace); 821 goto fail; 822 } 823 824 memptrs = msm_gem_kernel_new(drm, sizeof(*gpu->memptrs_bo), 825 MSM_BO_UNCACHED, gpu->aspace, &gpu->memptrs_bo, 826 &memptrs_iova); 827 828 if (IS_ERR(memptrs)) { 829 ret = PTR_ERR(memptrs); 830 dev_err(drm->dev, "could not allocate memptrs: %d\n", ret); 831 goto fail; 832 } 833 834 if (nr_rings > ARRAY_SIZE(gpu->rb)) { 835 DRM_DEV_INFO_ONCE(drm->dev, "Only creating %zu ringbuffers\n", 836 ARRAY_SIZE(gpu->rb)); 837 nr_rings = ARRAY_SIZE(gpu->rb); 838 } 839 840 /* Create ringbuffer(s): */ 841 for (i = 0; i < nr_rings; i++) { 842 gpu->rb[i] = msm_ringbuffer_new(gpu, i, memptrs, memptrs_iova); 843 844 if (IS_ERR(gpu->rb[i])) { 845 ret = PTR_ERR(gpu->rb[i]); 846 dev_err(drm->dev, 847 "could not create ringbuffer %d: %d\n", i, ret); 848 goto fail; 849 } 850 851 memptrs += sizeof(struct msm_rbmemptrs); 852 memptrs_iova += sizeof(struct msm_rbmemptrs); 853 } 854 855 gpu->nr_rings = nr_rings; 856 857 return 0; 858 859 fail: 860 for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) { 861 msm_ringbuffer_destroy(gpu->rb[i]); 862 gpu->rb[i] = NULL; 863 } 864 865 if (gpu->memptrs_bo) { 866 msm_gem_put_vaddr(gpu->memptrs_bo); 867 msm_gem_put_iova(gpu->memptrs_bo, gpu->aspace); 868 drm_gem_object_unreference_unlocked(gpu->memptrs_bo); 869 } 870 871 platform_set_drvdata(pdev, NULL); 872 return ret; 873 } 874 875 void msm_gpu_cleanup(struct msm_gpu *gpu) 876 { 877 int i; 878 879 DBG("%s", gpu->name); 880 881 WARN_ON(!list_empty(&gpu->active_list)); 882 883 for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) { 884 msm_ringbuffer_destroy(gpu->rb[i]); 885 gpu->rb[i] = NULL; 886 } 887 888 if (gpu->memptrs_bo) { 889 msm_gem_put_vaddr(gpu->memptrs_bo); 890 msm_gem_put_iova(gpu->memptrs_bo, gpu->aspace); 891 drm_gem_object_unreference_unlocked(gpu->memptrs_bo); 892 } 893 894 if (!IS_ERR_OR_NULL(gpu->aspace)) { 895 gpu->aspace->mmu->funcs->detach(gpu->aspace->mmu, 896 NULL, 0); 897 msm_gem_address_space_put(gpu->aspace); 898 } 899 } 900