1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2013 Red Hat 4 * Author: Rob Clark <robdclark@gmail.com> 5 */ 6 7 #include "msm_gpu.h" 8 #include "msm_gem.h" 9 #include "msm_mmu.h" 10 #include "msm_fence.h" 11 #include "msm_gpu_trace.h" 12 #include "adreno/adreno_gpu.h" 13 14 #include <generated/utsrelease.h> 15 #include <linux/string_helpers.h> 16 #include <linux/devfreq.h> 17 #include <linux/devcoredump.h> 18 #include <linux/sched/task.h> 19 20 /* 21 * Power Management: 22 */ 23 24 static int msm_devfreq_target(struct device *dev, unsigned long *freq, 25 u32 flags) 26 { 27 struct msm_gpu *gpu = platform_get_drvdata(to_platform_device(dev)); 28 struct dev_pm_opp *opp; 29 30 opp = devfreq_recommended_opp(dev, freq, flags); 31 32 if (IS_ERR(opp)) 33 return PTR_ERR(opp); 34 35 if (gpu->funcs->gpu_set_freq) 36 gpu->funcs->gpu_set_freq(gpu, opp); 37 else 38 clk_set_rate(gpu->core_clk, *freq); 39 40 dev_pm_opp_put(opp); 41 42 return 0; 43 } 44 45 static int msm_devfreq_get_dev_status(struct device *dev, 46 struct devfreq_dev_status *status) 47 { 48 struct msm_gpu *gpu = platform_get_drvdata(to_platform_device(dev)); 49 ktime_t time; 50 51 if (gpu->funcs->gpu_get_freq) 52 status->current_frequency = gpu->funcs->gpu_get_freq(gpu); 53 else 54 status->current_frequency = clk_get_rate(gpu->core_clk); 55 56 status->busy_time = gpu->funcs->gpu_busy(gpu); 57 58 time = ktime_get(); 59 status->total_time = ktime_us_delta(time, gpu->devfreq.time); 60 gpu->devfreq.time = time; 61 62 return 0; 63 } 64 65 static int msm_devfreq_get_cur_freq(struct device *dev, unsigned long *freq) 66 { 67 struct msm_gpu *gpu = platform_get_drvdata(to_platform_device(dev)); 68 69 if (gpu->funcs->gpu_get_freq) 70 *freq = gpu->funcs->gpu_get_freq(gpu); 71 else 72 *freq = clk_get_rate(gpu->core_clk); 73 74 return 0; 75 } 76 77 static struct devfreq_dev_profile msm_devfreq_profile = { 78 .polling_ms = 10, 79 .target = msm_devfreq_target, 80 .get_dev_status = msm_devfreq_get_dev_status, 81 .get_cur_freq = msm_devfreq_get_cur_freq, 82 }; 83 84 static void msm_devfreq_init(struct msm_gpu *gpu) 85 { 86 /* We need target support to do devfreq */ 87 if (!gpu->funcs->gpu_busy) 88 return; 89 90 msm_devfreq_profile.initial_freq = gpu->fast_rate; 91 92 /* 93 * Don't set the freq_table or max_state and let devfreq build the table 94 * from OPP 95 * After a deferred probe, these may have be left to non-zero values, 96 * so set them back to zero before creating the devfreq device 97 */ 98 msm_devfreq_profile.freq_table = NULL; 99 msm_devfreq_profile.max_state = 0; 100 101 gpu->devfreq.devfreq = devm_devfreq_add_device(&gpu->pdev->dev, 102 &msm_devfreq_profile, DEVFREQ_GOV_SIMPLE_ONDEMAND, 103 NULL); 104 105 if (IS_ERR(gpu->devfreq.devfreq)) { 106 DRM_DEV_ERROR(&gpu->pdev->dev, "Couldn't initialize GPU devfreq\n"); 107 gpu->devfreq.devfreq = NULL; 108 } 109 110 devfreq_suspend_device(gpu->devfreq.devfreq); 111 } 112 113 static int enable_pwrrail(struct msm_gpu *gpu) 114 { 115 struct drm_device *dev = gpu->dev; 116 int ret = 0; 117 118 if (gpu->gpu_reg) { 119 ret = regulator_enable(gpu->gpu_reg); 120 if (ret) { 121 DRM_DEV_ERROR(dev->dev, "failed to enable 'gpu_reg': %d\n", ret); 122 return ret; 123 } 124 } 125 126 if (gpu->gpu_cx) { 127 ret = regulator_enable(gpu->gpu_cx); 128 if (ret) { 129 DRM_DEV_ERROR(dev->dev, "failed to enable 'gpu_cx': %d\n", ret); 130 return ret; 131 } 132 } 133 134 return 0; 135 } 136 137 static int disable_pwrrail(struct msm_gpu *gpu) 138 { 139 if (gpu->gpu_cx) 140 regulator_disable(gpu->gpu_cx); 141 if (gpu->gpu_reg) 142 regulator_disable(gpu->gpu_reg); 143 return 0; 144 } 145 146 static int enable_clk(struct msm_gpu *gpu) 147 { 148 if (gpu->core_clk && gpu->fast_rate) 149 clk_set_rate(gpu->core_clk, gpu->fast_rate); 150 151 /* Set the RBBM timer rate to 19.2Mhz */ 152 if (gpu->rbbmtimer_clk) 153 clk_set_rate(gpu->rbbmtimer_clk, 19200000); 154 155 return clk_bulk_prepare_enable(gpu->nr_clocks, gpu->grp_clks); 156 } 157 158 static int disable_clk(struct msm_gpu *gpu) 159 { 160 clk_bulk_disable_unprepare(gpu->nr_clocks, gpu->grp_clks); 161 162 /* 163 * Set the clock to a deliberately low rate. On older targets the clock 164 * speed had to be non zero to avoid problems. On newer targets this 165 * will be rounded down to zero anyway so it all works out. 166 */ 167 if (gpu->core_clk) 168 clk_set_rate(gpu->core_clk, 27000000); 169 170 if (gpu->rbbmtimer_clk) 171 clk_set_rate(gpu->rbbmtimer_clk, 0); 172 173 return 0; 174 } 175 176 static int enable_axi(struct msm_gpu *gpu) 177 { 178 if (gpu->ebi1_clk) 179 clk_prepare_enable(gpu->ebi1_clk); 180 return 0; 181 } 182 183 static int disable_axi(struct msm_gpu *gpu) 184 { 185 if (gpu->ebi1_clk) 186 clk_disable_unprepare(gpu->ebi1_clk); 187 return 0; 188 } 189 190 void msm_gpu_resume_devfreq(struct msm_gpu *gpu) 191 { 192 gpu->devfreq.busy_cycles = 0; 193 gpu->devfreq.time = ktime_get(); 194 195 devfreq_resume_device(gpu->devfreq.devfreq); 196 } 197 198 int msm_gpu_pm_resume(struct msm_gpu *gpu) 199 { 200 int ret; 201 202 DBG("%s", gpu->name); 203 204 ret = enable_pwrrail(gpu); 205 if (ret) 206 return ret; 207 208 ret = enable_clk(gpu); 209 if (ret) 210 return ret; 211 212 ret = enable_axi(gpu); 213 if (ret) 214 return ret; 215 216 msm_gpu_resume_devfreq(gpu); 217 218 gpu->needs_hw_init = true; 219 220 return 0; 221 } 222 223 int msm_gpu_pm_suspend(struct msm_gpu *gpu) 224 { 225 int ret; 226 227 DBG("%s", gpu->name); 228 229 devfreq_suspend_device(gpu->devfreq.devfreq); 230 231 ret = disable_axi(gpu); 232 if (ret) 233 return ret; 234 235 ret = disable_clk(gpu); 236 if (ret) 237 return ret; 238 239 ret = disable_pwrrail(gpu); 240 if (ret) 241 return ret; 242 243 return 0; 244 } 245 246 int msm_gpu_hw_init(struct msm_gpu *gpu) 247 { 248 int ret; 249 250 WARN_ON(!mutex_is_locked(&gpu->dev->struct_mutex)); 251 252 if (!gpu->needs_hw_init) 253 return 0; 254 255 disable_irq(gpu->irq); 256 ret = gpu->funcs->hw_init(gpu); 257 if (!ret) 258 gpu->needs_hw_init = false; 259 enable_irq(gpu->irq); 260 261 return ret; 262 } 263 264 #ifdef CONFIG_DEV_COREDUMP 265 static ssize_t msm_gpu_devcoredump_read(char *buffer, loff_t offset, 266 size_t count, void *data, size_t datalen) 267 { 268 struct msm_gpu *gpu = data; 269 struct drm_print_iterator iter; 270 struct drm_printer p; 271 struct msm_gpu_state *state; 272 273 state = msm_gpu_crashstate_get(gpu); 274 if (!state) 275 return 0; 276 277 iter.data = buffer; 278 iter.offset = 0; 279 iter.start = offset; 280 iter.remain = count; 281 282 p = drm_coredump_printer(&iter); 283 284 drm_printf(&p, "---\n"); 285 drm_printf(&p, "kernel: " UTS_RELEASE "\n"); 286 drm_printf(&p, "module: " KBUILD_MODNAME "\n"); 287 drm_printf(&p, "time: %lld.%09ld\n", 288 state->time.tv_sec, state->time.tv_nsec); 289 if (state->comm) 290 drm_printf(&p, "comm: %s\n", state->comm); 291 if (state->cmd) 292 drm_printf(&p, "cmdline: %s\n", state->cmd); 293 294 gpu->funcs->show(gpu, state, &p); 295 296 msm_gpu_crashstate_put(gpu); 297 298 return count - iter.remain; 299 } 300 301 static void msm_gpu_devcoredump_free(void *data) 302 { 303 struct msm_gpu *gpu = data; 304 305 msm_gpu_crashstate_put(gpu); 306 } 307 308 static void msm_gpu_crashstate_get_bo(struct msm_gpu_state *state, 309 struct msm_gem_object *obj, u64 iova, u32 flags) 310 { 311 struct msm_gpu_state_bo *state_bo = &state->bos[state->nr_bos]; 312 313 /* Don't record write only objects */ 314 state_bo->size = obj->base.size; 315 state_bo->iova = iova; 316 317 /* Only store data for non imported buffer objects marked for read */ 318 if ((flags & MSM_SUBMIT_BO_READ) && !obj->base.import_attach) { 319 void *ptr; 320 321 state_bo->data = kvmalloc(obj->base.size, GFP_KERNEL); 322 if (!state_bo->data) 323 goto out; 324 325 ptr = msm_gem_get_vaddr_active(&obj->base); 326 if (IS_ERR(ptr)) { 327 kvfree(state_bo->data); 328 state_bo->data = NULL; 329 goto out; 330 } 331 332 memcpy(state_bo->data, ptr, obj->base.size); 333 msm_gem_put_vaddr(&obj->base); 334 } 335 out: 336 state->nr_bos++; 337 } 338 339 static void msm_gpu_crashstate_capture(struct msm_gpu *gpu, 340 struct msm_gem_submit *submit, char *comm, char *cmd) 341 { 342 struct msm_gpu_state *state; 343 344 /* Check if the target supports capturing crash state */ 345 if (!gpu->funcs->gpu_state_get) 346 return; 347 348 /* Only save one crash state at a time */ 349 if (gpu->crashstate) 350 return; 351 352 state = gpu->funcs->gpu_state_get(gpu); 353 if (IS_ERR_OR_NULL(state)) 354 return; 355 356 /* Fill in the additional crash state information */ 357 state->comm = kstrdup(comm, GFP_KERNEL); 358 state->cmd = kstrdup(cmd, GFP_KERNEL); 359 360 if (submit) { 361 int i, nr = 0; 362 363 /* count # of buffers to dump: */ 364 for (i = 0; i < submit->nr_bos; i++) 365 if (should_dump(submit, i)) 366 nr++; 367 /* always dump cmd bo's, but don't double count them: */ 368 for (i = 0; i < submit->nr_cmds; i++) 369 if (!should_dump(submit, submit->cmd[i].idx)) 370 nr++; 371 372 state->bos = kcalloc(nr, 373 sizeof(struct msm_gpu_state_bo), GFP_KERNEL); 374 375 for (i = 0; i < submit->nr_bos; i++) { 376 if (should_dump(submit, i)) { 377 msm_gpu_crashstate_get_bo(state, submit->bos[i].obj, 378 submit->bos[i].iova, submit->bos[i].flags); 379 } 380 } 381 382 for (i = 0; state->bos && i < submit->nr_cmds; i++) { 383 int idx = submit->cmd[i].idx; 384 385 if (!should_dump(submit, submit->cmd[i].idx)) { 386 msm_gpu_crashstate_get_bo(state, submit->bos[idx].obj, 387 submit->bos[idx].iova, submit->bos[idx].flags); 388 } 389 } 390 } 391 392 /* Set the active crash state to be dumped on failure */ 393 gpu->crashstate = state; 394 395 /* FIXME: Release the crashstate if this errors out? */ 396 dev_coredumpm(gpu->dev->dev, THIS_MODULE, gpu, 0, GFP_KERNEL, 397 msm_gpu_devcoredump_read, msm_gpu_devcoredump_free); 398 } 399 #else 400 static void msm_gpu_crashstate_capture(struct msm_gpu *gpu, 401 struct msm_gem_submit *submit, char *comm, char *cmd) 402 { 403 } 404 #endif 405 406 /* 407 * Hangcheck detection for locked gpu: 408 */ 409 410 static void update_fences(struct msm_gpu *gpu, struct msm_ringbuffer *ring, 411 uint32_t fence) 412 { 413 struct msm_gem_submit *submit; 414 415 list_for_each_entry(submit, &ring->submits, node) { 416 if (submit->seqno > fence) 417 break; 418 419 msm_update_fence(submit->ring->fctx, 420 submit->fence->seqno); 421 } 422 } 423 424 static struct msm_gem_submit * 425 find_submit(struct msm_ringbuffer *ring, uint32_t fence) 426 { 427 struct msm_gem_submit *submit; 428 429 WARN_ON(!mutex_is_locked(&ring->gpu->dev->struct_mutex)); 430 431 list_for_each_entry(submit, &ring->submits, node) 432 if (submit->seqno == fence) 433 return submit; 434 435 return NULL; 436 } 437 438 static void retire_submits(struct msm_gpu *gpu); 439 440 static void recover_worker(struct work_struct *work) 441 { 442 struct msm_gpu *gpu = container_of(work, struct msm_gpu, recover_work); 443 struct drm_device *dev = gpu->dev; 444 struct msm_drm_private *priv = dev->dev_private; 445 struct msm_gem_submit *submit; 446 struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu); 447 char *comm = NULL, *cmd = NULL; 448 int i; 449 450 mutex_lock(&dev->struct_mutex); 451 452 DRM_DEV_ERROR(dev->dev, "%s: hangcheck recover!\n", gpu->name); 453 454 submit = find_submit(cur_ring, cur_ring->memptrs->fence + 1); 455 if (submit) { 456 struct task_struct *task; 457 458 /* Increment the fault counts */ 459 gpu->global_faults++; 460 submit->queue->faults++; 461 462 task = get_pid_task(submit->pid, PIDTYPE_PID); 463 if (task) { 464 comm = kstrdup(task->comm, GFP_KERNEL); 465 cmd = kstrdup_quotable_cmdline(task, GFP_KERNEL); 466 put_task_struct(task); 467 } 468 469 if (comm && cmd) { 470 DRM_DEV_ERROR(dev->dev, "%s: offending task: %s (%s)\n", 471 gpu->name, comm, cmd); 472 473 msm_rd_dump_submit(priv->hangrd, submit, 474 "offending task: %s (%s)", comm, cmd); 475 } else 476 msm_rd_dump_submit(priv->hangrd, submit, NULL); 477 } 478 479 /* Record the crash state */ 480 pm_runtime_get_sync(&gpu->pdev->dev); 481 msm_gpu_crashstate_capture(gpu, submit, comm, cmd); 482 pm_runtime_put_sync(&gpu->pdev->dev); 483 484 kfree(cmd); 485 kfree(comm); 486 487 /* 488 * Update all the rings with the latest and greatest fence.. this 489 * needs to happen after msm_rd_dump_submit() to ensure that the 490 * bo's referenced by the offending submit are still around. 491 */ 492 for (i = 0; i < gpu->nr_rings; i++) { 493 struct msm_ringbuffer *ring = gpu->rb[i]; 494 495 uint32_t fence = ring->memptrs->fence; 496 497 /* 498 * For the current (faulting?) ring/submit advance the fence by 499 * one more to clear the faulting submit 500 */ 501 if (ring == cur_ring) 502 fence++; 503 504 update_fences(gpu, ring, fence); 505 } 506 507 if (msm_gpu_active(gpu)) { 508 /* retire completed submits, plus the one that hung: */ 509 retire_submits(gpu); 510 511 pm_runtime_get_sync(&gpu->pdev->dev); 512 gpu->funcs->recover(gpu); 513 pm_runtime_put_sync(&gpu->pdev->dev); 514 515 /* 516 * Replay all remaining submits starting with highest priority 517 * ring 518 */ 519 for (i = 0; i < gpu->nr_rings; i++) { 520 struct msm_ringbuffer *ring = gpu->rb[i]; 521 522 list_for_each_entry(submit, &ring->submits, node) 523 gpu->funcs->submit(gpu, submit, NULL); 524 } 525 } 526 527 mutex_unlock(&dev->struct_mutex); 528 529 msm_gpu_retire(gpu); 530 } 531 532 static void hangcheck_timer_reset(struct msm_gpu *gpu) 533 { 534 DBG("%s", gpu->name); 535 mod_timer(&gpu->hangcheck_timer, 536 round_jiffies_up(jiffies + DRM_MSM_HANGCHECK_JIFFIES)); 537 } 538 539 static void hangcheck_handler(struct timer_list *t) 540 { 541 struct msm_gpu *gpu = from_timer(gpu, t, hangcheck_timer); 542 struct drm_device *dev = gpu->dev; 543 struct msm_drm_private *priv = dev->dev_private; 544 struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); 545 uint32_t fence = ring->memptrs->fence; 546 547 if (fence != ring->hangcheck_fence) { 548 /* some progress has been made.. ya! */ 549 ring->hangcheck_fence = fence; 550 } else if (fence < ring->seqno) { 551 /* no progress and not done.. hung! */ 552 ring->hangcheck_fence = fence; 553 DRM_DEV_ERROR(dev->dev, "%s: hangcheck detected gpu lockup rb %d!\n", 554 gpu->name, ring->id); 555 DRM_DEV_ERROR(dev->dev, "%s: completed fence: %u\n", 556 gpu->name, fence); 557 DRM_DEV_ERROR(dev->dev, "%s: submitted fence: %u\n", 558 gpu->name, ring->seqno); 559 560 queue_work(priv->wq, &gpu->recover_work); 561 } 562 563 /* if still more pending work, reset the hangcheck timer: */ 564 if (ring->seqno > ring->hangcheck_fence) 565 hangcheck_timer_reset(gpu); 566 567 /* workaround for missing irq: */ 568 queue_work(priv->wq, &gpu->retire_work); 569 } 570 571 /* 572 * Performance Counters: 573 */ 574 575 /* called under perf_lock */ 576 static int update_hw_cntrs(struct msm_gpu *gpu, uint32_t ncntrs, uint32_t *cntrs) 577 { 578 uint32_t current_cntrs[ARRAY_SIZE(gpu->last_cntrs)]; 579 int i, n = min(ncntrs, gpu->num_perfcntrs); 580 581 /* read current values: */ 582 for (i = 0; i < gpu->num_perfcntrs; i++) 583 current_cntrs[i] = gpu_read(gpu, gpu->perfcntrs[i].sample_reg); 584 585 /* update cntrs: */ 586 for (i = 0; i < n; i++) 587 cntrs[i] = current_cntrs[i] - gpu->last_cntrs[i]; 588 589 /* save current values: */ 590 for (i = 0; i < gpu->num_perfcntrs; i++) 591 gpu->last_cntrs[i] = current_cntrs[i]; 592 593 return n; 594 } 595 596 static void update_sw_cntrs(struct msm_gpu *gpu) 597 { 598 ktime_t time; 599 uint32_t elapsed; 600 unsigned long flags; 601 602 spin_lock_irqsave(&gpu->perf_lock, flags); 603 if (!gpu->perfcntr_active) 604 goto out; 605 606 time = ktime_get(); 607 elapsed = ktime_to_us(ktime_sub(time, gpu->last_sample.time)); 608 609 gpu->totaltime += elapsed; 610 if (gpu->last_sample.active) 611 gpu->activetime += elapsed; 612 613 gpu->last_sample.active = msm_gpu_active(gpu); 614 gpu->last_sample.time = time; 615 616 out: 617 spin_unlock_irqrestore(&gpu->perf_lock, flags); 618 } 619 620 void msm_gpu_perfcntr_start(struct msm_gpu *gpu) 621 { 622 unsigned long flags; 623 624 pm_runtime_get_sync(&gpu->pdev->dev); 625 626 spin_lock_irqsave(&gpu->perf_lock, flags); 627 /* we could dynamically enable/disable perfcntr registers too.. */ 628 gpu->last_sample.active = msm_gpu_active(gpu); 629 gpu->last_sample.time = ktime_get(); 630 gpu->activetime = gpu->totaltime = 0; 631 gpu->perfcntr_active = true; 632 update_hw_cntrs(gpu, 0, NULL); 633 spin_unlock_irqrestore(&gpu->perf_lock, flags); 634 } 635 636 void msm_gpu_perfcntr_stop(struct msm_gpu *gpu) 637 { 638 gpu->perfcntr_active = false; 639 pm_runtime_put_sync(&gpu->pdev->dev); 640 } 641 642 /* returns -errno or # of cntrs sampled */ 643 int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime, 644 uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs) 645 { 646 unsigned long flags; 647 int ret; 648 649 spin_lock_irqsave(&gpu->perf_lock, flags); 650 651 if (!gpu->perfcntr_active) { 652 ret = -EINVAL; 653 goto out; 654 } 655 656 *activetime = gpu->activetime; 657 *totaltime = gpu->totaltime; 658 659 gpu->activetime = gpu->totaltime = 0; 660 661 ret = update_hw_cntrs(gpu, ncntrs, cntrs); 662 663 out: 664 spin_unlock_irqrestore(&gpu->perf_lock, flags); 665 666 return ret; 667 } 668 669 /* 670 * Cmdstream submission/retirement: 671 */ 672 673 static void retire_submit(struct msm_gpu *gpu, struct msm_ringbuffer *ring, 674 struct msm_gem_submit *submit) 675 { 676 int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT; 677 volatile struct msm_gpu_submit_stats *stats; 678 u64 elapsed, clock = 0; 679 int i; 680 681 stats = &ring->memptrs->stats[index]; 682 /* Convert 19.2Mhz alwayson ticks to nanoseconds for elapsed time */ 683 elapsed = (stats->alwayson_end - stats->alwayson_start) * 10000; 684 do_div(elapsed, 192); 685 686 /* Calculate the clock frequency from the number of CP cycles */ 687 if (elapsed) { 688 clock = (stats->cpcycles_end - stats->cpcycles_start) * 1000; 689 do_div(clock, elapsed); 690 } 691 692 trace_msm_gpu_submit_retired(submit, elapsed, clock, 693 stats->alwayson_start, stats->alwayson_end); 694 695 for (i = 0; i < submit->nr_bos; i++) { 696 struct msm_gem_object *msm_obj = submit->bos[i].obj; 697 /* move to inactive: */ 698 msm_gem_move_to_inactive(&msm_obj->base); 699 msm_gem_unpin_iova(&msm_obj->base, submit->aspace); 700 drm_gem_object_put_locked(&msm_obj->base); 701 } 702 703 pm_runtime_mark_last_busy(&gpu->pdev->dev); 704 pm_runtime_put_autosuspend(&gpu->pdev->dev); 705 msm_gem_submit_free(submit); 706 } 707 708 static void retire_submits(struct msm_gpu *gpu) 709 { 710 struct drm_device *dev = gpu->dev; 711 struct msm_gem_submit *submit, *tmp; 712 int i; 713 714 WARN_ON(!mutex_is_locked(&dev->struct_mutex)); 715 716 /* Retire the commits starting with highest priority */ 717 for (i = 0; i < gpu->nr_rings; i++) { 718 struct msm_ringbuffer *ring = gpu->rb[i]; 719 720 list_for_each_entry_safe(submit, tmp, &ring->submits, node) { 721 if (dma_fence_is_signaled(submit->fence)) 722 retire_submit(gpu, ring, submit); 723 } 724 } 725 } 726 727 static void retire_worker(struct work_struct *work) 728 { 729 struct msm_gpu *gpu = container_of(work, struct msm_gpu, retire_work); 730 struct drm_device *dev = gpu->dev; 731 int i; 732 733 for (i = 0; i < gpu->nr_rings; i++) 734 update_fences(gpu, gpu->rb[i], gpu->rb[i]->memptrs->fence); 735 736 mutex_lock(&dev->struct_mutex); 737 retire_submits(gpu); 738 mutex_unlock(&dev->struct_mutex); 739 } 740 741 /* call from irq handler to schedule work to retire bo's */ 742 void msm_gpu_retire(struct msm_gpu *gpu) 743 { 744 struct msm_drm_private *priv = gpu->dev->dev_private; 745 queue_work(priv->wq, &gpu->retire_work); 746 update_sw_cntrs(gpu); 747 } 748 749 /* add bo's to gpu's ring, and kick gpu: */ 750 void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, 751 struct msm_file_private *ctx) 752 { 753 struct drm_device *dev = gpu->dev; 754 struct msm_drm_private *priv = dev->dev_private; 755 struct msm_ringbuffer *ring = submit->ring; 756 int i; 757 758 WARN_ON(!mutex_is_locked(&dev->struct_mutex)); 759 760 pm_runtime_get_sync(&gpu->pdev->dev); 761 762 msm_gpu_hw_init(gpu); 763 764 submit->seqno = ++ring->seqno; 765 766 list_add_tail(&submit->node, &ring->submits); 767 768 msm_rd_dump_submit(priv->rd, submit, NULL); 769 770 update_sw_cntrs(gpu); 771 772 for (i = 0; i < submit->nr_bos; i++) { 773 struct msm_gem_object *msm_obj = submit->bos[i].obj; 774 uint64_t iova; 775 776 /* can't happen yet.. but when we add 2d support we'll have 777 * to deal w/ cross-ring synchronization: 778 */ 779 WARN_ON(is_active(msm_obj) && (msm_obj->gpu != gpu)); 780 781 /* submit takes a reference to the bo and iova until retired: */ 782 drm_gem_object_get(&msm_obj->base); 783 msm_gem_get_and_pin_iova(&msm_obj->base, submit->aspace, &iova); 784 785 if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE) 786 msm_gem_move_to_active(&msm_obj->base, gpu, true, submit->fence); 787 else if (submit->bos[i].flags & MSM_SUBMIT_BO_READ) 788 msm_gem_move_to_active(&msm_obj->base, gpu, false, submit->fence); 789 } 790 791 gpu->funcs->submit(gpu, submit, ctx); 792 priv->lastctx = ctx; 793 794 hangcheck_timer_reset(gpu); 795 } 796 797 /* 798 * Init/Cleanup: 799 */ 800 801 static irqreturn_t irq_handler(int irq, void *data) 802 { 803 struct msm_gpu *gpu = data; 804 return gpu->funcs->irq(gpu); 805 } 806 807 static int get_clocks(struct platform_device *pdev, struct msm_gpu *gpu) 808 { 809 int ret = devm_clk_bulk_get_all(&pdev->dev, &gpu->grp_clks); 810 811 if (ret < 1) { 812 gpu->nr_clocks = 0; 813 return ret; 814 } 815 816 gpu->nr_clocks = ret; 817 818 gpu->core_clk = msm_clk_bulk_get_clock(gpu->grp_clks, 819 gpu->nr_clocks, "core"); 820 821 gpu->rbbmtimer_clk = msm_clk_bulk_get_clock(gpu->grp_clks, 822 gpu->nr_clocks, "rbbmtimer"); 823 824 return 0; 825 } 826 827 int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, 828 struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs, 829 const char *name, struct msm_gpu_config *config) 830 { 831 int i, ret, nr_rings = config->nr_rings; 832 void *memptrs; 833 uint64_t memptrs_iova; 834 835 if (WARN_ON(gpu->num_perfcntrs > ARRAY_SIZE(gpu->last_cntrs))) 836 gpu->num_perfcntrs = ARRAY_SIZE(gpu->last_cntrs); 837 838 gpu->dev = drm; 839 gpu->funcs = funcs; 840 gpu->name = name; 841 842 INIT_LIST_HEAD(&gpu->active_list); 843 INIT_WORK(&gpu->retire_work, retire_worker); 844 INIT_WORK(&gpu->recover_work, recover_worker); 845 846 847 timer_setup(&gpu->hangcheck_timer, hangcheck_handler, 0); 848 849 spin_lock_init(&gpu->perf_lock); 850 851 852 /* Map registers: */ 853 gpu->mmio = msm_ioremap(pdev, config->ioname, name); 854 if (IS_ERR(gpu->mmio)) { 855 ret = PTR_ERR(gpu->mmio); 856 goto fail; 857 } 858 859 /* Get Interrupt: */ 860 gpu->irq = platform_get_irq(pdev, 0); 861 if (gpu->irq < 0) { 862 ret = gpu->irq; 863 DRM_DEV_ERROR(drm->dev, "failed to get irq: %d\n", ret); 864 goto fail; 865 } 866 867 ret = devm_request_irq(&pdev->dev, gpu->irq, irq_handler, 868 IRQF_TRIGGER_HIGH, gpu->name, gpu); 869 if (ret) { 870 DRM_DEV_ERROR(drm->dev, "failed to request IRQ%u: %d\n", gpu->irq, ret); 871 goto fail; 872 } 873 874 ret = get_clocks(pdev, gpu); 875 if (ret) 876 goto fail; 877 878 gpu->ebi1_clk = msm_clk_get(pdev, "bus"); 879 DBG("ebi1_clk: %p", gpu->ebi1_clk); 880 if (IS_ERR(gpu->ebi1_clk)) 881 gpu->ebi1_clk = NULL; 882 883 /* Acquire regulators: */ 884 gpu->gpu_reg = devm_regulator_get(&pdev->dev, "vdd"); 885 DBG("gpu_reg: %p", gpu->gpu_reg); 886 if (IS_ERR(gpu->gpu_reg)) 887 gpu->gpu_reg = NULL; 888 889 gpu->gpu_cx = devm_regulator_get(&pdev->dev, "vddcx"); 890 DBG("gpu_cx: %p", gpu->gpu_cx); 891 if (IS_ERR(gpu->gpu_cx)) 892 gpu->gpu_cx = NULL; 893 894 gpu->pdev = pdev; 895 platform_set_drvdata(pdev, gpu); 896 897 msm_devfreq_init(gpu); 898 899 900 gpu->aspace = gpu->funcs->create_address_space(gpu, pdev); 901 902 if (gpu->aspace == NULL) 903 DRM_DEV_INFO(drm->dev, "%s: no IOMMU, fallback to VRAM carveout!\n", name); 904 else if (IS_ERR(gpu->aspace)) { 905 ret = PTR_ERR(gpu->aspace); 906 goto fail; 907 } 908 909 memptrs = msm_gem_kernel_new(drm, 910 sizeof(struct msm_rbmemptrs) * nr_rings, 911 check_apriv(gpu, MSM_BO_UNCACHED), gpu->aspace, &gpu->memptrs_bo, 912 &memptrs_iova); 913 914 if (IS_ERR(memptrs)) { 915 ret = PTR_ERR(memptrs); 916 DRM_DEV_ERROR(drm->dev, "could not allocate memptrs: %d\n", ret); 917 goto fail; 918 } 919 920 msm_gem_object_set_name(gpu->memptrs_bo, "memptrs"); 921 922 if (nr_rings > ARRAY_SIZE(gpu->rb)) { 923 DRM_DEV_INFO_ONCE(drm->dev, "Only creating %zu ringbuffers\n", 924 ARRAY_SIZE(gpu->rb)); 925 nr_rings = ARRAY_SIZE(gpu->rb); 926 } 927 928 /* Create ringbuffer(s): */ 929 for (i = 0; i < nr_rings; i++) { 930 gpu->rb[i] = msm_ringbuffer_new(gpu, i, memptrs, memptrs_iova); 931 932 if (IS_ERR(gpu->rb[i])) { 933 ret = PTR_ERR(gpu->rb[i]); 934 DRM_DEV_ERROR(drm->dev, 935 "could not create ringbuffer %d: %d\n", i, ret); 936 goto fail; 937 } 938 939 memptrs += sizeof(struct msm_rbmemptrs); 940 memptrs_iova += sizeof(struct msm_rbmemptrs); 941 } 942 943 gpu->nr_rings = nr_rings; 944 945 return 0; 946 947 fail: 948 for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) { 949 msm_ringbuffer_destroy(gpu->rb[i]); 950 gpu->rb[i] = NULL; 951 } 952 953 msm_gem_kernel_put(gpu->memptrs_bo, gpu->aspace, false); 954 955 platform_set_drvdata(pdev, NULL); 956 return ret; 957 } 958 959 void msm_gpu_cleanup(struct msm_gpu *gpu) 960 { 961 int i; 962 963 DBG("%s", gpu->name); 964 965 WARN_ON(!list_empty(&gpu->active_list)); 966 967 for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) { 968 msm_ringbuffer_destroy(gpu->rb[i]); 969 gpu->rb[i] = NULL; 970 } 971 972 msm_gem_kernel_put(gpu->memptrs_bo, gpu->aspace, false); 973 974 if (!IS_ERR_OR_NULL(gpu->aspace)) { 975 gpu->aspace->mmu->funcs->detach(gpu->aspace->mmu); 976 msm_gem_address_space_put(gpu->aspace); 977 } 978 } 979