1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2013 Red Hat 4 * Author: Rob Clark <robdclark@gmail.com> 5 */ 6 7 #include "msm_gpu.h" 8 #include "msm_gem.h" 9 #include "msm_mmu.h" 10 #include "msm_fence.h" 11 #include "msm_gpu_trace.h" 12 #include "adreno/adreno_gpu.h" 13 14 #include <generated/utsrelease.h> 15 #include <linux/string_helpers.h> 16 #include <linux/devfreq.h> 17 #include <linux/devcoredump.h> 18 #include <linux/sched/task.h> 19 20 /* 21 * Power Management: 22 */ 23 24 static int msm_devfreq_target(struct device *dev, unsigned long *freq, 25 u32 flags) 26 { 27 struct msm_gpu *gpu = dev_to_gpu(dev); 28 struct dev_pm_opp *opp; 29 30 opp = devfreq_recommended_opp(dev, freq, flags); 31 32 if (IS_ERR(opp)) 33 return PTR_ERR(opp); 34 35 trace_msm_gpu_freq_change(dev_pm_opp_get_freq(opp)); 36 37 if (gpu->funcs->gpu_set_freq) 38 gpu->funcs->gpu_set_freq(gpu, opp); 39 else 40 clk_set_rate(gpu->core_clk, *freq); 41 42 dev_pm_opp_put(opp); 43 44 return 0; 45 } 46 47 static int msm_devfreq_get_dev_status(struct device *dev, 48 struct devfreq_dev_status *status) 49 { 50 struct msm_gpu *gpu = dev_to_gpu(dev); 51 ktime_t time; 52 53 if (gpu->funcs->gpu_get_freq) 54 status->current_frequency = gpu->funcs->gpu_get_freq(gpu); 55 else 56 status->current_frequency = clk_get_rate(gpu->core_clk); 57 58 status->busy_time = gpu->funcs->gpu_busy(gpu); 59 60 time = ktime_get(); 61 status->total_time = ktime_us_delta(time, gpu->devfreq.time); 62 gpu->devfreq.time = time; 63 64 return 0; 65 } 66 67 static int msm_devfreq_get_cur_freq(struct device *dev, unsigned long *freq) 68 { 69 struct msm_gpu *gpu = dev_to_gpu(dev); 70 71 if (gpu->funcs->gpu_get_freq) 72 *freq = gpu->funcs->gpu_get_freq(gpu); 73 else 74 *freq = clk_get_rate(gpu->core_clk); 75 76 return 0; 77 } 78 79 static struct devfreq_dev_profile msm_devfreq_profile = { 80 .polling_ms = 10, 81 .target = msm_devfreq_target, 82 .get_dev_status = msm_devfreq_get_dev_status, 83 .get_cur_freq = msm_devfreq_get_cur_freq, 84 }; 85 86 static void msm_devfreq_init(struct msm_gpu *gpu) 87 { 88 /* We need target support to do devfreq */ 89 if (!gpu->funcs->gpu_busy) 90 return; 91 92 msm_devfreq_profile.initial_freq = gpu->fast_rate; 93 94 /* 95 * Don't set the freq_table or max_state and let devfreq build the table 96 * from OPP 97 * After a deferred probe, these may have be left to non-zero values, 98 * so set them back to zero before creating the devfreq device 99 */ 100 msm_devfreq_profile.freq_table = NULL; 101 msm_devfreq_profile.max_state = 0; 102 103 gpu->devfreq.devfreq = devm_devfreq_add_device(&gpu->pdev->dev, 104 &msm_devfreq_profile, DEVFREQ_GOV_SIMPLE_ONDEMAND, 105 NULL); 106 107 if (IS_ERR(gpu->devfreq.devfreq)) { 108 DRM_DEV_ERROR(&gpu->pdev->dev, "Couldn't initialize GPU devfreq\n"); 109 gpu->devfreq.devfreq = NULL; 110 } 111 112 devfreq_suspend_device(gpu->devfreq.devfreq); 113 } 114 115 static int enable_pwrrail(struct msm_gpu *gpu) 116 { 117 struct drm_device *dev = gpu->dev; 118 int ret = 0; 119 120 if (gpu->gpu_reg) { 121 ret = regulator_enable(gpu->gpu_reg); 122 if (ret) { 123 DRM_DEV_ERROR(dev->dev, "failed to enable 'gpu_reg': %d\n", ret); 124 return ret; 125 } 126 } 127 128 if (gpu->gpu_cx) { 129 ret = regulator_enable(gpu->gpu_cx); 130 if (ret) { 131 DRM_DEV_ERROR(dev->dev, "failed to enable 'gpu_cx': %d\n", ret); 132 return ret; 133 } 134 } 135 136 return 0; 137 } 138 139 static int disable_pwrrail(struct msm_gpu *gpu) 140 { 141 if (gpu->gpu_cx) 142 regulator_disable(gpu->gpu_cx); 143 if (gpu->gpu_reg) 144 regulator_disable(gpu->gpu_reg); 145 return 0; 146 } 147 148 static int enable_clk(struct msm_gpu *gpu) 149 { 150 if (gpu->core_clk && gpu->fast_rate) 151 clk_set_rate(gpu->core_clk, gpu->fast_rate); 152 153 /* Set the RBBM timer rate to 19.2Mhz */ 154 if (gpu->rbbmtimer_clk) 155 clk_set_rate(gpu->rbbmtimer_clk, 19200000); 156 157 return clk_bulk_prepare_enable(gpu->nr_clocks, gpu->grp_clks); 158 } 159 160 static int disable_clk(struct msm_gpu *gpu) 161 { 162 clk_bulk_disable_unprepare(gpu->nr_clocks, gpu->grp_clks); 163 164 /* 165 * Set the clock to a deliberately low rate. On older targets the clock 166 * speed had to be non zero to avoid problems. On newer targets this 167 * will be rounded down to zero anyway so it all works out. 168 */ 169 if (gpu->core_clk) 170 clk_set_rate(gpu->core_clk, 27000000); 171 172 if (gpu->rbbmtimer_clk) 173 clk_set_rate(gpu->rbbmtimer_clk, 0); 174 175 return 0; 176 } 177 178 static int enable_axi(struct msm_gpu *gpu) 179 { 180 if (gpu->ebi1_clk) 181 clk_prepare_enable(gpu->ebi1_clk); 182 return 0; 183 } 184 185 static int disable_axi(struct msm_gpu *gpu) 186 { 187 if (gpu->ebi1_clk) 188 clk_disable_unprepare(gpu->ebi1_clk); 189 return 0; 190 } 191 192 void msm_gpu_resume_devfreq(struct msm_gpu *gpu) 193 { 194 gpu->devfreq.busy_cycles = 0; 195 gpu->devfreq.time = ktime_get(); 196 197 devfreq_resume_device(gpu->devfreq.devfreq); 198 } 199 200 int msm_gpu_pm_resume(struct msm_gpu *gpu) 201 { 202 int ret; 203 204 DBG("%s", gpu->name); 205 trace_msm_gpu_resume(0); 206 207 ret = enable_pwrrail(gpu); 208 if (ret) 209 return ret; 210 211 ret = enable_clk(gpu); 212 if (ret) 213 return ret; 214 215 ret = enable_axi(gpu); 216 if (ret) 217 return ret; 218 219 msm_gpu_resume_devfreq(gpu); 220 221 gpu->needs_hw_init = true; 222 223 return 0; 224 } 225 226 int msm_gpu_pm_suspend(struct msm_gpu *gpu) 227 { 228 int ret; 229 230 DBG("%s", gpu->name); 231 trace_msm_gpu_suspend(0); 232 233 devfreq_suspend_device(gpu->devfreq.devfreq); 234 235 ret = disable_axi(gpu); 236 if (ret) 237 return ret; 238 239 ret = disable_clk(gpu); 240 if (ret) 241 return ret; 242 243 ret = disable_pwrrail(gpu); 244 if (ret) 245 return ret; 246 247 return 0; 248 } 249 250 int msm_gpu_hw_init(struct msm_gpu *gpu) 251 { 252 int ret; 253 254 WARN_ON(!mutex_is_locked(&gpu->dev->struct_mutex)); 255 256 if (!gpu->needs_hw_init) 257 return 0; 258 259 disable_irq(gpu->irq); 260 ret = gpu->funcs->hw_init(gpu); 261 if (!ret) 262 gpu->needs_hw_init = false; 263 enable_irq(gpu->irq); 264 265 return ret; 266 } 267 268 #ifdef CONFIG_DEV_COREDUMP 269 static ssize_t msm_gpu_devcoredump_read(char *buffer, loff_t offset, 270 size_t count, void *data, size_t datalen) 271 { 272 struct msm_gpu *gpu = data; 273 struct drm_print_iterator iter; 274 struct drm_printer p; 275 struct msm_gpu_state *state; 276 277 state = msm_gpu_crashstate_get(gpu); 278 if (!state) 279 return 0; 280 281 iter.data = buffer; 282 iter.offset = 0; 283 iter.start = offset; 284 iter.remain = count; 285 286 p = drm_coredump_printer(&iter); 287 288 drm_printf(&p, "---\n"); 289 drm_printf(&p, "kernel: " UTS_RELEASE "\n"); 290 drm_printf(&p, "module: " KBUILD_MODNAME "\n"); 291 drm_printf(&p, "time: %lld.%09ld\n", 292 state->time.tv_sec, state->time.tv_nsec); 293 if (state->comm) 294 drm_printf(&p, "comm: %s\n", state->comm); 295 if (state->cmd) 296 drm_printf(&p, "cmdline: %s\n", state->cmd); 297 298 gpu->funcs->show(gpu, state, &p); 299 300 msm_gpu_crashstate_put(gpu); 301 302 return count - iter.remain; 303 } 304 305 static void msm_gpu_devcoredump_free(void *data) 306 { 307 struct msm_gpu *gpu = data; 308 309 msm_gpu_crashstate_put(gpu); 310 } 311 312 static void msm_gpu_crashstate_get_bo(struct msm_gpu_state *state, 313 struct msm_gem_object *obj, u64 iova, u32 flags) 314 { 315 struct msm_gpu_state_bo *state_bo = &state->bos[state->nr_bos]; 316 317 /* Don't record write only objects */ 318 state_bo->size = obj->base.size; 319 state_bo->iova = iova; 320 321 /* Only store data for non imported buffer objects marked for read */ 322 if ((flags & MSM_SUBMIT_BO_READ) && !obj->base.import_attach) { 323 void *ptr; 324 325 state_bo->data = kvmalloc(obj->base.size, GFP_KERNEL); 326 if (!state_bo->data) 327 goto out; 328 329 ptr = msm_gem_get_vaddr_active(&obj->base); 330 if (IS_ERR(ptr)) { 331 kvfree(state_bo->data); 332 state_bo->data = NULL; 333 goto out; 334 } 335 336 memcpy(state_bo->data, ptr, obj->base.size); 337 msm_gem_put_vaddr(&obj->base); 338 } 339 out: 340 state->nr_bos++; 341 } 342 343 static void msm_gpu_crashstate_capture(struct msm_gpu *gpu, 344 struct msm_gem_submit *submit, char *comm, char *cmd) 345 { 346 struct msm_gpu_state *state; 347 348 /* Check if the target supports capturing crash state */ 349 if (!gpu->funcs->gpu_state_get) 350 return; 351 352 /* Only save one crash state at a time */ 353 if (gpu->crashstate) 354 return; 355 356 state = gpu->funcs->gpu_state_get(gpu); 357 if (IS_ERR_OR_NULL(state)) 358 return; 359 360 /* Fill in the additional crash state information */ 361 state->comm = kstrdup(comm, GFP_KERNEL); 362 state->cmd = kstrdup(cmd, GFP_KERNEL); 363 364 if (submit) { 365 int i, nr = 0; 366 367 /* count # of buffers to dump: */ 368 for (i = 0; i < submit->nr_bos; i++) 369 if (should_dump(submit, i)) 370 nr++; 371 /* always dump cmd bo's, but don't double count them: */ 372 for (i = 0; i < submit->nr_cmds; i++) 373 if (!should_dump(submit, submit->cmd[i].idx)) 374 nr++; 375 376 state->bos = kcalloc(nr, 377 sizeof(struct msm_gpu_state_bo), GFP_KERNEL); 378 379 for (i = 0; i < submit->nr_bos; i++) { 380 if (should_dump(submit, i)) { 381 msm_gpu_crashstate_get_bo(state, submit->bos[i].obj, 382 submit->bos[i].iova, submit->bos[i].flags); 383 } 384 } 385 386 for (i = 0; state->bos && i < submit->nr_cmds; i++) { 387 int idx = submit->cmd[i].idx; 388 389 if (!should_dump(submit, submit->cmd[i].idx)) { 390 msm_gpu_crashstate_get_bo(state, submit->bos[idx].obj, 391 submit->bos[idx].iova, submit->bos[idx].flags); 392 } 393 } 394 } 395 396 /* Set the active crash state to be dumped on failure */ 397 gpu->crashstate = state; 398 399 /* FIXME: Release the crashstate if this errors out? */ 400 dev_coredumpm(gpu->dev->dev, THIS_MODULE, gpu, 0, GFP_KERNEL, 401 msm_gpu_devcoredump_read, msm_gpu_devcoredump_free); 402 } 403 #else 404 static void msm_gpu_crashstate_capture(struct msm_gpu *gpu, 405 struct msm_gem_submit *submit, char *comm, char *cmd) 406 { 407 } 408 #endif 409 410 /* 411 * Hangcheck detection for locked gpu: 412 */ 413 414 static void update_fences(struct msm_gpu *gpu, struct msm_ringbuffer *ring, 415 uint32_t fence) 416 { 417 struct msm_gem_submit *submit; 418 419 list_for_each_entry(submit, &ring->submits, node) { 420 if (submit->seqno > fence) 421 break; 422 423 msm_update_fence(submit->ring->fctx, 424 submit->fence->seqno); 425 } 426 } 427 428 static struct msm_gem_submit * 429 find_submit(struct msm_ringbuffer *ring, uint32_t fence) 430 { 431 struct msm_gem_submit *submit; 432 433 WARN_ON(!mutex_is_locked(&ring->gpu->dev->struct_mutex)); 434 435 list_for_each_entry(submit, &ring->submits, node) 436 if (submit->seqno == fence) 437 return submit; 438 439 return NULL; 440 } 441 442 static void retire_submits(struct msm_gpu *gpu); 443 444 static void recover_worker(struct work_struct *work) 445 { 446 struct msm_gpu *gpu = container_of(work, struct msm_gpu, recover_work); 447 struct drm_device *dev = gpu->dev; 448 struct msm_drm_private *priv = dev->dev_private; 449 struct msm_gem_submit *submit; 450 struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu); 451 char *comm = NULL, *cmd = NULL; 452 int i; 453 454 mutex_lock(&dev->struct_mutex); 455 456 DRM_DEV_ERROR(dev->dev, "%s: hangcheck recover!\n", gpu->name); 457 458 submit = find_submit(cur_ring, cur_ring->memptrs->fence + 1); 459 if (submit) { 460 struct task_struct *task; 461 462 /* Increment the fault counts */ 463 gpu->global_faults++; 464 submit->queue->faults++; 465 466 task = get_pid_task(submit->pid, PIDTYPE_PID); 467 if (task) { 468 comm = kstrdup(task->comm, GFP_KERNEL); 469 cmd = kstrdup_quotable_cmdline(task, GFP_KERNEL); 470 put_task_struct(task); 471 } 472 473 if (comm && cmd) { 474 DRM_DEV_ERROR(dev->dev, "%s: offending task: %s (%s)\n", 475 gpu->name, comm, cmd); 476 477 msm_rd_dump_submit(priv->hangrd, submit, 478 "offending task: %s (%s)", comm, cmd); 479 } else 480 msm_rd_dump_submit(priv->hangrd, submit, NULL); 481 } 482 483 /* Record the crash state */ 484 pm_runtime_get_sync(&gpu->pdev->dev); 485 msm_gpu_crashstate_capture(gpu, submit, comm, cmd); 486 pm_runtime_put_sync(&gpu->pdev->dev); 487 488 kfree(cmd); 489 kfree(comm); 490 491 /* 492 * Update all the rings with the latest and greatest fence.. this 493 * needs to happen after msm_rd_dump_submit() to ensure that the 494 * bo's referenced by the offending submit are still around. 495 */ 496 for (i = 0; i < gpu->nr_rings; i++) { 497 struct msm_ringbuffer *ring = gpu->rb[i]; 498 499 uint32_t fence = ring->memptrs->fence; 500 501 /* 502 * For the current (faulting?) ring/submit advance the fence by 503 * one more to clear the faulting submit 504 */ 505 if (ring == cur_ring) 506 fence++; 507 508 update_fences(gpu, ring, fence); 509 } 510 511 if (msm_gpu_active(gpu)) { 512 /* retire completed submits, plus the one that hung: */ 513 retire_submits(gpu); 514 515 pm_runtime_get_sync(&gpu->pdev->dev); 516 gpu->funcs->recover(gpu); 517 pm_runtime_put_sync(&gpu->pdev->dev); 518 519 /* 520 * Replay all remaining submits starting with highest priority 521 * ring 522 */ 523 for (i = 0; i < gpu->nr_rings; i++) { 524 struct msm_ringbuffer *ring = gpu->rb[i]; 525 526 list_for_each_entry(submit, &ring->submits, node) 527 gpu->funcs->submit(gpu, submit); 528 } 529 } 530 531 mutex_unlock(&dev->struct_mutex); 532 533 msm_gpu_retire(gpu); 534 } 535 536 static void hangcheck_timer_reset(struct msm_gpu *gpu) 537 { 538 DBG("%s", gpu->name); 539 mod_timer(&gpu->hangcheck_timer, 540 round_jiffies_up(jiffies + DRM_MSM_HANGCHECK_JIFFIES)); 541 } 542 543 static void hangcheck_handler(struct timer_list *t) 544 { 545 struct msm_gpu *gpu = from_timer(gpu, t, hangcheck_timer); 546 struct drm_device *dev = gpu->dev; 547 struct msm_drm_private *priv = dev->dev_private; 548 struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); 549 uint32_t fence = ring->memptrs->fence; 550 551 if (fence != ring->hangcheck_fence) { 552 /* some progress has been made.. ya! */ 553 ring->hangcheck_fence = fence; 554 } else if (fence < ring->seqno) { 555 /* no progress and not done.. hung! */ 556 ring->hangcheck_fence = fence; 557 DRM_DEV_ERROR(dev->dev, "%s: hangcheck detected gpu lockup rb %d!\n", 558 gpu->name, ring->id); 559 DRM_DEV_ERROR(dev->dev, "%s: completed fence: %u\n", 560 gpu->name, fence); 561 DRM_DEV_ERROR(dev->dev, "%s: submitted fence: %u\n", 562 gpu->name, ring->seqno); 563 564 queue_work(priv->wq, &gpu->recover_work); 565 } 566 567 /* if still more pending work, reset the hangcheck timer: */ 568 if (ring->seqno > ring->hangcheck_fence) 569 hangcheck_timer_reset(gpu); 570 571 /* workaround for missing irq: */ 572 queue_work(priv->wq, &gpu->retire_work); 573 } 574 575 /* 576 * Performance Counters: 577 */ 578 579 /* called under perf_lock */ 580 static int update_hw_cntrs(struct msm_gpu *gpu, uint32_t ncntrs, uint32_t *cntrs) 581 { 582 uint32_t current_cntrs[ARRAY_SIZE(gpu->last_cntrs)]; 583 int i, n = min(ncntrs, gpu->num_perfcntrs); 584 585 /* read current values: */ 586 for (i = 0; i < gpu->num_perfcntrs; i++) 587 current_cntrs[i] = gpu_read(gpu, gpu->perfcntrs[i].sample_reg); 588 589 /* update cntrs: */ 590 for (i = 0; i < n; i++) 591 cntrs[i] = current_cntrs[i] - gpu->last_cntrs[i]; 592 593 /* save current values: */ 594 for (i = 0; i < gpu->num_perfcntrs; i++) 595 gpu->last_cntrs[i] = current_cntrs[i]; 596 597 return n; 598 } 599 600 static void update_sw_cntrs(struct msm_gpu *gpu) 601 { 602 ktime_t time; 603 uint32_t elapsed; 604 unsigned long flags; 605 606 spin_lock_irqsave(&gpu->perf_lock, flags); 607 if (!gpu->perfcntr_active) 608 goto out; 609 610 time = ktime_get(); 611 elapsed = ktime_to_us(ktime_sub(time, gpu->last_sample.time)); 612 613 gpu->totaltime += elapsed; 614 if (gpu->last_sample.active) 615 gpu->activetime += elapsed; 616 617 gpu->last_sample.active = msm_gpu_active(gpu); 618 gpu->last_sample.time = time; 619 620 out: 621 spin_unlock_irqrestore(&gpu->perf_lock, flags); 622 } 623 624 void msm_gpu_perfcntr_start(struct msm_gpu *gpu) 625 { 626 unsigned long flags; 627 628 pm_runtime_get_sync(&gpu->pdev->dev); 629 630 spin_lock_irqsave(&gpu->perf_lock, flags); 631 /* we could dynamically enable/disable perfcntr registers too.. */ 632 gpu->last_sample.active = msm_gpu_active(gpu); 633 gpu->last_sample.time = ktime_get(); 634 gpu->activetime = gpu->totaltime = 0; 635 gpu->perfcntr_active = true; 636 update_hw_cntrs(gpu, 0, NULL); 637 spin_unlock_irqrestore(&gpu->perf_lock, flags); 638 } 639 640 void msm_gpu_perfcntr_stop(struct msm_gpu *gpu) 641 { 642 gpu->perfcntr_active = false; 643 pm_runtime_put_sync(&gpu->pdev->dev); 644 } 645 646 /* returns -errno or # of cntrs sampled */ 647 int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime, 648 uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs) 649 { 650 unsigned long flags; 651 int ret; 652 653 spin_lock_irqsave(&gpu->perf_lock, flags); 654 655 if (!gpu->perfcntr_active) { 656 ret = -EINVAL; 657 goto out; 658 } 659 660 *activetime = gpu->activetime; 661 *totaltime = gpu->totaltime; 662 663 gpu->activetime = gpu->totaltime = 0; 664 665 ret = update_hw_cntrs(gpu, ncntrs, cntrs); 666 667 out: 668 spin_unlock_irqrestore(&gpu->perf_lock, flags); 669 670 return ret; 671 } 672 673 /* 674 * Cmdstream submission/retirement: 675 */ 676 677 static void retire_submit(struct msm_gpu *gpu, struct msm_ringbuffer *ring, 678 struct msm_gem_submit *submit) 679 { 680 int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT; 681 volatile struct msm_gpu_submit_stats *stats; 682 u64 elapsed, clock = 0; 683 int i; 684 685 stats = &ring->memptrs->stats[index]; 686 /* Convert 19.2Mhz alwayson ticks to nanoseconds for elapsed time */ 687 elapsed = (stats->alwayson_end - stats->alwayson_start) * 10000; 688 do_div(elapsed, 192); 689 690 /* Calculate the clock frequency from the number of CP cycles */ 691 if (elapsed) { 692 clock = (stats->cpcycles_end - stats->cpcycles_start) * 1000; 693 do_div(clock, elapsed); 694 } 695 696 trace_msm_gpu_submit_retired(submit, elapsed, clock, 697 stats->alwayson_start, stats->alwayson_end); 698 699 for (i = 0; i < submit->nr_bos; i++) { 700 struct msm_gem_object *msm_obj = submit->bos[i].obj; 701 702 msm_gem_active_put(&msm_obj->base); 703 msm_gem_unpin_iova(&msm_obj->base, submit->aspace); 704 drm_gem_object_put_locked(&msm_obj->base); 705 } 706 707 pm_runtime_mark_last_busy(&gpu->pdev->dev); 708 pm_runtime_put_autosuspend(&gpu->pdev->dev); 709 msm_gem_submit_free(submit); 710 } 711 712 static void retire_submits(struct msm_gpu *gpu) 713 { 714 struct drm_device *dev = gpu->dev; 715 struct msm_gem_submit *submit, *tmp; 716 int i; 717 718 WARN_ON(!mutex_is_locked(&dev->struct_mutex)); 719 720 /* Retire the commits starting with highest priority */ 721 for (i = 0; i < gpu->nr_rings; i++) { 722 struct msm_ringbuffer *ring = gpu->rb[i]; 723 724 list_for_each_entry_safe(submit, tmp, &ring->submits, node) { 725 if (dma_fence_is_signaled(submit->fence)) 726 retire_submit(gpu, ring, submit); 727 } 728 } 729 } 730 731 static void retire_worker(struct work_struct *work) 732 { 733 struct msm_gpu *gpu = container_of(work, struct msm_gpu, retire_work); 734 struct drm_device *dev = gpu->dev; 735 int i; 736 737 for (i = 0; i < gpu->nr_rings; i++) 738 update_fences(gpu, gpu->rb[i], gpu->rb[i]->memptrs->fence); 739 740 mutex_lock(&dev->struct_mutex); 741 retire_submits(gpu); 742 mutex_unlock(&dev->struct_mutex); 743 } 744 745 /* call from irq handler to schedule work to retire bo's */ 746 void msm_gpu_retire(struct msm_gpu *gpu) 747 { 748 struct msm_drm_private *priv = gpu->dev->dev_private; 749 queue_work(priv->wq, &gpu->retire_work); 750 update_sw_cntrs(gpu); 751 } 752 753 /* add bo's to gpu's ring, and kick gpu: */ 754 void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) 755 { 756 struct drm_device *dev = gpu->dev; 757 struct msm_drm_private *priv = dev->dev_private; 758 struct msm_ringbuffer *ring = submit->ring; 759 int i; 760 761 WARN_ON(!mutex_is_locked(&dev->struct_mutex)); 762 763 pm_runtime_get_sync(&gpu->pdev->dev); 764 765 msm_gpu_hw_init(gpu); 766 767 submit->seqno = ++ring->seqno; 768 769 list_add_tail(&submit->node, &ring->submits); 770 771 msm_rd_dump_submit(priv->rd, submit, NULL); 772 773 update_sw_cntrs(gpu); 774 775 for (i = 0; i < submit->nr_bos; i++) { 776 struct msm_gem_object *msm_obj = submit->bos[i].obj; 777 struct drm_gem_object *drm_obj = &msm_obj->base; 778 uint64_t iova; 779 780 /* can't happen yet.. but when we add 2d support we'll have 781 * to deal w/ cross-ring synchronization: 782 */ 783 WARN_ON(is_active(msm_obj) && (msm_obj->gpu != gpu)); 784 785 /* submit takes a reference to the bo and iova until retired: */ 786 drm_gem_object_get(&msm_obj->base); 787 msm_gem_get_and_pin_iova(&msm_obj->base, submit->aspace, &iova); 788 789 if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE) 790 dma_resv_add_excl_fence(drm_obj->resv, submit->fence); 791 else if (submit->bos[i].flags & MSM_SUBMIT_BO_READ) 792 dma_resv_add_shared_fence(drm_obj->resv, submit->fence); 793 794 msm_gem_active_get(drm_obj, gpu); 795 } 796 797 gpu->funcs->submit(gpu, submit); 798 priv->lastctx = submit->queue->ctx; 799 800 hangcheck_timer_reset(gpu); 801 } 802 803 /* 804 * Init/Cleanup: 805 */ 806 807 static irqreturn_t irq_handler(int irq, void *data) 808 { 809 struct msm_gpu *gpu = data; 810 return gpu->funcs->irq(gpu); 811 } 812 813 static int get_clocks(struct platform_device *pdev, struct msm_gpu *gpu) 814 { 815 int ret = devm_clk_bulk_get_all(&pdev->dev, &gpu->grp_clks); 816 817 if (ret < 1) { 818 gpu->nr_clocks = 0; 819 return ret; 820 } 821 822 gpu->nr_clocks = ret; 823 824 gpu->core_clk = msm_clk_bulk_get_clock(gpu->grp_clks, 825 gpu->nr_clocks, "core"); 826 827 gpu->rbbmtimer_clk = msm_clk_bulk_get_clock(gpu->grp_clks, 828 gpu->nr_clocks, "rbbmtimer"); 829 830 return 0; 831 } 832 833 /* Return a new address space for a msm_drm_private instance */ 834 struct msm_gem_address_space * 835 msm_gpu_create_private_address_space(struct msm_gpu *gpu, struct task_struct *task) 836 { 837 struct msm_gem_address_space *aspace = NULL; 838 if (!gpu) 839 return NULL; 840 841 /* 842 * If the target doesn't support private address spaces then return 843 * the global one 844 */ 845 if (gpu->funcs->create_private_address_space) { 846 aspace = gpu->funcs->create_private_address_space(gpu); 847 if (!IS_ERR(aspace)) 848 aspace->pid = get_pid(task_pid(task)); 849 } 850 851 if (IS_ERR_OR_NULL(aspace)) 852 aspace = msm_gem_address_space_get(gpu->aspace); 853 854 return aspace; 855 } 856 857 int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, 858 struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs, 859 const char *name, struct msm_gpu_config *config) 860 { 861 int i, ret, nr_rings = config->nr_rings; 862 void *memptrs; 863 uint64_t memptrs_iova; 864 865 if (WARN_ON(gpu->num_perfcntrs > ARRAY_SIZE(gpu->last_cntrs))) 866 gpu->num_perfcntrs = ARRAY_SIZE(gpu->last_cntrs); 867 868 gpu->dev = drm; 869 gpu->funcs = funcs; 870 gpu->name = name; 871 872 INIT_LIST_HEAD(&gpu->active_list); 873 INIT_WORK(&gpu->retire_work, retire_worker); 874 INIT_WORK(&gpu->recover_work, recover_worker); 875 876 877 timer_setup(&gpu->hangcheck_timer, hangcheck_handler, 0); 878 879 spin_lock_init(&gpu->perf_lock); 880 881 882 /* Map registers: */ 883 gpu->mmio = msm_ioremap(pdev, config->ioname, name); 884 if (IS_ERR(gpu->mmio)) { 885 ret = PTR_ERR(gpu->mmio); 886 goto fail; 887 } 888 889 /* Get Interrupt: */ 890 gpu->irq = platform_get_irq(pdev, 0); 891 if (gpu->irq < 0) { 892 ret = gpu->irq; 893 DRM_DEV_ERROR(drm->dev, "failed to get irq: %d\n", ret); 894 goto fail; 895 } 896 897 ret = devm_request_irq(&pdev->dev, gpu->irq, irq_handler, 898 IRQF_TRIGGER_HIGH, gpu->name, gpu); 899 if (ret) { 900 DRM_DEV_ERROR(drm->dev, "failed to request IRQ%u: %d\n", gpu->irq, ret); 901 goto fail; 902 } 903 904 ret = get_clocks(pdev, gpu); 905 if (ret) 906 goto fail; 907 908 gpu->ebi1_clk = msm_clk_get(pdev, "bus"); 909 DBG("ebi1_clk: %p", gpu->ebi1_clk); 910 if (IS_ERR(gpu->ebi1_clk)) 911 gpu->ebi1_clk = NULL; 912 913 /* Acquire regulators: */ 914 gpu->gpu_reg = devm_regulator_get(&pdev->dev, "vdd"); 915 DBG("gpu_reg: %p", gpu->gpu_reg); 916 if (IS_ERR(gpu->gpu_reg)) 917 gpu->gpu_reg = NULL; 918 919 gpu->gpu_cx = devm_regulator_get(&pdev->dev, "vddcx"); 920 DBG("gpu_cx: %p", gpu->gpu_cx); 921 if (IS_ERR(gpu->gpu_cx)) 922 gpu->gpu_cx = NULL; 923 924 gpu->pdev = pdev; 925 platform_set_drvdata(pdev, &gpu->adreno_smmu); 926 927 msm_devfreq_init(gpu); 928 929 930 gpu->aspace = gpu->funcs->create_address_space(gpu, pdev); 931 932 if (gpu->aspace == NULL) 933 DRM_DEV_INFO(drm->dev, "%s: no IOMMU, fallback to VRAM carveout!\n", name); 934 else if (IS_ERR(gpu->aspace)) { 935 ret = PTR_ERR(gpu->aspace); 936 goto fail; 937 } 938 939 memptrs = msm_gem_kernel_new(drm, 940 sizeof(struct msm_rbmemptrs) * nr_rings, 941 check_apriv(gpu, MSM_BO_UNCACHED), gpu->aspace, &gpu->memptrs_bo, 942 &memptrs_iova); 943 944 if (IS_ERR(memptrs)) { 945 ret = PTR_ERR(memptrs); 946 DRM_DEV_ERROR(drm->dev, "could not allocate memptrs: %d\n", ret); 947 goto fail; 948 } 949 950 msm_gem_object_set_name(gpu->memptrs_bo, "memptrs"); 951 952 if (nr_rings > ARRAY_SIZE(gpu->rb)) { 953 DRM_DEV_INFO_ONCE(drm->dev, "Only creating %zu ringbuffers\n", 954 ARRAY_SIZE(gpu->rb)); 955 nr_rings = ARRAY_SIZE(gpu->rb); 956 } 957 958 /* Create ringbuffer(s): */ 959 for (i = 0; i < nr_rings; i++) { 960 gpu->rb[i] = msm_ringbuffer_new(gpu, i, memptrs, memptrs_iova); 961 962 if (IS_ERR(gpu->rb[i])) { 963 ret = PTR_ERR(gpu->rb[i]); 964 DRM_DEV_ERROR(drm->dev, 965 "could not create ringbuffer %d: %d\n", i, ret); 966 goto fail; 967 } 968 969 memptrs += sizeof(struct msm_rbmemptrs); 970 memptrs_iova += sizeof(struct msm_rbmemptrs); 971 } 972 973 gpu->nr_rings = nr_rings; 974 975 return 0; 976 977 fail: 978 for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) { 979 msm_ringbuffer_destroy(gpu->rb[i]); 980 gpu->rb[i] = NULL; 981 } 982 983 msm_gem_kernel_put(gpu->memptrs_bo, gpu->aspace, false); 984 985 platform_set_drvdata(pdev, NULL); 986 return ret; 987 } 988 989 void msm_gpu_cleanup(struct msm_gpu *gpu) 990 { 991 int i; 992 993 DBG("%s", gpu->name); 994 995 WARN_ON(!list_empty(&gpu->active_list)); 996 997 for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) { 998 msm_ringbuffer_destroy(gpu->rb[i]); 999 gpu->rb[i] = NULL; 1000 } 1001 1002 msm_gem_kernel_put(gpu->memptrs_bo, gpu->aspace, false); 1003 1004 if (!IS_ERR_OR_NULL(gpu->aspace)) { 1005 gpu->aspace->mmu->funcs->detach(gpu->aspace->mmu); 1006 msm_gem_address_space_put(gpu->aspace); 1007 } 1008 } 1009