1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */ 3 /* Copyright 2019 Collabora ltd. */ 4 #include <linux/delay.h> 5 #include <linux/interrupt.h> 6 #include <linux/io.h> 7 #include <linux/platform_device.h> 8 #include <linux/pm_runtime.h> 9 #include <linux/dma-resv.h> 10 #include <drm/gpu_scheduler.h> 11 #include <drm/panfrost_drm.h> 12 13 #include "panfrost_device.h" 14 #include "panfrost_devfreq.h" 15 #include "panfrost_job.h" 16 #include "panfrost_features.h" 17 #include "panfrost_issues.h" 18 #include "panfrost_gem.h" 19 #include "panfrost_regs.h" 20 #include "panfrost_gpu.h" 21 #include "panfrost_mmu.h" 22 23 #define job_write(dev, reg, data) writel(data, dev->iomem + (reg)) 24 #define job_read(dev, reg) readl(dev->iomem + (reg)) 25 26 struct panfrost_queue_state { 27 struct drm_gpu_scheduler sched; 28 bool stopped; 29 struct mutex lock; 30 u64 fence_context; 31 u64 emit_seqno; 32 }; 33 34 struct panfrost_job_slot { 35 struct panfrost_queue_state queue[NUM_JOB_SLOTS]; 36 spinlock_t job_lock; 37 }; 38 39 static struct panfrost_job * 40 to_panfrost_job(struct drm_sched_job *sched_job) 41 { 42 return container_of(sched_job, struct panfrost_job, base); 43 } 44 45 struct panfrost_fence { 46 struct dma_fence base; 47 struct drm_device *dev; 48 /* panfrost seqno for signaled() test */ 49 u64 seqno; 50 int queue; 51 }; 52 53 static inline struct panfrost_fence * 54 to_panfrost_fence(struct dma_fence *fence) 55 { 56 return (struct panfrost_fence *)fence; 57 } 58 59 static const char *panfrost_fence_get_driver_name(struct dma_fence *fence) 60 { 61 return "panfrost"; 62 } 63 64 static const char *panfrost_fence_get_timeline_name(struct dma_fence *fence) 65 { 66 struct panfrost_fence *f = to_panfrost_fence(fence); 67 68 switch (f->queue) { 69 case 0: 70 return "panfrost-js-0"; 71 case 1: 72 return "panfrost-js-1"; 73 case 2: 74 return "panfrost-js-2"; 75 default: 76 return NULL; 77 } 78 } 79 80 static const struct dma_fence_ops panfrost_fence_ops = { 81 .get_driver_name = panfrost_fence_get_driver_name, 82 .get_timeline_name = panfrost_fence_get_timeline_name, 83 }; 84 85 static struct dma_fence *panfrost_fence_create(struct panfrost_device *pfdev, int js_num) 86 { 87 struct panfrost_fence *fence; 88 struct panfrost_job_slot *js = pfdev->js; 89 90 fence = kzalloc(sizeof(*fence), GFP_KERNEL); 91 if (!fence) 92 return ERR_PTR(-ENOMEM); 93 94 fence->dev = pfdev->ddev; 95 fence->queue = js_num; 96 fence->seqno = ++js->queue[js_num].emit_seqno; 97 dma_fence_init(&fence->base, &panfrost_fence_ops, &js->job_lock, 98 js->queue[js_num].fence_context, fence->seqno); 99 100 return &fence->base; 101 } 102 103 static int panfrost_job_get_slot(struct panfrost_job *job) 104 { 105 /* JS0: fragment jobs. 106 * JS1: vertex/tiler jobs 107 * JS2: compute jobs 108 */ 109 if (job->requirements & PANFROST_JD_REQ_FS) 110 return 0; 111 112 /* Not exposed to userspace yet */ 113 #if 0 114 if (job->requirements & PANFROST_JD_REQ_ONLY_COMPUTE) { 115 if ((job->requirements & PANFROST_JD_REQ_CORE_GRP_MASK) && 116 (job->pfdev->features.nr_core_groups == 2)) 117 return 2; 118 if (panfrost_has_hw_issue(job->pfdev, HW_ISSUE_8987)) 119 return 2; 120 } 121 #endif 122 return 1; 123 } 124 125 static void panfrost_job_write_affinity(struct panfrost_device *pfdev, 126 u32 requirements, 127 int js) 128 { 129 u64 affinity; 130 131 /* 132 * Use all cores for now. 133 * Eventually we may need to support tiler only jobs and h/w with 134 * multiple (2) coherent core groups 135 */ 136 affinity = pfdev->features.shader_present; 137 138 job_write(pfdev, JS_AFFINITY_NEXT_LO(js), affinity & 0xFFFFFFFF); 139 job_write(pfdev, JS_AFFINITY_NEXT_HI(js), affinity >> 32); 140 } 141 142 static void panfrost_job_hw_submit(struct panfrost_job *job, int js) 143 { 144 struct panfrost_device *pfdev = job->pfdev; 145 u32 cfg; 146 u64 jc_head = job->jc; 147 int ret; 148 149 panfrost_devfreq_record_busy(&pfdev->pfdevfreq); 150 151 ret = pm_runtime_get_sync(pfdev->dev); 152 if (ret < 0) 153 return; 154 155 if (WARN_ON(job_read(pfdev, JS_COMMAND_NEXT(js)))) { 156 return; 157 } 158 159 cfg = panfrost_mmu_as_get(pfdev, &job->file_priv->mmu); 160 161 job_write(pfdev, JS_HEAD_NEXT_LO(js), jc_head & 0xFFFFFFFF); 162 job_write(pfdev, JS_HEAD_NEXT_HI(js), jc_head >> 32); 163 164 panfrost_job_write_affinity(pfdev, job->requirements, js); 165 166 /* start MMU, medium priority, cache clean/flush on end, clean/flush on 167 * start */ 168 cfg |= JS_CONFIG_THREAD_PRI(8) | 169 JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE | 170 JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE; 171 172 if (panfrost_has_hw_feature(pfdev, HW_FEATURE_FLUSH_REDUCTION)) 173 cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION; 174 175 if (panfrost_has_hw_issue(pfdev, HW_ISSUE_10649)) 176 cfg |= JS_CONFIG_START_MMU; 177 178 job_write(pfdev, JS_CONFIG_NEXT(js), cfg); 179 180 if (panfrost_has_hw_feature(pfdev, HW_FEATURE_FLUSH_REDUCTION)) 181 job_write(pfdev, JS_FLUSH_ID_NEXT(js), job->flush_id); 182 183 /* GO ! */ 184 dev_dbg(pfdev->dev, "JS: Submitting atom %p to js[%d] with head=0x%llx", 185 job, js, jc_head); 186 187 job_write(pfdev, JS_COMMAND_NEXT(js), JS_COMMAND_START); 188 } 189 190 static void panfrost_acquire_object_fences(struct drm_gem_object **bos, 191 int bo_count, 192 struct dma_fence **implicit_fences) 193 { 194 int i; 195 196 for (i = 0; i < bo_count; i++) 197 implicit_fences[i] = dma_resv_get_excl_rcu(bos[i]->resv); 198 } 199 200 static void panfrost_attach_object_fences(struct drm_gem_object **bos, 201 int bo_count, 202 struct dma_fence *fence) 203 { 204 int i; 205 206 for (i = 0; i < bo_count; i++) 207 dma_resv_add_excl_fence(bos[i]->resv, fence); 208 } 209 210 int panfrost_job_push(struct panfrost_job *job) 211 { 212 struct panfrost_device *pfdev = job->pfdev; 213 int slot = panfrost_job_get_slot(job); 214 struct drm_sched_entity *entity = &job->file_priv->sched_entity[slot]; 215 struct ww_acquire_ctx acquire_ctx; 216 int ret = 0; 217 218 mutex_lock(&pfdev->sched_lock); 219 220 ret = drm_gem_lock_reservations(job->bos, job->bo_count, 221 &acquire_ctx); 222 if (ret) { 223 mutex_unlock(&pfdev->sched_lock); 224 return ret; 225 } 226 227 ret = drm_sched_job_init(&job->base, entity, NULL); 228 if (ret) { 229 mutex_unlock(&pfdev->sched_lock); 230 goto unlock; 231 } 232 233 job->render_done_fence = dma_fence_get(&job->base.s_fence->finished); 234 235 kref_get(&job->refcount); /* put by scheduler job completion */ 236 237 panfrost_acquire_object_fences(job->bos, job->bo_count, 238 job->implicit_fences); 239 240 drm_sched_entity_push_job(&job->base, entity); 241 242 mutex_unlock(&pfdev->sched_lock); 243 244 panfrost_attach_object_fences(job->bos, job->bo_count, 245 job->render_done_fence); 246 247 unlock: 248 drm_gem_unlock_reservations(job->bos, job->bo_count, &acquire_ctx); 249 250 return ret; 251 } 252 253 static void panfrost_job_cleanup(struct kref *ref) 254 { 255 struct panfrost_job *job = container_of(ref, struct panfrost_job, 256 refcount); 257 unsigned int i; 258 259 if (job->in_fences) { 260 for (i = 0; i < job->in_fence_count; i++) 261 dma_fence_put(job->in_fences[i]); 262 kvfree(job->in_fences); 263 } 264 if (job->implicit_fences) { 265 for (i = 0; i < job->bo_count; i++) 266 dma_fence_put(job->implicit_fences[i]); 267 kvfree(job->implicit_fences); 268 } 269 dma_fence_put(job->done_fence); 270 dma_fence_put(job->render_done_fence); 271 272 if (job->mappings) { 273 for (i = 0; i < job->bo_count; i++) { 274 if (!job->mappings[i]) 275 break; 276 277 atomic_dec(&job->mappings[i]->obj->gpu_usecount); 278 panfrost_gem_mapping_put(job->mappings[i]); 279 } 280 kvfree(job->mappings); 281 } 282 283 if (job->bos) { 284 for (i = 0; i < job->bo_count; i++) 285 drm_gem_object_put(job->bos[i]); 286 287 kvfree(job->bos); 288 } 289 290 kfree(job); 291 } 292 293 void panfrost_job_put(struct panfrost_job *job) 294 { 295 kref_put(&job->refcount, panfrost_job_cleanup); 296 } 297 298 static void panfrost_job_free(struct drm_sched_job *sched_job) 299 { 300 struct panfrost_job *job = to_panfrost_job(sched_job); 301 302 drm_sched_job_cleanup(sched_job); 303 304 panfrost_job_put(job); 305 } 306 307 static struct dma_fence *panfrost_job_dependency(struct drm_sched_job *sched_job, 308 struct drm_sched_entity *s_entity) 309 { 310 struct panfrost_job *job = to_panfrost_job(sched_job); 311 struct dma_fence *fence; 312 unsigned int i; 313 314 /* Explicit fences */ 315 for (i = 0; i < job->in_fence_count; i++) { 316 if (job->in_fences[i]) { 317 fence = job->in_fences[i]; 318 job->in_fences[i] = NULL; 319 return fence; 320 } 321 } 322 323 /* Implicit fences, max. one per BO */ 324 for (i = 0; i < job->bo_count; i++) { 325 if (job->implicit_fences[i]) { 326 fence = job->implicit_fences[i]; 327 job->implicit_fences[i] = NULL; 328 return fence; 329 } 330 } 331 332 return NULL; 333 } 334 335 static struct dma_fence *panfrost_job_run(struct drm_sched_job *sched_job) 336 { 337 struct panfrost_job *job = to_panfrost_job(sched_job); 338 struct panfrost_device *pfdev = job->pfdev; 339 int slot = panfrost_job_get_slot(job); 340 struct dma_fence *fence = NULL; 341 342 if (unlikely(job->base.s_fence->finished.error)) 343 return NULL; 344 345 pfdev->jobs[slot] = job; 346 347 fence = panfrost_fence_create(pfdev, slot); 348 if (IS_ERR(fence)) 349 return NULL; 350 351 if (job->done_fence) 352 dma_fence_put(job->done_fence); 353 job->done_fence = dma_fence_get(fence); 354 355 panfrost_job_hw_submit(job, slot); 356 357 return fence; 358 } 359 360 void panfrost_job_enable_interrupts(struct panfrost_device *pfdev) 361 { 362 int j; 363 u32 irq_mask = 0; 364 365 for (j = 0; j < NUM_JOB_SLOTS; j++) { 366 irq_mask |= MK_JS_MASK(j); 367 } 368 369 job_write(pfdev, JOB_INT_CLEAR, irq_mask); 370 job_write(pfdev, JOB_INT_MASK, irq_mask); 371 } 372 373 static bool panfrost_scheduler_stop(struct panfrost_queue_state *queue, 374 struct drm_sched_job *bad) 375 { 376 bool stopped = false; 377 378 mutex_lock(&queue->lock); 379 if (!queue->stopped) { 380 drm_sched_stop(&queue->sched, bad); 381 if (bad) 382 drm_sched_increase_karma(bad); 383 queue->stopped = true; 384 stopped = true; 385 } 386 mutex_unlock(&queue->lock); 387 388 return stopped; 389 } 390 391 static void panfrost_job_timedout(struct drm_sched_job *sched_job) 392 { 393 struct panfrost_job *job = to_panfrost_job(sched_job); 394 struct panfrost_device *pfdev = job->pfdev; 395 int js = panfrost_job_get_slot(job); 396 unsigned long flags; 397 int i; 398 399 /* 400 * If the GPU managed to complete this jobs fence, the timeout is 401 * spurious. Bail out. 402 */ 403 if (dma_fence_is_signaled(job->done_fence)) 404 return; 405 406 dev_err(pfdev->dev, "gpu sched timeout, js=%d, config=0x%x, status=0x%x, head=0x%x, tail=0x%x, sched_job=%p", 407 js, 408 job_read(pfdev, JS_CONFIG(js)), 409 job_read(pfdev, JS_STATUS(js)), 410 job_read(pfdev, JS_HEAD_LO(js)), 411 job_read(pfdev, JS_TAIL_LO(js)), 412 sched_job); 413 414 /* Scheduler is already stopped, nothing to do. */ 415 if (!panfrost_scheduler_stop(&pfdev->js->queue[js], sched_job)) 416 return; 417 418 if (!mutex_trylock(&pfdev->reset_lock)) 419 return; 420 421 for (i = 0; i < NUM_JOB_SLOTS; i++) { 422 struct drm_gpu_scheduler *sched = &pfdev->js->queue[i].sched; 423 424 /* 425 * If the queue is still active, make sure we wait for any 426 * pending timeouts. 427 */ 428 if (!pfdev->js->queue[i].stopped) 429 cancel_delayed_work_sync(&sched->work_tdr); 430 431 /* 432 * If the scheduler was not already stopped, there's a tiny 433 * chance a timeout has expired just before we stopped it, and 434 * drm_sched_stop() does not flush pending works. Let's flush 435 * them now so the timeout handler doesn't get called in the 436 * middle of a reset. 437 */ 438 if (panfrost_scheduler_stop(&pfdev->js->queue[i], NULL)) 439 cancel_delayed_work_sync(&sched->work_tdr); 440 441 /* 442 * Now that we cancelled the pending timeouts, we can safely 443 * reset the stopped state. 444 */ 445 pfdev->js->queue[i].stopped = false; 446 } 447 448 spin_lock_irqsave(&pfdev->js->job_lock, flags); 449 for (i = 0; i < NUM_JOB_SLOTS; i++) { 450 if (pfdev->jobs[i]) { 451 pm_runtime_put_noidle(pfdev->dev); 452 panfrost_devfreq_record_idle(&pfdev->pfdevfreq); 453 pfdev->jobs[i] = NULL; 454 } 455 } 456 spin_unlock_irqrestore(&pfdev->js->job_lock, flags); 457 458 panfrost_device_reset(pfdev); 459 460 for (i = 0; i < NUM_JOB_SLOTS; i++) 461 drm_sched_resubmit_jobs(&pfdev->js->queue[i].sched); 462 463 mutex_unlock(&pfdev->reset_lock); 464 465 /* restart scheduler after GPU is usable again */ 466 for (i = 0; i < NUM_JOB_SLOTS; i++) 467 drm_sched_start(&pfdev->js->queue[i].sched, true); 468 } 469 470 static const struct drm_sched_backend_ops panfrost_sched_ops = { 471 .dependency = panfrost_job_dependency, 472 .run_job = panfrost_job_run, 473 .timedout_job = panfrost_job_timedout, 474 .free_job = panfrost_job_free 475 }; 476 477 static irqreturn_t panfrost_job_irq_handler(int irq, void *data) 478 { 479 struct panfrost_device *pfdev = data; 480 u32 status = job_read(pfdev, JOB_INT_STAT); 481 int j; 482 483 dev_dbg(pfdev->dev, "jobslot irq status=%x\n", status); 484 485 if (!status) 486 return IRQ_NONE; 487 488 pm_runtime_mark_last_busy(pfdev->dev); 489 490 for (j = 0; status; j++) { 491 u32 mask = MK_JS_MASK(j); 492 493 if (!(status & mask)) 494 continue; 495 496 job_write(pfdev, JOB_INT_CLEAR, mask); 497 498 if (status & JOB_INT_MASK_ERR(j)) { 499 job_write(pfdev, JS_COMMAND_NEXT(j), JS_COMMAND_NOP); 500 501 dev_err(pfdev->dev, "js fault, js=%d, status=%s, head=0x%x, tail=0x%x", 502 j, 503 panfrost_exception_name(pfdev, job_read(pfdev, JS_STATUS(j))), 504 job_read(pfdev, JS_HEAD_LO(j)), 505 job_read(pfdev, JS_TAIL_LO(j))); 506 507 drm_sched_fault(&pfdev->js->queue[j].sched); 508 } 509 510 if (status & JOB_INT_MASK_DONE(j)) { 511 struct panfrost_job *job; 512 513 spin_lock(&pfdev->js->job_lock); 514 job = pfdev->jobs[j]; 515 /* Only NULL if job timeout occurred */ 516 if (job) { 517 pfdev->jobs[j] = NULL; 518 519 panfrost_mmu_as_put(pfdev, &job->file_priv->mmu); 520 panfrost_devfreq_record_idle(&pfdev->pfdevfreq); 521 522 dma_fence_signal_locked(job->done_fence); 523 pm_runtime_put_autosuspend(pfdev->dev); 524 } 525 spin_unlock(&pfdev->js->job_lock); 526 } 527 528 status &= ~mask; 529 } 530 531 return IRQ_HANDLED; 532 } 533 534 int panfrost_job_init(struct panfrost_device *pfdev) 535 { 536 struct panfrost_job_slot *js; 537 int ret, j, irq; 538 539 pfdev->js = js = devm_kzalloc(pfdev->dev, sizeof(*js), GFP_KERNEL); 540 if (!js) 541 return -ENOMEM; 542 543 spin_lock_init(&js->job_lock); 544 545 irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "job"); 546 if (irq <= 0) 547 return -ENODEV; 548 549 ret = devm_request_irq(pfdev->dev, irq, panfrost_job_irq_handler, 550 IRQF_SHARED, KBUILD_MODNAME "-job", pfdev); 551 if (ret) { 552 dev_err(pfdev->dev, "failed to request job irq"); 553 return ret; 554 } 555 556 for (j = 0; j < NUM_JOB_SLOTS; j++) { 557 mutex_init(&js->queue[j].lock); 558 559 js->queue[j].fence_context = dma_fence_context_alloc(1); 560 561 ret = drm_sched_init(&js->queue[j].sched, 562 &panfrost_sched_ops, 563 1, 0, msecs_to_jiffies(500), 564 "pan_js"); 565 if (ret) { 566 dev_err(pfdev->dev, "Failed to create scheduler: %d.", ret); 567 goto err_sched; 568 } 569 } 570 571 panfrost_job_enable_interrupts(pfdev); 572 573 return 0; 574 575 err_sched: 576 for (j--; j >= 0; j--) 577 drm_sched_fini(&js->queue[j].sched); 578 579 return ret; 580 } 581 582 void panfrost_job_fini(struct panfrost_device *pfdev) 583 { 584 struct panfrost_job_slot *js = pfdev->js; 585 int j; 586 587 job_write(pfdev, JOB_INT_MASK, 0); 588 589 for (j = 0; j < NUM_JOB_SLOTS; j++) { 590 drm_sched_fini(&js->queue[j].sched); 591 mutex_destroy(&js->queue[j].lock); 592 } 593 594 } 595 596 int panfrost_job_open(struct panfrost_file_priv *panfrost_priv) 597 { 598 struct panfrost_device *pfdev = panfrost_priv->pfdev; 599 struct panfrost_job_slot *js = pfdev->js; 600 struct drm_gpu_scheduler *sched; 601 int ret, i; 602 603 for (i = 0; i < NUM_JOB_SLOTS; i++) { 604 sched = &js->queue[i].sched; 605 ret = drm_sched_entity_init(&panfrost_priv->sched_entity[i], 606 DRM_SCHED_PRIORITY_NORMAL, &sched, 607 1, NULL); 608 if (WARN_ON(ret)) 609 return ret; 610 } 611 return 0; 612 } 613 614 void panfrost_job_close(struct panfrost_file_priv *panfrost_priv) 615 { 616 int i; 617 618 for (i = 0; i < NUM_JOB_SLOTS; i++) 619 drm_sched_entity_destroy(&panfrost_priv->sched_entity[i]); 620 } 621 622 int panfrost_job_is_idle(struct panfrost_device *pfdev) 623 { 624 struct panfrost_job_slot *js = pfdev->js; 625 int i; 626 627 for (i = 0; i < NUM_JOB_SLOTS; i++) { 628 /* If there are any jobs in the HW queue, we're not idle */ 629 if (atomic_read(&js->queue[i].sched.hw_rq_count)) 630 return false; 631 } 632 633 return true; 634 } 635