1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */ 3 /* Copyright 2019 Collabora ltd. */ 4 #include <linux/delay.h> 5 #include <linux/interrupt.h> 6 #include <linux/io.h> 7 #include <linux/iopoll.h> 8 #include <linux/platform_device.h> 9 #include <linux/pm_runtime.h> 10 #include <linux/dma-resv.h> 11 #include <drm/gpu_scheduler.h> 12 #include <drm/panfrost_drm.h> 13 14 #include "panfrost_device.h" 15 #include "panfrost_devfreq.h" 16 #include "panfrost_job.h" 17 #include "panfrost_features.h" 18 #include "panfrost_issues.h" 19 #include "panfrost_gem.h" 20 #include "panfrost_regs.h" 21 #include "panfrost_gpu.h" 22 #include "panfrost_mmu.h" 23 24 #define JOB_TIMEOUT_MS 500 25 26 #define job_write(dev, reg, data) writel(data, dev->iomem + (reg)) 27 #define job_read(dev, reg) readl(dev->iomem + (reg)) 28 29 struct panfrost_queue_state { 30 struct drm_gpu_scheduler sched; 31 u64 fence_context; 32 u64 emit_seqno; 33 }; 34 35 struct panfrost_job_slot { 36 struct panfrost_queue_state queue[NUM_JOB_SLOTS]; 37 spinlock_t job_lock; 38 int irq; 39 }; 40 41 static struct panfrost_job * 42 to_panfrost_job(struct drm_sched_job *sched_job) 43 { 44 return container_of(sched_job, struct panfrost_job, base); 45 } 46 47 struct panfrost_fence { 48 struct dma_fence base; 49 struct drm_device *dev; 50 /* panfrost seqno for signaled() test */ 51 u64 seqno; 52 int queue; 53 }; 54 55 static inline struct panfrost_fence * 56 to_panfrost_fence(struct dma_fence *fence) 57 { 58 return (struct panfrost_fence *)fence; 59 } 60 61 static const char *panfrost_fence_get_driver_name(struct dma_fence *fence) 62 { 63 return "panfrost"; 64 } 65 66 static const char *panfrost_fence_get_timeline_name(struct dma_fence *fence) 67 { 68 struct panfrost_fence *f = to_panfrost_fence(fence); 69 70 switch (f->queue) { 71 case 0: 72 return "panfrost-js-0"; 73 case 1: 74 return "panfrost-js-1"; 75 case 2: 76 return "panfrost-js-2"; 77 default: 78 return NULL; 79 } 80 } 81 82 static const struct dma_fence_ops panfrost_fence_ops = { 83 .get_driver_name = panfrost_fence_get_driver_name, 84 .get_timeline_name = panfrost_fence_get_timeline_name, 85 }; 86 87 static struct dma_fence *panfrost_fence_create(struct panfrost_device *pfdev, int js_num) 88 { 89 struct panfrost_fence *fence; 90 struct panfrost_job_slot *js = pfdev->js; 91 92 fence = kzalloc(sizeof(*fence), GFP_KERNEL); 93 if (!fence) 94 return ERR_PTR(-ENOMEM); 95 96 fence->dev = pfdev->ddev; 97 fence->queue = js_num; 98 fence->seqno = ++js->queue[js_num].emit_seqno; 99 dma_fence_init(&fence->base, &panfrost_fence_ops, &js->job_lock, 100 js->queue[js_num].fence_context, fence->seqno); 101 102 return &fence->base; 103 } 104 105 static int panfrost_job_get_slot(struct panfrost_job *job) 106 { 107 /* JS0: fragment jobs. 108 * JS1: vertex/tiler jobs 109 * JS2: compute jobs 110 */ 111 if (job->requirements & PANFROST_JD_REQ_FS) 112 return 0; 113 114 /* Not exposed to userspace yet */ 115 #if 0 116 if (job->requirements & PANFROST_JD_REQ_ONLY_COMPUTE) { 117 if ((job->requirements & PANFROST_JD_REQ_CORE_GRP_MASK) && 118 (job->pfdev->features.nr_core_groups == 2)) 119 return 2; 120 if (panfrost_has_hw_issue(job->pfdev, HW_ISSUE_8987)) 121 return 2; 122 } 123 #endif 124 return 1; 125 } 126 127 static void panfrost_job_write_affinity(struct panfrost_device *pfdev, 128 u32 requirements, 129 int js) 130 { 131 u64 affinity; 132 133 /* 134 * Use all cores for now. 135 * Eventually we may need to support tiler only jobs and h/w with 136 * multiple (2) coherent core groups 137 */ 138 affinity = pfdev->features.shader_present; 139 140 job_write(pfdev, JS_AFFINITY_NEXT_LO(js), affinity & 0xFFFFFFFF); 141 job_write(pfdev, JS_AFFINITY_NEXT_HI(js), affinity >> 32); 142 } 143 144 static u32 145 panfrost_get_job_chain_flag(const struct panfrost_job *job) 146 { 147 struct panfrost_fence *f = to_panfrost_fence(job->done_fence); 148 149 if (!panfrost_has_hw_feature(job->pfdev, HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) 150 return 0; 151 152 return (f->seqno & 1) ? JS_CONFIG_JOB_CHAIN_FLAG : 0; 153 } 154 155 static struct panfrost_job * 156 panfrost_dequeue_job(struct panfrost_device *pfdev, int slot) 157 { 158 struct panfrost_job *job = pfdev->jobs[slot][0]; 159 160 WARN_ON(!job); 161 pfdev->jobs[slot][0] = pfdev->jobs[slot][1]; 162 pfdev->jobs[slot][1] = NULL; 163 164 return job; 165 } 166 167 static unsigned int 168 panfrost_enqueue_job(struct panfrost_device *pfdev, int slot, 169 struct panfrost_job *job) 170 { 171 if (WARN_ON(!job)) 172 return 0; 173 174 if (!pfdev->jobs[slot][0]) { 175 pfdev->jobs[slot][0] = job; 176 return 0; 177 } 178 179 WARN_ON(pfdev->jobs[slot][1]); 180 pfdev->jobs[slot][1] = job; 181 WARN_ON(panfrost_get_job_chain_flag(job) == 182 panfrost_get_job_chain_flag(pfdev->jobs[slot][0])); 183 return 1; 184 } 185 186 static void panfrost_job_hw_submit(struct panfrost_job *job, int js) 187 { 188 struct panfrost_device *pfdev = job->pfdev; 189 unsigned int subslot; 190 u32 cfg; 191 u64 jc_head = job->jc; 192 int ret; 193 194 panfrost_devfreq_record_busy(&pfdev->pfdevfreq); 195 196 ret = pm_runtime_get_sync(pfdev->dev); 197 if (ret < 0) 198 return; 199 200 if (WARN_ON(job_read(pfdev, JS_COMMAND_NEXT(js)))) { 201 return; 202 } 203 204 cfg = panfrost_mmu_as_get(pfdev, job->file_priv->mmu); 205 206 job_write(pfdev, JS_HEAD_NEXT_LO(js), jc_head & 0xFFFFFFFF); 207 job_write(pfdev, JS_HEAD_NEXT_HI(js), jc_head >> 32); 208 209 panfrost_job_write_affinity(pfdev, job->requirements, js); 210 211 /* start MMU, medium priority, cache clean/flush on end, clean/flush on 212 * start */ 213 cfg |= JS_CONFIG_THREAD_PRI(8) | 214 JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE | 215 JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE | 216 panfrost_get_job_chain_flag(job); 217 218 if (panfrost_has_hw_feature(pfdev, HW_FEATURE_FLUSH_REDUCTION)) 219 cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION; 220 221 if (panfrost_has_hw_issue(pfdev, HW_ISSUE_10649)) 222 cfg |= JS_CONFIG_START_MMU; 223 224 job_write(pfdev, JS_CONFIG_NEXT(js), cfg); 225 226 if (panfrost_has_hw_feature(pfdev, HW_FEATURE_FLUSH_REDUCTION)) 227 job_write(pfdev, JS_FLUSH_ID_NEXT(js), job->flush_id); 228 229 /* GO ! */ 230 231 spin_lock(&pfdev->js->job_lock); 232 subslot = panfrost_enqueue_job(pfdev, js, job); 233 /* Don't queue the job if a reset is in progress */ 234 if (!atomic_read(&pfdev->reset.pending)) { 235 job_write(pfdev, JS_COMMAND_NEXT(js), JS_COMMAND_START); 236 dev_dbg(pfdev->dev, 237 "JS: Submitting atom %p to js[%d][%d] with head=0x%llx AS %d", 238 job, js, subslot, jc_head, cfg & 0xf); 239 } 240 spin_unlock(&pfdev->js->job_lock); 241 } 242 243 static int panfrost_acquire_object_fences(struct drm_gem_object **bos, 244 int bo_count, 245 struct xarray *deps) 246 { 247 int i, ret; 248 249 for (i = 0; i < bo_count; i++) { 250 /* panfrost always uses write mode in its current uapi */ 251 ret = drm_gem_fence_array_add_implicit(deps, bos[i], true); 252 if (ret) 253 return ret; 254 } 255 256 return 0; 257 } 258 259 static void panfrost_attach_object_fences(struct drm_gem_object **bos, 260 int bo_count, 261 struct dma_fence *fence) 262 { 263 int i; 264 265 for (i = 0; i < bo_count; i++) 266 dma_resv_add_excl_fence(bos[i]->resv, fence); 267 } 268 269 int panfrost_job_push(struct panfrost_job *job) 270 { 271 struct panfrost_device *pfdev = job->pfdev; 272 int slot = panfrost_job_get_slot(job); 273 struct drm_sched_entity *entity = &job->file_priv->sched_entity[slot]; 274 struct ww_acquire_ctx acquire_ctx; 275 int ret = 0; 276 277 278 ret = drm_gem_lock_reservations(job->bos, job->bo_count, 279 &acquire_ctx); 280 if (ret) 281 return ret; 282 283 mutex_lock(&pfdev->sched_lock); 284 285 ret = drm_sched_job_init(&job->base, entity, NULL); 286 if (ret) { 287 mutex_unlock(&pfdev->sched_lock); 288 goto unlock; 289 } 290 291 job->render_done_fence = dma_fence_get(&job->base.s_fence->finished); 292 293 ret = panfrost_acquire_object_fences(job->bos, job->bo_count, 294 &job->deps); 295 if (ret) { 296 mutex_unlock(&pfdev->sched_lock); 297 goto unlock; 298 } 299 300 kref_get(&job->refcount); /* put by scheduler job completion */ 301 302 drm_sched_entity_push_job(&job->base, entity); 303 304 mutex_unlock(&pfdev->sched_lock); 305 306 panfrost_attach_object_fences(job->bos, job->bo_count, 307 job->render_done_fence); 308 309 unlock: 310 drm_gem_unlock_reservations(job->bos, job->bo_count, &acquire_ctx); 311 312 return ret; 313 } 314 315 static void panfrost_job_cleanup(struct kref *ref) 316 { 317 struct panfrost_job *job = container_of(ref, struct panfrost_job, 318 refcount); 319 struct dma_fence *fence; 320 unsigned long index; 321 unsigned int i; 322 323 xa_for_each(&job->deps, index, fence) { 324 dma_fence_put(fence); 325 } 326 xa_destroy(&job->deps); 327 328 dma_fence_put(job->done_fence); 329 dma_fence_put(job->render_done_fence); 330 331 if (job->mappings) { 332 for (i = 0; i < job->bo_count; i++) { 333 if (!job->mappings[i]) 334 break; 335 336 atomic_dec(&job->mappings[i]->obj->gpu_usecount); 337 panfrost_gem_mapping_put(job->mappings[i]); 338 } 339 kvfree(job->mappings); 340 } 341 342 if (job->bos) { 343 for (i = 0; i < job->bo_count; i++) 344 drm_gem_object_put(job->bos[i]); 345 346 kvfree(job->bos); 347 } 348 349 kfree(job); 350 } 351 352 void panfrost_job_put(struct panfrost_job *job) 353 { 354 kref_put(&job->refcount, panfrost_job_cleanup); 355 } 356 357 static void panfrost_job_free(struct drm_sched_job *sched_job) 358 { 359 struct panfrost_job *job = to_panfrost_job(sched_job); 360 361 drm_sched_job_cleanup(sched_job); 362 363 panfrost_job_put(job); 364 } 365 366 static struct dma_fence *panfrost_job_dependency(struct drm_sched_job *sched_job, 367 struct drm_sched_entity *s_entity) 368 { 369 struct panfrost_job *job = to_panfrost_job(sched_job); 370 371 if (!xa_empty(&job->deps)) 372 return xa_erase(&job->deps, job->last_dep++); 373 374 return NULL; 375 } 376 377 static struct dma_fence *panfrost_job_run(struct drm_sched_job *sched_job) 378 { 379 struct panfrost_job *job = to_panfrost_job(sched_job); 380 struct panfrost_device *pfdev = job->pfdev; 381 int slot = panfrost_job_get_slot(job); 382 struct dma_fence *fence = NULL; 383 384 if (unlikely(job->base.s_fence->finished.error)) 385 return NULL; 386 387 /* Nothing to execute: can happen if the job has finished while 388 * we were resetting the GPU. 389 */ 390 if (!job->jc) 391 return NULL; 392 393 fence = panfrost_fence_create(pfdev, slot); 394 if (IS_ERR(fence)) 395 return fence; 396 397 if (job->done_fence) 398 dma_fence_put(job->done_fence); 399 job->done_fence = dma_fence_get(fence); 400 401 panfrost_job_hw_submit(job, slot); 402 403 return fence; 404 } 405 406 void panfrost_job_enable_interrupts(struct panfrost_device *pfdev) 407 { 408 int j; 409 u32 irq_mask = 0; 410 411 for (j = 0; j < NUM_JOB_SLOTS; j++) { 412 irq_mask |= MK_JS_MASK(j); 413 } 414 415 job_write(pfdev, JOB_INT_CLEAR, irq_mask); 416 job_write(pfdev, JOB_INT_MASK, irq_mask); 417 } 418 419 static void panfrost_job_handle_err(struct panfrost_device *pfdev, 420 struct panfrost_job *job, 421 unsigned int js) 422 { 423 u32 js_status = job_read(pfdev, JS_STATUS(js)); 424 const char *exception_name = panfrost_exception_name(js_status); 425 bool signal_fence = true; 426 427 if (!panfrost_exception_is_fault(js_status)) { 428 dev_dbg(pfdev->dev, "js event, js=%d, status=%s, head=0x%x, tail=0x%x", 429 js, exception_name, 430 job_read(pfdev, JS_HEAD_LO(js)), 431 job_read(pfdev, JS_TAIL_LO(js))); 432 } else { 433 dev_err(pfdev->dev, "js fault, js=%d, status=%s, head=0x%x, tail=0x%x", 434 js, exception_name, 435 job_read(pfdev, JS_HEAD_LO(js)), 436 job_read(pfdev, JS_TAIL_LO(js))); 437 } 438 439 if (js_status == DRM_PANFROST_EXCEPTION_STOPPED) { 440 /* Update the job head so we can resume */ 441 job->jc = job_read(pfdev, JS_TAIL_LO(js)) | 442 ((u64)job_read(pfdev, JS_TAIL_HI(js)) << 32); 443 444 /* The job will be resumed, don't signal the fence */ 445 signal_fence = false; 446 } else if (js_status == DRM_PANFROST_EXCEPTION_TERMINATED) { 447 /* Job has been hard-stopped, flag it as canceled */ 448 dma_fence_set_error(job->done_fence, -ECANCELED); 449 job->jc = 0; 450 } else if (panfrost_exception_is_fault(js_status)) { 451 /* We might want to provide finer-grained error code based on 452 * the exception type, but unconditionally setting to EINVAL 453 * is good enough for now. 454 */ 455 dma_fence_set_error(job->done_fence, -EINVAL); 456 job->jc = 0; 457 } 458 459 panfrost_mmu_as_put(pfdev, job->file_priv->mmu); 460 panfrost_devfreq_record_idle(&pfdev->pfdevfreq); 461 462 if (signal_fence) 463 dma_fence_signal_locked(job->done_fence); 464 465 pm_runtime_put_autosuspend(pfdev->dev); 466 467 if (panfrost_exception_needs_reset(pfdev, js_status)) { 468 atomic_set(&pfdev->reset.pending, 1); 469 drm_sched_fault(&pfdev->js->queue[js].sched); 470 } 471 } 472 473 static void panfrost_job_handle_done(struct panfrost_device *pfdev, 474 struct panfrost_job *job) 475 { 476 /* Set ->jc to 0 to avoid re-submitting an already finished job (can 477 * happen when we receive the DONE interrupt while doing a GPU reset). 478 */ 479 job->jc = 0; 480 panfrost_mmu_as_put(pfdev, job->file_priv->mmu); 481 panfrost_devfreq_record_idle(&pfdev->pfdevfreq); 482 483 dma_fence_signal_locked(job->done_fence); 484 pm_runtime_put_autosuspend(pfdev->dev); 485 } 486 487 static void panfrost_job_handle_irq(struct panfrost_device *pfdev, u32 status) 488 { 489 struct panfrost_job *done[NUM_JOB_SLOTS][2] = {}; 490 struct panfrost_job *failed[NUM_JOB_SLOTS] = {}; 491 u32 js_state = 0, js_events = 0; 492 unsigned int i, j; 493 494 /* First we collect all failed/done jobs. */ 495 while (status) { 496 u32 js_state_mask = 0; 497 498 for (j = 0; j < NUM_JOB_SLOTS; j++) { 499 if (status & MK_JS_MASK(j)) 500 js_state_mask |= MK_JS_MASK(j); 501 502 if (status & JOB_INT_MASK_DONE(j)) { 503 if (done[j][0]) 504 done[j][1] = panfrost_dequeue_job(pfdev, j); 505 else 506 done[j][0] = panfrost_dequeue_job(pfdev, j); 507 } 508 509 if (status & JOB_INT_MASK_ERR(j)) { 510 /* Cancel the next submission. Will be submitted 511 * after we're done handling this failure if 512 * there's no reset pending. 513 */ 514 job_write(pfdev, JS_COMMAND_NEXT(j), JS_COMMAND_NOP); 515 failed[j] = panfrost_dequeue_job(pfdev, j); 516 } 517 } 518 519 /* JS_STATE is sampled when JOB_INT_CLEAR is written. 520 * For each BIT(slot) or BIT(slot + 16) bit written to 521 * JOB_INT_CLEAR, the corresponding bits in JS_STATE 522 * (BIT(slot) and BIT(slot + 16)) are updated, but this 523 * is racy. If we only have one job done at the time we 524 * read JOB_INT_RAWSTAT but the second job fails before we 525 * clear the status, we end up with a status containing 526 * only the DONE bit and consider both jobs as DONE since 527 * JS_STATE reports both NEXT and CURRENT as inactive. 528 * To prevent that, let's repeat this clear+read steps 529 * until status is 0. 530 */ 531 job_write(pfdev, JOB_INT_CLEAR, status); 532 js_state &= ~js_state_mask; 533 js_state |= job_read(pfdev, JOB_INT_JS_STATE) & js_state_mask; 534 js_events |= status; 535 status = job_read(pfdev, JOB_INT_RAWSTAT); 536 } 537 538 /* Then we handle the dequeued jobs. */ 539 for (j = 0; j < NUM_JOB_SLOTS; j++) { 540 if (!(js_events & MK_JS_MASK(j))) 541 continue; 542 543 if (failed[j]) { 544 panfrost_job_handle_err(pfdev, failed[j], j); 545 } else if (pfdev->jobs[j][0] && !(js_state & MK_JS_MASK(j))) { 546 /* When the current job doesn't fail, the JM dequeues 547 * the next job without waiting for an ACK, this means 548 * we can have 2 jobs dequeued and only catch the 549 * interrupt when the second one is done. If both slots 550 * are inactive, but one job remains in pfdev->jobs[j], 551 * consider it done. Of course that doesn't apply if a 552 * failure happened since we cancelled execution of the 553 * job in _NEXT (see above). 554 */ 555 if (WARN_ON(!done[j][0])) 556 done[j][0] = panfrost_dequeue_job(pfdev, j); 557 else 558 done[j][1] = panfrost_dequeue_job(pfdev, j); 559 } 560 561 for (i = 0; i < ARRAY_SIZE(done[0]) && done[j][i]; i++) 562 panfrost_job_handle_done(pfdev, done[j][i]); 563 } 564 565 /* And finally we requeue jobs that were waiting in the second slot 566 * and have been stopped if we detected a failure on the first slot. 567 */ 568 for (j = 0; j < NUM_JOB_SLOTS; j++) { 569 if (!(js_events & MK_JS_MASK(j))) 570 continue; 571 572 if (!failed[j] || !pfdev->jobs[j][0]) 573 continue; 574 575 if (pfdev->jobs[j][0]->jc == 0) { 576 /* The job was cancelled, signal the fence now */ 577 struct panfrost_job *canceled = panfrost_dequeue_job(pfdev, j); 578 579 dma_fence_set_error(canceled->done_fence, -ECANCELED); 580 panfrost_job_handle_done(pfdev, canceled); 581 } else if (!atomic_read(&pfdev->reset.pending)) { 582 /* Requeue the job we removed if no reset is pending */ 583 job_write(pfdev, JS_COMMAND_NEXT(j), JS_COMMAND_START); 584 } 585 } 586 } 587 588 static void panfrost_job_handle_irqs(struct panfrost_device *pfdev) 589 { 590 u32 status = job_read(pfdev, JOB_INT_RAWSTAT); 591 592 while (status) { 593 pm_runtime_mark_last_busy(pfdev->dev); 594 595 spin_lock(&pfdev->js->job_lock); 596 panfrost_job_handle_irq(pfdev, status); 597 spin_unlock(&pfdev->js->job_lock); 598 status = job_read(pfdev, JOB_INT_RAWSTAT); 599 } 600 } 601 602 static u32 panfrost_active_slots(struct panfrost_device *pfdev, 603 u32 *js_state_mask, u32 js_state) 604 { 605 u32 rawstat; 606 607 if (!(js_state & *js_state_mask)) 608 return 0; 609 610 rawstat = job_read(pfdev, JOB_INT_RAWSTAT); 611 if (rawstat) { 612 unsigned int i; 613 614 for (i = 0; i < NUM_JOB_SLOTS; i++) { 615 if (rawstat & MK_JS_MASK(i)) 616 *js_state_mask &= ~MK_JS_MASK(i); 617 } 618 } 619 620 return js_state & *js_state_mask; 621 } 622 623 static void 624 panfrost_reset(struct panfrost_device *pfdev, 625 struct drm_sched_job *bad) 626 { 627 u32 js_state, js_state_mask = 0xffffffff; 628 unsigned int i, j; 629 bool cookie; 630 int ret; 631 632 if (!atomic_read(&pfdev->reset.pending)) 633 return; 634 635 /* Stop the schedulers. 636 * 637 * FIXME: We temporarily get out of the dma_fence_signalling section 638 * because the cleanup path generate lockdep splats when taking locks 639 * to release job resources. We should rework the code to follow this 640 * pattern: 641 * 642 * try_lock 643 * if (locked) 644 * release 645 * else 646 * schedule_work_to_release_later 647 */ 648 for (i = 0; i < NUM_JOB_SLOTS; i++) 649 drm_sched_stop(&pfdev->js->queue[i].sched, bad); 650 651 cookie = dma_fence_begin_signalling(); 652 653 if (bad) 654 drm_sched_increase_karma(bad); 655 656 /* Mask job interrupts and synchronize to make sure we won't be 657 * interrupted during our reset. 658 */ 659 job_write(pfdev, JOB_INT_MASK, 0); 660 synchronize_irq(pfdev->js->irq); 661 662 for (i = 0; i < NUM_JOB_SLOTS; i++) { 663 /* Cancel the next job and soft-stop the running job. */ 664 job_write(pfdev, JS_COMMAND_NEXT(i), JS_COMMAND_NOP); 665 job_write(pfdev, JS_COMMAND(i), JS_COMMAND_SOFT_STOP); 666 } 667 668 /* Wait at most 10ms for soft-stops to complete */ 669 ret = readl_poll_timeout(pfdev->iomem + JOB_INT_JS_STATE, js_state, 670 !panfrost_active_slots(pfdev, &js_state_mask, js_state), 671 10, 10000); 672 673 if (ret) 674 dev_err(pfdev->dev, "Soft-stop failed\n"); 675 676 /* Handle the remaining interrupts before we reset. */ 677 panfrost_job_handle_irqs(pfdev); 678 679 /* Remaining interrupts have been handled, but we might still have 680 * stuck jobs. Let's make sure the PM counters stay balanced by 681 * manually calling pm_runtime_put_noidle() and 682 * panfrost_devfreq_record_idle() for each stuck job. 683 */ 684 spin_lock(&pfdev->js->job_lock); 685 for (i = 0; i < NUM_JOB_SLOTS; i++) { 686 for (j = 0; j < ARRAY_SIZE(pfdev->jobs[0]) && pfdev->jobs[i][j]; j++) { 687 pm_runtime_put_noidle(pfdev->dev); 688 panfrost_devfreq_record_idle(&pfdev->pfdevfreq); 689 } 690 } 691 memset(pfdev->jobs, 0, sizeof(pfdev->jobs)); 692 spin_unlock(&pfdev->js->job_lock); 693 694 /* Proceed with reset now. */ 695 panfrost_device_reset(pfdev); 696 697 /* panfrost_device_reset() unmasks job interrupts, but we want to 698 * keep them masked a bit longer. 699 */ 700 job_write(pfdev, JOB_INT_MASK, 0); 701 702 /* GPU has been reset, we can clear the reset pending bit. */ 703 atomic_set(&pfdev->reset.pending, 0); 704 705 /* Now resubmit jobs that were previously queued but didn't have a 706 * chance to finish. 707 * FIXME: We temporarily get out of the DMA fence signalling section 708 * while resubmitting jobs because the job submission logic will 709 * allocate memory with the GFP_KERNEL flag which can trigger memory 710 * reclaim and exposes a lock ordering issue. 711 */ 712 dma_fence_end_signalling(cookie); 713 for (i = 0; i < NUM_JOB_SLOTS; i++) 714 drm_sched_resubmit_jobs(&pfdev->js->queue[i].sched); 715 cookie = dma_fence_begin_signalling(); 716 717 /* Restart the schedulers */ 718 for (i = 0; i < NUM_JOB_SLOTS; i++) 719 drm_sched_start(&pfdev->js->queue[i].sched, true); 720 721 /* Re-enable job interrupts now that everything has been restarted. */ 722 job_write(pfdev, JOB_INT_MASK, 723 GENMASK(16 + NUM_JOB_SLOTS - 1, 16) | 724 GENMASK(NUM_JOB_SLOTS - 1, 0)); 725 726 dma_fence_end_signalling(cookie); 727 } 728 729 static enum drm_gpu_sched_stat panfrost_job_timedout(struct drm_sched_job 730 *sched_job) 731 { 732 struct panfrost_job *job = to_panfrost_job(sched_job); 733 struct panfrost_device *pfdev = job->pfdev; 734 int js = panfrost_job_get_slot(job); 735 736 /* 737 * If the GPU managed to complete this jobs fence, the timeout is 738 * spurious. Bail out. 739 */ 740 if (dma_fence_is_signaled(job->done_fence)) 741 return DRM_GPU_SCHED_STAT_NOMINAL; 742 743 dev_err(pfdev->dev, "gpu sched timeout, js=%d, config=0x%x, status=0x%x, head=0x%x, tail=0x%x, sched_job=%p", 744 js, 745 job_read(pfdev, JS_CONFIG(js)), 746 job_read(pfdev, JS_STATUS(js)), 747 job_read(pfdev, JS_HEAD_LO(js)), 748 job_read(pfdev, JS_TAIL_LO(js)), 749 sched_job); 750 751 atomic_set(&pfdev->reset.pending, 1); 752 panfrost_reset(pfdev, sched_job); 753 754 return DRM_GPU_SCHED_STAT_NOMINAL; 755 } 756 757 static void panfrost_reset_work(struct work_struct *work) 758 { 759 struct panfrost_device *pfdev; 760 761 pfdev = container_of(work, struct panfrost_device, reset.work); 762 panfrost_reset(pfdev, NULL); 763 } 764 765 static const struct drm_sched_backend_ops panfrost_sched_ops = { 766 .dependency = panfrost_job_dependency, 767 .run_job = panfrost_job_run, 768 .timedout_job = panfrost_job_timedout, 769 .free_job = panfrost_job_free 770 }; 771 772 static irqreturn_t panfrost_job_irq_handler_thread(int irq, void *data) 773 { 774 struct panfrost_device *pfdev = data; 775 776 panfrost_job_handle_irqs(pfdev); 777 job_write(pfdev, JOB_INT_MASK, 778 GENMASK(16 + NUM_JOB_SLOTS - 1, 16) | 779 GENMASK(NUM_JOB_SLOTS - 1, 0)); 780 return IRQ_HANDLED; 781 } 782 783 static irqreturn_t panfrost_job_irq_handler(int irq, void *data) 784 { 785 struct panfrost_device *pfdev = data; 786 u32 status = job_read(pfdev, JOB_INT_STAT); 787 788 if (!status) 789 return IRQ_NONE; 790 791 job_write(pfdev, JOB_INT_MASK, 0); 792 return IRQ_WAKE_THREAD; 793 } 794 795 int panfrost_job_init(struct panfrost_device *pfdev) 796 { 797 struct panfrost_job_slot *js; 798 unsigned int nentries = 2; 799 int ret, j; 800 801 /* All GPUs have two entries per queue, but without jobchain 802 * disambiguation stopping the right job in the close path is tricky, 803 * so let's just advertise one entry in that case. 804 */ 805 if (!panfrost_has_hw_feature(pfdev, HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) 806 nentries = 1; 807 808 pfdev->js = js = devm_kzalloc(pfdev->dev, sizeof(*js), GFP_KERNEL); 809 if (!js) 810 return -ENOMEM; 811 812 INIT_WORK(&pfdev->reset.work, panfrost_reset_work); 813 spin_lock_init(&js->job_lock); 814 815 js->irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "job"); 816 if (js->irq <= 0) 817 return -ENODEV; 818 819 ret = devm_request_threaded_irq(pfdev->dev, js->irq, 820 panfrost_job_irq_handler, 821 panfrost_job_irq_handler_thread, 822 IRQF_SHARED, KBUILD_MODNAME "-job", 823 pfdev); 824 if (ret) { 825 dev_err(pfdev->dev, "failed to request job irq"); 826 return ret; 827 } 828 829 pfdev->reset.wq = alloc_ordered_workqueue("panfrost-reset", 0); 830 if (!pfdev->reset.wq) 831 return -ENOMEM; 832 833 for (j = 0; j < NUM_JOB_SLOTS; j++) { 834 js->queue[j].fence_context = dma_fence_context_alloc(1); 835 836 ret = drm_sched_init(&js->queue[j].sched, 837 &panfrost_sched_ops, 838 nentries, 0, 839 msecs_to_jiffies(JOB_TIMEOUT_MS), 840 pfdev->reset.wq, 841 NULL, "pan_js"); 842 if (ret) { 843 dev_err(pfdev->dev, "Failed to create scheduler: %d.", ret); 844 goto err_sched; 845 } 846 } 847 848 panfrost_job_enable_interrupts(pfdev); 849 850 return 0; 851 852 err_sched: 853 for (j--; j >= 0; j--) 854 drm_sched_fini(&js->queue[j].sched); 855 856 destroy_workqueue(pfdev->reset.wq); 857 return ret; 858 } 859 860 void panfrost_job_fini(struct panfrost_device *pfdev) 861 { 862 struct panfrost_job_slot *js = pfdev->js; 863 int j; 864 865 job_write(pfdev, JOB_INT_MASK, 0); 866 867 for (j = 0; j < NUM_JOB_SLOTS; j++) { 868 drm_sched_fini(&js->queue[j].sched); 869 } 870 871 cancel_work_sync(&pfdev->reset.work); 872 destroy_workqueue(pfdev->reset.wq); 873 } 874 875 int panfrost_job_open(struct panfrost_file_priv *panfrost_priv) 876 { 877 struct panfrost_device *pfdev = panfrost_priv->pfdev; 878 struct panfrost_job_slot *js = pfdev->js; 879 struct drm_gpu_scheduler *sched; 880 int ret, i; 881 882 for (i = 0; i < NUM_JOB_SLOTS; i++) { 883 sched = &js->queue[i].sched; 884 ret = drm_sched_entity_init(&panfrost_priv->sched_entity[i], 885 DRM_SCHED_PRIORITY_NORMAL, &sched, 886 1, NULL); 887 if (WARN_ON(ret)) 888 return ret; 889 } 890 return 0; 891 } 892 893 void panfrost_job_close(struct panfrost_file_priv *panfrost_priv) 894 { 895 struct panfrost_device *pfdev = panfrost_priv->pfdev; 896 int i; 897 898 for (i = 0; i < NUM_JOB_SLOTS; i++) 899 drm_sched_entity_destroy(&panfrost_priv->sched_entity[i]); 900 901 /* Kill in-flight jobs */ 902 spin_lock(&pfdev->js->job_lock); 903 for (i = 0; i < NUM_JOB_SLOTS; i++) { 904 struct drm_sched_entity *entity = &panfrost_priv->sched_entity[i]; 905 int j; 906 907 for (j = ARRAY_SIZE(pfdev->jobs[0]) - 1; j >= 0; j--) { 908 struct panfrost_job *job = pfdev->jobs[i][j]; 909 u32 cmd; 910 911 if (!job || job->base.entity != entity) 912 continue; 913 914 if (j == 1) { 915 /* Try to cancel the job before it starts */ 916 job_write(pfdev, JS_COMMAND_NEXT(i), JS_COMMAND_NOP); 917 /* Reset the job head so it doesn't get restarted if 918 * the job in the first slot failed. 919 */ 920 job->jc = 0; 921 } 922 923 if (panfrost_has_hw_feature(pfdev, HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) { 924 cmd = panfrost_get_job_chain_flag(job) ? 925 JS_COMMAND_HARD_STOP_1 : 926 JS_COMMAND_HARD_STOP_0; 927 } else { 928 cmd = JS_COMMAND_HARD_STOP; 929 } 930 931 job_write(pfdev, JS_COMMAND(i), cmd); 932 } 933 } 934 spin_unlock(&pfdev->js->job_lock); 935 } 936 937 int panfrost_job_is_idle(struct panfrost_device *pfdev) 938 { 939 struct panfrost_job_slot *js = pfdev->js; 940 int i; 941 942 for (i = 0; i < NUM_JOB_SLOTS; i++) { 943 /* If there are any jobs in the HW queue, we're not idle */ 944 if (atomic_read(&js->queue[i].sched.hw_rq_count)) 945 return false; 946 } 947 948 return true; 949 } 950