1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2014 Intel Corporation 4 */ 5 6 /** 7 * DOC: Logical Rings, Logical Ring Contexts and Execlists 8 * 9 * Motivation: 10 * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts". 11 * These expanded contexts enable a number of new abilities, especially 12 * "Execlists" (also implemented in this file). 13 * 14 * One of the main differences with the legacy HW contexts is that logical 15 * ring contexts incorporate many more things to the context's state, like 16 * PDPs or ringbuffer control registers: 17 * 18 * The reason why PDPs are included in the context is straightforward: as 19 * PPGTTs (per-process GTTs) are actually per-context, having the PDPs 20 * contained there mean you don't need to do a ppgtt->switch_mm yourself, 21 * instead, the GPU will do it for you on the context switch. 22 * 23 * But, what about the ringbuffer control registers (head, tail, etc..)? 24 * shouldn't we just need a set of those per engine command streamer? This is 25 * where the name "Logical Rings" starts to make sense: by virtualizing the 26 * rings, the engine cs shifts to a new "ring buffer" with every context 27 * switch. When you want to submit a workload to the GPU you: A) choose your 28 * context, B) find its appropriate virtualized ring, C) write commands to it 29 * and then, finally, D) tell the GPU to switch to that context. 30 * 31 * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch 32 * to a contexts is via a context execution list, ergo "Execlists". 33 * 34 * LRC implementation: 35 * Regarding the creation of contexts, we have: 36 * 37 * - One global default context. 38 * - One local default context for each opened fd. 39 * - One local extra context for each context create ioctl call. 40 * 41 * Now that ringbuffers belong per-context (and not per-engine, like before) 42 * and that contexts are uniquely tied to a given engine (and not reusable, 43 * like before) we need: 44 * 45 * - One ringbuffer per-engine inside each context. 46 * - One backing object per-engine inside each context. 47 * 48 * The global default context starts its life with these new objects fully 49 * allocated and populated. The local default context for each opened fd is 50 * more complex, because we don't know at creation time which engine is going 51 * to use them. To handle this, we have implemented a deferred creation of LR 52 * contexts: 53 * 54 * The local context starts its life as a hollow or blank holder, that only 55 * gets populated for a given engine once we receive an execbuffer. If later 56 * on we receive another execbuffer ioctl for the same context but a different 57 * engine, we allocate/populate a new ringbuffer and context backing object and 58 * so on. 59 * 60 * Finally, regarding local contexts created using the ioctl call: as they are 61 * only allowed with the render ring, we can allocate & populate them right 62 * away (no need to defer anything, at least for now). 63 * 64 * Execlists implementation: 65 * Execlists are the new method by which, on gen8+ hardware, workloads are 66 * submitted for execution (as opposed to the legacy, ringbuffer-based, method). 67 * This method works as follows: 68 * 69 * When a request is committed, its commands (the BB start and any leading or 70 * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer 71 * for the appropriate context. The tail pointer in the hardware context is not 72 * updated at this time, but instead, kept by the driver in the ringbuffer 73 * structure. A structure representing this request is added to a request queue 74 * for the appropriate engine: this structure contains a copy of the context's 75 * tail after the request was written to the ring buffer and a pointer to the 76 * context itself. 77 * 78 * If the engine's request queue was empty before the request was added, the 79 * queue is processed immediately. Otherwise the queue will be processed during 80 * a context switch interrupt. In any case, elements on the queue will get sent 81 * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a 82 * globally unique 20-bits submission ID. 83 * 84 * When execution of a request completes, the GPU updates the context status 85 * buffer with a context complete event and generates a context switch interrupt. 86 * During the interrupt handling, the driver examines the events in the buffer: 87 * for each context complete event, if the announced ID matches that on the head 88 * of the request queue, then that request is retired and removed from the queue. 89 * 90 * After processing, if any requests were retired and the queue is not empty 91 * then a new execution list can be submitted. The two requests at the front of 92 * the queue are next to be submitted but since a context may not occur twice in 93 * an execution list, if subsequent requests have the same ID as the first then 94 * the two requests must be combined. This is done simply by discarding requests 95 * at the head of the queue until either only one requests is left (in which case 96 * we use a NULL second context) or the first two requests have unique IDs. 97 * 98 * By always executing the first two requests in the queue the driver ensures 99 * that the GPU is kept as busy as possible. In the case where a single context 100 * completes but a second context is still executing, the request for this second 101 * context will be at the head of the queue when we remove the first one. This 102 * request will then be resubmitted along with a new request for a different context, 103 * which will cause the hardware to continue executing the second request and queue 104 * the new request (the GPU detects the condition of a context getting preempted 105 * with the same context and optimizes the context switch flow by not doing 106 * preemption, but just sampling the new tail pointer). 107 * 108 */ 109 #include <linux/interrupt.h> 110 111 #include "i915_drv.h" 112 #include "i915_trace.h" 113 #include "i915_vgpu.h" 114 #include "gen8_engine_cs.h" 115 #include "intel_breadcrumbs.h" 116 #include "intel_context.h" 117 #include "intel_engine_pm.h" 118 #include "intel_engine_stats.h" 119 #include "intel_execlists_submission.h" 120 #include "intel_gt.h" 121 #include "intel_gt_irq.h" 122 #include "intel_gt_pm.h" 123 #include "intel_gt_requests.h" 124 #include "intel_lrc.h" 125 #include "intel_lrc_reg.h" 126 #include "intel_mocs.h" 127 #include "intel_reset.h" 128 #include "intel_ring.h" 129 #include "intel_workarounds.h" 130 #include "shmem_utils.h" 131 132 #define RING_EXECLIST_QFULL (1 << 0x2) 133 #define RING_EXECLIST1_VALID (1 << 0x3) 134 #define RING_EXECLIST0_VALID (1 << 0x4) 135 #define RING_EXECLIST_ACTIVE_STATUS (3 << 0xE) 136 #define RING_EXECLIST1_ACTIVE (1 << 0x11) 137 #define RING_EXECLIST0_ACTIVE (1 << 0x12) 138 139 #define GEN8_CTX_STATUS_IDLE_ACTIVE (1 << 0) 140 #define GEN8_CTX_STATUS_PREEMPTED (1 << 1) 141 #define GEN8_CTX_STATUS_ELEMENT_SWITCH (1 << 2) 142 #define GEN8_CTX_STATUS_ACTIVE_IDLE (1 << 3) 143 #define GEN8_CTX_STATUS_COMPLETE (1 << 4) 144 #define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15) 145 146 #define GEN8_CTX_STATUS_COMPLETED_MASK \ 147 (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED) 148 149 #define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE (0x1) /* lower csb dword */ 150 #define GEN12_CTX_SWITCH_DETAIL(csb_dw) ((csb_dw) & 0xF) /* upper csb dword */ 151 #define GEN12_CSB_SW_CTX_ID_MASK GENMASK(25, 15) 152 #define GEN12_IDLE_CTX_ID 0x7FF 153 #define GEN12_CSB_CTX_VALID(csb_dw) \ 154 (FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID) 155 156 /* Typical size of the average request (2 pipecontrols and a MI_BB) */ 157 #define EXECLISTS_REQUEST_SIZE 64 /* bytes */ 158 159 struct virtual_engine { 160 struct intel_engine_cs base; 161 struct intel_context context; 162 struct rcu_work rcu; 163 164 /* 165 * We allow only a single request through the virtual engine at a time 166 * (each request in the timeline waits for the completion fence of 167 * the previous before being submitted). By restricting ourselves to 168 * only submitting a single request, each request is placed on to a 169 * physical to maximise load spreading (by virtue of the late greedy 170 * scheduling -- each real engine takes the next available request 171 * upon idling). 172 */ 173 struct i915_request *request; 174 175 /* 176 * We keep a rbtree of available virtual engines inside each physical 177 * engine, sorted by priority. Here we preallocate the nodes we need 178 * for the virtual engine, indexed by physical_engine->id. 179 */ 180 struct ve_node { 181 struct rb_node rb; 182 int prio; 183 } nodes[I915_NUM_ENGINES]; 184 185 /* 186 * Keep track of bonded pairs -- restrictions upon on our selection 187 * of physical engines any particular request may be submitted to. 188 * If we receive a submit-fence from a master engine, we will only 189 * use one of sibling_mask physical engines. 190 */ 191 struct ve_bond { 192 const struct intel_engine_cs *master; 193 intel_engine_mask_t sibling_mask; 194 } *bonds; 195 unsigned int num_bonds; 196 197 /* And finally, which physical engines this virtual engine maps onto. */ 198 unsigned int num_siblings; 199 struct intel_engine_cs *siblings[]; 200 }; 201 202 static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine) 203 { 204 GEM_BUG_ON(!intel_engine_is_virtual(engine)); 205 return container_of(engine, struct virtual_engine, base); 206 } 207 208 static struct i915_request * 209 __active_request(const struct intel_timeline * const tl, 210 struct i915_request *rq, 211 int error) 212 { 213 struct i915_request *active = rq; 214 215 list_for_each_entry_from_reverse(rq, &tl->requests, link) { 216 if (__i915_request_is_complete(rq)) 217 break; 218 219 if (error) { 220 i915_request_set_error_once(rq, error); 221 __i915_request_skip(rq); 222 } 223 active = rq; 224 } 225 226 return active; 227 } 228 229 static struct i915_request * 230 active_request(const struct intel_timeline * const tl, struct i915_request *rq) 231 { 232 return __active_request(tl, rq, 0); 233 } 234 235 static void ring_set_paused(const struct intel_engine_cs *engine, int state) 236 { 237 /* 238 * We inspect HWS_PREEMPT with a semaphore inside 239 * engine->emit_fini_breadcrumb. If the dword is true, 240 * the ring is paused as the semaphore will busywait 241 * until the dword is false. 242 */ 243 engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state; 244 if (state) 245 wmb(); 246 } 247 248 static struct i915_priolist *to_priolist(struct rb_node *rb) 249 { 250 return rb_entry(rb, struct i915_priolist, node); 251 } 252 253 static int rq_prio(const struct i915_request *rq) 254 { 255 return READ_ONCE(rq->sched.attr.priority); 256 } 257 258 static int effective_prio(const struct i915_request *rq) 259 { 260 int prio = rq_prio(rq); 261 262 /* 263 * If this request is special and must not be interrupted at any 264 * cost, so be it. Note we are only checking the most recent request 265 * in the context and so may be masking an earlier vip request. It 266 * is hoped that under the conditions where nopreempt is used, this 267 * will not matter (i.e. all requests to that context will be 268 * nopreempt for as long as desired). 269 */ 270 if (i915_request_has_nopreempt(rq)) 271 prio = I915_PRIORITY_UNPREEMPTABLE; 272 273 return prio; 274 } 275 276 static int queue_prio(const struct intel_engine_execlists *execlists) 277 { 278 struct rb_node *rb; 279 280 rb = rb_first_cached(&execlists->queue); 281 if (!rb) 282 return INT_MIN; 283 284 return to_priolist(rb)->priority; 285 } 286 287 static int virtual_prio(const struct intel_engine_execlists *el) 288 { 289 struct rb_node *rb = rb_first_cached(&el->virtual); 290 291 return rb ? rb_entry(rb, struct ve_node, rb)->prio : INT_MIN; 292 } 293 294 static bool need_preempt(const struct intel_engine_cs *engine, 295 const struct i915_request *rq) 296 { 297 int last_prio; 298 299 if (!intel_engine_has_semaphores(engine)) 300 return false; 301 302 /* 303 * Check if the current priority hint merits a preemption attempt. 304 * 305 * We record the highest value priority we saw during rescheduling 306 * prior to this dequeue, therefore we know that if it is strictly 307 * less than the current tail of ESLP[0], we do not need to force 308 * a preempt-to-idle cycle. 309 * 310 * However, the priority hint is a mere hint that we may need to 311 * preempt. If that hint is stale or we may be trying to preempt 312 * ourselves, ignore the request. 313 * 314 * More naturally we would write 315 * prio >= max(0, last); 316 * except that we wish to prevent triggering preemption at the same 317 * priority level: the task that is running should remain running 318 * to preserve FIFO ordering of dependencies. 319 */ 320 last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1); 321 if (engine->execlists.queue_priority_hint <= last_prio) 322 return false; 323 324 /* 325 * Check against the first request in ELSP[1], it will, thanks to the 326 * power of PI, be the highest priority of that context. 327 */ 328 if (!list_is_last(&rq->sched.link, &engine->active.requests) && 329 rq_prio(list_next_entry(rq, sched.link)) > last_prio) 330 return true; 331 332 /* 333 * If the inflight context did not trigger the preemption, then maybe 334 * it was the set of queued requests? Pick the highest priority in 335 * the queue (the first active priolist) and see if it deserves to be 336 * running instead of ELSP[0]. 337 * 338 * The highest priority request in the queue can not be either 339 * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same 340 * context, it's priority would not exceed ELSP[0] aka last_prio. 341 */ 342 return max(virtual_prio(&engine->execlists), 343 queue_prio(&engine->execlists)) > last_prio; 344 } 345 346 __maybe_unused static bool 347 assert_priority_queue(const struct i915_request *prev, 348 const struct i915_request *next) 349 { 350 /* 351 * Without preemption, the prev may refer to the still active element 352 * which we refuse to let go. 353 * 354 * Even with preemption, there are times when we think it is better not 355 * to preempt and leave an ostensibly lower priority request in flight. 356 */ 357 if (i915_request_is_active(prev)) 358 return true; 359 360 return rq_prio(prev) >= rq_prio(next); 361 } 362 363 static struct i915_request * 364 __unwind_incomplete_requests(struct intel_engine_cs *engine) 365 { 366 struct i915_request *rq, *rn, *active = NULL; 367 struct list_head *pl; 368 int prio = I915_PRIORITY_INVALID; 369 370 lockdep_assert_held(&engine->active.lock); 371 372 list_for_each_entry_safe_reverse(rq, rn, 373 &engine->active.requests, 374 sched.link) { 375 if (__i915_request_is_complete(rq)) { 376 list_del_init(&rq->sched.link); 377 continue; 378 } 379 380 __i915_request_unsubmit(rq); 381 382 GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); 383 if (rq_prio(rq) != prio) { 384 prio = rq_prio(rq); 385 pl = i915_sched_lookup_priolist(engine, prio); 386 } 387 GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); 388 389 list_move(&rq->sched.link, pl); 390 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 391 392 /* Check in case we rollback so far we wrap [size/2] */ 393 if (intel_ring_direction(rq->ring, 394 rq->tail, 395 rq->ring->tail + 8) > 0) 396 rq->context->lrc.desc |= CTX_DESC_FORCE_RESTORE; 397 398 active = rq; 399 } 400 401 return active; 402 } 403 404 struct i915_request * 405 execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists) 406 { 407 struct intel_engine_cs *engine = 408 container_of(execlists, typeof(*engine), execlists); 409 410 return __unwind_incomplete_requests(engine); 411 } 412 413 static void 414 execlists_context_status_change(struct i915_request *rq, unsigned long status) 415 { 416 /* 417 * Only used when GVT-g is enabled now. When GVT-g is disabled, 418 * The compiler should eliminate this function as dead-code. 419 */ 420 if (!IS_ENABLED(CONFIG_DRM_I915_GVT)) 421 return; 422 423 atomic_notifier_call_chain(&rq->engine->context_status_notifier, 424 status, rq); 425 } 426 427 static void reset_active(struct i915_request *rq, 428 struct intel_engine_cs *engine) 429 { 430 struct intel_context * const ce = rq->context; 431 u32 head; 432 433 /* 434 * The executing context has been cancelled. We want to prevent 435 * further execution along this context and propagate the error on 436 * to anything depending on its results. 437 * 438 * In __i915_request_submit(), we apply the -EIO and remove the 439 * requests' payloads for any banned requests. But first, we must 440 * rewind the context back to the start of the incomplete request so 441 * that we do not jump back into the middle of the batch. 442 * 443 * We preserve the breadcrumbs and semaphores of the incomplete 444 * requests so that inter-timeline dependencies (i.e other timelines) 445 * remain correctly ordered. And we defer to __i915_request_submit() 446 * so that all asynchronous waits are correctly handled. 447 */ 448 ENGINE_TRACE(engine, "{ reset rq=%llx:%lld }\n", 449 rq->fence.context, rq->fence.seqno); 450 451 /* On resubmission of the active request, payload will be scrubbed */ 452 if (__i915_request_is_complete(rq)) 453 head = rq->tail; 454 else 455 head = __active_request(ce->timeline, rq, -EIO)->head; 456 head = intel_ring_wrap(ce->ring, head); 457 458 /* Scrub the context image to prevent replaying the previous batch */ 459 lrc_init_regs(ce, engine, true); 460 461 /* We've switched away, so this should be a no-op, but intent matters */ 462 ce->lrc.lrca = lrc_update_regs(ce, engine, head); 463 } 464 465 static bool bad_request(const struct i915_request *rq) 466 { 467 return rq->fence.error && i915_request_started(rq); 468 } 469 470 static struct intel_engine_cs * 471 __execlists_schedule_in(struct i915_request *rq) 472 { 473 struct intel_engine_cs * const engine = rq->engine; 474 struct intel_context * const ce = rq->context; 475 476 intel_context_get(ce); 477 478 if (unlikely(intel_context_is_closed(ce) && 479 !intel_engine_has_heartbeat(engine))) 480 intel_context_set_banned(ce); 481 482 if (unlikely(intel_context_is_banned(ce) || bad_request(rq))) 483 reset_active(rq, engine); 484 485 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 486 lrc_check_regs(ce, engine, "before"); 487 488 if (ce->tag) { 489 /* Use a fixed tag for OA and friends */ 490 GEM_BUG_ON(ce->tag <= BITS_PER_LONG); 491 ce->lrc.ccid = ce->tag; 492 } else { 493 /* We don't need a strict matching tag, just different values */ 494 unsigned int tag = __ffs(engine->context_tag); 495 496 GEM_BUG_ON(tag >= BITS_PER_LONG); 497 __clear_bit(tag, &engine->context_tag); 498 ce->lrc.ccid = (1 + tag) << (GEN11_SW_CTX_ID_SHIFT - 32); 499 500 BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID); 501 } 502 503 ce->lrc.ccid |= engine->execlists.ccid; 504 505 __intel_gt_pm_get(engine->gt); 506 if (engine->fw_domain && !engine->fw_active++) 507 intel_uncore_forcewake_get(engine->uncore, engine->fw_domain); 508 execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); 509 intel_engine_context_in(engine); 510 511 CE_TRACE(ce, "schedule-in, ccid:%x\n", ce->lrc.ccid); 512 513 return engine; 514 } 515 516 static void execlists_schedule_in(struct i915_request *rq, int idx) 517 { 518 struct intel_context * const ce = rq->context; 519 struct intel_engine_cs *old; 520 521 GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine)); 522 trace_i915_request_in(rq, idx); 523 524 old = ce->inflight; 525 if (!old) 526 old = __execlists_schedule_in(rq); 527 WRITE_ONCE(ce->inflight, ptr_inc(old)); 528 529 GEM_BUG_ON(intel_context_inflight(ce) != rq->engine); 530 } 531 532 static void 533 resubmit_virtual_request(struct i915_request *rq, struct virtual_engine *ve) 534 { 535 struct intel_engine_cs *engine = rq->engine; 536 537 spin_lock_irq(&engine->active.lock); 538 539 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 540 WRITE_ONCE(rq->engine, &ve->base); 541 ve->base.submit_request(rq); 542 543 spin_unlock_irq(&engine->active.lock); 544 } 545 546 static void kick_siblings(struct i915_request *rq, struct intel_context *ce) 547 { 548 struct virtual_engine *ve = container_of(ce, typeof(*ve), context); 549 struct intel_engine_cs *engine = rq->engine; 550 551 /* 552 * After this point, the rq may be transferred to a new sibling, so 553 * before we clear ce->inflight make sure that the context has been 554 * removed from the b->signalers and furthermore we need to make sure 555 * that the concurrent iterator in signal_irq_work is no longer 556 * following ce->signal_link. 557 */ 558 if (!list_empty(&ce->signals)) 559 intel_context_remove_breadcrumbs(ce, engine->breadcrumbs); 560 561 /* 562 * This engine is now too busy to run this virtual request, so 563 * see if we can find an alternative engine for it to execute on. 564 * Once a request has become bonded to this engine, we treat it the 565 * same as other native request. 566 */ 567 if (i915_request_in_priority_queue(rq) && 568 rq->execution_mask != engine->mask) 569 resubmit_virtual_request(rq, ve); 570 571 if (READ_ONCE(ve->request)) 572 tasklet_hi_schedule(&ve->base.execlists.tasklet); 573 } 574 575 static void __execlists_schedule_out(struct i915_request * const rq, 576 struct intel_context * const ce) 577 { 578 struct intel_engine_cs * const engine = rq->engine; 579 unsigned int ccid; 580 581 /* 582 * NB process_csb() is not under the engine->active.lock and hence 583 * schedule_out can race with schedule_in meaning that we should 584 * refrain from doing non-trivial work here. 585 */ 586 587 CE_TRACE(ce, "schedule-out, ccid:%x\n", ce->lrc.ccid); 588 GEM_BUG_ON(ce->inflight != engine); 589 590 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 591 lrc_check_regs(ce, engine, "after"); 592 593 /* 594 * If we have just completed this context, the engine may now be 595 * idle and we want to re-enter powersaving. 596 */ 597 if (intel_timeline_is_last(ce->timeline, rq) && 598 __i915_request_is_complete(rq)) 599 intel_engine_add_retire(engine, ce->timeline); 600 601 ccid = ce->lrc.ccid; 602 ccid >>= GEN11_SW_CTX_ID_SHIFT - 32; 603 ccid &= GEN12_MAX_CONTEXT_HW_ID; 604 if (ccid < BITS_PER_LONG) { 605 GEM_BUG_ON(ccid == 0); 606 GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag)); 607 __set_bit(ccid - 1, &engine->context_tag); 608 } 609 610 lrc_update_runtime(ce); 611 intel_engine_context_out(engine); 612 execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); 613 if (engine->fw_domain && !--engine->fw_active) 614 intel_uncore_forcewake_put(engine->uncore, engine->fw_domain); 615 intel_gt_pm_put_async(engine->gt); 616 617 /* 618 * If this is part of a virtual engine, its next request may 619 * have been blocked waiting for access to the active context. 620 * We have to kick all the siblings again in case we need to 621 * switch (e.g. the next request is not runnable on this 622 * engine). Hopefully, we will already have submitted the next 623 * request before the tasklet runs and do not need to rebuild 624 * each virtual tree and kick everyone again. 625 */ 626 if (ce->engine != engine) 627 kick_siblings(rq, ce); 628 629 WRITE_ONCE(ce->inflight, NULL); 630 intel_context_put(ce); 631 } 632 633 static inline void execlists_schedule_out(struct i915_request *rq) 634 { 635 struct intel_context * const ce = rq->context; 636 637 trace_i915_request_out(rq); 638 639 GEM_BUG_ON(!ce->inflight); 640 ce->inflight = ptr_dec(ce->inflight); 641 if (!__intel_context_inflight_count(ce->inflight)) 642 __execlists_schedule_out(rq, ce); 643 644 i915_request_put(rq); 645 } 646 647 static u64 execlists_update_context(struct i915_request *rq) 648 { 649 struct intel_context *ce = rq->context; 650 u64 desc = ce->lrc.desc; 651 u32 tail, prev; 652 653 /* 654 * WaIdleLiteRestore:bdw,skl 655 * 656 * We should never submit the context with the same RING_TAIL twice 657 * just in case we submit an empty ring, which confuses the HW. 658 * 659 * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of 660 * the normal request to be able to always advance the RING_TAIL on 661 * subsequent resubmissions (for lite restore). Should that fail us, 662 * and we try and submit the same tail again, force the context 663 * reload. 664 * 665 * If we need to return to a preempted context, we need to skip the 666 * lite-restore and force it to reload the RING_TAIL. Otherwise, the 667 * HW has a tendency to ignore us rewinding the TAIL to the end of 668 * an earlier request. 669 */ 670 GEM_BUG_ON(ce->lrc_reg_state[CTX_RING_TAIL] != rq->ring->tail); 671 prev = rq->ring->tail; 672 tail = intel_ring_set_tail(rq->ring, rq->tail); 673 if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0)) 674 desc |= CTX_DESC_FORCE_RESTORE; 675 ce->lrc_reg_state[CTX_RING_TAIL] = tail; 676 rq->tail = rq->wa_tail; 677 678 /* 679 * Make sure the context image is complete before we submit it to HW. 680 * 681 * Ostensibly, writes (including the WCB) should be flushed prior to 682 * an uncached write such as our mmio register access, the empirical 683 * evidence (esp. on Braswell) suggests that the WC write into memory 684 * may not be visible to the HW prior to the completion of the UC 685 * register write and that we may begin execution from the context 686 * before its image is complete leading to invalid PD chasing. 687 */ 688 wmb(); 689 690 ce->lrc.desc &= ~CTX_DESC_FORCE_RESTORE; 691 return desc; 692 } 693 694 static void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port) 695 { 696 if (execlists->ctrl_reg) { 697 writel(lower_32_bits(desc), execlists->submit_reg + port * 2); 698 writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1); 699 } else { 700 writel(upper_32_bits(desc), execlists->submit_reg); 701 writel(lower_32_bits(desc), execlists->submit_reg); 702 } 703 } 704 705 static __maybe_unused char * 706 dump_port(char *buf, int buflen, const char *prefix, struct i915_request *rq) 707 { 708 if (!rq) 709 return ""; 710 711 snprintf(buf, buflen, "%sccid:%x %llx:%lld%s prio %d", 712 prefix, 713 rq->context->lrc.ccid, 714 rq->fence.context, rq->fence.seqno, 715 __i915_request_is_complete(rq) ? "!" : 716 __i915_request_has_started(rq) ? "*" : 717 "", 718 rq_prio(rq)); 719 720 return buf; 721 } 722 723 static __maybe_unused noinline void 724 trace_ports(const struct intel_engine_execlists *execlists, 725 const char *msg, 726 struct i915_request * const *ports) 727 { 728 const struct intel_engine_cs *engine = 729 container_of(execlists, typeof(*engine), execlists); 730 char __maybe_unused p0[40], p1[40]; 731 732 if (!ports[0]) 733 return; 734 735 ENGINE_TRACE(engine, "%s { %s%s }\n", msg, 736 dump_port(p0, sizeof(p0), "", ports[0]), 737 dump_port(p1, sizeof(p1), ", ", ports[1])); 738 } 739 740 static bool 741 reset_in_progress(const struct intel_engine_execlists *execlists) 742 { 743 return unlikely(!__tasklet_is_enabled(&execlists->tasklet)); 744 } 745 746 static __maybe_unused noinline bool 747 assert_pending_valid(const struct intel_engine_execlists *execlists, 748 const char *msg) 749 { 750 struct intel_engine_cs *engine = 751 container_of(execlists, typeof(*engine), execlists); 752 struct i915_request * const *port, *rq, *prev = NULL; 753 struct intel_context *ce = NULL; 754 u32 ccid = -1; 755 756 trace_ports(execlists, msg, execlists->pending); 757 758 /* We may be messing around with the lists during reset, lalala */ 759 if (reset_in_progress(execlists)) 760 return true; 761 762 if (!execlists->pending[0]) { 763 GEM_TRACE_ERR("%s: Nothing pending for promotion!\n", 764 engine->name); 765 return false; 766 } 767 768 if (execlists->pending[execlists_num_ports(execlists)]) { 769 GEM_TRACE_ERR("%s: Excess pending[%d] for promotion!\n", 770 engine->name, execlists_num_ports(execlists)); 771 return false; 772 } 773 774 for (port = execlists->pending; (rq = *port); port++) { 775 unsigned long flags; 776 bool ok = true; 777 778 GEM_BUG_ON(!kref_read(&rq->fence.refcount)); 779 GEM_BUG_ON(!i915_request_is_active(rq)); 780 781 if (ce == rq->context) { 782 GEM_TRACE_ERR("%s: Dup context:%llx in pending[%zd]\n", 783 engine->name, 784 ce->timeline->fence_context, 785 port - execlists->pending); 786 return false; 787 } 788 ce = rq->context; 789 790 if (ccid == ce->lrc.ccid) { 791 GEM_TRACE_ERR("%s: Dup ccid:%x context:%llx in pending[%zd]\n", 792 engine->name, 793 ccid, ce->timeline->fence_context, 794 port - execlists->pending); 795 return false; 796 } 797 ccid = ce->lrc.ccid; 798 799 /* 800 * Sentinels are supposed to be the last request so they flush 801 * the current execution off the HW. Check that they are the only 802 * request in the pending submission. 803 * 804 * NB: Due to the async nature of preempt-to-busy and request 805 * cancellation we need to handle the case where request 806 * becomes a sentinel in parallel to CSB processing. 807 */ 808 if (prev && i915_request_has_sentinel(prev) && 809 !READ_ONCE(prev->fence.error)) { 810 GEM_TRACE_ERR("%s: context:%llx after sentinel in pending[%zd]\n", 811 engine->name, 812 ce->timeline->fence_context, 813 port - execlists->pending); 814 return false; 815 } 816 prev = rq; 817 818 /* 819 * We want virtual requests to only be in the first slot so 820 * that they are never stuck behind a hog and can be immediately 821 * transferred onto the next idle engine. 822 */ 823 if (rq->execution_mask != engine->mask && 824 port != execlists->pending) { 825 GEM_TRACE_ERR("%s: virtual engine:%llx not in prime position[%zd]\n", 826 engine->name, 827 ce->timeline->fence_context, 828 port - execlists->pending); 829 return false; 830 } 831 832 /* Hold tightly onto the lock to prevent concurrent retires! */ 833 if (!spin_trylock_irqsave(&rq->lock, flags)) 834 continue; 835 836 if (__i915_request_is_complete(rq)) 837 goto unlock; 838 839 if (i915_active_is_idle(&ce->active) && 840 !intel_context_is_barrier(ce)) { 841 GEM_TRACE_ERR("%s: Inactive context:%llx in pending[%zd]\n", 842 engine->name, 843 ce->timeline->fence_context, 844 port - execlists->pending); 845 ok = false; 846 goto unlock; 847 } 848 849 if (!i915_vma_is_pinned(ce->state)) { 850 GEM_TRACE_ERR("%s: Unpinned context:%llx in pending[%zd]\n", 851 engine->name, 852 ce->timeline->fence_context, 853 port - execlists->pending); 854 ok = false; 855 goto unlock; 856 } 857 858 if (!i915_vma_is_pinned(ce->ring->vma)) { 859 GEM_TRACE_ERR("%s: Unpinned ring:%llx in pending[%zd]\n", 860 engine->name, 861 ce->timeline->fence_context, 862 port - execlists->pending); 863 ok = false; 864 goto unlock; 865 } 866 867 unlock: 868 spin_unlock_irqrestore(&rq->lock, flags); 869 if (!ok) 870 return false; 871 } 872 873 return ce; 874 } 875 876 static void execlists_submit_ports(struct intel_engine_cs *engine) 877 { 878 struct intel_engine_execlists *execlists = &engine->execlists; 879 unsigned int n; 880 881 GEM_BUG_ON(!assert_pending_valid(execlists, "submit")); 882 883 /* 884 * We can skip acquiring intel_runtime_pm_get() here as it was taken 885 * on our behalf by the request (see i915_gem_mark_busy()) and it will 886 * not be relinquished until the device is idle (see 887 * i915_gem_idle_work_handler()). As a precaution, we make sure 888 * that all ELSP are drained i.e. we have processed the CSB, 889 * before allowing ourselves to idle and calling intel_runtime_pm_put(). 890 */ 891 GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); 892 893 /* 894 * ELSQ note: the submit queue is not cleared after being submitted 895 * to the HW so we need to make sure we always clean it up. This is 896 * currently ensured by the fact that we always write the same number 897 * of elsq entries, keep this in mind before changing the loop below. 898 */ 899 for (n = execlists_num_ports(execlists); n--; ) { 900 struct i915_request *rq = execlists->pending[n]; 901 902 write_desc(execlists, 903 rq ? execlists_update_context(rq) : 0, 904 n); 905 } 906 907 /* we need to manually load the submit queue */ 908 if (execlists->ctrl_reg) 909 writel(EL_CTRL_LOAD, execlists->ctrl_reg); 910 } 911 912 static bool ctx_single_port_submission(const struct intel_context *ce) 913 { 914 return (IS_ENABLED(CONFIG_DRM_I915_GVT) && 915 intel_context_force_single_submission(ce)); 916 } 917 918 static bool can_merge_ctx(const struct intel_context *prev, 919 const struct intel_context *next) 920 { 921 if (prev != next) 922 return false; 923 924 if (ctx_single_port_submission(prev)) 925 return false; 926 927 return true; 928 } 929 930 static unsigned long i915_request_flags(const struct i915_request *rq) 931 { 932 return READ_ONCE(rq->fence.flags); 933 } 934 935 static bool can_merge_rq(const struct i915_request *prev, 936 const struct i915_request *next) 937 { 938 GEM_BUG_ON(prev == next); 939 GEM_BUG_ON(!assert_priority_queue(prev, next)); 940 941 /* 942 * We do not submit known completed requests. Therefore if the next 943 * request is already completed, we can pretend to merge it in 944 * with the previous context (and we will skip updating the ELSP 945 * and tracking). Thus hopefully keeping the ELSP full with active 946 * contexts, despite the best efforts of preempt-to-busy to confuse 947 * us. 948 */ 949 if (__i915_request_is_complete(next)) 950 return true; 951 952 if (unlikely((i915_request_flags(prev) | i915_request_flags(next)) & 953 (BIT(I915_FENCE_FLAG_NOPREEMPT) | 954 BIT(I915_FENCE_FLAG_SENTINEL)))) 955 return false; 956 957 if (!can_merge_ctx(prev->context, next->context)) 958 return false; 959 960 GEM_BUG_ON(i915_seqno_passed(prev->fence.seqno, next->fence.seqno)); 961 return true; 962 } 963 964 static bool virtual_matches(const struct virtual_engine *ve, 965 const struct i915_request *rq, 966 const struct intel_engine_cs *engine) 967 { 968 const struct intel_engine_cs *inflight; 969 970 if (!rq) 971 return false; 972 973 if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */ 974 return false; 975 976 /* 977 * We track when the HW has completed saving the context image 978 * (i.e. when we have seen the final CS event switching out of 979 * the context) and must not overwrite the context image before 980 * then. This restricts us to only using the active engine 981 * while the previous virtualized request is inflight (so 982 * we reuse the register offsets). This is a very small 983 * hystersis on the greedy seelction algorithm. 984 */ 985 inflight = intel_context_inflight(&ve->context); 986 if (inflight && inflight != engine) 987 return false; 988 989 return true; 990 } 991 992 static struct virtual_engine * 993 first_virtual_engine(struct intel_engine_cs *engine) 994 { 995 struct intel_engine_execlists *el = &engine->execlists; 996 struct rb_node *rb = rb_first_cached(&el->virtual); 997 998 while (rb) { 999 struct virtual_engine *ve = 1000 rb_entry(rb, typeof(*ve), nodes[engine->id].rb); 1001 struct i915_request *rq = READ_ONCE(ve->request); 1002 1003 /* lazily cleanup after another engine handled rq */ 1004 if (!rq || !virtual_matches(ve, rq, engine)) { 1005 rb_erase_cached(rb, &el->virtual); 1006 RB_CLEAR_NODE(rb); 1007 rb = rb_first_cached(&el->virtual); 1008 continue; 1009 } 1010 1011 return ve; 1012 } 1013 1014 return NULL; 1015 } 1016 1017 static void virtual_xfer_context(struct virtual_engine *ve, 1018 struct intel_engine_cs *engine) 1019 { 1020 unsigned int n; 1021 1022 if (likely(engine == ve->siblings[0])) 1023 return; 1024 1025 GEM_BUG_ON(READ_ONCE(ve->context.inflight)); 1026 if (!intel_engine_has_relative_mmio(engine)) 1027 lrc_update_offsets(&ve->context, engine); 1028 1029 /* 1030 * Move the bound engine to the top of the list for 1031 * future execution. We then kick this tasklet first 1032 * before checking others, so that we preferentially 1033 * reuse this set of bound registers. 1034 */ 1035 for (n = 1; n < ve->num_siblings; n++) { 1036 if (ve->siblings[n] == engine) { 1037 swap(ve->siblings[n], ve->siblings[0]); 1038 break; 1039 } 1040 } 1041 } 1042 1043 static void defer_request(struct i915_request *rq, struct list_head * const pl) 1044 { 1045 LIST_HEAD(list); 1046 1047 /* 1048 * We want to move the interrupted request to the back of 1049 * the round-robin list (i.e. its priority level), but 1050 * in doing so, we must then move all requests that were in 1051 * flight and were waiting for the interrupted request to 1052 * be run after it again. 1053 */ 1054 do { 1055 struct i915_dependency *p; 1056 1057 GEM_BUG_ON(i915_request_is_active(rq)); 1058 list_move_tail(&rq->sched.link, pl); 1059 1060 for_each_waiter(p, rq) { 1061 struct i915_request *w = 1062 container_of(p->waiter, typeof(*w), sched); 1063 1064 if (p->flags & I915_DEPENDENCY_WEAK) 1065 continue; 1066 1067 /* Leave semaphores spinning on the other engines */ 1068 if (w->engine != rq->engine) 1069 continue; 1070 1071 /* No waiter should start before its signaler */ 1072 GEM_BUG_ON(i915_request_has_initial_breadcrumb(w) && 1073 __i915_request_has_started(w) && 1074 !__i915_request_is_complete(rq)); 1075 1076 if (!i915_request_is_ready(w)) 1077 continue; 1078 1079 if (rq_prio(w) < rq_prio(rq)) 1080 continue; 1081 1082 GEM_BUG_ON(rq_prio(w) > rq_prio(rq)); 1083 GEM_BUG_ON(i915_request_is_active(w)); 1084 list_move_tail(&w->sched.link, &list); 1085 } 1086 1087 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); 1088 } while (rq); 1089 } 1090 1091 static void defer_active(struct intel_engine_cs *engine) 1092 { 1093 struct i915_request *rq; 1094 1095 rq = __unwind_incomplete_requests(engine); 1096 if (!rq) 1097 return; 1098 1099 defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq))); 1100 } 1101 1102 static bool 1103 timeslice_yield(const struct intel_engine_execlists *el, 1104 const struct i915_request *rq) 1105 { 1106 /* 1107 * Once bitten, forever smitten! 1108 * 1109 * If the active context ever busy-waited on a semaphore, 1110 * it will be treated as a hog until the end of its timeslice (i.e. 1111 * until it is scheduled out and replaced by a new submission, 1112 * possibly even its own lite-restore). The HW only sends an interrupt 1113 * on the first miss, and we do know if that semaphore has been 1114 * signaled, or even if it is now stuck on another semaphore. Play 1115 * safe, yield if it might be stuck -- it will be given a fresh 1116 * timeslice in the near future. 1117 */ 1118 return rq->context->lrc.ccid == READ_ONCE(el->yield); 1119 } 1120 1121 static bool needs_timeslice(const struct intel_engine_cs *engine, 1122 const struct i915_request *rq) 1123 { 1124 if (!intel_engine_has_timeslices(engine)) 1125 return false; 1126 1127 /* If not currently active, or about to switch, wait for next event */ 1128 if (!rq || __i915_request_is_complete(rq)) 1129 return false; 1130 1131 /* We do not need to start the timeslice until after the ACK */ 1132 if (READ_ONCE(engine->execlists.pending[0])) 1133 return false; 1134 1135 /* If ELSP[1] is occupied, always check to see if worth slicing */ 1136 if (!list_is_last_rcu(&rq->sched.link, &engine->active.requests)) { 1137 ENGINE_TRACE(engine, "timeslice required for second inflight context\n"); 1138 return true; 1139 } 1140 1141 /* Otherwise, ELSP[0] is by itself, but may be waiting in the queue */ 1142 if (!RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)) { 1143 ENGINE_TRACE(engine, "timeslice required for queue\n"); 1144 return true; 1145 } 1146 1147 if (!RB_EMPTY_ROOT(&engine->execlists.virtual.rb_root)) { 1148 ENGINE_TRACE(engine, "timeslice required for virtual\n"); 1149 return true; 1150 } 1151 1152 return false; 1153 } 1154 1155 static bool 1156 timeslice_expired(struct intel_engine_cs *engine, const struct i915_request *rq) 1157 { 1158 const struct intel_engine_execlists *el = &engine->execlists; 1159 1160 if (i915_request_has_nopreempt(rq) && __i915_request_has_started(rq)) 1161 return false; 1162 1163 if (!needs_timeslice(engine, rq)) 1164 return false; 1165 1166 return timer_expired(&el->timer) || timeslice_yield(el, rq); 1167 } 1168 1169 static unsigned long timeslice(const struct intel_engine_cs *engine) 1170 { 1171 return READ_ONCE(engine->props.timeslice_duration_ms); 1172 } 1173 1174 static void start_timeslice(struct intel_engine_cs *engine) 1175 { 1176 struct intel_engine_execlists *el = &engine->execlists; 1177 unsigned long duration; 1178 1179 /* Disable the timer if there is nothing to switch to */ 1180 duration = 0; 1181 if (needs_timeslice(engine, *el->active)) { 1182 /* Avoid continually prolonging an active timeslice */ 1183 if (timer_active(&el->timer)) { 1184 /* 1185 * If we just submitted a new ELSP after an old 1186 * context, that context may have already consumed 1187 * its timeslice, so recheck. 1188 */ 1189 if (!timer_pending(&el->timer)) 1190 tasklet_hi_schedule(&el->tasklet); 1191 return; 1192 } 1193 1194 duration = timeslice(engine); 1195 } 1196 1197 set_timer_ms(&el->timer, duration); 1198 } 1199 1200 static void record_preemption(struct intel_engine_execlists *execlists) 1201 { 1202 (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++); 1203 } 1204 1205 static unsigned long active_preempt_timeout(struct intel_engine_cs *engine, 1206 const struct i915_request *rq) 1207 { 1208 if (!rq) 1209 return 0; 1210 1211 /* Force a fast reset for terminated contexts (ignoring sysfs!) */ 1212 if (unlikely(intel_context_is_banned(rq->context) || bad_request(rq))) 1213 return 1; 1214 1215 return READ_ONCE(engine->props.preempt_timeout_ms); 1216 } 1217 1218 static void set_preempt_timeout(struct intel_engine_cs *engine, 1219 const struct i915_request *rq) 1220 { 1221 if (!intel_engine_has_preempt_reset(engine)) 1222 return; 1223 1224 set_timer_ms(&engine->execlists.preempt, 1225 active_preempt_timeout(engine, rq)); 1226 } 1227 1228 static bool completed(const struct i915_request *rq) 1229 { 1230 if (i915_request_has_sentinel(rq)) 1231 return false; 1232 1233 return __i915_request_is_complete(rq); 1234 } 1235 1236 static void execlists_dequeue(struct intel_engine_cs *engine) 1237 { 1238 struct intel_engine_execlists * const execlists = &engine->execlists; 1239 struct i915_request **port = execlists->pending; 1240 struct i915_request ** const last_port = port + execlists->port_mask; 1241 struct i915_request *last, * const *active; 1242 struct virtual_engine *ve; 1243 struct rb_node *rb; 1244 bool submit = false; 1245 1246 /* 1247 * Hardware submission is through 2 ports. Conceptually each port 1248 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is 1249 * static for a context, and unique to each, so we only execute 1250 * requests belonging to a single context from each ring. RING_HEAD 1251 * is maintained by the CS in the context image, it marks the place 1252 * where it got up to last time, and through RING_TAIL we tell the CS 1253 * where we want to execute up to this time. 1254 * 1255 * In this list the requests are in order of execution. Consecutive 1256 * requests from the same context are adjacent in the ringbuffer. We 1257 * can combine these requests into a single RING_TAIL update: 1258 * 1259 * RING_HEAD...req1...req2 1260 * ^- RING_TAIL 1261 * since to execute req2 the CS must first execute req1. 1262 * 1263 * Our goal then is to point each port to the end of a consecutive 1264 * sequence of requests as being the most optimal (fewest wake ups 1265 * and context switches) submission. 1266 */ 1267 1268 spin_lock(&engine->active.lock); 1269 1270 /* 1271 * If the queue is higher priority than the last 1272 * request in the currently active context, submit afresh. 1273 * We will resubmit again afterwards in case we need to split 1274 * the active context to interject the preemption request, 1275 * i.e. we will retrigger preemption following the ack in case 1276 * of trouble. 1277 * 1278 */ 1279 active = execlists->active; 1280 while ((last = *active) && completed(last)) 1281 active++; 1282 1283 if (last) { 1284 if (need_preempt(engine, last)) { 1285 ENGINE_TRACE(engine, 1286 "preempting last=%llx:%lld, prio=%d, hint=%d\n", 1287 last->fence.context, 1288 last->fence.seqno, 1289 last->sched.attr.priority, 1290 execlists->queue_priority_hint); 1291 record_preemption(execlists); 1292 1293 /* 1294 * Don't let the RING_HEAD advance past the breadcrumb 1295 * as we unwind (and until we resubmit) so that we do 1296 * not accidentally tell it to go backwards. 1297 */ 1298 ring_set_paused(engine, 1); 1299 1300 /* 1301 * Note that we have not stopped the GPU at this point, 1302 * so we are unwinding the incomplete requests as they 1303 * remain inflight and so by the time we do complete 1304 * the preemption, some of the unwound requests may 1305 * complete! 1306 */ 1307 __unwind_incomplete_requests(engine); 1308 1309 last = NULL; 1310 } else if (timeslice_expired(engine, last)) { 1311 ENGINE_TRACE(engine, 1312 "expired:%s last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n", 1313 yesno(timer_expired(&execlists->timer)), 1314 last->fence.context, last->fence.seqno, 1315 rq_prio(last), 1316 execlists->queue_priority_hint, 1317 yesno(timeslice_yield(execlists, last))); 1318 1319 /* 1320 * Consume this timeslice; ensure we start a new one. 1321 * 1322 * The timeslice expired, and we will unwind the 1323 * running contexts and recompute the next ELSP. 1324 * If that submit will be the same pair of contexts 1325 * (due to dependency ordering), we will skip the 1326 * submission. If we don't cancel the timer now, 1327 * we will see that the timer has expired and 1328 * reschedule the tasklet; continually until the 1329 * next context switch or other preeemption event. 1330 * 1331 * Since we have decided to reschedule based on 1332 * consumption of this timeslice, if we submit the 1333 * same context again, grant it a full timeslice. 1334 */ 1335 cancel_timer(&execlists->timer); 1336 ring_set_paused(engine, 1); 1337 defer_active(engine); 1338 1339 /* 1340 * Unlike for preemption, if we rewind and continue 1341 * executing the same context as previously active, 1342 * the order of execution will remain the same and 1343 * the tail will only advance. We do not need to 1344 * force a full context restore, as a lite-restore 1345 * is sufficient to resample the monotonic TAIL. 1346 * 1347 * If we switch to any other context, similarly we 1348 * will not rewind TAIL of current context, and 1349 * normal save/restore will preserve state and allow 1350 * us to later continue executing the same request. 1351 */ 1352 last = NULL; 1353 } else { 1354 /* 1355 * Otherwise if we already have a request pending 1356 * for execution after the current one, we can 1357 * just wait until the next CS event before 1358 * queuing more. In either case we will force a 1359 * lite-restore preemption event, but if we wait 1360 * we hopefully coalesce several updates into a single 1361 * submission. 1362 */ 1363 if (active[1]) { 1364 /* 1365 * Even if ELSP[1] is occupied and not worthy 1366 * of timeslices, our queue might be. 1367 */ 1368 spin_unlock(&engine->active.lock); 1369 return; 1370 } 1371 } 1372 } 1373 1374 /* XXX virtual is always taking precedence */ 1375 while ((ve = first_virtual_engine(engine))) { 1376 struct i915_request *rq; 1377 1378 spin_lock(&ve->base.active.lock); 1379 1380 rq = ve->request; 1381 if (unlikely(!virtual_matches(ve, rq, engine))) 1382 goto unlock; /* lost the race to a sibling */ 1383 1384 GEM_BUG_ON(rq->engine != &ve->base); 1385 GEM_BUG_ON(rq->context != &ve->context); 1386 1387 if (unlikely(rq_prio(rq) < queue_prio(execlists))) { 1388 spin_unlock(&ve->base.active.lock); 1389 break; 1390 } 1391 1392 if (last && !can_merge_rq(last, rq)) { 1393 spin_unlock(&ve->base.active.lock); 1394 spin_unlock(&engine->active.lock); 1395 return; /* leave this for another sibling */ 1396 } 1397 1398 ENGINE_TRACE(engine, 1399 "virtual rq=%llx:%lld%s, new engine? %s\n", 1400 rq->fence.context, 1401 rq->fence.seqno, 1402 __i915_request_is_complete(rq) ? "!" : 1403 __i915_request_has_started(rq) ? "*" : 1404 "", 1405 yesno(engine != ve->siblings[0])); 1406 1407 WRITE_ONCE(ve->request, NULL); 1408 WRITE_ONCE(ve->base.execlists.queue_priority_hint, INT_MIN); 1409 1410 rb = &ve->nodes[engine->id].rb; 1411 rb_erase_cached(rb, &execlists->virtual); 1412 RB_CLEAR_NODE(rb); 1413 1414 GEM_BUG_ON(!(rq->execution_mask & engine->mask)); 1415 WRITE_ONCE(rq->engine, engine); 1416 1417 if (__i915_request_submit(rq)) { 1418 /* 1419 * Only after we confirm that we will submit 1420 * this request (i.e. it has not already 1421 * completed), do we want to update the context. 1422 * 1423 * This serves two purposes. It avoids 1424 * unnecessary work if we are resubmitting an 1425 * already completed request after timeslicing. 1426 * But more importantly, it prevents us altering 1427 * ve->siblings[] on an idle context, where 1428 * we may be using ve->siblings[] in 1429 * virtual_context_enter / virtual_context_exit. 1430 */ 1431 virtual_xfer_context(ve, engine); 1432 GEM_BUG_ON(ve->siblings[0] != engine); 1433 1434 submit = true; 1435 last = rq; 1436 } 1437 1438 i915_request_put(rq); 1439 unlock: 1440 spin_unlock(&ve->base.active.lock); 1441 1442 /* 1443 * Hmm, we have a bunch of virtual engine requests, 1444 * but the first one was already completed (thanks 1445 * preempt-to-busy!). Keep looking at the veng queue 1446 * until we have no more relevant requests (i.e. 1447 * the normal submit queue has higher priority). 1448 */ 1449 if (submit) 1450 break; 1451 } 1452 1453 while ((rb = rb_first_cached(&execlists->queue))) { 1454 struct i915_priolist *p = to_priolist(rb); 1455 struct i915_request *rq, *rn; 1456 1457 priolist_for_each_request_consume(rq, rn, p) { 1458 bool merge = true; 1459 1460 /* 1461 * Can we combine this request with the current port? 1462 * It has to be the same context/ringbuffer and not 1463 * have any exceptions (e.g. GVT saying never to 1464 * combine contexts). 1465 * 1466 * If we can combine the requests, we can execute both 1467 * by updating the RING_TAIL to point to the end of the 1468 * second request, and so we never need to tell the 1469 * hardware about the first. 1470 */ 1471 if (last && !can_merge_rq(last, rq)) { 1472 /* 1473 * If we are on the second port and cannot 1474 * combine this request with the last, then we 1475 * are done. 1476 */ 1477 if (port == last_port) 1478 goto done; 1479 1480 /* 1481 * We must not populate both ELSP[] with the 1482 * same LRCA, i.e. we must submit 2 different 1483 * contexts if we submit 2 ELSP. 1484 */ 1485 if (last->context == rq->context) 1486 goto done; 1487 1488 if (i915_request_has_sentinel(last)) 1489 goto done; 1490 1491 /* 1492 * We avoid submitting virtual requests into 1493 * the secondary ports so that we can migrate 1494 * the request immediately to another engine 1495 * rather than wait for the primary request. 1496 */ 1497 if (rq->execution_mask != engine->mask) 1498 goto done; 1499 1500 /* 1501 * If GVT overrides us we only ever submit 1502 * port[0], leaving port[1] empty. Note that we 1503 * also have to be careful that we don't queue 1504 * the same context (even though a different 1505 * request) to the second port. 1506 */ 1507 if (ctx_single_port_submission(last->context) || 1508 ctx_single_port_submission(rq->context)) 1509 goto done; 1510 1511 merge = false; 1512 } 1513 1514 if (__i915_request_submit(rq)) { 1515 if (!merge) { 1516 *port++ = i915_request_get(last); 1517 last = NULL; 1518 } 1519 1520 GEM_BUG_ON(last && 1521 !can_merge_ctx(last->context, 1522 rq->context)); 1523 GEM_BUG_ON(last && 1524 i915_seqno_passed(last->fence.seqno, 1525 rq->fence.seqno)); 1526 1527 submit = true; 1528 last = rq; 1529 } 1530 } 1531 1532 rb_erase_cached(&p->node, &execlists->queue); 1533 i915_priolist_free(p); 1534 } 1535 done: 1536 *port++ = i915_request_get(last); 1537 1538 /* 1539 * Here be a bit of magic! Or sleight-of-hand, whichever you prefer. 1540 * 1541 * We choose the priority hint such that if we add a request of greater 1542 * priority than this, we kick the submission tasklet to decide on 1543 * the right order of submitting the requests to hardware. We must 1544 * also be prepared to reorder requests as they are in-flight on the 1545 * HW. We derive the priority hint then as the first "hole" in 1546 * the HW submission ports and if there are no available slots, 1547 * the priority of the lowest executing request, i.e. last. 1548 * 1549 * When we do receive a higher priority request ready to run from the 1550 * user, see queue_request(), the priority hint is bumped to that 1551 * request triggering preemption on the next dequeue (or subsequent 1552 * interrupt for secondary ports). 1553 */ 1554 execlists->queue_priority_hint = queue_prio(execlists); 1555 spin_unlock(&engine->active.lock); 1556 1557 /* 1558 * We can skip poking the HW if we ended up with exactly the same set 1559 * of requests as currently running, e.g. trying to timeslice a pair 1560 * of ordered contexts. 1561 */ 1562 if (submit && 1563 memcmp(active, 1564 execlists->pending, 1565 (port - execlists->pending) * sizeof(*port))) { 1566 *port = NULL; 1567 while (port-- != execlists->pending) 1568 execlists_schedule_in(*port, port - execlists->pending); 1569 1570 WRITE_ONCE(execlists->yield, -1); 1571 set_preempt_timeout(engine, *active); 1572 execlists_submit_ports(engine); 1573 } else { 1574 ring_set_paused(engine, 0); 1575 while (port-- != execlists->pending) 1576 i915_request_put(*port); 1577 *execlists->pending = NULL; 1578 } 1579 } 1580 1581 static void execlists_dequeue_irq(struct intel_engine_cs *engine) 1582 { 1583 local_irq_disable(); /* Suspend interrupts across request submission */ 1584 execlists_dequeue(engine); 1585 local_irq_enable(); /* flush irq_work (e.g. breadcrumb enabling) */ 1586 } 1587 1588 static void clear_ports(struct i915_request **ports, int count) 1589 { 1590 memset_p((void **)ports, NULL, count); 1591 } 1592 1593 static void 1594 copy_ports(struct i915_request **dst, struct i915_request **src, int count) 1595 { 1596 /* A memcpy_p() would be very useful here! */ 1597 while (count--) 1598 WRITE_ONCE(*dst++, *src++); /* avoid write tearing */ 1599 } 1600 1601 static struct i915_request ** 1602 cancel_port_requests(struct intel_engine_execlists * const execlists, 1603 struct i915_request **inactive) 1604 { 1605 struct i915_request * const *port; 1606 1607 for (port = execlists->pending; *port; port++) 1608 *inactive++ = *port; 1609 clear_ports(execlists->pending, ARRAY_SIZE(execlists->pending)); 1610 1611 /* Mark the end of active before we overwrite *active */ 1612 for (port = xchg(&execlists->active, execlists->pending); *port; port++) 1613 *inactive++ = *port; 1614 clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight)); 1615 1616 smp_wmb(); /* complete the seqlock for execlists_active() */ 1617 WRITE_ONCE(execlists->active, execlists->inflight); 1618 1619 /* Having cancelled all outstanding process_csb(), stop their timers */ 1620 GEM_BUG_ON(execlists->pending[0]); 1621 cancel_timer(&execlists->timer); 1622 cancel_timer(&execlists->preempt); 1623 1624 return inactive; 1625 } 1626 1627 static void invalidate_csb_entries(const u64 *first, const u64 *last) 1628 { 1629 clflush((void *)first); 1630 clflush((void *)last); 1631 } 1632 1633 /* 1634 * Starting with Gen12, the status has a new format: 1635 * 1636 * bit 0: switched to new queue 1637 * bit 1: reserved 1638 * bit 2: semaphore wait mode (poll or signal), only valid when 1639 * switch detail is set to "wait on semaphore" 1640 * bits 3-5: engine class 1641 * bits 6-11: engine instance 1642 * bits 12-14: reserved 1643 * bits 15-25: sw context id of the lrc the GT switched to 1644 * bits 26-31: sw counter of the lrc the GT switched to 1645 * bits 32-35: context switch detail 1646 * - 0: ctx complete 1647 * - 1: wait on sync flip 1648 * - 2: wait on vblank 1649 * - 3: wait on scanline 1650 * - 4: wait on semaphore 1651 * - 5: context preempted (not on SEMAPHORE_WAIT or 1652 * WAIT_FOR_EVENT) 1653 * bit 36: reserved 1654 * bits 37-43: wait detail (for switch detail 1 to 4) 1655 * bits 44-46: reserved 1656 * bits 47-57: sw context id of the lrc the GT switched away from 1657 * bits 58-63: sw counter of the lrc the GT switched away from 1658 */ 1659 static bool gen12_csb_parse(const u64 csb) 1660 { 1661 bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(csb)); 1662 bool new_queue = 1663 lower_32_bits(csb) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE; 1664 1665 /* 1666 * The context switch detail is not guaranteed to be 5 when a preemption 1667 * occurs, so we can't just check for that. The check below works for 1668 * all the cases we care about, including preemptions of WAIT 1669 * instructions and lite-restore. Preempt-to-idle via the CTRL register 1670 * would require some extra handling, but we don't support that. 1671 */ 1672 if (!ctx_away_valid || new_queue) { 1673 GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(csb))); 1674 return true; 1675 } 1676 1677 /* 1678 * switch detail = 5 is covered by the case above and we do not expect a 1679 * context switch on an unsuccessful wait instruction since we always 1680 * use polling mode. 1681 */ 1682 GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(csb))); 1683 return false; 1684 } 1685 1686 static bool gen8_csb_parse(const u64 csb) 1687 { 1688 return csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED); 1689 } 1690 1691 static noinline u64 1692 wa_csb_read(const struct intel_engine_cs *engine, u64 * const csb) 1693 { 1694 u64 entry; 1695 1696 /* 1697 * Reading from the HWSP has one particular advantage: we can detect 1698 * a stale entry. Since the write into HWSP is broken, we have no reason 1699 * to trust the HW at all, the mmio entry may equally be unordered, so 1700 * we prefer the path that is self-checking and as a last resort, 1701 * return the mmio value. 1702 * 1703 * tgl,dg1:HSDES#22011327657 1704 */ 1705 preempt_disable(); 1706 if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 10)) { 1707 int idx = csb - engine->execlists.csb_status; 1708 int status; 1709 1710 status = GEN8_EXECLISTS_STATUS_BUF; 1711 if (idx >= 6) { 1712 status = GEN11_EXECLISTS_STATUS_BUF2; 1713 idx -= 6; 1714 } 1715 status += sizeof(u64) * idx; 1716 1717 entry = intel_uncore_read64(engine->uncore, 1718 _MMIO(engine->mmio_base + status)); 1719 } 1720 preempt_enable(); 1721 1722 return entry; 1723 } 1724 1725 static u64 csb_read(const struct intel_engine_cs *engine, u64 * const csb) 1726 { 1727 u64 entry = READ_ONCE(*csb); 1728 1729 /* 1730 * Unfortunately, the GPU does not always serialise its write 1731 * of the CSB entries before its write of the CSB pointer, at least 1732 * from the perspective of the CPU, using what is known as a Global 1733 * Observation Point. We may read a new CSB tail pointer, but then 1734 * read the stale CSB entries, causing us to misinterpret the 1735 * context-switch events, and eventually declare the GPU hung. 1736 * 1737 * icl:HSDES#1806554093 1738 * tgl:HSDES#22011248461 1739 */ 1740 if (unlikely(entry == -1)) 1741 entry = wa_csb_read(engine, csb); 1742 1743 /* Consume this entry so that we can spot its future reuse. */ 1744 WRITE_ONCE(*csb, -1); 1745 1746 /* ELSP is an implicit wmb() before the GPU wraps and overwrites csb */ 1747 return entry; 1748 } 1749 1750 static void new_timeslice(struct intel_engine_execlists *el) 1751 { 1752 /* By cancelling, we will start afresh in start_timeslice() */ 1753 cancel_timer(&el->timer); 1754 } 1755 1756 static struct i915_request ** 1757 process_csb(struct intel_engine_cs *engine, struct i915_request **inactive) 1758 { 1759 struct intel_engine_execlists * const execlists = &engine->execlists; 1760 u64 * const buf = execlists->csb_status; 1761 const u8 num_entries = execlists->csb_size; 1762 struct i915_request **prev; 1763 u8 head, tail; 1764 1765 /* 1766 * As we modify our execlists state tracking we require exclusive 1767 * access. Either we are inside the tasklet, or the tasklet is disabled 1768 * and we assume that is only inside the reset paths and so serialised. 1769 */ 1770 GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) && 1771 !reset_in_progress(execlists)); 1772 1773 /* 1774 * Note that csb_write, csb_status may be either in HWSP or mmio. 1775 * When reading from the csb_write mmio register, we have to be 1776 * careful to only use the GEN8_CSB_WRITE_PTR portion, which is 1777 * the low 4bits. As it happens we know the next 4bits are always 1778 * zero and so we can simply masked off the low u8 of the register 1779 * and treat it identically to reading from the HWSP (without having 1780 * to use explicit shifting and masking, and probably bifurcating 1781 * the code to handle the legacy mmio read). 1782 */ 1783 head = execlists->csb_head; 1784 tail = READ_ONCE(*execlists->csb_write); 1785 if (unlikely(head == tail)) 1786 return inactive; 1787 1788 /* 1789 * We will consume all events from HW, or at least pretend to. 1790 * 1791 * The sequence of events from the HW is deterministic, and derived 1792 * from our writes to the ELSP, with a smidgen of variability for 1793 * the arrival of the asynchronous requests wrt to the inflight 1794 * execution. If the HW sends an event that does not correspond with 1795 * the one we are expecting, we have to abandon all hope as we lose 1796 * all tracking of what the engine is actually executing. We will 1797 * only detect we are out of sequence with the HW when we get an 1798 * 'impossible' event because we have already drained our own 1799 * preemption/promotion queue. If this occurs, we know that we likely 1800 * lost track of execution earlier and must unwind and restart, the 1801 * simplest way is by stop processing the event queue and force the 1802 * engine to reset. 1803 */ 1804 execlists->csb_head = tail; 1805 ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail); 1806 1807 /* 1808 * Hopefully paired with a wmb() in HW! 1809 * 1810 * We must complete the read of the write pointer before any reads 1811 * from the CSB, so that we do not see stale values. Without an rmb 1812 * (lfence) the HW may speculatively perform the CSB[] reads *before* 1813 * we perform the READ_ONCE(*csb_write). 1814 */ 1815 rmb(); 1816 1817 /* Remember who was last running under the timer */ 1818 prev = inactive; 1819 *prev = NULL; 1820 1821 do { 1822 bool promote; 1823 u64 csb; 1824 1825 if (++head == num_entries) 1826 head = 0; 1827 1828 /* 1829 * We are flying near dragons again. 1830 * 1831 * We hold a reference to the request in execlist_port[] 1832 * but no more than that. We are operating in softirq 1833 * context and so cannot hold any mutex or sleep. That 1834 * prevents us stopping the requests we are processing 1835 * in port[] from being retired simultaneously (the 1836 * breadcrumb will be complete before we see the 1837 * context-switch). As we only hold the reference to the 1838 * request, any pointer chasing underneath the request 1839 * is subject to a potential use-after-free. Thus we 1840 * store all of the bookkeeping within port[] as 1841 * required, and avoid using unguarded pointers beneath 1842 * request itself. The same applies to the atomic 1843 * status notifier. 1844 */ 1845 1846 csb = csb_read(engine, buf + head); 1847 ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n", 1848 head, upper_32_bits(csb), lower_32_bits(csb)); 1849 1850 if (GRAPHICS_VER(engine->i915) >= 12) 1851 promote = gen12_csb_parse(csb); 1852 else 1853 promote = gen8_csb_parse(csb); 1854 if (promote) { 1855 struct i915_request * const *old = execlists->active; 1856 1857 if (GEM_WARN_ON(!*execlists->pending)) { 1858 execlists->error_interrupt |= ERROR_CSB; 1859 break; 1860 } 1861 1862 ring_set_paused(engine, 0); 1863 1864 /* Point active to the new ELSP; prevent overwriting */ 1865 WRITE_ONCE(execlists->active, execlists->pending); 1866 smp_wmb(); /* notify execlists_active() */ 1867 1868 /* cancel old inflight, prepare for switch */ 1869 trace_ports(execlists, "preempted", old); 1870 while (*old) 1871 *inactive++ = *old++; 1872 1873 /* switch pending to inflight */ 1874 GEM_BUG_ON(!assert_pending_valid(execlists, "promote")); 1875 copy_ports(execlists->inflight, 1876 execlists->pending, 1877 execlists_num_ports(execlists)); 1878 smp_wmb(); /* complete the seqlock */ 1879 WRITE_ONCE(execlists->active, execlists->inflight); 1880 1881 /* XXX Magic delay for tgl */ 1882 ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR); 1883 1884 WRITE_ONCE(execlists->pending[0], NULL); 1885 } else { 1886 if (GEM_WARN_ON(!*execlists->active)) { 1887 execlists->error_interrupt |= ERROR_CSB; 1888 break; 1889 } 1890 1891 /* port0 completed, advanced to port1 */ 1892 trace_ports(execlists, "completed", execlists->active); 1893 1894 /* 1895 * We rely on the hardware being strongly 1896 * ordered, that the breadcrumb write is 1897 * coherent (visible from the CPU) before the 1898 * user interrupt is processed. One might assume 1899 * that the breadcrumb write being before the 1900 * user interrupt and the CS event for the context 1901 * switch would therefore be before the CS event 1902 * itself... 1903 */ 1904 if (GEM_SHOW_DEBUG() && 1905 !__i915_request_is_complete(*execlists->active)) { 1906 struct i915_request *rq = *execlists->active; 1907 const u32 *regs __maybe_unused = 1908 rq->context->lrc_reg_state; 1909 1910 ENGINE_TRACE(engine, 1911 "context completed before request!\n"); 1912 ENGINE_TRACE(engine, 1913 "ring:{start:0x%08x, head:%04x, tail:%04x, ctl:%08x, mode:%08x}\n", 1914 ENGINE_READ(engine, RING_START), 1915 ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR, 1916 ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR, 1917 ENGINE_READ(engine, RING_CTL), 1918 ENGINE_READ(engine, RING_MI_MODE)); 1919 ENGINE_TRACE(engine, 1920 "rq:{start:%08x, head:%04x, tail:%04x, seqno:%llx:%d, hwsp:%d}, ", 1921 i915_ggtt_offset(rq->ring->vma), 1922 rq->head, rq->tail, 1923 rq->fence.context, 1924 lower_32_bits(rq->fence.seqno), 1925 hwsp_seqno(rq)); 1926 ENGINE_TRACE(engine, 1927 "ctx:{start:%08x, head:%04x, tail:%04x}, ", 1928 regs[CTX_RING_START], 1929 regs[CTX_RING_HEAD], 1930 regs[CTX_RING_TAIL]); 1931 } 1932 1933 *inactive++ = *execlists->active++; 1934 1935 GEM_BUG_ON(execlists->active - execlists->inflight > 1936 execlists_num_ports(execlists)); 1937 } 1938 } while (head != tail); 1939 1940 /* 1941 * Gen11 has proven to fail wrt global observation point between 1942 * entry and tail update, failing on the ordering and thus 1943 * we see an old entry in the context status buffer. 1944 * 1945 * Forcibly evict out entries for the next gpu csb update, 1946 * to increase the odds that we get a fresh entries with non 1947 * working hardware. The cost for doing so comes out mostly with 1948 * the wash as hardware, working or not, will need to do the 1949 * invalidation before. 1950 */ 1951 invalidate_csb_entries(&buf[0], &buf[num_entries - 1]); 1952 1953 /* 1954 * We assume that any event reflects a change in context flow 1955 * and merits a fresh timeslice. We reinstall the timer after 1956 * inspecting the queue to see if we need to resumbit. 1957 */ 1958 if (*prev != *execlists->active) /* elide lite-restores */ 1959 new_timeslice(execlists); 1960 1961 return inactive; 1962 } 1963 1964 static void post_process_csb(struct i915_request **port, 1965 struct i915_request **last) 1966 { 1967 while (port != last) 1968 execlists_schedule_out(*port++); 1969 } 1970 1971 static void __execlists_hold(struct i915_request *rq) 1972 { 1973 LIST_HEAD(list); 1974 1975 do { 1976 struct i915_dependency *p; 1977 1978 if (i915_request_is_active(rq)) 1979 __i915_request_unsubmit(rq); 1980 1981 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 1982 list_move_tail(&rq->sched.link, &rq->engine->active.hold); 1983 i915_request_set_hold(rq); 1984 RQ_TRACE(rq, "on hold\n"); 1985 1986 for_each_waiter(p, rq) { 1987 struct i915_request *w = 1988 container_of(p->waiter, typeof(*w), sched); 1989 1990 if (p->flags & I915_DEPENDENCY_WEAK) 1991 continue; 1992 1993 /* Leave semaphores spinning on the other engines */ 1994 if (w->engine != rq->engine) 1995 continue; 1996 1997 if (!i915_request_is_ready(w)) 1998 continue; 1999 2000 if (__i915_request_is_complete(w)) 2001 continue; 2002 2003 if (i915_request_on_hold(w)) 2004 continue; 2005 2006 list_move_tail(&w->sched.link, &list); 2007 } 2008 2009 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); 2010 } while (rq); 2011 } 2012 2013 static bool execlists_hold(struct intel_engine_cs *engine, 2014 struct i915_request *rq) 2015 { 2016 if (i915_request_on_hold(rq)) 2017 return false; 2018 2019 spin_lock_irq(&engine->active.lock); 2020 2021 if (__i915_request_is_complete(rq)) { /* too late! */ 2022 rq = NULL; 2023 goto unlock; 2024 } 2025 2026 /* 2027 * Transfer this request onto the hold queue to prevent it 2028 * being resumbitted to HW (and potentially completed) before we have 2029 * released it. Since we may have already submitted following 2030 * requests, we need to remove those as well. 2031 */ 2032 GEM_BUG_ON(i915_request_on_hold(rq)); 2033 GEM_BUG_ON(rq->engine != engine); 2034 __execlists_hold(rq); 2035 GEM_BUG_ON(list_empty(&engine->active.hold)); 2036 2037 unlock: 2038 spin_unlock_irq(&engine->active.lock); 2039 return rq; 2040 } 2041 2042 static bool hold_request(const struct i915_request *rq) 2043 { 2044 struct i915_dependency *p; 2045 bool result = false; 2046 2047 /* 2048 * If one of our ancestors is on hold, we must also be on hold, 2049 * otherwise we will bypass it and execute before it. 2050 */ 2051 rcu_read_lock(); 2052 for_each_signaler(p, rq) { 2053 const struct i915_request *s = 2054 container_of(p->signaler, typeof(*s), sched); 2055 2056 if (s->engine != rq->engine) 2057 continue; 2058 2059 result = i915_request_on_hold(s); 2060 if (result) 2061 break; 2062 } 2063 rcu_read_unlock(); 2064 2065 return result; 2066 } 2067 2068 static void __execlists_unhold(struct i915_request *rq) 2069 { 2070 LIST_HEAD(list); 2071 2072 do { 2073 struct i915_dependency *p; 2074 2075 RQ_TRACE(rq, "hold release\n"); 2076 2077 GEM_BUG_ON(!i915_request_on_hold(rq)); 2078 GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit)); 2079 2080 i915_request_clear_hold(rq); 2081 list_move_tail(&rq->sched.link, 2082 i915_sched_lookup_priolist(rq->engine, 2083 rq_prio(rq))); 2084 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 2085 2086 /* Also release any children on this engine that are ready */ 2087 for_each_waiter(p, rq) { 2088 struct i915_request *w = 2089 container_of(p->waiter, typeof(*w), sched); 2090 2091 if (p->flags & I915_DEPENDENCY_WEAK) 2092 continue; 2093 2094 /* Propagate any change in error status */ 2095 if (rq->fence.error) 2096 i915_request_set_error_once(w, rq->fence.error); 2097 2098 if (w->engine != rq->engine) 2099 continue; 2100 2101 if (!i915_request_on_hold(w)) 2102 continue; 2103 2104 /* Check that no other parents are also on hold */ 2105 if (hold_request(w)) 2106 continue; 2107 2108 list_move_tail(&w->sched.link, &list); 2109 } 2110 2111 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); 2112 } while (rq); 2113 } 2114 2115 static void execlists_unhold(struct intel_engine_cs *engine, 2116 struct i915_request *rq) 2117 { 2118 spin_lock_irq(&engine->active.lock); 2119 2120 /* 2121 * Move this request back to the priority queue, and all of its 2122 * children and grandchildren that were suspended along with it. 2123 */ 2124 __execlists_unhold(rq); 2125 2126 if (rq_prio(rq) > engine->execlists.queue_priority_hint) { 2127 engine->execlists.queue_priority_hint = rq_prio(rq); 2128 tasklet_hi_schedule(&engine->execlists.tasklet); 2129 } 2130 2131 spin_unlock_irq(&engine->active.lock); 2132 } 2133 2134 struct execlists_capture { 2135 struct work_struct work; 2136 struct i915_request *rq; 2137 struct i915_gpu_coredump *error; 2138 }; 2139 2140 static void execlists_capture_work(struct work_struct *work) 2141 { 2142 struct execlists_capture *cap = container_of(work, typeof(*cap), work); 2143 const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN; 2144 struct intel_engine_cs *engine = cap->rq->engine; 2145 struct intel_gt_coredump *gt = cap->error->gt; 2146 struct intel_engine_capture_vma *vma; 2147 2148 /* Compress all the objects attached to the request, slow! */ 2149 vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp); 2150 if (vma) { 2151 struct i915_vma_compress *compress = 2152 i915_vma_capture_prepare(gt); 2153 2154 intel_engine_coredump_add_vma(gt->engine, vma, compress); 2155 i915_vma_capture_finish(gt, compress); 2156 } 2157 2158 gt->simulated = gt->engine->simulated; 2159 cap->error->simulated = gt->simulated; 2160 2161 /* Publish the error state, and announce it to the world */ 2162 i915_error_state_store(cap->error); 2163 i915_gpu_coredump_put(cap->error); 2164 2165 /* Return this request and all that depend upon it for signaling */ 2166 execlists_unhold(engine, cap->rq); 2167 i915_request_put(cap->rq); 2168 2169 kfree(cap); 2170 } 2171 2172 static struct execlists_capture *capture_regs(struct intel_engine_cs *engine) 2173 { 2174 const gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN; 2175 struct execlists_capture *cap; 2176 2177 cap = kmalloc(sizeof(*cap), gfp); 2178 if (!cap) 2179 return NULL; 2180 2181 cap->error = i915_gpu_coredump_alloc(engine->i915, gfp); 2182 if (!cap->error) 2183 goto err_cap; 2184 2185 cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp); 2186 if (!cap->error->gt) 2187 goto err_gpu; 2188 2189 cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp); 2190 if (!cap->error->gt->engine) 2191 goto err_gt; 2192 2193 cap->error->gt->engine->hung = true; 2194 2195 return cap; 2196 2197 err_gt: 2198 kfree(cap->error->gt); 2199 err_gpu: 2200 kfree(cap->error); 2201 err_cap: 2202 kfree(cap); 2203 return NULL; 2204 } 2205 2206 static struct i915_request * 2207 active_context(struct intel_engine_cs *engine, u32 ccid) 2208 { 2209 const struct intel_engine_execlists * const el = &engine->execlists; 2210 struct i915_request * const *port, *rq; 2211 2212 /* 2213 * Use the most recent result from process_csb(), but just in case 2214 * we trigger an error (via interrupt) before the first CS event has 2215 * been written, peek at the next submission. 2216 */ 2217 2218 for (port = el->active; (rq = *port); port++) { 2219 if (rq->context->lrc.ccid == ccid) { 2220 ENGINE_TRACE(engine, 2221 "ccid:%x found at active:%zd\n", 2222 ccid, port - el->active); 2223 return rq; 2224 } 2225 } 2226 2227 for (port = el->pending; (rq = *port); port++) { 2228 if (rq->context->lrc.ccid == ccid) { 2229 ENGINE_TRACE(engine, 2230 "ccid:%x found at pending:%zd\n", 2231 ccid, port - el->pending); 2232 return rq; 2233 } 2234 } 2235 2236 ENGINE_TRACE(engine, "ccid:%x not found\n", ccid); 2237 return NULL; 2238 } 2239 2240 static u32 active_ccid(struct intel_engine_cs *engine) 2241 { 2242 return ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI); 2243 } 2244 2245 static void execlists_capture(struct intel_engine_cs *engine) 2246 { 2247 struct execlists_capture *cap; 2248 2249 if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)) 2250 return; 2251 2252 /* 2253 * We need to _quickly_ capture the engine state before we reset. 2254 * We are inside an atomic section (softirq) here and we are delaying 2255 * the forced preemption event. 2256 */ 2257 cap = capture_regs(engine); 2258 if (!cap) 2259 return; 2260 2261 spin_lock_irq(&engine->active.lock); 2262 cap->rq = active_context(engine, active_ccid(engine)); 2263 if (cap->rq) { 2264 cap->rq = active_request(cap->rq->context->timeline, cap->rq); 2265 cap->rq = i915_request_get_rcu(cap->rq); 2266 } 2267 spin_unlock_irq(&engine->active.lock); 2268 if (!cap->rq) 2269 goto err_free; 2270 2271 /* 2272 * Remove the request from the execlists queue, and take ownership 2273 * of the request. We pass it to our worker who will _slowly_ compress 2274 * all the pages the _user_ requested for debugging their batch, after 2275 * which we return it to the queue for signaling. 2276 * 2277 * By removing them from the execlists queue, we also remove the 2278 * requests from being processed by __unwind_incomplete_requests() 2279 * during the intel_engine_reset(), and so they will *not* be replayed 2280 * afterwards. 2281 * 2282 * Note that because we have not yet reset the engine at this point, 2283 * it is possible for the request that we have identified as being 2284 * guilty, did in fact complete and we will then hit an arbitration 2285 * point allowing the outstanding preemption to succeed. The likelihood 2286 * of that is very low (as capturing of the engine registers should be 2287 * fast enough to run inside an irq-off atomic section!), so we will 2288 * simply hold that request accountable for being non-preemptible 2289 * long enough to force the reset. 2290 */ 2291 if (!execlists_hold(engine, cap->rq)) 2292 goto err_rq; 2293 2294 INIT_WORK(&cap->work, execlists_capture_work); 2295 schedule_work(&cap->work); 2296 return; 2297 2298 err_rq: 2299 i915_request_put(cap->rq); 2300 err_free: 2301 i915_gpu_coredump_put(cap->error); 2302 kfree(cap); 2303 } 2304 2305 static void execlists_reset(struct intel_engine_cs *engine, const char *msg) 2306 { 2307 const unsigned int bit = I915_RESET_ENGINE + engine->id; 2308 unsigned long *lock = &engine->gt->reset.flags; 2309 2310 if (!intel_has_reset_engine(engine->gt)) 2311 return; 2312 2313 if (test_and_set_bit(bit, lock)) 2314 return; 2315 2316 ENGINE_TRACE(engine, "reset for %s\n", msg); 2317 2318 /* Mark this tasklet as disabled to avoid waiting for it to complete */ 2319 tasklet_disable_nosync(&engine->execlists.tasklet); 2320 2321 ring_set_paused(engine, 1); /* Freeze the current request in place */ 2322 execlists_capture(engine); 2323 intel_engine_reset(engine, msg); 2324 2325 tasklet_enable(&engine->execlists.tasklet); 2326 clear_and_wake_up_bit(bit, lock); 2327 } 2328 2329 static bool preempt_timeout(const struct intel_engine_cs *const engine) 2330 { 2331 const struct timer_list *t = &engine->execlists.preempt; 2332 2333 if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT) 2334 return false; 2335 2336 if (!timer_expired(t)) 2337 return false; 2338 2339 return engine->execlists.pending[0]; 2340 } 2341 2342 /* 2343 * Check the unread Context Status Buffers and manage the submission of new 2344 * contexts to the ELSP accordingly. 2345 */ 2346 static void execlists_submission_tasklet(struct tasklet_struct *t) 2347 { 2348 struct intel_engine_cs * const engine = 2349 from_tasklet(engine, t, execlists.tasklet); 2350 struct i915_request *post[2 * EXECLIST_MAX_PORTS]; 2351 struct i915_request **inactive; 2352 2353 rcu_read_lock(); 2354 inactive = process_csb(engine, post); 2355 GEM_BUG_ON(inactive - post > ARRAY_SIZE(post)); 2356 2357 if (unlikely(preempt_timeout(engine))) { 2358 cancel_timer(&engine->execlists.preempt); 2359 engine->execlists.error_interrupt |= ERROR_PREEMPT; 2360 } 2361 2362 if (unlikely(READ_ONCE(engine->execlists.error_interrupt))) { 2363 const char *msg; 2364 2365 /* Generate the error message in priority wrt to the user! */ 2366 if (engine->execlists.error_interrupt & GENMASK(15, 0)) 2367 msg = "CS error"; /* thrown by a user payload */ 2368 else if (engine->execlists.error_interrupt & ERROR_CSB) 2369 msg = "invalid CSB event"; 2370 else if (engine->execlists.error_interrupt & ERROR_PREEMPT) 2371 msg = "preemption time out"; 2372 else 2373 msg = "internal error"; 2374 2375 engine->execlists.error_interrupt = 0; 2376 execlists_reset(engine, msg); 2377 } 2378 2379 if (!engine->execlists.pending[0]) { 2380 execlists_dequeue_irq(engine); 2381 start_timeslice(engine); 2382 } 2383 2384 post_process_csb(post, inactive); 2385 rcu_read_unlock(); 2386 } 2387 2388 static void execlists_irq_handler(struct intel_engine_cs *engine, u16 iir) 2389 { 2390 bool tasklet = false; 2391 2392 if (unlikely(iir & GT_CS_MASTER_ERROR_INTERRUPT)) { 2393 u32 eir; 2394 2395 /* Upper 16b are the enabling mask, rsvd for internal errors */ 2396 eir = ENGINE_READ(engine, RING_EIR) & GENMASK(15, 0); 2397 ENGINE_TRACE(engine, "CS error: %x\n", eir); 2398 2399 /* Disable the error interrupt until after the reset */ 2400 if (likely(eir)) { 2401 ENGINE_WRITE(engine, RING_EMR, ~0u); 2402 ENGINE_WRITE(engine, RING_EIR, eir); 2403 WRITE_ONCE(engine->execlists.error_interrupt, eir); 2404 tasklet = true; 2405 } 2406 } 2407 2408 if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) { 2409 WRITE_ONCE(engine->execlists.yield, 2410 ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI)); 2411 ENGINE_TRACE(engine, "semaphore yield: %08x\n", 2412 engine->execlists.yield); 2413 if (del_timer(&engine->execlists.timer)) 2414 tasklet = true; 2415 } 2416 2417 if (iir & GT_CONTEXT_SWITCH_INTERRUPT) 2418 tasklet = true; 2419 2420 if (iir & GT_RENDER_USER_INTERRUPT) 2421 intel_engine_signal_breadcrumbs(engine); 2422 2423 if (tasklet) 2424 tasklet_hi_schedule(&engine->execlists.tasklet); 2425 } 2426 2427 static void __execlists_kick(struct intel_engine_execlists *execlists) 2428 { 2429 /* Kick the tasklet for some interrupt coalescing and reset handling */ 2430 tasklet_hi_schedule(&execlists->tasklet); 2431 } 2432 2433 #define execlists_kick(t, member) \ 2434 __execlists_kick(container_of(t, struct intel_engine_execlists, member)) 2435 2436 static void execlists_timeslice(struct timer_list *timer) 2437 { 2438 execlists_kick(timer, timer); 2439 } 2440 2441 static void execlists_preempt(struct timer_list *timer) 2442 { 2443 execlists_kick(timer, preempt); 2444 } 2445 2446 static void queue_request(struct intel_engine_cs *engine, 2447 struct i915_request *rq) 2448 { 2449 GEM_BUG_ON(!list_empty(&rq->sched.link)); 2450 list_add_tail(&rq->sched.link, 2451 i915_sched_lookup_priolist(engine, rq_prio(rq))); 2452 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 2453 } 2454 2455 static bool submit_queue(struct intel_engine_cs *engine, 2456 const struct i915_request *rq) 2457 { 2458 struct intel_engine_execlists *execlists = &engine->execlists; 2459 2460 if (rq_prio(rq) <= execlists->queue_priority_hint) 2461 return false; 2462 2463 execlists->queue_priority_hint = rq_prio(rq); 2464 return true; 2465 } 2466 2467 static bool ancestor_on_hold(const struct intel_engine_cs *engine, 2468 const struct i915_request *rq) 2469 { 2470 GEM_BUG_ON(i915_request_on_hold(rq)); 2471 return !list_empty(&engine->active.hold) && hold_request(rq); 2472 } 2473 2474 static void execlists_submit_request(struct i915_request *request) 2475 { 2476 struct intel_engine_cs *engine = request->engine; 2477 unsigned long flags; 2478 2479 /* Will be called from irq-context when using foreign fences. */ 2480 spin_lock_irqsave(&engine->active.lock, flags); 2481 2482 if (unlikely(ancestor_on_hold(engine, request))) { 2483 RQ_TRACE(request, "ancestor on hold\n"); 2484 list_add_tail(&request->sched.link, &engine->active.hold); 2485 i915_request_set_hold(request); 2486 } else { 2487 queue_request(engine, request); 2488 2489 GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); 2490 GEM_BUG_ON(list_empty(&request->sched.link)); 2491 2492 if (submit_queue(engine, request)) 2493 __execlists_kick(&engine->execlists); 2494 } 2495 2496 spin_unlock_irqrestore(&engine->active.lock, flags); 2497 } 2498 2499 static int 2500 __execlists_context_pre_pin(struct intel_context *ce, 2501 struct intel_engine_cs *engine, 2502 struct i915_gem_ww_ctx *ww, void **vaddr) 2503 { 2504 int err; 2505 2506 err = lrc_pre_pin(ce, engine, ww, vaddr); 2507 if (err) 2508 return err; 2509 2510 if (!__test_and_set_bit(CONTEXT_INIT_BIT, &ce->flags)) { 2511 lrc_init_state(ce, engine, *vaddr); 2512 2513 __i915_gem_object_flush_map(ce->state->obj, 0, engine->context_size); 2514 } 2515 2516 return 0; 2517 } 2518 2519 static int execlists_context_pre_pin(struct intel_context *ce, 2520 struct i915_gem_ww_ctx *ww, 2521 void **vaddr) 2522 { 2523 return __execlists_context_pre_pin(ce, ce->engine, ww, vaddr); 2524 } 2525 2526 static int execlists_context_pin(struct intel_context *ce, void *vaddr) 2527 { 2528 return lrc_pin(ce, ce->engine, vaddr); 2529 } 2530 2531 static int execlists_context_alloc(struct intel_context *ce) 2532 { 2533 return lrc_alloc(ce, ce->engine); 2534 } 2535 2536 static const struct intel_context_ops execlists_context_ops = { 2537 .flags = COPS_HAS_INFLIGHT, 2538 2539 .alloc = execlists_context_alloc, 2540 2541 .pre_pin = execlists_context_pre_pin, 2542 .pin = execlists_context_pin, 2543 .unpin = lrc_unpin, 2544 .post_unpin = lrc_post_unpin, 2545 2546 .enter = intel_context_enter_engine, 2547 .exit = intel_context_exit_engine, 2548 2549 .reset = lrc_reset, 2550 .destroy = lrc_destroy, 2551 }; 2552 2553 static int emit_pdps(struct i915_request *rq) 2554 { 2555 const struct intel_engine_cs * const engine = rq->engine; 2556 struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(rq->context->vm); 2557 int err, i; 2558 u32 *cs; 2559 2560 GEM_BUG_ON(intel_vgpu_active(rq->engine->i915)); 2561 2562 /* 2563 * Beware ye of the dragons, this sequence is magic! 2564 * 2565 * Small changes to this sequence can cause anything from 2566 * GPU hangs to forcewake errors and machine lockups! 2567 */ 2568 2569 cs = intel_ring_begin(rq, 2); 2570 if (IS_ERR(cs)) 2571 return PTR_ERR(cs); 2572 2573 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 2574 *cs++ = MI_NOOP; 2575 intel_ring_advance(rq, cs); 2576 2577 /* Flush any residual operations from the context load */ 2578 err = engine->emit_flush(rq, EMIT_FLUSH); 2579 if (err) 2580 return err; 2581 2582 /* Magic required to prevent forcewake errors! */ 2583 err = engine->emit_flush(rq, EMIT_INVALIDATE); 2584 if (err) 2585 return err; 2586 2587 cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2); 2588 if (IS_ERR(cs)) 2589 return PTR_ERR(cs); 2590 2591 /* Ensure the LRI have landed before we invalidate & continue */ 2592 *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED; 2593 for (i = GEN8_3LVL_PDPES; i--; ) { 2594 const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); 2595 u32 base = engine->mmio_base; 2596 2597 *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i)); 2598 *cs++ = upper_32_bits(pd_daddr); 2599 *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i)); 2600 *cs++ = lower_32_bits(pd_daddr); 2601 } 2602 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 2603 intel_ring_advance(rq, cs); 2604 2605 intel_ring_advance(rq, cs); 2606 2607 return 0; 2608 } 2609 2610 static int execlists_request_alloc(struct i915_request *request) 2611 { 2612 int ret; 2613 2614 GEM_BUG_ON(!intel_context_is_pinned(request->context)); 2615 2616 /* 2617 * Flush enough space to reduce the likelihood of waiting after 2618 * we start building the request - in which case we will just 2619 * have to repeat work. 2620 */ 2621 request->reserved_space += EXECLISTS_REQUEST_SIZE; 2622 2623 /* 2624 * Note that after this point, we have committed to using 2625 * this request as it is being used to both track the 2626 * state of engine initialisation and liveness of the 2627 * golden renderstate above. Think twice before you try 2628 * to cancel/unwind this request now. 2629 */ 2630 2631 if (!i915_vm_is_4lvl(request->context->vm)) { 2632 ret = emit_pdps(request); 2633 if (ret) 2634 return ret; 2635 } 2636 2637 /* Unconditionally invalidate GPU caches and TLBs. */ 2638 ret = request->engine->emit_flush(request, EMIT_INVALIDATE); 2639 if (ret) 2640 return ret; 2641 2642 request->reserved_space -= EXECLISTS_REQUEST_SIZE; 2643 return 0; 2644 } 2645 2646 static void reset_csb_pointers(struct intel_engine_cs *engine) 2647 { 2648 struct intel_engine_execlists * const execlists = &engine->execlists; 2649 const unsigned int reset_value = execlists->csb_size - 1; 2650 2651 ring_set_paused(engine, 0); 2652 2653 /* 2654 * Sometimes Icelake forgets to reset its pointers on a GPU reset. 2655 * Bludgeon them with a mmio update to be sure. 2656 */ 2657 ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR, 2658 0xffff << 16 | reset_value << 8 | reset_value); 2659 ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR); 2660 2661 /* 2662 * After a reset, the HW starts writing into CSB entry [0]. We 2663 * therefore have to set our HEAD pointer back one entry so that 2664 * the *first* entry we check is entry 0. To complicate this further, 2665 * as we don't wait for the first interrupt after reset, we have to 2666 * fake the HW write to point back to the last entry so that our 2667 * inline comparison of our cached head position against the last HW 2668 * write works even before the first interrupt. 2669 */ 2670 execlists->csb_head = reset_value; 2671 WRITE_ONCE(*execlists->csb_write, reset_value); 2672 wmb(); /* Make sure this is visible to HW (paranoia?) */ 2673 2674 /* Check that the GPU does indeed update the CSB entries! */ 2675 memset(execlists->csb_status, -1, (reset_value + 1) * sizeof(u64)); 2676 invalidate_csb_entries(&execlists->csb_status[0], 2677 &execlists->csb_status[reset_value]); 2678 2679 /* Once more for luck and our trusty paranoia */ 2680 ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR, 2681 0xffff << 16 | reset_value << 8 | reset_value); 2682 ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR); 2683 2684 GEM_BUG_ON(READ_ONCE(*execlists->csb_write) != reset_value); 2685 } 2686 2687 static void sanitize_hwsp(struct intel_engine_cs *engine) 2688 { 2689 struct intel_timeline *tl; 2690 2691 list_for_each_entry(tl, &engine->status_page.timelines, engine_link) 2692 intel_timeline_reset_seqno(tl); 2693 } 2694 2695 static void execlists_sanitize(struct intel_engine_cs *engine) 2696 { 2697 GEM_BUG_ON(execlists_active(&engine->execlists)); 2698 2699 /* 2700 * Poison residual state on resume, in case the suspend didn't! 2701 * 2702 * We have to assume that across suspend/resume (or other loss 2703 * of control) that the contents of our pinned buffers has been 2704 * lost, replaced by garbage. Since this doesn't always happen, 2705 * let's poison such state so that we more quickly spot when 2706 * we falsely assume it has been preserved. 2707 */ 2708 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 2709 memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE); 2710 2711 reset_csb_pointers(engine); 2712 2713 /* 2714 * The kernel_context HWSP is stored in the status_page. As above, 2715 * that may be lost on resume/initialisation, and so we need to 2716 * reset the value in the HWSP. 2717 */ 2718 sanitize_hwsp(engine); 2719 2720 /* And scrub the dirty cachelines for the HWSP */ 2721 clflush_cache_range(engine->status_page.addr, PAGE_SIZE); 2722 } 2723 2724 static void enable_error_interrupt(struct intel_engine_cs *engine) 2725 { 2726 u32 status; 2727 2728 engine->execlists.error_interrupt = 0; 2729 ENGINE_WRITE(engine, RING_EMR, ~0u); 2730 ENGINE_WRITE(engine, RING_EIR, ~0u); /* clear all existing errors */ 2731 2732 status = ENGINE_READ(engine, RING_ESR); 2733 if (unlikely(status)) { 2734 drm_err(&engine->i915->drm, 2735 "engine '%s' resumed still in error: %08x\n", 2736 engine->name, status); 2737 __intel_gt_reset(engine->gt, engine->mask); 2738 } 2739 2740 /* 2741 * On current gen8+, we have 2 signals to play with 2742 * 2743 * - I915_ERROR_INSTUCTION (bit 0) 2744 * 2745 * Generate an error if the command parser encounters an invalid 2746 * instruction 2747 * 2748 * This is a fatal error. 2749 * 2750 * - CP_PRIV (bit 2) 2751 * 2752 * Generate an error on privilege violation (where the CP replaces 2753 * the instruction with a no-op). This also fires for writes into 2754 * read-only scratch pages. 2755 * 2756 * This is a non-fatal error, parsing continues. 2757 * 2758 * * there are a few others defined for odd HW that we do not use 2759 * 2760 * Since CP_PRIV fires for cases where we have chosen to ignore the 2761 * error (as the HW is validating and suppressing the mistakes), we 2762 * only unmask the instruction error bit. 2763 */ 2764 ENGINE_WRITE(engine, RING_EMR, ~I915_ERROR_INSTRUCTION); 2765 } 2766 2767 static void enable_execlists(struct intel_engine_cs *engine) 2768 { 2769 u32 mode; 2770 2771 assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL); 2772 2773 intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */ 2774 2775 if (GRAPHICS_VER(engine->i915) >= 11) 2776 mode = _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE); 2777 else 2778 mode = _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE); 2779 ENGINE_WRITE_FW(engine, RING_MODE_GEN7, mode); 2780 2781 ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); 2782 2783 ENGINE_WRITE_FW(engine, 2784 RING_HWS_PGA, 2785 i915_ggtt_offset(engine->status_page.vma)); 2786 ENGINE_POSTING_READ(engine, RING_HWS_PGA); 2787 2788 enable_error_interrupt(engine); 2789 } 2790 2791 static int execlists_resume(struct intel_engine_cs *engine) 2792 { 2793 intel_mocs_init_engine(engine); 2794 intel_breadcrumbs_reset(engine->breadcrumbs); 2795 2796 enable_execlists(engine); 2797 2798 return 0; 2799 } 2800 2801 static void execlists_reset_prepare(struct intel_engine_cs *engine) 2802 { 2803 struct intel_engine_execlists * const execlists = &engine->execlists; 2804 2805 ENGINE_TRACE(engine, "depth<-%d\n", 2806 atomic_read(&execlists->tasklet.count)); 2807 2808 /* 2809 * Prevent request submission to the hardware until we have 2810 * completed the reset in i915_gem_reset_finish(). If a request 2811 * is completed by one engine, it may then queue a request 2812 * to a second via its execlists->tasklet *just* as we are 2813 * calling engine->resume() and also writing the ELSP. 2814 * Turning off the execlists->tasklet until the reset is over 2815 * prevents the race. 2816 */ 2817 __tasklet_disable_sync_once(&execlists->tasklet); 2818 GEM_BUG_ON(!reset_in_progress(execlists)); 2819 2820 /* 2821 * We stop engines, otherwise we might get failed reset and a 2822 * dead gpu (on elk). Also as modern gpu as kbl can suffer 2823 * from system hang if batchbuffer is progressing when 2824 * the reset is issued, regardless of READY_TO_RESET ack. 2825 * Thus assume it is best to stop engines on all gens 2826 * where we have a gpu reset. 2827 * 2828 * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES) 2829 * 2830 * FIXME: Wa for more modern gens needs to be validated 2831 */ 2832 ring_set_paused(engine, 1); 2833 intel_engine_stop_cs(engine); 2834 2835 engine->execlists.reset_ccid = active_ccid(engine); 2836 } 2837 2838 static struct i915_request ** 2839 reset_csb(struct intel_engine_cs *engine, struct i915_request **inactive) 2840 { 2841 struct intel_engine_execlists * const execlists = &engine->execlists; 2842 2843 mb(); /* paranoia: read the CSB pointers from after the reset */ 2844 clflush(execlists->csb_write); 2845 mb(); 2846 2847 inactive = process_csb(engine, inactive); /* drain preemption events */ 2848 2849 /* Following the reset, we need to reload the CSB read/write pointers */ 2850 reset_csb_pointers(engine); 2851 2852 return inactive; 2853 } 2854 2855 static void 2856 execlists_reset_active(struct intel_engine_cs *engine, bool stalled) 2857 { 2858 struct intel_context *ce; 2859 struct i915_request *rq; 2860 u32 head; 2861 2862 /* 2863 * Save the currently executing context, even if we completed 2864 * its request, it was still running at the time of the 2865 * reset and will have been clobbered. 2866 */ 2867 rq = active_context(engine, engine->execlists.reset_ccid); 2868 if (!rq) 2869 return; 2870 2871 ce = rq->context; 2872 GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); 2873 2874 if (__i915_request_is_complete(rq)) { 2875 /* Idle context; tidy up the ring so we can restart afresh */ 2876 head = intel_ring_wrap(ce->ring, rq->tail); 2877 goto out_replay; 2878 } 2879 2880 /* We still have requests in-flight; the engine should be active */ 2881 GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); 2882 2883 /* Context has requests still in-flight; it should not be idle! */ 2884 GEM_BUG_ON(i915_active_is_idle(&ce->active)); 2885 2886 rq = active_request(ce->timeline, rq); 2887 head = intel_ring_wrap(ce->ring, rq->head); 2888 GEM_BUG_ON(head == ce->ring->tail); 2889 2890 /* 2891 * If this request hasn't started yet, e.g. it is waiting on a 2892 * semaphore, we need to avoid skipping the request or else we 2893 * break the signaling chain. However, if the context is corrupt 2894 * the request will not restart and we will be stuck with a wedged 2895 * device. It is quite often the case that if we issue a reset 2896 * while the GPU is loading the context image, that the context 2897 * image becomes corrupt. 2898 * 2899 * Otherwise, if we have not started yet, the request should replay 2900 * perfectly and we do not need to flag the result as being erroneous. 2901 */ 2902 if (!__i915_request_has_started(rq)) 2903 goto out_replay; 2904 2905 /* 2906 * If the request was innocent, we leave the request in the ELSP 2907 * and will try to replay it on restarting. The context image may 2908 * have been corrupted by the reset, in which case we may have 2909 * to service a new GPU hang, but more likely we can continue on 2910 * without impact. 2911 * 2912 * If the request was guilty, we presume the context is corrupt 2913 * and have to at least restore the RING register in the context 2914 * image back to the expected values to skip over the guilty request. 2915 */ 2916 __i915_request_reset(rq, stalled); 2917 2918 /* 2919 * We want a simple context + ring to execute the breadcrumb update. 2920 * We cannot rely on the context being intact across the GPU hang, 2921 * so clear it and rebuild just what we need for the breadcrumb. 2922 * All pending requests for this context will be zapped, and any 2923 * future request will be after userspace has had the opportunity 2924 * to recreate its own state. 2925 */ 2926 out_replay: 2927 ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n", 2928 head, ce->ring->tail); 2929 lrc_reset_regs(ce, engine); 2930 ce->lrc.lrca = lrc_update_regs(ce, engine, head); 2931 } 2932 2933 static void execlists_reset_csb(struct intel_engine_cs *engine, bool stalled) 2934 { 2935 struct intel_engine_execlists * const execlists = &engine->execlists; 2936 struct i915_request *post[2 * EXECLIST_MAX_PORTS]; 2937 struct i915_request **inactive; 2938 2939 rcu_read_lock(); 2940 inactive = reset_csb(engine, post); 2941 2942 execlists_reset_active(engine, true); 2943 2944 inactive = cancel_port_requests(execlists, inactive); 2945 post_process_csb(post, inactive); 2946 rcu_read_unlock(); 2947 } 2948 2949 static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled) 2950 { 2951 unsigned long flags; 2952 2953 ENGINE_TRACE(engine, "\n"); 2954 2955 /* Process the csb, find the guilty context and throw away */ 2956 execlists_reset_csb(engine, stalled); 2957 2958 /* Push back any incomplete requests for replay after the reset. */ 2959 rcu_read_lock(); 2960 spin_lock_irqsave(&engine->active.lock, flags); 2961 __unwind_incomplete_requests(engine); 2962 spin_unlock_irqrestore(&engine->active.lock, flags); 2963 rcu_read_unlock(); 2964 } 2965 2966 static void nop_submission_tasklet(struct tasklet_struct *t) 2967 { 2968 struct intel_engine_cs * const engine = 2969 from_tasklet(engine, t, execlists.tasklet); 2970 2971 /* The driver is wedged; don't process any more events. */ 2972 WRITE_ONCE(engine->execlists.queue_priority_hint, INT_MIN); 2973 } 2974 2975 static void execlists_reset_cancel(struct intel_engine_cs *engine) 2976 { 2977 struct intel_engine_execlists * const execlists = &engine->execlists; 2978 struct i915_request *rq, *rn; 2979 struct rb_node *rb; 2980 unsigned long flags; 2981 2982 ENGINE_TRACE(engine, "\n"); 2983 2984 /* 2985 * Before we call engine->cancel_requests(), we should have exclusive 2986 * access to the submission state. This is arranged for us by the 2987 * caller disabling the interrupt generation, the tasklet and other 2988 * threads that may then access the same state, giving us a free hand 2989 * to reset state. However, we still need to let lockdep be aware that 2990 * we know this state may be accessed in hardirq context, so we 2991 * disable the irq around this manipulation and we want to keep 2992 * the spinlock focused on its duties and not accidentally conflate 2993 * coverage to the submission's irq state. (Similarly, although we 2994 * shouldn't need to disable irq around the manipulation of the 2995 * submission's irq state, we also wish to remind ourselves that 2996 * it is irq state.) 2997 */ 2998 execlists_reset_csb(engine, true); 2999 3000 rcu_read_lock(); 3001 spin_lock_irqsave(&engine->active.lock, flags); 3002 3003 /* Mark all executing requests as skipped. */ 3004 list_for_each_entry(rq, &engine->active.requests, sched.link) 3005 i915_request_put(i915_request_mark_eio(rq)); 3006 intel_engine_signal_breadcrumbs(engine); 3007 3008 /* Flush the queued requests to the timeline list (for retiring). */ 3009 while ((rb = rb_first_cached(&execlists->queue))) { 3010 struct i915_priolist *p = to_priolist(rb); 3011 3012 priolist_for_each_request_consume(rq, rn, p) { 3013 if (i915_request_mark_eio(rq)) { 3014 __i915_request_submit(rq); 3015 i915_request_put(rq); 3016 } 3017 } 3018 3019 rb_erase_cached(&p->node, &execlists->queue); 3020 i915_priolist_free(p); 3021 } 3022 3023 /* On-hold requests will be flushed to timeline upon their release */ 3024 list_for_each_entry(rq, &engine->active.hold, sched.link) 3025 i915_request_put(i915_request_mark_eio(rq)); 3026 3027 /* Cancel all attached virtual engines */ 3028 while ((rb = rb_first_cached(&execlists->virtual))) { 3029 struct virtual_engine *ve = 3030 rb_entry(rb, typeof(*ve), nodes[engine->id].rb); 3031 3032 rb_erase_cached(rb, &execlists->virtual); 3033 RB_CLEAR_NODE(rb); 3034 3035 spin_lock(&ve->base.active.lock); 3036 rq = fetch_and_zero(&ve->request); 3037 if (rq) { 3038 if (i915_request_mark_eio(rq)) { 3039 rq->engine = engine; 3040 __i915_request_submit(rq); 3041 i915_request_put(rq); 3042 } 3043 i915_request_put(rq); 3044 3045 ve->base.execlists.queue_priority_hint = INT_MIN; 3046 } 3047 spin_unlock(&ve->base.active.lock); 3048 } 3049 3050 /* Remaining _unready_ requests will be nop'ed when submitted */ 3051 3052 execlists->queue_priority_hint = INT_MIN; 3053 execlists->queue = RB_ROOT_CACHED; 3054 3055 GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet)); 3056 execlists->tasklet.callback = nop_submission_tasklet; 3057 3058 spin_unlock_irqrestore(&engine->active.lock, flags); 3059 rcu_read_unlock(); 3060 } 3061 3062 static void execlists_reset_finish(struct intel_engine_cs *engine) 3063 { 3064 struct intel_engine_execlists * const execlists = &engine->execlists; 3065 3066 /* 3067 * After a GPU reset, we may have requests to replay. Do so now while 3068 * we still have the forcewake to be sure that the GPU is not allowed 3069 * to sleep before we restart and reload a context. 3070 * 3071 * If the GPU reset fails, the engine may still be alive with requests 3072 * inflight. We expect those to complete, or for the device to be 3073 * reset as the next level of recovery, and as a final resort we 3074 * will declare the device wedged. 3075 */ 3076 GEM_BUG_ON(!reset_in_progress(execlists)); 3077 3078 /* And kick in case we missed a new request submission. */ 3079 if (__tasklet_enable(&execlists->tasklet)) 3080 __execlists_kick(execlists); 3081 3082 ENGINE_TRACE(engine, "depth->%d\n", 3083 atomic_read(&execlists->tasklet.count)); 3084 } 3085 3086 static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine) 3087 { 3088 ENGINE_WRITE(engine, RING_IMR, 3089 ~(engine->irq_enable_mask | engine->irq_keep_mask)); 3090 ENGINE_POSTING_READ(engine, RING_IMR); 3091 } 3092 3093 static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine) 3094 { 3095 ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask); 3096 } 3097 3098 static void execlists_park(struct intel_engine_cs *engine) 3099 { 3100 cancel_timer(&engine->execlists.timer); 3101 cancel_timer(&engine->execlists.preempt); 3102 } 3103 3104 static bool can_preempt(struct intel_engine_cs *engine) 3105 { 3106 if (GRAPHICS_VER(engine->i915) > 8) 3107 return true; 3108 3109 /* GPGPU on bdw requires extra w/a; not implemented */ 3110 return engine->class != RENDER_CLASS; 3111 } 3112 3113 static void execlists_set_default_submission(struct intel_engine_cs *engine) 3114 { 3115 engine->submit_request = execlists_submit_request; 3116 engine->schedule = i915_schedule; 3117 engine->execlists.tasklet.callback = execlists_submission_tasklet; 3118 } 3119 3120 static void execlists_shutdown(struct intel_engine_cs *engine) 3121 { 3122 /* Synchronise with residual timers and any softirq they raise */ 3123 del_timer_sync(&engine->execlists.timer); 3124 del_timer_sync(&engine->execlists.preempt); 3125 tasklet_kill(&engine->execlists.tasklet); 3126 } 3127 3128 static void execlists_release(struct intel_engine_cs *engine) 3129 { 3130 engine->sanitize = NULL; /* no longer in control, nothing to sanitize */ 3131 3132 execlists_shutdown(engine); 3133 3134 intel_engine_cleanup_common(engine); 3135 lrc_fini_wa_ctx(engine); 3136 } 3137 3138 static void 3139 logical_ring_default_vfuncs(struct intel_engine_cs *engine) 3140 { 3141 /* Default vfuncs which can be overridden by each engine. */ 3142 3143 engine->resume = execlists_resume; 3144 3145 engine->cops = &execlists_context_ops; 3146 engine->request_alloc = execlists_request_alloc; 3147 3148 engine->reset.prepare = execlists_reset_prepare; 3149 engine->reset.rewind = execlists_reset_rewind; 3150 engine->reset.cancel = execlists_reset_cancel; 3151 engine->reset.finish = execlists_reset_finish; 3152 3153 engine->park = execlists_park; 3154 engine->unpark = NULL; 3155 3156 engine->emit_flush = gen8_emit_flush_xcs; 3157 engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; 3158 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs; 3159 if (GRAPHICS_VER(engine->i915) >= 12) { 3160 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs; 3161 engine->emit_flush = gen12_emit_flush_xcs; 3162 } 3163 engine->set_default_submission = execlists_set_default_submission; 3164 3165 if (GRAPHICS_VER(engine->i915) < 11) { 3166 engine->irq_enable = gen8_logical_ring_enable_irq; 3167 engine->irq_disable = gen8_logical_ring_disable_irq; 3168 } else { 3169 /* 3170 * TODO: On Gen11 interrupt masks need to be clear 3171 * to allow C6 entry. Keep interrupts enabled at 3172 * and take the hit of generating extra interrupts 3173 * until a more refined solution exists. 3174 */ 3175 } 3176 intel_engine_set_irq_handler(engine, execlists_irq_handler); 3177 3178 engine->flags |= I915_ENGINE_SUPPORTS_STATS; 3179 if (!intel_vgpu_active(engine->i915)) { 3180 engine->flags |= I915_ENGINE_HAS_SEMAPHORES; 3181 if (can_preempt(engine)) { 3182 engine->flags |= I915_ENGINE_HAS_PREEMPTION; 3183 if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 3184 engine->flags |= I915_ENGINE_HAS_TIMESLICES; 3185 } 3186 } 3187 3188 if (intel_engine_has_preemption(engine)) 3189 engine->emit_bb_start = gen8_emit_bb_start; 3190 else 3191 engine->emit_bb_start = gen8_emit_bb_start_noarb; 3192 } 3193 3194 static void logical_ring_default_irqs(struct intel_engine_cs *engine) 3195 { 3196 unsigned int shift = 0; 3197 3198 if (GRAPHICS_VER(engine->i915) < 11) { 3199 const u8 irq_shifts[] = { 3200 [RCS0] = GEN8_RCS_IRQ_SHIFT, 3201 [BCS0] = GEN8_BCS_IRQ_SHIFT, 3202 [VCS0] = GEN8_VCS0_IRQ_SHIFT, 3203 [VCS1] = GEN8_VCS1_IRQ_SHIFT, 3204 [VECS0] = GEN8_VECS_IRQ_SHIFT, 3205 }; 3206 3207 shift = irq_shifts[engine->id]; 3208 } 3209 3210 engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift; 3211 engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift; 3212 engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT << shift; 3213 engine->irq_keep_mask |= GT_WAIT_SEMAPHORE_INTERRUPT << shift; 3214 } 3215 3216 static void rcs_submission_override(struct intel_engine_cs *engine) 3217 { 3218 switch (GRAPHICS_VER(engine->i915)) { 3219 case 12: 3220 engine->emit_flush = gen12_emit_flush_rcs; 3221 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs; 3222 break; 3223 case 11: 3224 engine->emit_flush = gen11_emit_flush_rcs; 3225 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs; 3226 break; 3227 default: 3228 engine->emit_flush = gen8_emit_flush_rcs; 3229 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; 3230 break; 3231 } 3232 } 3233 3234 int intel_execlists_submission_setup(struct intel_engine_cs *engine) 3235 { 3236 struct intel_engine_execlists * const execlists = &engine->execlists; 3237 struct drm_i915_private *i915 = engine->i915; 3238 struct intel_uncore *uncore = engine->uncore; 3239 u32 base = engine->mmio_base; 3240 3241 tasklet_setup(&engine->execlists.tasklet, execlists_submission_tasklet); 3242 timer_setup(&engine->execlists.timer, execlists_timeslice, 0); 3243 timer_setup(&engine->execlists.preempt, execlists_preempt, 0); 3244 3245 logical_ring_default_vfuncs(engine); 3246 logical_ring_default_irqs(engine); 3247 3248 if (engine->class == RENDER_CLASS) 3249 rcs_submission_override(engine); 3250 3251 lrc_init_wa_ctx(engine); 3252 3253 if (HAS_LOGICAL_RING_ELSQ(i915)) { 3254 execlists->submit_reg = uncore->regs + 3255 i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base)); 3256 execlists->ctrl_reg = uncore->regs + 3257 i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base)); 3258 } else { 3259 execlists->submit_reg = uncore->regs + 3260 i915_mmio_reg_offset(RING_ELSP(base)); 3261 } 3262 3263 execlists->csb_status = 3264 (u64 *)&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX]; 3265 3266 execlists->csb_write = 3267 &engine->status_page.addr[intel_hws_csb_write_index(i915)]; 3268 3269 if (GRAPHICS_VER(i915) < 11) 3270 execlists->csb_size = GEN8_CSB_ENTRIES; 3271 else 3272 execlists->csb_size = GEN11_CSB_ENTRIES; 3273 3274 engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0); 3275 if (GRAPHICS_VER(engine->i915) >= 11) { 3276 execlists->ccid |= engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT - 32); 3277 execlists->ccid |= engine->class << (GEN11_ENGINE_CLASS_SHIFT - 32); 3278 } 3279 3280 /* Finally, take ownership and responsibility for cleanup! */ 3281 engine->sanitize = execlists_sanitize; 3282 engine->release = execlists_release; 3283 3284 return 0; 3285 } 3286 3287 static struct list_head *virtual_queue(struct virtual_engine *ve) 3288 { 3289 return &ve->base.execlists.default_priolist.requests; 3290 } 3291 3292 static void rcu_virtual_context_destroy(struct work_struct *wrk) 3293 { 3294 struct virtual_engine *ve = 3295 container_of(wrk, typeof(*ve), rcu.work); 3296 unsigned int n; 3297 3298 GEM_BUG_ON(ve->context.inflight); 3299 3300 /* Preempt-to-busy may leave a stale request behind. */ 3301 if (unlikely(ve->request)) { 3302 struct i915_request *old; 3303 3304 spin_lock_irq(&ve->base.active.lock); 3305 3306 old = fetch_and_zero(&ve->request); 3307 if (old) { 3308 GEM_BUG_ON(!__i915_request_is_complete(old)); 3309 __i915_request_submit(old); 3310 i915_request_put(old); 3311 } 3312 3313 spin_unlock_irq(&ve->base.active.lock); 3314 } 3315 3316 /* 3317 * Flush the tasklet in case it is still running on another core. 3318 * 3319 * This needs to be done before we remove ourselves from the siblings' 3320 * rbtrees as in the case it is running in parallel, it may reinsert 3321 * the rb_node into a sibling. 3322 */ 3323 tasklet_kill(&ve->base.execlists.tasklet); 3324 3325 /* Decouple ourselves from the siblings, no more access allowed. */ 3326 for (n = 0; n < ve->num_siblings; n++) { 3327 struct intel_engine_cs *sibling = ve->siblings[n]; 3328 struct rb_node *node = &ve->nodes[sibling->id].rb; 3329 3330 if (RB_EMPTY_NODE(node)) 3331 continue; 3332 3333 spin_lock_irq(&sibling->active.lock); 3334 3335 /* Detachment is lazily performed in the execlists tasklet */ 3336 if (!RB_EMPTY_NODE(node)) 3337 rb_erase_cached(node, &sibling->execlists.virtual); 3338 3339 spin_unlock_irq(&sibling->active.lock); 3340 } 3341 GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet)); 3342 GEM_BUG_ON(!list_empty(virtual_queue(ve))); 3343 3344 lrc_fini(&ve->context); 3345 intel_context_fini(&ve->context); 3346 3347 intel_breadcrumbs_free(ve->base.breadcrumbs); 3348 intel_engine_free_request_pool(&ve->base); 3349 3350 kfree(ve->bonds); 3351 kfree(ve); 3352 } 3353 3354 static void virtual_context_destroy(struct kref *kref) 3355 { 3356 struct virtual_engine *ve = 3357 container_of(kref, typeof(*ve), context.ref); 3358 3359 GEM_BUG_ON(!list_empty(&ve->context.signals)); 3360 3361 /* 3362 * When destroying the virtual engine, we have to be aware that 3363 * it may still be in use from an hardirq/softirq context causing 3364 * the resubmission of a completed request (background completion 3365 * due to preempt-to-busy). Before we can free the engine, we need 3366 * to flush the submission code and tasklets that are still potentially 3367 * accessing the engine. Flushing the tasklets requires process context, 3368 * and since we can guard the resubmit onto the engine with an RCU read 3369 * lock, we can delegate the free of the engine to an RCU worker. 3370 */ 3371 INIT_RCU_WORK(&ve->rcu, rcu_virtual_context_destroy); 3372 queue_rcu_work(system_wq, &ve->rcu); 3373 } 3374 3375 static void virtual_engine_initial_hint(struct virtual_engine *ve) 3376 { 3377 int swp; 3378 3379 /* 3380 * Pick a random sibling on starting to help spread the load around. 3381 * 3382 * New contexts are typically created with exactly the same order 3383 * of siblings, and often started in batches. Due to the way we iterate 3384 * the array of sibling when submitting requests, sibling[0] is 3385 * prioritised for dequeuing. If we make sure that sibling[0] is fairly 3386 * randomised across the system, we also help spread the load by the 3387 * first engine we inspect being different each time. 3388 * 3389 * NB This does not force us to execute on this engine, it will just 3390 * typically be the first we inspect for submission. 3391 */ 3392 swp = prandom_u32_max(ve->num_siblings); 3393 if (swp) 3394 swap(ve->siblings[swp], ve->siblings[0]); 3395 } 3396 3397 static int virtual_context_alloc(struct intel_context *ce) 3398 { 3399 struct virtual_engine *ve = container_of(ce, typeof(*ve), context); 3400 3401 return lrc_alloc(ce, ve->siblings[0]); 3402 } 3403 3404 static int virtual_context_pre_pin(struct intel_context *ce, 3405 struct i915_gem_ww_ctx *ww, 3406 void **vaddr) 3407 { 3408 struct virtual_engine *ve = container_of(ce, typeof(*ve), context); 3409 3410 /* Note: we must use a real engine class for setting up reg state */ 3411 return __execlists_context_pre_pin(ce, ve->siblings[0], ww, vaddr); 3412 } 3413 3414 static int virtual_context_pin(struct intel_context *ce, void *vaddr) 3415 { 3416 struct virtual_engine *ve = container_of(ce, typeof(*ve), context); 3417 3418 return lrc_pin(ce, ve->siblings[0], vaddr); 3419 } 3420 3421 static void virtual_context_enter(struct intel_context *ce) 3422 { 3423 struct virtual_engine *ve = container_of(ce, typeof(*ve), context); 3424 unsigned int n; 3425 3426 for (n = 0; n < ve->num_siblings; n++) 3427 intel_engine_pm_get(ve->siblings[n]); 3428 3429 intel_timeline_enter(ce->timeline); 3430 } 3431 3432 static void virtual_context_exit(struct intel_context *ce) 3433 { 3434 struct virtual_engine *ve = container_of(ce, typeof(*ve), context); 3435 unsigned int n; 3436 3437 intel_timeline_exit(ce->timeline); 3438 3439 for (n = 0; n < ve->num_siblings; n++) 3440 intel_engine_pm_put(ve->siblings[n]); 3441 } 3442 3443 static const struct intel_context_ops virtual_context_ops = { 3444 .flags = COPS_HAS_INFLIGHT, 3445 3446 .alloc = virtual_context_alloc, 3447 3448 .pre_pin = virtual_context_pre_pin, 3449 .pin = virtual_context_pin, 3450 .unpin = lrc_unpin, 3451 .post_unpin = lrc_post_unpin, 3452 3453 .enter = virtual_context_enter, 3454 .exit = virtual_context_exit, 3455 3456 .destroy = virtual_context_destroy, 3457 }; 3458 3459 static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve) 3460 { 3461 struct i915_request *rq; 3462 intel_engine_mask_t mask; 3463 3464 rq = READ_ONCE(ve->request); 3465 if (!rq) 3466 return 0; 3467 3468 /* The rq is ready for submission; rq->execution_mask is now stable. */ 3469 mask = rq->execution_mask; 3470 if (unlikely(!mask)) { 3471 /* Invalid selection, submit to a random engine in error */ 3472 i915_request_set_error_once(rq, -ENODEV); 3473 mask = ve->siblings[0]->mask; 3474 } 3475 3476 ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n", 3477 rq->fence.context, rq->fence.seqno, 3478 mask, ve->base.execlists.queue_priority_hint); 3479 3480 return mask; 3481 } 3482 3483 static void virtual_submission_tasklet(struct tasklet_struct *t) 3484 { 3485 struct virtual_engine * const ve = 3486 from_tasklet(ve, t, base.execlists.tasklet); 3487 const int prio = READ_ONCE(ve->base.execlists.queue_priority_hint); 3488 intel_engine_mask_t mask; 3489 unsigned int n; 3490 3491 rcu_read_lock(); 3492 mask = virtual_submission_mask(ve); 3493 rcu_read_unlock(); 3494 if (unlikely(!mask)) 3495 return; 3496 3497 for (n = 0; n < ve->num_siblings; n++) { 3498 struct intel_engine_cs *sibling = READ_ONCE(ve->siblings[n]); 3499 struct ve_node * const node = &ve->nodes[sibling->id]; 3500 struct rb_node **parent, *rb; 3501 bool first; 3502 3503 if (!READ_ONCE(ve->request)) 3504 break; /* already handled by a sibling's tasklet */ 3505 3506 spin_lock_irq(&sibling->active.lock); 3507 3508 if (unlikely(!(mask & sibling->mask))) { 3509 if (!RB_EMPTY_NODE(&node->rb)) { 3510 rb_erase_cached(&node->rb, 3511 &sibling->execlists.virtual); 3512 RB_CLEAR_NODE(&node->rb); 3513 } 3514 3515 goto unlock_engine; 3516 } 3517 3518 if (unlikely(!RB_EMPTY_NODE(&node->rb))) { 3519 /* 3520 * Cheat and avoid rebalancing the tree if we can 3521 * reuse this node in situ. 3522 */ 3523 first = rb_first_cached(&sibling->execlists.virtual) == 3524 &node->rb; 3525 if (prio == node->prio || (prio > node->prio && first)) 3526 goto submit_engine; 3527 3528 rb_erase_cached(&node->rb, &sibling->execlists.virtual); 3529 } 3530 3531 rb = NULL; 3532 first = true; 3533 parent = &sibling->execlists.virtual.rb_root.rb_node; 3534 while (*parent) { 3535 struct ve_node *other; 3536 3537 rb = *parent; 3538 other = rb_entry(rb, typeof(*other), rb); 3539 if (prio > other->prio) { 3540 parent = &rb->rb_left; 3541 } else { 3542 parent = &rb->rb_right; 3543 first = false; 3544 } 3545 } 3546 3547 rb_link_node(&node->rb, rb, parent); 3548 rb_insert_color_cached(&node->rb, 3549 &sibling->execlists.virtual, 3550 first); 3551 3552 submit_engine: 3553 GEM_BUG_ON(RB_EMPTY_NODE(&node->rb)); 3554 node->prio = prio; 3555 if (first && prio > sibling->execlists.queue_priority_hint) 3556 tasklet_hi_schedule(&sibling->execlists.tasklet); 3557 3558 unlock_engine: 3559 spin_unlock_irq(&sibling->active.lock); 3560 3561 if (intel_context_inflight(&ve->context)) 3562 break; 3563 } 3564 } 3565 3566 static void virtual_submit_request(struct i915_request *rq) 3567 { 3568 struct virtual_engine *ve = to_virtual_engine(rq->engine); 3569 unsigned long flags; 3570 3571 ENGINE_TRACE(&ve->base, "rq=%llx:%lld\n", 3572 rq->fence.context, 3573 rq->fence.seqno); 3574 3575 GEM_BUG_ON(ve->base.submit_request != virtual_submit_request); 3576 3577 spin_lock_irqsave(&ve->base.active.lock, flags); 3578 3579 /* By the time we resubmit a request, it may be completed */ 3580 if (__i915_request_is_complete(rq)) { 3581 __i915_request_submit(rq); 3582 goto unlock; 3583 } 3584 3585 if (ve->request) { /* background completion from preempt-to-busy */ 3586 GEM_BUG_ON(!__i915_request_is_complete(ve->request)); 3587 __i915_request_submit(ve->request); 3588 i915_request_put(ve->request); 3589 } 3590 3591 ve->base.execlists.queue_priority_hint = rq_prio(rq); 3592 ve->request = i915_request_get(rq); 3593 3594 GEM_BUG_ON(!list_empty(virtual_queue(ve))); 3595 list_move_tail(&rq->sched.link, virtual_queue(ve)); 3596 3597 tasklet_hi_schedule(&ve->base.execlists.tasklet); 3598 3599 unlock: 3600 spin_unlock_irqrestore(&ve->base.active.lock, flags); 3601 } 3602 3603 static struct ve_bond * 3604 virtual_find_bond(struct virtual_engine *ve, 3605 const struct intel_engine_cs *master) 3606 { 3607 int i; 3608 3609 for (i = 0; i < ve->num_bonds; i++) { 3610 if (ve->bonds[i].master == master) 3611 return &ve->bonds[i]; 3612 } 3613 3614 return NULL; 3615 } 3616 3617 static void 3618 virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal) 3619 { 3620 struct virtual_engine *ve = to_virtual_engine(rq->engine); 3621 intel_engine_mask_t allowed, exec; 3622 struct ve_bond *bond; 3623 3624 allowed = ~to_request(signal)->engine->mask; 3625 3626 bond = virtual_find_bond(ve, to_request(signal)->engine); 3627 if (bond) 3628 allowed &= bond->sibling_mask; 3629 3630 /* Restrict the bonded request to run on only the available engines */ 3631 exec = READ_ONCE(rq->execution_mask); 3632 while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed)) 3633 ; 3634 3635 /* Prevent the master from being re-run on the bonded engines */ 3636 to_request(signal)->execution_mask &= ~allowed; 3637 } 3638 3639 struct intel_context * 3640 intel_execlists_create_virtual(struct intel_engine_cs **siblings, 3641 unsigned int count) 3642 { 3643 struct virtual_engine *ve; 3644 unsigned int n; 3645 int err; 3646 3647 if (count == 0) 3648 return ERR_PTR(-EINVAL); 3649 3650 if (count == 1) 3651 return intel_context_create(siblings[0]); 3652 3653 ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL); 3654 if (!ve) 3655 return ERR_PTR(-ENOMEM); 3656 3657 ve->base.i915 = siblings[0]->i915; 3658 ve->base.gt = siblings[0]->gt; 3659 ve->base.uncore = siblings[0]->uncore; 3660 ve->base.id = -1; 3661 3662 ve->base.class = OTHER_CLASS; 3663 ve->base.uabi_class = I915_ENGINE_CLASS_INVALID; 3664 ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 3665 ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 3666 3667 /* 3668 * The decision on whether to submit a request using semaphores 3669 * depends on the saturated state of the engine. We only compute 3670 * this during HW submission of the request, and we need for this 3671 * state to be globally applied to all requests being submitted 3672 * to this engine. Virtual engines encompass more than one physical 3673 * engine and so we cannot accurately tell in advance if one of those 3674 * engines is already saturated and so cannot afford to use a semaphore 3675 * and be pessimized in priority for doing so -- if we are the only 3676 * context using semaphores after all other clients have stopped, we 3677 * will be starved on the saturated system. Such a global switch for 3678 * semaphores is less than ideal, but alas is the current compromise. 3679 */ 3680 ve->base.saturated = ALL_ENGINES; 3681 3682 snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); 3683 3684 intel_engine_init_active(&ve->base, ENGINE_VIRTUAL); 3685 intel_engine_init_execlists(&ve->base); 3686 3687 ve->base.cops = &virtual_context_ops; 3688 ve->base.request_alloc = execlists_request_alloc; 3689 3690 ve->base.schedule = i915_schedule; 3691 ve->base.submit_request = virtual_submit_request; 3692 ve->base.bond_execute = virtual_bond_execute; 3693 3694 INIT_LIST_HEAD(virtual_queue(ve)); 3695 ve->base.execlists.queue_priority_hint = INT_MIN; 3696 tasklet_setup(&ve->base.execlists.tasklet, virtual_submission_tasklet); 3697 3698 intel_context_init(&ve->context, &ve->base); 3699 3700 ve->base.breadcrumbs = intel_breadcrumbs_create(NULL); 3701 if (!ve->base.breadcrumbs) { 3702 err = -ENOMEM; 3703 goto err_put; 3704 } 3705 3706 for (n = 0; n < count; n++) { 3707 struct intel_engine_cs *sibling = siblings[n]; 3708 3709 GEM_BUG_ON(!is_power_of_2(sibling->mask)); 3710 if (sibling->mask & ve->base.mask) { 3711 DRM_DEBUG("duplicate %s entry in load balancer\n", 3712 sibling->name); 3713 err = -EINVAL; 3714 goto err_put; 3715 } 3716 3717 /* 3718 * The virtual engine implementation is tightly coupled to 3719 * the execlists backend -- we push out request directly 3720 * into a tree inside each physical engine. We could support 3721 * layering if we handle cloning of the requests and 3722 * submitting a copy into each backend. 3723 */ 3724 if (sibling->execlists.tasklet.callback != 3725 execlists_submission_tasklet) { 3726 err = -ENODEV; 3727 goto err_put; 3728 } 3729 3730 GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb)); 3731 RB_CLEAR_NODE(&ve->nodes[sibling->id].rb); 3732 3733 ve->siblings[ve->num_siblings++] = sibling; 3734 ve->base.mask |= sibling->mask; 3735 3736 /* 3737 * All physical engines must be compatible for their emission 3738 * functions (as we build the instructions during request 3739 * construction and do not alter them before submission 3740 * on the physical engine). We use the engine class as a guide 3741 * here, although that could be refined. 3742 */ 3743 if (ve->base.class != OTHER_CLASS) { 3744 if (ve->base.class != sibling->class) { 3745 DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n", 3746 sibling->class, ve->base.class); 3747 err = -EINVAL; 3748 goto err_put; 3749 } 3750 continue; 3751 } 3752 3753 ve->base.class = sibling->class; 3754 ve->base.uabi_class = sibling->uabi_class; 3755 snprintf(ve->base.name, sizeof(ve->base.name), 3756 "v%dx%d", ve->base.class, count); 3757 ve->base.context_size = sibling->context_size; 3758 3759 ve->base.emit_bb_start = sibling->emit_bb_start; 3760 ve->base.emit_flush = sibling->emit_flush; 3761 ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb; 3762 ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb; 3763 ve->base.emit_fini_breadcrumb_dw = 3764 sibling->emit_fini_breadcrumb_dw; 3765 3766 ve->base.flags = sibling->flags; 3767 } 3768 3769 ve->base.flags |= I915_ENGINE_IS_VIRTUAL; 3770 3771 virtual_engine_initial_hint(ve); 3772 return &ve->context; 3773 3774 err_put: 3775 intel_context_put(&ve->context); 3776 return ERR_PTR(err); 3777 } 3778 3779 struct intel_context * 3780 intel_execlists_clone_virtual(struct intel_engine_cs *src) 3781 { 3782 struct virtual_engine *se = to_virtual_engine(src); 3783 struct intel_context *dst; 3784 3785 dst = intel_execlists_create_virtual(se->siblings, 3786 se->num_siblings); 3787 if (IS_ERR(dst)) 3788 return dst; 3789 3790 if (se->num_bonds) { 3791 struct virtual_engine *de = to_virtual_engine(dst->engine); 3792 3793 de->bonds = kmemdup(se->bonds, 3794 sizeof(*se->bonds) * se->num_bonds, 3795 GFP_KERNEL); 3796 if (!de->bonds) { 3797 intel_context_put(dst); 3798 return ERR_PTR(-ENOMEM); 3799 } 3800 3801 de->num_bonds = se->num_bonds; 3802 } 3803 3804 return dst; 3805 } 3806 3807 int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine, 3808 const struct intel_engine_cs *master, 3809 const struct intel_engine_cs *sibling) 3810 { 3811 struct virtual_engine *ve = to_virtual_engine(engine); 3812 struct ve_bond *bond; 3813 int n; 3814 3815 /* Sanity check the sibling is part of the virtual engine */ 3816 for (n = 0; n < ve->num_siblings; n++) 3817 if (sibling == ve->siblings[n]) 3818 break; 3819 if (n == ve->num_siblings) 3820 return -EINVAL; 3821 3822 bond = virtual_find_bond(ve, master); 3823 if (bond) { 3824 bond->sibling_mask |= sibling->mask; 3825 return 0; 3826 } 3827 3828 bond = krealloc(ve->bonds, 3829 sizeof(*bond) * (ve->num_bonds + 1), 3830 GFP_KERNEL); 3831 if (!bond) 3832 return -ENOMEM; 3833 3834 bond[ve->num_bonds].master = master; 3835 bond[ve->num_bonds].sibling_mask = sibling->mask; 3836 3837 ve->bonds = bond; 3838 ve->num_bonds++; 3839 3840 return 0; 3841 } 3842 3843 void intel_execlists_show_requests(struct intel_engine_cs *engine, 3844 struct drm_printer *m, 3845 void (*show_request)(struct drm_printer *m, 3846 const struct i915_request *rq, 3847 const char *prefix, 3848 int indent), 3849 unsigned int max) 3850 { 3851 const struct intel_engine_execlists *execlists = &engine->execlists; 3852 struct i915_request *rq, *last; 3853 unsigned long flags; 3854 unsigned int count; 3855 struct rb_node *rb; 3856 3857 spin_lock_irqsave(&engine->active.lock, flags); 3858 3859 last = NULL; 3860 count = 0; 3861 list_for_each_entry(rq, &engine->active.requests, sched.link) { 3862 if (count++ < max - 1) 3863 show_request(m, rq, "\t\t", 0); 3864 else 3865 last = rq; 3866 } 3867 if (last) { 3868 if (count > max) { 3869 drm_printf(m, 3870 "\t\t...skipping %d executing requests...\n", 3871 count - max); 3872 } 3873 show_request(m, last, "\t\t", 0); 3874 } 3875 3876 if (execlists->queue_priority_hint != INT_MIN) 3877 drm_printf(m, "\t\tQueue priority hint: %d\n", 3878 READ_ONCE(execlists->queue_priority_hint)); 3879 3880 last = NULL; 3881 count = 0; 3882 for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { 3883 struct i915_priolist *p = rb_entry(rb, typeof(*p), node); 3884 3885 priolist_for_each_request(rq, p) { 3886 if (count++ < max - 1) 3887 show_request(m, rq, "\t\t", 0); 3888 else 3889 last = rq; 3890 } 3891 } 3892 if (last) { 3893 if (count > max) { 3894 drm_printf(m, 3895 "\t\t...skipping %d queued requests...\n", 3896 count - max); 3897 } 3898 show_request(m, last, "\t\t", 0); 3899 } 3900 3901 last = NULL; 3902 count = 0; 3903 for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) { 3904 struct virtual_engine *ve = 3905 rb_entry(rb, typeof(*ve), nodes[engine->id].rb); 3906 struct i915_request *rq = READ_ONCE(ve->request); 3907 3908 if (rq) { 3909 if (count++ < max - 1) 3910 show_request(m, rq, "\t\t", 0); 3911 else 3912 last = rq; 3913 } 3914 } 3915 if (last) { 3916 if (count > max) { 3917 drm_printf(m, 3918 "\t\t...skipping %d virtual requests...\n", 3919 count - max); 3920 } 3921 show_request(m, last, "\t\t", 0); 3922 } 3923 3924 spin_unlock_irqrestore(&engine->active.lock, flags); 3925 } 3926 3927 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 3928 #include "selftest_execlists.c" 3929 #endif 3930