1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2014 Intel Corporation 4 */ 5 6 /** 7 * DOC: Logical Rings, Logical Ring Contexts and Execlists 8 * 9 * Motivation: 10 * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts". 11 * These expanded contexts enable a number of new abilities, especially 12 * "Execlists" (also implemented in this file). 13 * 14 * One of the main differences with the legacy HW contexts is that logical 15 * ring contexts incorporate many more things to the context's state, like 16 * PDPs or ringbuffer control registers: 17 * 18 * The reason why PDPs are included in the context is straightforward: as 19 * PPGTTs (per-process GTTs) are actually per-context, having the PDPs 20 * contained there mean you don't need to do a ppgtt->switch_mm yourself, 21 * instead, the GPU will do it for you on the context switch. 22 * 23 * But, what about the ringbuffer control registers (head, tail, etc..)? 24 * shouldn't we just need a set of those per engine command streamer? This is 25 * where the name "Logical Rings" starts to make sense: by virtualizing the 26 * rings, the engine cs shifts to a new "ring buffer" with every context 27 * switch. When you want to submit a workload to the GPU you: A) choose your 28 * context, B) find its appropriate virtualized ring, C) write commands to it 29 * and then, finally, D) tell the GPU to switch to that context. 30 * 31 * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch 32 * to a contexts is via a context execution list, ergo "Execlists". 33 * 34 * LRC implementation: 35 * Regarding the creation of contexts, we have: 36 * 37 * - One global default context. 38 * - One local default context for each opened fd. 39 * - One local extra context for each context create ioctl call. 40 * 41 * Now that ringbuffers belong per-context (and not per-engine, like before) 42 * and that contexts are uniquely tied to a given engine (and not reusable, 43 * like before) we need: 44 * 45 * - One ringbuffer per-engine inside each context. 46 * - One backing object per-engine inside each context. 47 * 48 * The global default context starts its life with these new objects fully 49 * allocated and populated. The local default context for each opened fd is 50 * more complex, because we don't know at creation time which engine is going 51 * to use them. To handle this, we have implemented a deferred creation of LR 52 * contexts: 53 * 54 * The local context starts its life as a hollow or blank holder, that only 55 * gets populated for a given engine once we receive an execbuffer. If later 56 * on we receive another execbuffer ioctl for the same context but a different 57 * engine, we allocate/populate a new ringbuffer and context backing object and 58 * so on. 59 * 60 * Finally, regarding local contexts created using the ioctl call: as they are 61 * only allowed with the render ring, we can allocate & populate them right 62 * away (no need to defer anything, at least for now). 63 * 64 * Execlists implementation: 65 * Execlists are the new method by which, on gen8+ hardware, workloads are 66 * submitted for execution (as opposed to the legacy, ringbuffer-based, method). 67 * This method works as follows: 68 * 69 * When a request is committed, its commands (the BB start and any leading or 70 * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer 71 * for the appropriate context. The tail pointer in the hardware context is not 72 * updated at this time, but instead, kept by the driver in the ringbuffer 73 * structure. A structure representing this request is added to a request queue 74 * for the appropriate engine: this structure contains a copy of the context's 75 * tail after the request was written to the ring buffer and a pointer to the 76 * context itself. 77 * 78 * If the engine's request queue was empty before the request was added, the 79 * queue is processed immediately. Otherwise the queue will be processed during 80 * a context switch interrupt. In any case, elements on the queue will get sent 81 * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a 82 * globally unique 20-bits submission ID. 83 * 84 * When execution of a request completes, the GPU updates the context status 85 * buffer with a context complete event and generates a context switch interrupt. 86 * During the interrupt handling, the driver examines the events in the buffer: 87 * for each context complete event, if the announced ID matches that on the head 88 * of the request queue, then that request is retired and removed from the queue. 89 * 90 * After processing, if any requests were retired and the queue is not empty 91 * then a new execution list can be submitted. The two requests at the front of 92 * the queue are next to be submitted but since a context may not occur twice in 93 * an execution list, if subsequent requests have the same ID as the first then 94 * the two requests must be combined. This is done simply by discarding requests 95 * at the head of the queue until either only one requests is left (in which case 96 * we use a NULL second context) or the first two requests have unique IDs. 97 * 98 * By always executing the first two requests in the queue the driver ensures 99 * that the GPU is kept as busy as possible. In the case where a single context 100 * completes but a second context is still executing, the request for this second 101 * context will be at the head of the queue when we remove the first one. This 102 * request will then be resubmitted along with a new request for a different context, 103 * which will cause the hardware to continue executing the second request and queue 104 * the new request (the GPU detects the condition of a context getting preempted 105 * with the same context and optimizes the context switch flow by not doing 106 * preemption, but just sampling the new tail pointer). 107 * 108 */ 109 #include <linux/interrupt.h> 110 111 #include "i915_drv.h" 112 #include "i915_trace.h" 113 #include "i915_vgpu.h" 114 #include "gen8_engine_cs.h" 115 #include "intel_breadcrumbs.h" 116 #include "intel_context.h" 117 #include "intel_engine_heartbeat.h" 118 #include "intel_engine_pm.h" 119 #include "intel_engine_stats.h" 120 #include "intel_execlists_submission.h" 121 #include "intel_gt.h" 122 #include "intel_gt_irq.h" 123 #include "intel_gt_pm.h" 124 #include "intel_gt_requests.h" 125 #include "intel_lrc.h" 126 #include "intel_lrc_reg.h" 127 #include "intel_mocs.h" 128 #include "intel_reset.h" 129 #include "intel_ring.h" 130 #include "intel_workarounds.h" 131 #include "shmem_utils.h" 132 133 #define RING_EXECLIST_QFULL (1 << 0x2) 134 #define RING_EXECLIST1_VALID (1 << 0x3) 135 #define RING_EXECLIST0_VALID (1 << 0x4) 136 #define RING_EXECLIST_ACTIVE_STATUS (3 << 0xE) 137 #define RING_EXECLIST1_ACTIVE (1 << 0x11) 138 #define RING_EXECLIST0_ACTIVE (1 << 0x12) 139 140 #define GEN8_CTX_STATUS_IDLE_ACTIVE (1 << 0) 141 #define GEN8_CTX_STATUS_PREEMPTED (1 << 1) 142 #define GEN8_CTX_STATUS_ELEMENT_SWITCH (1 << 2) 143 #define GEN8_CTX_STATUS_ACTIVE_IDLE (1 << 3) 144 #define GEN8_CTX_STATUS_COMPLETE (1 << 4) 145 #define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15) 146 147 #define GEN8_CTX_STATUS_COMPLETED_MASK \ 148 (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED) 149 150 #define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE (0x1) /* lower csb dword */ 151 #define GEN12_CTX_SWITCH_DETAIL(csb_dw) ((csb_dw) & 0xF) /* upper csb dword */ 152 #define GEN12_CSB_SW_CTX_ID_MASK GENMASK(25, 15) 153 #define GEN12_IDLE_CTX_ID 0x7FF 154 #define GEN12_CSB_CTX_VALID(csb_dw) \ 155 (FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID) 156 157 #define XEHP_CTX_STATUS_SWITCHED_TO_NEW_QUEUE BIT(1) /* upper csb dword */ 158 #define XEHP_CSB_SW_CTX_ID_MASK GENMASK(31, 10) 159 #define XEHP_IDLE_CTX_ID 0xFFFF 160 #define XEHP_CSB_CTX_VALID(csb_dw) \ 161 (FIELD_GET(XEHP_CSB_SW_CTX_ID_MASK, csb_dw) != XEHP_IDLE_CTX_ID) 162 163 /* Typical size of the average request (2 pipecontrols and a MI_BB) */ 164 #define EXECLISTS_REQUEST_SIZE 64 /* bytes */ 165 166 struct virtual_engine { 167 struct intel_engine_cs base; 168 struct intel_context context; 169 struct rcu_work rcu; 170 171 /* 172 * We allow only a single request through the virtual engine at a time 173 * (each request in the timeline waits for the completion fence of 174 * the previous before being submitted). By restricting ourselves to 175 * only submitting a single request, each request is placed on to a 176 * physical to maximise load spreading (by virtue of the late greedy 177 * scheduling -- each real engine takes the next available request 178 * upon idling). 179 */ 180 struct i915_request *request; 181 182 /* 183 * We keep a rbtree of available virtual engines inside each physical 184 * engine, sorted by priority. Here we preallocate the nodes we need 185 * for the virtual engine, indexed by physical_engine->id. 186 */ 187 struct ve_node { 188 struct rb_node rb; 189 int prio; 190 } nodes[I915_NUM_ENGINES]; 191 192 /* And finally, which physical engines this virtual engine maps onto. */ 193 unsigned int num_siblings; 194 struct intel_engine_cs *siblings[]; 195 }; 196 197 static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine) 198 { 199 GEM_BUG_ON(!intel_engine_is_virtual(engine)); 200 return container_of(engine, struct virtual_engine, base); 201 } 202 203 static struct intel_context * 204 execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 205 unsigned long flags); 206 207 static struct i915_request * 208 __active_request(const struct intel_timeline * const tl, 209 struct i915_request *rq, 210 int error) 211 { 212 struct i915_request *active = rq; 213 214 list_for_each_entry_from_reverse(rq, &tl->requests, link) { 215 if (__i915_request_is_complete(rq)) 216 break; 217 218 if (error) { 219 i915_request_set_error_once(rq, error); 220 __i915_request_skip(rq); 221 } 222 active = rq; 223 } 224 225 return active; 226 } 227 228 static struct i915_request * 229 active_request(const struct intel_timeline * const tl, struct i915_request *rq) 230 { 231 return __active_request(tl, rq, 0); 232 } 233 234 static void ring_set_paused(const struct intel_engine_cs *engine, int state) 235 { 236 /* 237 * We inspect HWS_PREEMPT with a semaphore inside 238 * engine->emit_fini_breadcrumb. If the dword is true, 239 * the ring is paused as the semaphore will busywait 240 * until the dword is false. 241 */ 242 engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state; 243 if (state) 244 wmb(); 245 } 246 247 static struct i915_priolist *to_priolist(struct rb_node *rb) 248 { 249 return rb_entry(rb, struct i915_priolist, node); 250 } 251 252 static int rq_prio(const struct i915_request *rq) 253 { 254 return READ_ONCE(rq->sched.attr.priority); 255 } 256 257 static int effective_prio(const struct i915_request *rq) 258 { 259 int prio = rq_prio(rq); 260 261 /* 262 * If this request is special and must not be interrupted at any 263 * cost, so be it. Note we are only checking the most recent request 264 * in the context and so may be masking an earlier vip request. It 265 * is hoped that under the conditions where nopreempt is used, this 266 * will not matter (i.e. all requests to that context will be 267 * nopreempt for as long as desired). 268 */ 269 if (i915_request_has_nopreempt(rq)) 270 prio = I915_PRIORITY_UNPREEMPTABLE; 271 272 return prio; 273 } 274 275 static int queue_prio(const struct i915_sched_engine *sched_engine) 276 { 277 struct rb_node *rb; 278 279 rb = rb_first_cached(&sched_engine->queue); 280 if (!rb) 281 return INT_MIN; 282 283 return to_priolist(rb)->priority; 284 } 285 286 static int virtual_prio(const struct intel_engine_execlists *el) 287 { 288 struct rb_node *rb = rb_first_cached(&el->virtual); 289 290 return rb ? rb_entry(rb, struct ve_node, rb)->prio : INT_MIN; 291 } 292 293 static bool need_preempt(const struct intel_engine_cs *engine, 294 const struct i915_request *rq) 295 { 296 int last_prio; 297 298 if (!intel_engine_has_semaphores(engine)) 299 return false; 300 301 /* 302 * Check if the current priority hint merits a preemption attempt. 303 * 304 * We record the highest value priority we saw during rescheduling 305 * prior to this dequeue, therefore we know that if it is strictly 306 * less than the current tail of ESLP[0], we do not need to force 307 * a preempt-to-idle cycle. 308 * 309 * However, the priority hint is a mere hint that we may need to 310 * preempt. If that hint is stale or we may be trying to preempt 311 * ourselves, ignore the request. 312 * 313 * More naturally we would write 314 * prio >= max(0, last); 315 * except that we wish to prevent triggering preemption at the same 316 * priority level: the task that is running should remain running 317 * to preserve FIFO ordering of dependencies. 318 */ 319 last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1); 320 if (engine->sched_engine->queue_priority_hint <= last_prio) 321 return false; 322 323 /* 324 * Check against the first request in ELSP[1], it will, thanks to the 325 * power of PI, be the highest priority of that context. 326 */ 327 if (!list_is_last(&rq->sched.link, &engine->sched_engine->requests) && 328 rq_prio(list_next_entry(rq, sched.link)) > last_prio) 329 return true; 330 331 /* 332 * If the inflight context did not trigger the preemption, then maybe 333 * it was the set of queued requests? Pick the highest priority in 334 * the queue (the first active priolist) and see if it deserves to be 335 * running instead of ELSP[0]. 336 * 337 * The highest priority request in the queue can not be either 338 * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same 339 * context, it's priority would not exceed ELSP[0] aka last_prio. 340 */ 341 return max(virtual_prio(&engine->execlists), 342 queue_prio(engine->sched_engine)) > last_prio; 343 } 344 345 __maybe_unused static bool 346 assert_priority_queue(const struct i915_request *prev, 347 const struct i915_request *next) 348 { 349 /* 350 * Without preemption, the prev may refer to the still active element 351 * which we refuse to let go. 352 * 353 * Even with preemption, there are times when we think it is better not 354 * to preempt and leave an ostensibly lower priority request in flight. 355 */ 356 if (i915_request_is_active(prev)) 357 return true; 358 359 return rq_prio(prev) >= rq_prio(next); 360 } 361 362 static struct i915_request * 363 __unwind_incomplete_requests(struct intel_engine_cs *engine) 364 { 365 struct i915_request *rq, *rn, *active = NULL; 366 struct list_head *pl; 367 int prio = I915_PRIORITY_INVALID; 368 369 lockdep_assert_held(&engine->sched_engine->lock); 370 371 list_for_each_entry_safe_reverse(rq, rn, 372 &engine->sched_engine->requests, 373 sched.link) { 374 if (__i915_request_is_complete(rq)) { 375 list_del_init(&rq->sched.link); 376 continue; 377 } 378 379 __i915_request_unsubmit(rq); 380 381 GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); 382 if (rq_prio(rq) != prio) { 383 prio = rq_prio(rq); 384 pl = i915_sched_lookup_priolist(engine->sched_engine, 385 prio); 386 } 387 GEM_BUG_ON(i915_sched_engine_is_empty(engine->sched_engine)); 388 389 list_move(&rq->sched.link, pl); 390 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 391 392 /* Check in case we rollback so far we wrap [size/2] */ 393 if (intel_ring_direction(rq->ring, 394 rq->tail, 395 rq->ring->tail + 8) > 0) 396 rq->context->lrc.desc |= CTX_DESC_FORCE_RESTORE; 397 398 active = rq; 399 } 400 401 return active; 402 } 403 404 struct i915_request * 405 execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists) 406 { 407 struct intel_engine_cs *engine = 408 container_of(execlists, typeof(*engine), execlists); 409 410 return __unwind_incomplete_requests(engine); 411 } 412 413 static void 414 execlists_context_status_change(struct i915_request *rq, unsigned long status) 415 { 416 /* 417 * Only used when GVT-g is enabled now. When GVT-g is disabled, 418 * The compiler should eliminate this function as dead-code. 419 */ 420 if (!IS_ENABLED(CONFIG_DRM_I915_GVT)) 421 return; 422 423 atomic_notifier_call_chain(&rq->engine->context_status_notifier, 424 status, rq); 425 } 426 427 static void reset_active(struct i915_request *rq, 428 struct intel_engine_cs *engine) 429 { 430 struct intel_context * const ce = rq->context; 431 u32 head; 432 433 /* 434 * The executing context has been cancelled. We want to prevent 435 * further execution along this context and propagate the error on 436 * to anything depending on its results. 437 * 438 * In __i915_request_submit(), we apply the -EIO and remove the 439 * requests' payloads for any banned requests. But first, we must 440 * rewind the context back to the start of the incomplete request so 441 * that we do not jump back into the middle of the batch. 442 * 443 * We preserve the breadcrumbs and semaphores of the incomplete 444 * requests so that inter-timeline dependencies (i.e other timelines) 445 * remain correctly ordered. And we defer to __i915_request_submit() 446 * so that all asynchronous waits are correctly handled. 447 */ 448 ENGINE_TRACE(engine, "{ reset rq=%llx:%lld }\n", 449 rq->fence.context, rq->fence.seqno); 450 451 /* On resubmission of the active request, payload will be scrubbed */ 452 if (__i915_request_is_complete(rq)) 453 head = rq->tail; 454 else 455 head = __active_request(ce->timeline, rq, -EIO)->head; 456 head = intel_ring_wrap(ce->ring, head); 457 458 /* Scrub the context image to prevent replaying the previous batch */ 459 lrc_init_regs(ce, engine, true); 460 461 /* We've switched away, so this should be a no-op, but intent matters */ 462 ce->lrc.lrca = lrc_update_regs(ce, engine, head); 463 } 464 465 static bool bad_request(const struct i915_request *rq) 466 { 467 return rq->fence.error && i915_request_started(rq); 468 } 469 470 static struct intel_engine_cs * 471 __execlists_schedule_in(struct i915_request *rq) 472 { 473 struct intel_engine_cs * const engine = rq->engine; 474 struct intel_context * const ce = rq->context; 475 476 intel_context_get(ce); 477 478 if (unlikely(intel_context_is_closed(ce) && 479 !intel_engine_has_heartbeat(engine))) 480 intel_context_set_banned(ce); 481 482 if (unlikely(intel_context_is_banned(ce) || bad_request(rq))) 483 reset_active(rq, engine); 484 485 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 486 lrc_check_regs(ce, engine, "before"); 487 488 if (ce->tag) { 489 /* Use a fixed tag for OA and friends */ 490 GEM_BUG_ON(ce->tag <= BITS_PER_LONG); 491 ce->lrc.ccid = ce->tag; 492 } else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) { 493 /* We don't need a strict matching tag, just different values */ 494 unsigned int tag = ffs(READ_ONCE(engine->context_tag)); 495 496 GEM_BUG_ON(tag == 0 || tag >= BITS_PER_LONG); 497 clear_bit(tag - 1, &engine->context_tag); 498 ce->lrc.ccid = tag << (XEHP_SW_CTX_ID_SHIFT - 32); 499 500 BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID); 501 502 } else { 503 /* We don't need a strict matching tag, just different values */ 504 unsigned int tag = __ffs(engine->context_tag); 505 506 GEM_BUG_ON(tag >= BITS_PER_LONG); 507 __clear_bit(tag, &engine->context_tag); 508 ce->lrc.ccid = (1 + tag) << (GEN11_SW_CTX_ID_SHIFT - 32); 509 510 BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID); 511 } 512 513 ce->lrc.ccid |= engine->execlists.ccid; 514 515 __intel_gt_pm_get(engine->gt); 516 if (engine->fw_domain && !engine->fw_active++) 517 intel_uncore_forcewake_get(engine->uncore, engine->fw_domain); 518 execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); 519 intel_engine_context_in(engine); 520 521 CE_TRACE(ce, "schedule-in, ccid:%x\n", ce->lrc.ccid); 522 523 return engine; 524 } 525 526 static void execlists_schedule_in(struct i915_request *rq, int idx) 527 { 528 struct intel_context * const ce = rq->context; 529 struct intel_engine_cs *old; 530 531 GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine)); 532 trace_i915_request_in(rq, idx); 533 534 old = ce->inflight; 535 if (!old) 536 old = __execlists_schedule_in(rq); 537 WRITE_ONCE(ce->inflight, ptr_inc(old)); 538 539 GEM_BUG_ON(intel_context_inflight(ce) != rq->engine); 540 } 541 542 static void 543 resubmit_virtual_request(struct i915_request *rq, struct virtual_engine *ve) 544 { 545 struct intel_engine_cs *engine = rq->engine; 546 547 spin_lock_irq(&engine->sched_engine->lock); 548 549 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 550 WRITE_ONCE(rq->engine, &ve->base); 551 ve->base.submit_request(rq); 552 553 spin_unlock_irq(&engine->sched_engine->lock); 554 } 555 556 static void kick_siblings(struct i915_request *rq, struct intel_context *ce) 557 { 558 struct virtual_engine *ve = container_of(ce, typeof(*ve), context); 559 struct intel_engine_cs *engine = rq->engine; 560 561 /* 562 * After this point, the rq may be transferred to a new sibling, so 563 * before we clear ce->inflight make sure that the context has been 564 * removed from the b->signalers and furthermore we need to make sure 565 * that the concurrent iterator in signal_irq_work is no longer 566 * following ce->signal_link. 567 */ 568 if (!list_empty(&ce->signals)) 569 intel_context_remove_breadcrumbs(ce, engine->breadcrumbs); 570 571 /* 572 * This engine is now too busy to run this virtual request, so 573 * see if we can find an alternative engine for it to execute on. 574 * Once a request has become bonded to this engine, we treat it the 575 * same as other native request. 576 */ 577 if (i915_request_in_priority_queue(rq) && 578 rq->execution_mask != engine->mask) 579 resubmit_virtual_request(rq, ve); 580 581 if (READ_ONCE(ve->request)) 582 tasklet_hi_schedule(&ve->base.sched_engine->tasklet); 583 } 584 585 static void __execlists_schedule_out(struct i915_request * const rq, 586 struct intel_context * const ce) 587 { 588 struct intel_engine_cs * const engine = rq->engine; 589 unsigned int ccid; 590 591 /* 592 * NB process_csb() is not under the engine->sched_engine->lock and hence 593 * schedule_out can race with schedule_in meaning that we should 594 * refrain from doing non-trivial work here. 595 */ 596 597 CE_TRACE(ce, "schedule-out, ccid:%x\n", ce->lrc.ccid); 598 GEM_BUG_ON(ce->inflight != engine); 599 600 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 601 lrc_check_regs(ce, engine, "after"); 602 603 /* 604 * If we have just completed this context, the engine may now be 605 * idle and we want to re-enter powersaving. 606 */ 607 if (intel_timeline_is_last(ce->timeline, rq) && 608 __i915_request_is_complete(rq)) 609 intel_engine_add_retire(engine, ce->timeline); 610 611 ccid = ce->lrc.ccid; 612 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) { 613 ccid >>= XEHP_SW_CTX_ID_SHIFT - 32; 614 ccid &= XEHP_MAX_CONTEXT_HW_ID; 615 } else { 616 ccid >>= GEN11_SW_CTX_ID_SHIFT - 32; 617 ccid &= GEN12_MAX_CONTEXT_HW_ID; 618 } 619 620 if (ccid < BITS_PER_LONG) { 621 GEM_BUG_ON(ccid == 0); 622 GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag)); 623 __set_bit(ccid - 1, &engine->context_tag); 624 } 625 626 lrc_update_runtime(ce); 627 intel_engine_context_out(engine); 628 execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); 629 if (engine->fw_domain && !--engine->fw_active) 630 intel_uncore_forcewake_put(engine->uncore, engine->fw_domain); 631 intel_gt_pm_put_async(engine->gt); 632 633 /* 634 * If this is part of a virtual engine, its next request may 635 * have been blocked waiting for access to the active context. 636 * We have to kick all the siblings again in case we need to 637 * switch (e.g. the next request is not runnable on this 638 * engine). Hopefully, we will already have submitted the next 639 * request before the tasklet runs and do not need to rebuild 640 * each virtual tree and kick everyone again. 641 */ 642 if (ce->engine != engine) 643 kick_siblings(rq, ce); 644 645 WRITE_ONCE(ce->inflight, NULL); 646 intel_context_put(ce); 647 } 648 649 static inline void execlists_schedule_out(struct i915_request *rq) 650 { 651 struct intel_context * const ce = rq->context; 652 653 trace_i915_request_out(rq); 654 655 GEM_BUG_ON(!ce->inflight); 656 ce->inflight = ptr_dec(ce->inflight); 657 if (!__intel_context_inflight_count(ce->inflight)) 658 __execlists_schedule_out(rq, ce); 659 660 i915_request_put(rq); 661 } 662 663 static u64 execlists_update_context(struct i915_request *rq) 664 { 665 struct intel_context *ce = rq->context; 666 u64 desc = ce->lrc.desc; 667 u32 tail, prev; 668 669 /* 670 * WaIdleLiteRestore:bdw,skl 671 * 672 * We should never submit the context with the same RING_TAIL twice 673 * just in case we submit an empty ring, which confuses the HW. 674 * 675 * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of 676 * the normal request to be able to always advance the RING_TAIL on 677 * subsequent resubmissions (for lite restore). Should that fail us, 678 * and we try and submit the same tail again, force the context 679 * reload. 680 * 681 * If we need to return to a preempted context, we need to skip the 682 * lite-restore and force it to reload the RING_TAIL. Otherwise, the 683 * HW has a tendency to ignore us rewinding the TAIL to the end of 684 * an earlier request. 685 */ 686 GEM_BUG_ON(ce->lrc_reg_state[CTX_RING_TAIL] != rq->ring->tail); 687 prev = rq->ring->tail; 688 tail = intel_ring_set_tail(rq->ring, rq->tail); 689 if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0)) 690 desc |= CTX_DESC_FORCE_RESTORE; 691 ce->lrc_reg_state[CTX_RING_TAIL] = tail; 692 rq->tail = rq->wa_tail; 693 694 /* 695 * Make sure the context image is complete before we submit it to HW. 696 * 697 * Ostensibly, writes (including the WCB) should be flushed prior to 698 * an uncached write such as our mmio register access, the empirical 699 * evidence (esp. on Braswell) suggests that the WC write into memory 700 * may not be visible to the HW prior to the completion of the UC 701 * register write and that we may begin execution from the context 702 * before its image is complete leading to invalid PD chasing. 703 */ 704 wmb(); 705 706 ce->lrc.desc &= ~CTX_DESC_FORCE_RESTORE; 707 return desc; 708 } 709 710 static void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port) 711 { 712 if (execlists->ctrl_reg) { 713 writel(lower_32_bits(desc), execlists->submit_reg + port * 2); 714 writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1); 715 } else { 716 writel(upper_32_bits(desc), execlists->submit_reg); 717 writel(lower_32_bits(desc), execlists->submit_reg); 718 } 719 } 720 721 static __maybe_unused char * 722 dump_port(char *buf, int buflen, const char *prefix, struct i915_request *rq) 723 { 724 if (!rq) 725 return ""; 726 727 snprintf(buf, buflen, "%sccid:%x %llx:%lld%s prio %d", 728 prefix, 729 rq->context->lrc.ccid, 730 rq->fence.context, rq->fence.seqno, 731 __i915_request_is_complete(rq) ? "!" : 732 __i915_request_has_started(rq) ? "*" : 733 "", 734 rq_prio(rq)); 735 736 return buf; 737 } 738 739 static __maybe_unused noinline void 740 trace_ports(const struct intel_engine_execlists *execlists, 741 const char *msg, 742 struct i915_request * const *ports) 743 { 744 const struct intel_engine_cs *engine = 745 container_of(execlists, typeof(*engine), execlists); 746 char __maybe_unused p0[40], p1[40]; 747 748 if (!ports[0]) 749 return; 750 751 ENGINE_TRACE(engine, "%s { %s%s }\n", msg, 752 dump_port(p0, sizeof(p0), "", ports[0]), 753 dump_port(p1, sizeof(p1), ", ", ports[1])); 754 } 755 756 static bool 757 reset_in_progress(const struct intel_engine_cs *engine) 758 { 759 return unlikely(!__tasklet_is_enabled(&engine->sched_engine->tasklet)); 760 } 761 762 static __maybe_unused noinline bool 763 assert_pending_valid(const struct intel_engine_execlists *execlists, 764 const char *msg) 765 { 766 struct intel_engine_cs *engine = 767 container_of(execlists, typeof(*engine), execlists); 768 struct i915_request * const *port, *rq, *prev = NULL; 769 struct intel_context *ce = NULL; 770 u32 ccid = -1; 771 772 trace_ports(execlists, msg, execlists->pending); 773 774 /* We may be messing around with the lists during reset, lalala */ 775 if (reset_in_progress(engine)) 776 return true; 777 778 if (!execlists->pending[0]) { 779 GEM_TRACE_ERR("%s: Nothing pending for promotion!\n", 780 engine->name); 781 return false; 782 } 783 784 if (execlists->pending[execlists_num_ports(execlists)]) { 785 GEM_TRACE_ERR("%s: Excess pending[%d] for promotion!\n", 786 engine->name, execlists_num_ports(execlists)); 787 return false; 788 } 789 790 for (port = execlists->pending; (rq = *port); port++) { 791 unsigned long flags; 792 bool ok = true; 793 794 GEM_BUG_ON(!kref_read(&rq->fence.refcount)); 795 GEM_BUG_ON(!i915_request_is_active(rq)); 796 797 if (ce == rq->context) { 798 GEM_TRACE_ERR("%s: Dup context:%llx in pending[%zd]\n", 799 engine->name, 800 ce->timeline->fence_context, 801 port - execlists->pending); 802 return false; 803 } 804 ce = rq->context; 805 806 if (ccid == ce->lrc.ccid) { 807 GEM_TRACE_ERR("%s: Dup ccid:%x context:%llx in pending[%zd]\n", 808 engine->name, 809 ccid, ce->timeline->fence_context, 810 port - execlists->pending); 811 return false; 812 } 813 ccid = ce->lrc.ccid; 814 815 /* 816 * Sentinels are supposed to be the last request so they flush 817 * the current execution off the HW. Check that they are the only 818 * request in the pending submission. 819 * 820 * NB: Due to the async nature of preempt-to-busy and request 821 * cancellation we need to handle the case where request 822 * becomes a sentinel in parallel to CSB processing. 823 */ 824 if (prev && i915_request_has_sentinel(prev) && 825 !READ_ONCE(prev->fence.error)) { 826 GEM_TRACE_ERR("%s: context:%llx after sentinel in pending[%zd]\n", 827 engine->name, 828 ce->timeline->fence_context, 829 port - execlists->pending); 830 return false; 831 } 832 prev = rq; 833 834 /* 835 * We want virtual requests to only be in the first slot so 836 * that they are never stuck behind a hog and can be immediately 837 * transferred onto the next idle engine. 838 */ 839 if (rq->execution_mask != engine->mask && 840 port != execlists->pending) { 841 GEM_TRACE_ERR("%s: virtual engine:%llx not in prime position[%zd]\n", 842 engine->name, 843 ce->timeline->fence_context, 844 port - execlists->pending); 845 return false; 846 } 847 848 /* Hold tightly onto the lock to prevent concurrent retires! */ 849 if (!spin_trylock_irqsave(&rq->lock, flags)) 850 continue; 851 852 if (__i915_request_is_complete(rq)) 853 goto unlock; 854 855 if (i915_active_is_idle(&ce->active) && 856 !intel_context_is_barrier(ce)) { 857 GEM_TRACE_ERR("%s: Inactive context:%llx in pending[%zd]\n", 858 engine->name, 859 ce->timeline->fence_context, 860 port - execlists->pending); 861 ok = false; 862 goto unlock; 863 } 864 865 if (!i915_vma_is_pinned(ce->state)) { 866 GEM_TRACE_ERR("%s: Unpinned context:%llx in pending[%zd]\n", 867 engine->name, 868 ce->timeline->fence_context, 869 port - execlists->pending); 870 ok = false; 871 goto unlock; 872 } 873 874 if (!i915_vma_is_pinned(ce->ring->vma)) { 875 GEM_TRACE_ERR("%s: Unpinned ring:%llx in pending[%zd]\n", 876 engine->name, 877 ce->timeline->fence_context, 878 port - execlists->pending); 879 ok = false; 880 goto unlock; 881 } 882 883 unlock: 884 spin_unlock_irqrestore(&rq->lock, flags); 885 if (!ok) 886 return false; 887 } 888 889 return ce; 890 } 891 892 static void execlists_submit_ports(struct intel_engine_cs *engine) 893 { 894 struct intel_engine_execlists *execlists = &engine->execlists; 895 unsigned int n; 896 897 GEM_BUG_ON(!assert_pending_valid(execlists, "submit")); 898 899 /* 900 * We can skip acquiring intel_runtime_pm_get() here as it was taken 901 * on our behalf by the request (see i915_gem_mark_busy()) and it will 902 * not be relinquished until the device is idle (see 903 * i915_gem_idle_work_handler()). As a precaution, we make sure 904 * that all ELSP are drained i.e. we have processed the CSB, 905 * before allowing ourselves to idle and calling intel_runtime_pm_put(). 906 */ 907 GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); 908 909 /* 910 * ELSQ note: the submit queue is not cleared after being submitted 911 * to the HW so we need to make sure we always clean it up. This is 912 * currently ensured by the fact that we always write the same number 913 * of elsq entries, keep this in mind before changing the loop below. 914 */ 915 for (n = execlists_num_ports(execlists); n--; ) { 916 struct i915_request *rq = execlists->pending[n]; 917 918 write_desc(execlists, 919 rq ? execlists_update_context(rq) : 0, 920 n); 921 } 922 923 /* we need to manually load the submit queue */ 924 if (execlists->ctrl_reg) 925 writel(EL_CTRL_LOAD, execlists->ctrl_reg); 926 } 927 928 static bool ctx_single_port_submission(const struct intel_context *ce) 929 { 930 return (IS_ENABLED(CONFIG_DRM_I915_GVT) && 931 intel_context_force_single_submission(ce)); 932 } 933 934 static bool can_merge_ctx(const struct intel_context *prev, 935 const struct intel_context *next) 936 { 937 if (prev != next) 938 return false; 939 940 if (ctx_single_port_submission(prev)) 941 return false; 942 943 return true; 944 } 945 946 static unsigned long i915_request_flags(const struct i915_request *rq) 947 { 948 return READ_ONCE(rq->fence.flags); 949 } 950 951 static bool can_merge_rq(const struct i915_request *prev, 952 const struct i915_request *next) 953 { 954 GEM_BUG_ON(prev == next); 955 GEM_BUG_ON(!assert_priority_queue(prev, next)); 956 957 /* 958 * We do not submit known completed requests. Therefore if the next 959 * request is already completed, we can pretend to merge it in 960 * with the previous context (and we will skip updating the ELSP 961 * and tracking). Thus hopefully keeping the ELSP full with active 962 * contexts, despite the best efforts of preempt-to-busy to confuse 963 * us. 964 */ 965 if (__i915_request_is_complete(next)) 966 return true; 967 968 if (unlikely((i915_request_flags(prev) | i915_request_flags(next)) & 969 (BIT(I915_FENCE_FLAG_NOPREEMPT) | 970 BIT(I915_FENCE_FLAG_SENTINEL)))) 971 return false; 972 973 if (!can_merge_ctx(prev->context, next->context)) 974 return false; 975 976 GEM_BUG_ON(i915_seqno_passed(prev->fence.seqno, next->fence.seqno)); 977 return true; 978 } 979 980 static bool virtual_matches(const struct virtual_engine *ve, 981 const struct i915_request *rq, 982 const struct intel_engine_cs *engine) 983 { 984 const struct intel_engine_cs *inflight; 985 986 if (!rq) 987 return false; 988 989 if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */ 990 return false; 991 992 /* 993 * We track when the HW has completed saving the context image 994 * (i.e. when we have seen the final CS event switching out of 995 * the context) and must not overwrite the context image before 996 * then. This restricts us to only using the active engine 997 * while the previous virtualized request is inflight (so 998 * we reuse the register offsets). This is a very small 999 * hystersis on the greedy seelction algorithm. 1000 */ 1001 inflight = intel_context_inflight(&ve->context); 1002 if (inflight && inflight != engine) 1003 return false; 1004 1005 return true; 1006 } 1007 1008 static struct virtual_engine * 1009 first_virtual_engine(struct intel_engine_cs *engine) 1010 { 1011 struct intel_engine_execlists *el = &engine->execlists; 1012 struct rb_node *rb = rb_first_cached(&el->virtual); 1013 1014 while (rb) { 1015 struct virtual_engine *ve = 1016 rb_entry(rb, typeof(*ve), nodes[engine->id].rb); 1017 struct i915_request *rq = READ_ONCE(ve->request); 1018 1019 /* lazily cleanup after another engine handled rq */ 1020 if (!rq || !virtual_matches(ve, rq, engine)) { 1021 rb_erase_cached(rb, &el->virtual); 1022 RB_CLEAR_NODE(rb); 1023 rb = rb_first_cached(&el->virtual); 1024 continue; 1025 } 1026 1027 return ve; 1028 } 1029 1030 return NULL; 1031 } 1032 1033 static void virtual_xfer_context(struct virtual_engine *ve, 1034 struct intel_engine_cs *engine) 1035 { 1036 unsigned int n; 1037 1038 if (likely(engine == ve->siblings[0])) 1039 return; 1040 1041 GEM_BUG_ON(READ_ONCE(ve->context.inflight)); 1042 if (!intel_engine_has_relative_mmio(engine)) 1043 lrc_update_offsets(&ve->context, engine); 1044 1045 /* 1046 * Move the bound engine to the top of the list for 1047 * future execution. We then kick this tasklet first 1048 * before checking others, so that we preferentially 1049 * reuse this set of bound registers. 1050 */ 1051 for (n = 1; n < ve->num_siblings; n++) { 1052 if (ve->siblings[n] == engine) { 1053 swap(ve->siblings[n], ve->siblings[0]); 1054 break; 1055 } 1056 } 1057 } 1058 1059 static void defer_request(struct i915_request *rq, struct list_head * const pl) 1060 { 1061 LIST_HEAD(list); 1062 1063 /* 1064 * We want to move the interrupted request to the back of 1065 * the round-robin list (i.e. its priority level), but 1066 * in doing so, we must then move all requests that were in 1067 * flight and were waiting for the interrupted request to 1068 * be run after it again. 1069 */ 1070 do { 1071 struct i915_dependency *p; 1072 1073 GEM_BUG_ON(i915_request_is_active(rq)); 1074 list_move_tail(&rq->sched.link, pl); 1075 1076 for_each_waiter(p, rq) { 1077 struct i915_request *w = 1078 container_of(p->waiter, typeof(*w), sched); 1079 1080 if (p->flags & I915_DEPENDENCY_WEAK) 1081 continue; 1082 1083 /* Leave semaphores spinning on the other engines */ 1084 if (w->engine != rq->engine) 1085 continue; 1086 1087 /* No waiter should start before its signaler */ 1088 GEM_BUG_ON(i915_request_has_initial_breadcrumb(w) && 1089 __i915_request_has_started(w) && 1090 !__i915_request_is_complete(rq)); 1091 1092 if (!i915_request_is_ready(w)) 1093 continue; 1094 1095 if (rq_prio(w) < rq_prio(rq)) 1096 continue; 1097 1098 GEM_BUG_ON(rq_prio(w) > rq_prio(rq)); 1099 GEM_BUG_ON(i915_request_is_active(w)); 1100 list_move_tail(&w->sched.link, &list); 1101 } 1102 1103 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); 1104 } while (rq); 1105 } 1106 1107 static void defer_active(struct intel_engine_cs *engine) 1108 { 1109 struct i915_request *rq; 1110 1111 rq = __unwind_incomplete_requests(engine); 1112 if (!rq) 1113 return; 1114 1115 defer_request(rq, i915_sched_lookup_priolist(engine->sched_engine, 1116 rq_prio(rq))); 1117 } 1118 1119 static bool 1120 timeslice_yield(const struct intel_engine_execlists *el, 1121 const struct i915_request *rq) 1122 { 1123 /* 1124 * Once bitten, forever smitten! 1125 * 1126 * If the active context ever busy-waited on a semaphore, 1127 * it will be treated as a hog until the end of its timeslice (i.e. 1128 * until it is scheduled out and replaced by a new submission, 1129 * possibly even its own lite-restore). The HW only sends an interrupt 1130 * on the first miss, and we do know if that semaphore has been 1131 * signaled, or even if it is now stuck on another semaphore. Play 1132 * safe, yield if it might be stuck -- it will be given a fresh 1133 * timeslice in the near future. 1134 */ 1135 return rq->context->lrc.ccid == READ_ONCE(el->yield); 1136 } 1137 1138 static bool needs_timeslice(const struct intel_engine_cs *engine, 1139 const struct i915_request *rq) 1140 { 1141 if (!intel_engine_has_timeslices(engine)) 1142 return false; 1143 1144 /* If not currently active, or about to switch, wait for next event */ 1145 if (!rq || __i915_request_is_complete(rq)) 1146 return false; 1147 1148 /* We do not need to start the timeslice until after the ACK */ 1149 if (READ_ONCE(engine->execlists.pending[0])) 1150 return false; 1151 1152 /* If ELSP[1] is occupied, always check to see if worth slicing */ 1153 if (!list_is_last_rcu(&rq->sched.link, 1154 &engine->sched_engine->requests)) { 1155 ENGINE_TRACE(engine, "timeslice required for second inflight context\n"); 1156 return true; 1157 } 1158 1159 /* Otherwise, ELSP[0] is by itself, but may be waiting in the queue */ 1160 if (!i915_sched_engine_is_empty(engine->sched_engine)) { 1161 ENGINE_TRACE(engine, "timeslice required for queue\n"); 1162 return true; 1163 } 1164 1165 if (!RB_EMPTY_ROOT(&engine->execlists.virtual.rb_root)) { 1166 ENGINE_TRACE(engine, "timeslice required for virtual\n"); 1167 return true; 1168 } 1169 1170 return false; 1171 } 1172 1173 static bool 1174 timeslice_expired(struct intel_engine_cs *engine, const struct i915_request *rq) 1175 { 1176 const struct intel_engine_execlists *el = &engine->execlists; 1177 1178 if (i915_request_has_nopreempt(rq) && __i915_request_has_started(rq)) 1179 return false; 1180 1181 if (!needs_timeslice(engine, rq)) 1182 return false; 1183 1184 return timer_expired(&el->timer) || timeslice_yield(el, rq); 1185 } 1186 1187 static unsigned long timeslice(const struct intel_engine_cs *engine) 1188 { 1189 return READ_ONCE(engine->props.timeslice_duration_ms); 1190 } 1191 1192 static void start_timeslice(struct intel_engine_cs *engine) 1193 { 1194 struct intel_engine_execlists *el = &engine->execlists; 1195 unsigned long duration; 1196 1197 /* Disable the timer if there is nothing to switch to */ 1198 duration = 0; 1199 if (needs_timeslice(engine, *el->active)) { 1200 /* Avoid continually prolonging an active timeslice */ 1201 if (timer_active(&el->timer)) { 1202 /* 1203 * If we just submitted a new ELSP after an old 1204 * context, that context may have already consumed 1205 * its timeslice, so recheck. 1206 */ 1207 if (!timer_pending(&el->timer)) 1208 tasklet_hi_schedule(&engine->sched_engine->tasklet); 1209 return; 1210 } 1211 1212 duration = timeslice(engine); 1213 } 1214 1215 set_timer_ms(&el->timer, duration); 1216 } 1217 1218 static void record_preemption(struct intel_engine_execlists *execlists) 1219 { 1220 (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++); 1221 } 1222 1223 static unsigned long active_preempt_timeout(struct intel_engine_cs *engine, 1224 const struct i915_request *rq) 1225 { 1226 if (!rq) 1227 return 0; 1228 1229 /* Force a fast reset for terminated contexts (ignoring sysfs!) */ 1230 if (unlikely(intel_context_is_banned(rq->context) || bad_request(rq))) 1231 return 1; 1232 1233 return READ_ONCE(engine->props.preempt_timeout_ms); 1234 } 1235 1236 static void set_preempt_timeout(struct intel_engine_cs *engine, 1237 const struct i915_request *rq) 1238 { 1239 if (!intel_engine_has_preempt_reset(engine)) 1240 return; 1241 1242 set_timer_ms(&engine->execlists.preempt, 1243 active_preempt_timeout(engine, rq)); 1244 } 1245 1246 static bool completed(const struct i915_request *rq) 1247 { 1248 if (i915_request_has_sentinel(rq)) 1249 return false; 1250 1251 return __i915_request_is_complete(rq); 1252 } 1253 1254 static void execlists_dequeue(struct intel_engine_cs *engine) 1255 { 1256 struct intel_engine_execlists * const execlists = &engine->execlists; 1257 struct i915_sched_engine * const sched_engine = engine->sched_engine; 1258 struct i915_request **port = execlists->pending; 1259 struct i915_request ** const last_port = port + execlists->port_mask; 1260 struct i915_request *last, * const *active; 1261 struct virtual_engine *ve; 1262 struct rb_node *rb; 1263 bool submit = false; 1264 1265 /* 1266 * Hardware submission is through 2 ports. Conceptually each port 1267 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is 1268 * static for a context, and unique to each, so we only execute 1269 * requests belonging to a single context from each ring. RING_HEAD 1270 * is maintained by the CS in the context image, it marks the place 1271 * where it got up to last time, and through RING_TAIL we tell the CS 1272 * where we want to execute up to this time. 1273 * 1274 * In this list the requests are in order of execution. Consecutive 1275 * requests from the same context are adjacent in the ringbuffer. We 1276 * can combine these requests into a single RING_TAIL update: 1277 * 1278 * RING_HEAD...req1...req2 1279 * ^- RING_TAIL 1280 * since to execute req2 the CS must first execute req1. 1281 * 1282 * Our goal then is to point each port to the end of a consecutive 1283 * sequence of requests as being the most optimal (fewest wake ups 1284 * and context switches) submission. 1285 */ 1286 1287 spin_lock(&sched_engine->lock); 1288 1289 /* 1290 * If the queue is higher priority than the last 1291 * request in the currently active context, submit afresh. 1292 * We will resubmit again afterwards in case we need to split 1293 * the active context to interject the preemption request, 1294 * i.e. we will retrigger preemption following the ack in case 1295 * of trouble. 1296 * 1297 */ 1298 active = execlists->active; 1299 while ((last = *active) && completed(last)) 1300 active++; 1301 1302 if (last) { 1303 if (need_preempt(engine, last)) { 1304 ENGINE_TRACE(engine, 1305 "preempting last=%llx:%lld, prio=%d, hint=%d\n", 1306 last->fence.context, 1307 last->fence.seqno, 1308 last->sched.attr.priority, 1309 sched_engine->queue_priority_hint); 1310 record_preemption(execlists); 1311 1312 /* 1313 * Don't let the RING_HEAD advance past the breadcrumb 1314 * as we unwind (and until we resubmit) so that we do 1315 * not accidentally tell it to go backwards. 1316 */ 1317 ring_set_paused(engine, 1); 1318 1319 /* 1320 * Note that we have not stopped the GPU at this point, 1321 * so we are unwinding the incomplete requests as they 1322 * remain inflight and so by the time we do complete 1323 * the preemption, some of the unwound requests may 1324 * complete! 1325 */ 1326 __unwind_incomplete_requests(engine); 1327 1328 last = NULL; 1329 } else if (timeslice_expired(engine, last)) { 1330 ENGINE_TRACE(engine, 1331 "expired:%s last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n", 1332 yesno(timer_expired(&execlists->timer)), 1333 last->fence.context, last->fence.seqno, 1334 rq_prio(last), 1335 sched_engine->queue_priority_hint, 1336 yesno(timeslice_yield(execlists, last))); 1337 1338 /* 1339 * Consume this timeslice; ensure we start a new one. 1340 * 1341 * The timeslice expired, and we will unwind the 1342 * running contexts and recompute the next ELSP. 1343 * If that submit will be the same pair of contexts 1344 * (due to dependency ordering), we will skip the 1345 * submission. If we don't cancel the timer now, 1346 * we will see that the timer has expired and 1347 * reschedule the tasklet; continually until the 1348 * next context switch or other preeemption event. 1349 * 1350 * Since we have decided to reschedule based on 1351 * consumption of this timeslice, if we submit the 1352 * same context again, grant it a full timeslice. 1353 */ 1354 cancel_timer(&execlists->timer); 1355 ring_set_paused(engine, 1); 1356 defer_active(engine); 1357 1358 /* 1359 * Unlike for preemption, if we rewind and continue 1360 * executing the same context as previously active, 1361 * the order of execution will remain the same and 1362 * the tail will only advance. We do not need to 1363 * force a full context restore, as a lite-restore 1364 * is sufficient to resample the monotonic TAIL. 1365 * 1366 * If we switch to any other context, similarly we 1367 * will not rewind TAIL of current context, and 1368 * normal save/restore will preserve state and allow 1369 * us to later continue executing the same request. 1370 */ 1371 last = NULL; 1372 } else { 1373 /* 1374 * Otherwise if we already have a request pending 1375 * for execution after the current one, we can 1376 * just wait until the next CS event before 1377 * queuing more. In either case we will force a 1378 * lite-restore preemption event, but if we wait 1379 * we hopefully coalesce several updates into a single 1380 * submission. 1381 */ 1382 if (active[1]) { 1383 /* 1384 * Even if ELSP[1] is occupied and not worthy 1385 * of timeslices, our queue might be. 1386 */ 1387 spin_unlock(&sched_engine->lock); 1388 return; 1389 } 1390 } 1391 } 1392 1393 /* XXX virtual is always taking precedence */ 1394 while ((ve = first_virtual_engine(engine))) { 1395 struct i915_request *rq; 1396 1397 spin_lock(&ve->base.sched_engine->lock); 1398 1399 rq = ve->request; 1400 if (unlikely(!virtual_matches(ve, rq, engine))) 1401 goto unlock; /* lost the race to a sibling */ 1402 1403 GEM_BUG_ON(rq->engine != &ve->base); 1404 GEM_BUG_ON(rq->context != &ve->context); 1405 1406 if (unlikely(rq_prio(rq) < queue_prio(sched_engine))) { 1407 spin_unlock(&ve->base.sched_engine->lock); 1408 break; 1409 } 1410 1411 if (last && !can_merge_rq(last, rq)) { 1412 spin_unlock(&ve->base.sched_engine->lock); 1413 spin_unlock(&engine->sched_engine->lock); 1414 return; /* leave this for another sibling */ 1415 } 1416 1417 ENGINE_TRACE(engine, 1418 "virtual rq=%llx:%lld%s, new engine? %s\n", 1419 rq->fence.context, 1420 rq->fence.seqno, 1421 __i915_request_is_complete(rq) ? "!" : 1422 __i915_request_has_started(rq) ? "*" : 1423 "", 1424 yesno(engine != ve->siblings[0])); 1425 1426 WRITE_ONCE(ve->request, NULL); 1427 WRITE_ONCE(ve->base.sched_engine->queue_priority_hint, INT_MIN); 1428 1429 rb = &ve->nodes[engine->id].rb; 1430 rb_erase_cached(rb, &execlists->virtual); 1431 RB_CLEAR_NODE(rb); 1432 1433 GEM_BUG_ON(!(rq->execution_mask & engine->mask)); 1434 WRITE_ONCE(rq->engine, engine); 1435 1436 if (__i915_request_submit(rq)) { 1437 /* 1438 * Only after we confirm that we will submit 1439 * this request (i.e. it has not already 1440 * completed), do we want to update the context. 1441 * 1442 * This serves two purposes. It avoids 1443 * unnecessary work if we are resubmitting an 1444 * already completed request after timeslicing. 1445 * But more importantly, it prevents us altering 1446 * ve->siblings[] on an idle context, where 1447 * we may be using ve->siblings[] in 1448 * virtual_context_enter / virtual_context_exit. 1449 */ 1450 virtual_xfer_context(ve, engine); 1451 GEM_BUG_ON(ve->siblings[0] != engine); 1452 1453 submit = true; 1454 last = rq; 1455 } 1456 1457 i915_request_put(rq); 1458 unlock: 1459 spin_unlock(&ve->base.sched_engine->lock); 1460 1461 /* 1462 * Hmm, we have a bunch of virtual engine requests, 1463 * but the first one was already completed (thanks 1464 * preempt-to-busy!). Keep looking at the veng queue 1465 * until we have no more relevant requests (i.e. 1466 * the normal submit queue has higher priority). 1467 */ 1468 if (submit) 1469 break; 1470 } 1471 1472 while ((rb = rb_first_cached(&sched_engine->queue))) { 1473 struct i915_priolist *p = to_priolist(rb); 1474 struct i915_request *rq, *rn; 1475 1476 priolist_for_each_request_consume(rq, rn, p) { 1477 bool merge = true; 1478 1479 /* 1480 * Can we combine this request with the current port? 1481 * It has to be the same context/ringbuffer and not 1482 * have any exceptions (e.g. GVT saying never to 1483 * combine contexts). 1484 * 1485 * If we can combine the requests, we can execute both 1486 * by updating the RING_TAIL to point to the end of the 1487 * second request, and so we never need to tell the 1488 * hardware about the first. 1489 */ 1490 if (last && !can_merge_rq(last, rq)) { 1491 /* 1492 * If we are on the second port and cannot 1493 * combine this request with the last, then we 1494 * are done. 1495 */ 1496 if (port == last_port) 1497 goto done; 1498 1499 /* 1500 * We must not populate both ELSP[] with the 1501 * same LRCA, i.e. we must submit 2 different 1502 * contexts if we submit 2 ELSP. 1503 */ 1504 if (last->context == rq->context) 1505 goto done; 1506 1507 if (i915_request_has_sentinel(last)) 1508 goto done; 1509 1510 /* 1511 * We avoid submitting virtual requests into 1512 * the secondary ports so that we can migrate 1513 * the request immediately to another engine 1514 * rather than wait for the primary request. 1515 */ 1516 if (rq->execution_mask != engine->mask) 1517 goto done; 1518 1519 /* 1520 * If GVT overrides us we only ever submit 1521 * port[0], leaving port[1] empty. Note that we 1522 * also have to be careful that we don't queue 1523 * the same context (even though a different 1524 * request) to the second port. 1525 */ 1526 if (ctx_single_port_submission(last->context) || 1527 ctx_single_port_submission(rq->context)) 1528 goto done; 1529 1530 merge = false; 1531 } 1532 1533 if (__i915_request_submit(rq)) { 1534 if (!merge) { 1535 *port++ = i915_request_get(last); 1536 last = NULL; 1537 } 1538 1539 GEM_BUG_ON(last && 1540 !can_merge_ctx(last->context, 1541 rq->context)); 1542 GEM_BUG_ON(last && 1543 i915_seqno_passed(last->fence.seqno, 1544 rq->fence.seqno)); 1545 1546 submit = true; 1547 last = rq; 1548 } 1549 } 1550 1551 rb_erase_cached(&p->node, &sched_engine->queue); 1552 i915_priolist_free(p); 1553 } 1554 done: 1555 *port++ = i915_request_get(last); 1556 1557 /* 1558 * Here be a bit of magic! Or sleight-of-hand, whichever you prefer. 1559 * 1560 * We choose the priority hint such that if we add a request of greater 1561 * priority than this, we kick the submission tasklet to decide on 1562 * the right order of submitting the requests to hardware. We must 1563 * also be prepared to reorder requests as they are in-flight on the 1564 * HW. We derive the priority hint then as the first "hole" in 1565 * the HW submission ports and if there are no available slots, 1566 * the priority of the lowest executing request, i.e. last. 1567 * 1568 * When we do receive a higher priority request ready to run from the 1569 * user, see queue_request(), the priority hint is bumped to that 1570 * request triggering preemption on the next dequeue (or subsequent 1571 * interrupt for secondary ports). 1572 */ 1573 sched_engine->queue_priority_hint = queue_prio(sched_engine); 1574 i915_sched_engine_reset_on_empty(sched_engine); 1575 spin_unlock(&sched_engine->lock); 1576 1577 /* 1578 * We can skip poking the HW if we ended up with exactly the same set 1579 * of requests as currently running, e.g. trying to timeslice a pair 1580 * of ordered contexts. 1581 */ 1582 if (submit && 1583 memcmp(active, 1584 execlists->pending, 1585 (port - execlists->pending) * sizeof(*port))) { 1586 *port = NULL; 1587 while (port-- != execlists->pending) 1588 execlists_schedule_in(*port, port - execlists->pending); 1589 1590 WRITE_ONCE(execlists->yield, -1); 1591 set_preempt_timeout(engine, *active); 1592 execlists_submit_ports(engine); 1593 } else { 1594 ring_set_paused(engine, 0); 1595 while (port-- != execlists->pending) 1596 i915_request_put(*port); 1597 *execlists->pending = NULL; 1598 } 1599 } 1600 1601 static void execlists_dequeue_irq(struct intel_engine_cs *engine) 1602 { 1603 local_irq_disable(); /* Suspend interrupts across request submission */ 1604 execlists_dequeue(engine); 1605 local_irq_enable(); /* flush irq_work (e.g. breadcrumb enabling) */ 1606 } 1607 1608 static void clear_ports(struct i915_request **ports, int count) 1609 { 1610 memset_p((void **)ports, NULL, count); 1611 } 1612 1613 static void 1614 copy_ports(struct i915_request **dst, struct i915_request **src, int count) 1615 { 1616 /* A memcpy_p() would be very useful here! */ 1617 while (count--) 1618 WRITE_ONCE(*dst++, *src++); /* avoid write tearing */ 1619 } 1620 1621 static struct i915_request ** 1622 cancel_port_requests(struct intel_engine_execlists * const execlists, 1623 struct i915_request **inactive) 1624 { 1625 struct i915_request * const *port; 1626 1627 for (port = execlists->pending; *port; port++) 1628 *inactive++ = *port; 1629 clear_ports(execlists->pending, ARRAY_SIZE(execlists->pending)); 1630 1631 /* Mark the end of active before we overwrite *active */ 1632 for (port = xchg(&execlists->active, execlists->pending); *port; port++) 1633 *inactive++ = *port; 1634 clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight)); 1635 1636 smp_wmb(); /* complete the seqlock for execlists_active() */ 1637 WRITE_ONCE(execlists->active, execlists->inflight); 1638 1639 /* Having cancelled all outstanding process_csb(), stop their timers */ 1640 GEM_BUG_ON(execlists->pending[0]); 1641 cancel_timer(&execlists->timer); 1642 cancel_timer(&execlists->preempt); 1643 1644 return inactive; 1645 } 1646 1647 static void invalidate_csb_entries(const u64 *first, const u64 *last) 1648 { 1649 clflush((void *)first); 1650 clflush((void *)last); 1651 } 1652 1653 /* 1654 * Starting with Gen12, the status has a new format: 1655 * 1656 * bit 0: switched to new queue 1657 * bit 1: reserved 1658 * bit 2: semaphore wait mode (poll or signal), only valid when 1659 * switch detail is set to "wait on semaphore" 1660 * bits 3-5: engine class 1661 * bits 6-11: engine instance 1662 * bits 12-14: reserved 1663 * bits 15-25: sw context id of the lrc the GT switched to 1664 * bits 26-31: sw counter of the lrc the GT switched to 1665 * bits 32-35: context switch detail 1666 * - 0: ctx complete 1667 * - 1: wait on sync flip 1668 * - 2: wait on vblank 1669 * - 3: wait on scanline 1670 * - 4: wait on semaphore 1671 * - 5: context preempted (not on SEMAPHORE_WAIT or 1672 * WAIT_FOR_EVENT) 1673 * bit 36: reserved 1674 * bits 37-43: wait detail (for switch detail 1 to 4) 1675 * bits 44-46: reserved 1676 * bits 47-57: sw context id of the lrc the GT switched away from 1677 * bits 58-63: sw counter of the lrc the GT switched away from 1678 * 1679 * Xe_HP csb shuffles things around compared to TGL: 1680 * 1681 * bits 0-3: context switch detail (same possible values as TGL) 1682 * bits 4-9: engine instance 1683 * bits 10-25: sw context id of the lrc the GT switched to 1684 * bits 26-31: sw counter of the lrc the GT switched to 1685 * bit 32: semaphore wait mode (poll or signal), Only valid when 1686 * switch detail is set to "wait on semaphore" 1687 * bit 33: switched to new queue 1688 * bits 34-41: wait detail (for switch detail 1 to 4) 1689 * bits 42-57: sw context id of the lrc the GT switched away from 1690 * bits 58-63: sw counter of the lrc the GT switched away from 1691 */ 1692 static inline bool 1693 __gen12_csb_parse(bool ctx_to_valid, bool ctx_away_valid, bool new_queue, 1694 u8 switch_detail) 1695 { 1696 /* 1697 * The context switch detail is not guaranteed to be 5 when a preemption 1698 * occurs, so we can't just check for that. The check below works for 1699 * all the cases we care about, including preemptions of WAIT 1700 * instructions and lite-restore. Preempt-to-idle via the CTRL register 1701 * would require some extra handling, but we don't support that. 1702 */ 1703 if (!ctx_away_valid || new_queue) { 1704 GEM_BUG_ON(!ctx_to_valid); 1705 return true; 1706 } 1707 1708 /* 1709 * switch detail = 5 is covered by the case above and we do not expect a 1710 * context switch on an unsuccessful wait instruction since we always 1711 * use polling mode. 1712 */ 1713 GEM_BUG_ON(switch_detail); 1714 return false; 1715 } 1716 1717 static bool xehp_csb_parse(const u64 csb) 1718 { 1719 return __gen12_csb_parse(XEHP_CSB_CTX_VALID(lower_32_bits(csb)), /* cxt to */ 1720 XEHP_CSB_CTX_VALID(upper_32_bits(csb)), /* cxt away */ 1721 upper_32_bits(csb) & XEHP_CTX_STATUS_SWITCHED_TO_NEW_QUEUE, 1722 GEN12_CTX_SWITCH_DETAIL(lower_32_bits(csb))); 1723 } 1724 1725 static bool gen12_csb_parse(const u64 csb) 1726 { 1727 return __gen12_csb_parse(GEN12_CSB_CTX_VALID(lower_32_bits(csb)), /* cxt to */ 1728 GEN12_CSB_CTX_VALID(upper_32_bits(csb)), /* cxt away */ 1729 lower_32_bits(csb) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE, 1730 GEN12_CTX_SWITCH_DETAIL(upper_32_bits(csb))); 1731 } 1732 1733 static bool gen8_csb_parse(const u64 csb) 1734 { 1735 return csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED); 1736 } 1737 1738 static noinline u64 1739 wa_csb_read(const struct intel_engine_cs *engine, u64 * const csb) 1740 { 1741 u64 entry; 1742 1743 /* 1744 * Reading from the HWSP has one particular advantage: we can detect 1745 * a stale entry. Since the write into HWSP is broken, we have no reason 1746 * to trust the HW at all, the mmio entry may equally be unordered, so 1747 * we prefer the path that is self-checking and as a last resort, 1748 * return the mmio value. 1749 * 1750 * tgl,dg1:HSDES#22011327657 1751 */ 1752 preempt_disable(); 1753 if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 10)) { 1754 int idx = csb - engine->execlists.csb_status; 1755 int status; 1756 1757 status = GEN8_EXECLISTS_STATUS_BUF; 1758 if (idx >= 6) { 1759 status = GEN11_EXECLISTS_STATUS_BUF2; 1760 idx -= 6; 1761 } 1762 status += sizeof(u64) * idx; 1763 1764 entry = intel_uncore_read64(engine->uncore, 1765 _MMIO(engine->mmio_base + status)); 1766 } 1767 preempt_enable(); 1768 1769 return entry; 1770 } 1771 1772 static u64 csb_read(const struct intel_engine_cs *engine, u64 * const csb) 1773 { 1774 u64 entry = READ_ONCE(*csb); 1775 1776 /* 1777 * Unfortunately, the GPU does not always serialise its write 1778 * of the CSB entries before its write of the CSB pointer, at least 1779 * from the perspective of the CPU, using what is known as a Global 1780 * Observation Point. We may read a new CSB tail pointer, but then 1781 * read the stale CSB entries, causing us to misinterpret the 1782 * context-switch events, and eventually declare the GPU hung. 1783 * 1784 * icl:HSDES#1806554093 1785 * tgl:HSDES#22011248461 1786 */ 1787 if (unlikely(entry == -1)) 1788 entry = wa_csb_read(engine, csb); 1789 1790 /* Consume this entry so that we can spot its future reuse. */ 1791 WRITE_ONCE(*csb, -1); 1792 1793 /* ELSP is an implicit wmb() before the GPU wraps and overwrites csb */ 1794 return entry; 1795 } 1796 1797 static void new_timeslice(struct intel_engine_execlists *el) 1798 { 1799 /* By cancelling, we will start afresh in start_timeslice() */ 1800 cancel_timer(&el->timer); 1801 } 1802 1803 static struct i915_request ** 1804 process_csb(struct intel_engine_cs *engine, struct i915_request **inactive) 1805 { 1806 struct intel_engine_execlists * const execlists = &engine->execlists; 1807 u64 * const buf = execlists->csb_status; 1808 const u8 num_entries = execlists->csb_size; 1809 struct i915_request **prev; 1810 u8 head, tail; 1811 1812 /* 1813 * As we modify our execlists state tracking we require exclusive 1814 * access. Either we are inside the tasklet, or the tasklet is disabled 1815 * and we assume that is only inside the reset paths and so serialised. 1816 */ 1817 GEM_BUG_ON(!tasklet_is_locked(&engine->sched_engine->tasklet) && 1818 !reset_in_progress(engine)); 1819 1820 /* 1821 * Note that csb_write, csb_status may be either in HWSP or mmio. 1822 * When reading from the csb_write mmio register, we have to be 1823 * careful to only use the GEN8_CSB_WRITE_PTR portion, which is 1824 * the low 4bits. As it happens we know the next 4bits are always 1825 * zero and so we can simply masked off the low u8 of the register 1826 * and treat it identically to reading from the HWSP (without having 1827 * to use explicit shifting and masking, and probably bifurcating 1828 * the code to handle the legacy mmio read). 1829 */ 1830 head = execlists->csb_head; 1831 tail = READ_ONCE(*execlists->csb_write); 1832 if (unlikely(head == tail)) 1833 return inactive; 1834 1835 /* 1836 * We will consume all events from HW, or at least pretend to. 1837 * 1838 * The sequence of events from the HW is deterministic, and derived 1839 * from our writes to the ELSP, with a smidgen of variability for 1840 * the arrival of the asynchronous requests wrt to the inflight 1841 * execution. If the HW sends an event that does not correspond with 1842 * the one we are expecting, we have to abandon all hope as we lose 1843 * all tracking of what the engine is actually executing. We will 1844 * only detect we are out of sequence with the HW when we get an 1845 * 'impossible' event because we have already drained our own 1846 * preemption/promotion queue. If this occurs, we know that we likely 1847 * lost track of execution earlier and must unwind and restart, the 1848 * simplest way is by stop processing the event queue and force the 1849 * engine to reset. 1850 */ 1851 execlists->csb_head = tail; 1852 ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail); 1853 1854 /* 1855 * Hopefully paired with a wmb() in HW! 1856 * 1857 * We must complete the read of the write pointer before any reads 1858 * from the CSB, so that we do not see stale values. Without an rmb 1859 * (lfence) the HW may speculatively perform the CSB[] reads *before* 1860 * we perform the READ_ONCE(*csb_write). 1861 */ 1862 rmb(); 1863 1864 /* Remember who was last running under the timer */ 1865 prev = inactive; 1866 *prev = NULL; 1867 1868 do { 1869 bool promote; 1870 u64 csb; 1871 1872 if (++head == num_entries) 1873 head = 0; 1874 1875 /* 1876 * We are flying near dragons again. 1877 * 1878 * We hold a reference to the request in execlist_port[] 1879 * but no more than that. We are operating in softirq 1880 * context and so cannot hold any mutex or sleep. That 1881 * prevents us stopping the requests we are processing 1882 * in port[] from being retired simultaneously (the 1883 * breadcrumb will be complete before we see the 1884 * context-switch). As we only hold the reference to the 1885 * request, any pointer chasing underneath the request 1886 * is subject to a potential use-after-free. Thus we 1887 * store all of the bookkeeping within port[] as 1888 * required, and avoid using unguarded pointers beneath 1889 * request itself. The same applies to the atomic 1890 * status notifier. 1891 */ 1892 1893 csb = csb_read(engine, buf + head); 1894 ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n", 1895 head, upper_32_bits(csb), lower_32_bits(csb)); 1896 1897 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) 1898 promote = xehp_csb_parse(csb); 1899 else if (GRAPHICS_VER(engine->i915) >= 12) 1900 promote = gen12_csb_parse(csb); 1901 else 1902 promote = gen8_csb_parse(csb); 1903 if (promote) { 1904 struct i915_request * const *old = execlists->active; 1905 1906 if (GEM_WARN_ON(!*execlists->pending)) { 1907 execlists->error_interrupt |= ERROR_CSB; 1908 break; 1909 } 1910 1911 ring_set_paused(engine, 0); 1912 1913 /* Point active to the new ELSP; prevent overwriting */ 1914 WRITE_ONCE(execlists->active, execlists->pending); 1915 smp_wmb(); /* notify execlists_active() */ 1916 1917 /* cancel old inflight, prepare for switch */ 1918 trace_ports(execlists, "preempted", old); 1919 while (*old) 1920 *inactive++ = *old++; 1921 1922 /* switch pending to inflight */ 1923 GEM_BUG_ON(!assert_pending_valid(execlists, "promote")); 1924 copy_ports(execlists->inflight, 1925 execlists->pending, 1926 execlists_num_ports(execlists)); 1927 smp_wmb(); /* complete the seqlock */ 1928 WRITE_ONCE(execlists->active, execlists->inflight); 1929 1930 /* XXX Magic delay for tgl */ 1931 ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR); 1932 1933 WRITE_ONCE(execlists->pending[0], NULL); 1934 } else { 1935 if (GEM_WARN_ON(!*execlists->active)) { 1936 execlists->error_interrupt |= ERROR_CSB; 1937 break; 1938 } 1939 1940 /* port0 completed, advanced to port1 */ 1941 trace_ports(execlists, "completed", execlists->active); 1942 1943 /* 1944 * We rely on the hardware being strongly 1945 * ordered, that the breadcrumb write is 1946 * coherent (visible from the CPU) before the 1947 * user interrupt is processed. One might assume 1948 * that the breadcrumb write being before the 1949 * user interrupt and the CS event for the context 1950 * switch would therefore be before the CS event 1951 * itself... 1952 */ 1953 if (GEM_SHOW_DEBUG() && 1954 !__i915_request_is_complete(*execlists->active)) { 1955 struct i915_request *rq = *execlists->active; 1956 const u32 *regs __maybe_unused = 1957 rq->context->lrc_reg_state; 1958 1959 ENGINE_TRACE(engine, 1960 "context completed before request!\n"); 1961 ENGINE_TRACE(engine, 1962 "ring:{start:0x%08x, head:%04x, tail:%04x, ctl:%08x, mode:%08x}\n", 1963 ENGINE_READ(engine, RING_START), 1964 ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR, 1965 ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR, 1966 ENGINE_READ(engine, RING_CTL), 1967 ENGINE_READ(engine, RING_MI_MODE)); 1968 ENGINE_TRACE(engine, 1969 "rq:{start:%08x, head:%04x, tail:%04x, seqno:%llx:%d, hwsp:%d}, ", 1970 i915_ggtt_offset(rq->ring->vma), 1971 rq->head, rq->tail, 1972 rq->fence.context, 1973 lower_32_bits(rq->fence.seqno), 1974 hwsp_seqno(rq)); 1975 ENGINE_TRACE(engine, 1976 "ctx:{start:%08x, head:%04x, tail:%04x}, ", 1977 regs[CTX_RING_START], 1978 regs[CTX_RING_HEAD], 1979 regs[CTX_RING_TAIL]); 1980 } 1981 1982 *inactive++ = *execlists->active++; 1983 1984 GEM_BUG_ON(execlists->active - execlists->inflight > 1985 execlists_num_ports(execlists)); 1986 } 1987 } while (head != tail); 1988 1989 /* 1990 * Gen11 has proven to fail wrt global observation point between 1991 * entry and tail update, failing on the ordering and thus 1992 * we see an old entry in the context status buffer. 1993 * 1994 * Forcibly evict out entries for the next gpu csb update, 1995 * to increase the odds that we get a fresh entries with non 1996 * working hardware. The cost for doing so comes out mostly with 1997 * the wash as hardware, working or not, will need to do the 1998 * invalidation before. 1999 */ 2000 invalidate_csb_entries(&buf[0], &buf[num_entries - 1]); 2001 2002 /* 2003 * We assume that any event reflects a change in context flow 2004 * and merits a fresh timeslice. We reinstall the timer after 2005 * inspecting the queue to see if we need to resumbit. 2006 */ 2007 if (*prev != *execlists->active) /* elide lite-restores */ 2008 new_timeslice(execlists); 2009 2010 return inactive; 2011 } 2012 2013 static void post_process_csb(struct i915_request **port, 2014 struct i915_request **last) 2015 { 2016 while (port != last) 2017 execlists_schedule_out(*port++); 2018 } 2019 2020 static void __execlists_hold(struct i915_request *rq) 2021 { 2022 LIST_HEAD(list); 2023 2024 do { 2025 struct i915_dependency *p; 2026 2027 if (i915_request_is_active(rq)) 2028 __i915_request_unsubmit(rq); 2029 2030 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 2031 list_move_tail(&rq->sched.link, 2032 &rq->engine->sched_engine->hold); 2033 i915_request_set_hold(rq); 2034 RQ_TRACE(rq, "on hold\n"); 2035 2036 for_each_waiter(p, rq) { 2037 struct i915_request *w = 2038 container_of(p->waiter, typeof(*w), sched); 2039 2040 if (p->flags & I915_DEPENDENCY_WEAK) 2041 continue; 2042 2043 /* Leave semaphores spinning on the other engines */ 2044 if (w->engine != rq->engine) 2045 continue; 2046 2047 if (!i915_request_is_ready(w)) 2048 continue; 2049 2050 if (__i915_request_is_complete(w)) 2051 continue; 2052 2053 if (i915_request_on_hold(w)) 2054 continue; 2055 2056 list_move_tail(&w->sched.link, &list); 2057 } 2058 2059 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); 2060 } while (rq); 2061 } 2062 2063 static bool execlists_hold(struct intel_engine_cs *engine, 2064 struct i915_request *rq) 2065 { 2066 if (i915_request_on_hold(rq)) 2067 return false; 2068 2069 spin_lock_irq(&engine->sched_engine->lock); 2070 2071 if (__i915_request_is_complete(rq)) { /* too late! */ 2072 rq = NULL; 2073 goto unlock; 2074 } 2075 2076 /* 2077 * Transfer this request onto the hold queue to prevent it 2078 * being resumbitted to HW (and potentially completed) before we have 2079 * released it. Since we may have already submitted following 2080 * requests, we need to remove those as well. 2081 */ 2082 GEM_BUG_ON(i915_request_on_hold(rq)); 2083 GEM_BUG_ON(rq->engine != engine); 2084 __execlists_hold(rq); 2085 GEM_BUG_ON(list_empty(&engine->sched_engine->hold)); 2086 2087 unlock: 2088 spin_unlock_irq(&engine->sched_engine->lock); 2089 return rq; 2090 } 2091 2092 static bool hold_request(const struct i915_request *rq) 2093 { 2094 struct i915_dependency *p; 2095 bool result = false; 2096 2097 /* 2098 * If one of our ancestors is on hold, we must also be on hold, 2099 * otherwise we will bypass it and execute before it. 2100 */ 2101 rcu_read_lock(); 2102 for_each_signaler(p, rq) { 2103 const struct i915_request *s = 2104 container_of(p->signaler, typeof(*s), sched); 2105 2106 if (s->engine != rq->engine) 2107 continue; 2108 2109 result = i915_request_on_hold(s); 2110 if (result) 2111 break; 2112 } 2113 rcu_read_unlock(); 2114 2115 return result; 2116 } 2117 2118 static void __execlists_unhold(struct i915_request *rq) 2119 { 2120 LIST_HEAD(list); 2121 2122 do { 2123 struct i915_dependency *p; 2124 2125 RQ_TRACE(rq, "hold release\n"); 2126 2127 GEM_BUG_ON(!i915_request_on_hold(rq)); 2128 GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit)); 2129 2130 i915_request_clear_hold(rq); 2131 list_move_tail(&rq->sched.link, 2132 i915_sched_lookup_priolist(rq->engine->sched_engine, 2133 rq_prio(rq))); 2134 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 2135 2136 /* Also release any children on this engine that are ready */ 2137 for_each_waiter(p, rq) { 2138 struct i915_request *w = 2139 container_of(p->waiter, typeof(*w), sched); 2140 2141 if (p->flags & I915_DEPENDENCY_WEAK) 2142 continue; 2143 2144 if (w->engine != rq->engine) 2145 continue; 2146 2147 if (!i915_request_on_hold(w)) 2148 continue; 2149 2150 /* Check that no other parents are also on hold */ 2151 if (hold_request(w)) 2152 continue; 2153 2154 list_move_tail(&w->sched.link, &list); 2155 } 2156 2157 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); 2158 } while (rq); 2159 } 2160 2161 static void execlists_unhold(struct intel_engine_cs *engine, 2162 struct i915_request *rq) 2163 { 2164 spin_lock_irq(&engine->sched_engine->lock); 2165 2166 /* 2167 * Move this request back to the priority queue, and all of its 2168 * children and grandchildren that were suspended along with it. 2169 */ 2170 __execlists_unhold(rq); 2171 2172 if (rq_prio(rq) > engine->sched_engine->queue_priority_hint) { 2173 engine->sched_engine->queue_priority_hint = rq_prio(rq); 2174 tasklet_hi_schedule(&engine->sched_engine->tasklet); 2175 } 2176 2177 spin_unlock_irq(&engine->sched_engine->lock); 2178 } 2179 2180 struct execlists_capture { 2181 struct work_struct work; 2182 struct i915_request *rq; 2183 struct i915_gpu_coredump *error; 2184 }; 2185 2186 static void execlists_capture_work(struct work_struct *work) 2187 { 2188 struct execlists_capture *cap = container_of(work, typeof(*cap), work); 2189 const gfp_t gfp = __GFP_KSWAPD_RECLAIM | __GFP_RETRY_MAYFAIL | 2190 __GFP_NOWARN; 2191 struct intel_engine_cs *engine = cap->rq->engine; 2192 struct intel_gt_coredump *gt = cap->error->gt; 2193 struct intel_engine_capture_vma *vma; 2194 2195 /* Compress all the objects attached to the request, slow! */ 2196 vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp); 2197 if (vma) { 2198 struct i915_vma_compress *compress = 2199 i915_vma_capture_prepare(gt); 2200 2201 intel_engine_coredump_add_vma(gt->engine, vma, compress); 2202 i915_vma_capture_finish(gt, compress); 2203 } 2204 2205 gt->simulated = gt->engine->simulated; 2206 cap->error->simulated = gt->simulated; 2207 2208 /* Publish the error state, and announce it to the world */ 2209 i915_error_state_store(cap->error); 2210 i915_gpu_coredump_put(cap->error); 2211 2212 /* Return this request and all that depend upon it for signaling */ 2213 execlists_unhold(engine, cap->rq); 2214 i915_request_put(cap->rq); 2215 2216 kfree(cap); 2217 } 2218 2219 static struct execlists_capture *capture_regs(struct intel_engine_cs *engine) 2220 { 2221 const gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN; 2222 struct execlists_capture *cap; 2223 2224 cap = kmalloc(sizeof(*cap), gfp); 2225 if (!cap) 2226 return NULL; 2227 2228 cap->error = i915_gpu_coredump_alloc(engine->i915, gfp); 2229 if (!cap->error) 2230 goto err_cap; 2231 2232 cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp); 2233 if (!cap->error->gt) 2234 goto err_gpu; 2235 2236 cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp); 2237 if (!cap->error->gt->engine) 2238 goto err_gt; 2239 2240 cap->error->gt->engine->hung = true; 2241 2242 return cap; 2243 2244 err_gt: 2245 kfree(cap->error->gt); 2246 err_gpu: 2247 kfree(cap->error); 2248 err_cap: 2249 kfree(cap); 2250 return NULL; 2251 } 2252 2253 static struct i915_request * 2254 active_context(struct intel_engine_cs *engine, u32 ccid) 2255 { 2256 const struct intel_engine_execlists * const el = &engine->execlists; 2257 struct i915_request * const *port, *rq; 2258 2259 /* 2260 * Use the most recent result from process_csb(), but just in case 2261 * we trigger an error (via interrupt) before the first CS event has 2262 * been written, peek at the next submission. 2263 */ 2264 2265 for (port = el->active; (rq = *port); port++) { 2266 if (rq->context->lrc.ccid == ccid) { 2267 ENGINE_TRACE(engine, 2268 "ccid:%x found at active:%zd\n", 2269 ccid, port - el->active); 2270 return rq; 2271 } 2272 } 2273 2274 for (port = el->pending; (rq = *port); port++) { 2275 if (rq->context->lrc.ccid == ccid) { 2276 ENGINE_TRACE(engine, 2277 "ccid:%x found at pending:%zd\n", 2278 ccid, port - el->pending); 2279 return rq; 2280 } 2281 } 2282 2283 ENGINE_TRACE(engine, "ccid:%x not found\n", ccid); 2284 return NULL; 2285 } 2286 2287 static u32 active_ccid(struct intel_engine_cs *engine) 2288 { 2289 return ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI); 2290 } 2291 2292 static void execlists_capture(struct intel_engine_cs *engine) 2293 { 2294 struct execlists_capture *cap; 2295 2296 if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)) 2297 return; 2298 2299 /* 2300 * We need to _quickly_ capture the engine state before we reset. 2301 * We are inside an atomic section (softirq) here and we are delaying 2302 * the forced preemption event. 2303 */ 2304 cap = capture_regs(engine); 2305 if (!cap) 2306 return; 2307 2308 spin_lock_irq(&engine->sched_engine->lock); 2309 cap->rq = active_context(engine, active_ccid(engine)); 2310 if (cap->rq) { 2311 cap->rq = active_request(cap->rq->context->timeline, cap->rq); 2312 cap->rq = i915_request_get_rcu(cap->rq); 2313 } 2314 spin_unlock_irq(&engine->sched_engine->lock); 2315 if (!cap->rq) 2316 goto err_free; 2317 2318 /* 2319 * Remove the request from the execlists queue, and take ownership 2320 * of the request. We pass it to our worker who will _slowly_ compress 2321 * all the pages the _user_ requested for debugging their batch, after 2322 * which we return it to the queue for signaling. 2323 * 2324 * By removing them from the execlists queue, we also remove the 2325 * requests from being processed by __unwind_incomplete_requests() 2326 * during the intel_engine_reset(), and so they will *not* be replayed 2327 * afterwards. 2328 * 2329 * Note that because we have not yet reset the engine at this point, 2330 * it is possible for the request that we have identified as being 2331 * guilty, did in fact complete and we will then hit an arbitration 2332 * point allowing the outstanding preemption to succeed. The likelihood 2333 * of that is very low (as capturing of the engine registers should be 2334 * fast enough to run inside an irq-off atomic section!), so we will 2335 * simply hold that request accountable for being non-preemptible 2336 * long enough to force the reset. 2337 */ 2338 if (!execlists_hold(engine, cap->rq)) 2339 goto err_rq; 2340 2341 INIT_WORK(&cap->work, execlists_capture_work); 2342 schedule_work(&cap->work); 2343 return; 2344 2345 err_rq: 2346 i915_request_put(cap->rq); 2347 err_free: 2348 i915_gpu_coredump_put(cap->error); 2349 kfree(cap); 2350 } 2351 2352 static void execlists_reset(struct intel_engine_cs *engine, const char *msg) 2353 { 2354 const unsigned int bit = I915_RESET_ENGINE + engine->id; 2355 unsigned long *lock = &engine->gt->reset.flags; 2356 2357 if (!intel_has_reset_engine(engine->gt)) 2358 return; 2359 2360 if (test_and_set_bit(bit, lock)) 2361 return; 2362 2363 ENGINE_TRACE(engine, "reset for %s\n", msg); 2364 2365 /* Mark this tasklet as disabled to avoid waiting for it to complete */ 2366 tasklet_disable_nosync(&engine->sched_engine->tasklet); 2367 2368 ring_set_paused(engine, 1); /* Freeze the current request in place */ 2369 execlists_capture(engine); 2370 intel_engine_reset(engine, msg); 2371 2372 tasklet_enable(&engine->sched_engine->tasklet); 2373 clear_and_wake_up_bit(bit, lock); 2374 } 2375 2376 static bool preempt_timeout(const struct intel_engine_cs *const engine) 2377 { 2378 const struct timer_list *t = &engine->execlists.preempt; 2379 2380 if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT) 2381 return false; 2382 2383 if (!timer_expired(t)) 2384 return false; 2385 2386 return engine->execlists.pending[0]; 2387 } 2388 2389 /* 2390 * Check the unread Context Status Buffers and manage the submission of new 2391 * contexts to the ELSP accordingly. 2392 */ 2393 static void execlists_submission_tasklet(struct tasklet_struct *t) 2394 { 2395 struct i915_sched_engine *sched_engine = 2396 from_tasklet(sched_engine, t, tasklet); 2397 struct intel_engine_cs * const engine = sched_engine->private_data; 2398 struct i915_request *post[2 * EXECLIST_MAX_PORTS]; 2399 struct i915_request **inactive; 2400 2401 rcu_read_lock(); 2402 inactive = process_csb(engine, post); 2403 GEM_BUG_ON(inactive - post > ARRAY_SIZE(post)); 2404 2405 if (unlikely(preempt_timeout(engine))) { 2406 cancel_timer(&engine->execlists.preempt); 2407 engine->execlists.error_interrupt |= ERROR_PREEMPT; 2408 } 2409 2410 if (unlikely(READ_ONCE(engine->execlists.error_interrupt))) { 2411 const char *msg; 2412 2413 /* Generate the error message in priority wrt to the user! */ 2414 if (engine->execlists.error_interrupt & GENMASK(15, 0)) 2415 msg = "CS error"; /* thrown by a user payload */ 2416 else if (engine->execlists.error_interrupt & ERROR_CSB) 2417 msg = "invalid CSB event"; 2418 else if (engine->execlists.error_interrupt & ERROR_PREEMPT) 2419 msg = "preemption time out"; 2420 else 2421 msg = "internal error"; 2422 2423 engine->execlists.error_interrupt = 0; 2424 execlists_reset(engine, msg); 2425 } 2426 2427 if (!engine->execlists.pending[0]) { 2428 execlists_dequeue_irq(engine); 2429 start_timeslice(engine); 2430 } 2431 2432 post_process_csb(post, inactive); 2433 rcu_read_unlock(); 2434 } 2435 2436 static void execlists_irq_handler(struct intel_engine_cs *engine, u16 iir) 2437 { 2438 bool tasklet = false; 2439 2440 if (unlikely(iir & GT_CS_MASTER_ERROR_INTERRUPT)) { 2441 u32 eir; 2442 2443 /* Upper 16b are the enabling mask, rsvd for internal errors */ 2444 eir = ENGINE_READ(engine, RING_EIR) & GENMASK(15, 0); 2445 ENGINE_TRACE(engine, "CS error: %x\n", eir); 2446 2447 /* Disable the error interrupt until after the reset */ 2448 if (likely(eir)) { 2449 ENGINE_WRITE(engine, RING_EMR, ~0u); 2450 ENGINE_WRITE(engine, RING_EIR, eir); 2451 WRITE_ONCE(engine->execlists.error_interrupt, eir); 2452 tasklet = true; 2453 } 2454 } 2455 2456 if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) { 2457 WRITE_ONCE(engine->execlists.yield, 2458 ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI)); 2459 ENGINE_TRACE(engine, "semaphore yield: %08x\n", 2460 engine->execlists.yield); 2461 if (del_timer(&engine->execlists.timer)) 2462 tasklet = true; 2463 } 2464 2465 if (iir & GT_CONTEXT_SWITCH_INTERRUPT) 2466 tasklet = true; 2467 2468 if (iir & GT_RENDER_USER_INTERRUPT) 2469 intel_engine_signal_breadcrumbs(engine); 2470 2471 if (tasklet) 2472 tasklet_hi_schedule(&engine->sched_engine->tasklet); 2473 } 2474 2475 static void __execlists_kick(struct intel_engine_execlists *execlists) 2476 { 2477 struct intel_engine_cs *engine = 2478 container_of(execlists, typeof(*engine), execlists); 2479 2480 /* Kick the tasklet for some interrupt coalescing and reset handling */ 2481 tasklet_hi_schedule(&engine->sched_engine->tasklet); 2482 } 2483 2484 #define execlists_kick(t, member) \ 2485 __execlists_kick(container_of(t, struct intel_engine_execlists, member)) 2486 2487 static void execlists_timeslice(struct timer_list *timer) 2488 { 2489 execlists_kick(timer, timer); 2490 } 2491 2492 static void execlists_preempt(struct timer_list *timer) 2493 { 2494 execlists_kick(timer, preempt); 2495 } 2496 2497 static void queue_request(struct intel_engine_cs *engine, 2498 struct i915_request *rq) 2499 { 2500 GEM_BUG_ON(!list_empty(&rq->sched.link)); 2501 list_add_tail(&rq->sched.link, 2502 i915_sched_lookup_priolist(engine->sched_engine, 2503 rq_prio(rq))); 2504 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 2505 } 2506 2507 static bool submit_queue(struct intel_engine_cs *engine, 2508 const struct i915_request *rq) 2509 { 2510 struct i915_sched_engine *sched_engine = engine->sched_engine; 2511 2512 if (rq_prio(rq) <= sched_engine->queue_priority_hint) 2513 return false; 2514 2515 sched_engine->queue_priority_hint = rq_prio(rq); 2516 return true; 2517 } 2518 2519 static bool ancestor_on_hold(const struct intel_engine_cs *engine, 2520 const struct i915_request *rq) 2521 { 2522 GEM_BUG_ON(i915_request_on_hold(rq)); 2523 return !list_empty(&engine->sched_engine->hold) && hold_request(rq); 2524 } 2525 2526 static void execlists_submit_request(struct i915_request *request) 2527 { 2528 struct intel_engine_cs *engine = request->engine; 2529 unsigned long flags; 2530 2531 /* Will be called from irq-context when using foreign fences. */ 2532 spin_lock_irqsave(&engine->sched_engine->lock, flags); 2533 2534 if (unlikely(ancestor_on_hold(engine, request))) { 2535 RQ_TRACE(request, "ancestor on hold\n"); 2536 list_add_tail(&request->sched.link, 2537 &engine->sched_engine->hold); 2538 i915_request_set_hold(request); 2539 } else { 2540 queue_request(engine, request); 2541 2542 GEM_BUG_ON(i915_sched_engine_is_empty(engine->sched_engine)); 2543 GEM_BUG_ON(list_empty(&request->sched.link)); 2544 2545 if (submit_queue(engine, request)) 2546 __execlists_kick(&engine->execlists); 2547 } 2548 2549 spin_unlock_irqrestore(&engine->sched_engine->lock, flags); 2550 } 2551 2552 static int 2553 __execlists_context_pre_pin(struct intel_context *ce, 2554 struct intel_engine_cs *engine, 2555 struct i915_gem_ww_ctx *ww, void **vaddr) 2556 { 2557 int err; 2558 2559 err = lrc_pre_pin(ce, engine, ww, vaddr); 2560 if (err) 2561 return err; 2562 2563 if (!__test_and_set_bit(CONTEXT_INIT_BIT, &ce->flags)) { 2564 lrc_init_state(ce, engine, *vaddr); 2565 2566 __i915_gem_object_flush_map(ce->state->obj, 0, engine->context_size); 2567 } 2568 2569 return 0; 2570 } 2571 2572 static int execlists_context_pre_pin(struct intel_context *ce, 2573 struct i915_gem_ww_ctx *ww, 2574 void **vaddr) 2575 { 2576 return __execlists_context_pre_pin(ce, ce->engine, ww, vaddr); 2577 } 2578 2579 static int execlists_context_pin(struct intel_context *ce, void *vaddr) 2580 { 2581 return lrc_pin(ce, ce->engine, vaddr); 2582 } 2583 2584 static int execlists_context_alloc(struct intel_context *ce) 2585 { 2586 return lrc_alloc(ce, ce->engine); 2587 } 2588 2589 static void execlists_context_cancel_request(struct intel_context *ce, 2590 struct i915_request *rq) 2591 { 2592 struct intel_engine_cs *engine = NULL; 2593 2594 i915_request_active_engine(rq, &engine); 2595 2596 if (engine && intel_engine_pulse(engine)) 2597 intel_gt_handle_error(engine->gt, engine->mask, 0, 2598 "request cancellation by %s", 2599 current->comm); 2600 } 2601 2602 static const struct intel_context_ops execlists_context_ops = { 2603 .flags = COPS_HAS_INFLIGHT, 2604 2605 .alloc = execlists_context_alloc, 2606 2607 .cancel_request = execlists_context_cancel_request, 2608 2609 .pre_pin = execlists_context_pre_pin, 2610 .pin = execlists_context_pin, 2611 .unpin = lrc_unpin, 2612 .post_unpin = lrc_post_unpin, 2613 2614 .enter = intel_context_enter_engine, 2615 .exit = intel_context_exit_engine, 2616 2617 .reset = lrc_reset, 2618 .destroy = lrc_destroy, 2619 2620 .create_virtual = execlists_create_virtual, 2621 }; 2622 2623 static int emit_pdps(struct i915_request *rq) 2624 { 2625 const struct intel_engine_cs * const engine = rq->engine; 2626 struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(rq->context->vm); 2627 int err, i; 2628 u32 *cs; 2629 2630 GEM_BUG_ON(intel_vgpu_active(rq->engine->i915)); 2631 2632 /* 2633 * Beware ye of the dragons, this sequence is magic! 2634 * 2635 * Small changes to this sequence can cause anything from 2636 * GPU hangs to forcewake errors and machine lockups! 2637 */ 2638 2639 cs = intel_ring_begin(rq, 2); 2640 if (IS_ERR(cs)) 2641 return PTR_ERR(cs); 2642 2643 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 2644 *cs++ = MI_NOOP; 2645 intel_ring_advance(rq, cs); 2646 2647 /* Flush any residual operations from the context load */ 2648 err = engine->emit_flush(rq, EMIT_FLUSH); 2649 if (err) 2650 return err; 2651 2652 /* Magic required to prevent forcewake errors! */ 2653 err = engine->emit_flush(rq, EMIT_INVALIDATE); 2654 if (err) 2655 return err; 2656 2657 cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2); 2658 if (IS_ERR(cs)) 2659 return PTR_ERR(cs); 2660 2661 /* Ensure the LRI have landed before we invalidate & continue */ 2662 *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED; 2663 for (i = GEN8_3LVL_PDPES; i--; ) { 2664 const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); 2665 u32 base = engine->mmio_base; 2666 2667 *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i)); 2668 *cs++ = upper_32_bits(pd_daddr); 2669 *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i)); 2670 *cs++ = lower_32_bits(pd_daddr); 2671 } 2672 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 2673 intel_ring_advance(rq, cs); 2674 2675 intel_ring_advance(rq, cs); 2676 2677 return 0; 2678 } 2679 2680 static int execlists_request_alloc(struct i915_request *request) 2681 { 2682 int ret; 2683 2684 GEM_BUG_ON(!intel_context_is_pinned(request->context)); 2685 2686 /* 2687 * Flush enough space to reduce the likelihood of waiting after 2688 * we start building the request - in which case we will just 2689 * have to repeat work. 2690 */ 2691 request->reserved_space += EXECLISTS_REQUEST_SIZE; 2692 2693 /* 2694 * Note that after this point, we have committed to using 2695 * this request as it is being used to both track the 2696 * state of engine initialisation and liveness of the 2697 * golden renderstate above. Think twice before you try 2698 * to cancel/unwind this request now. 2699 */ 2700 2701 if (!i915_vm_is_4lvl(request->context->vm)) { 2702 ret = emit_pdps(request); 2703 if (ret) 2704 return ret; 2705 } 2706 2707 /* Unconditionally invalidate GPU caches and TLBs. */ 2708 ret = request->engine->emit_flush(request, EMIT_INVALIDATE); 2709 if (ret) 2710 return ret; 2711 2712 request->reserved_space -= EXECLISTS_REQUEST_SIZE; 2713 return 0; 2714 } 2715 2716 static void reset_csb_pointers(struct intel_engine_cs *engine) 2717 { 2718 struct intel_engine_execlists * const execlists = &engine->execlists; 2719 const unsigned int reset_value = execlists->csb_size - 1; 2720 2721 ring_set_paused(engine, 0); 2722 2723 /* 2724 * Sometimes Icelake forgets to reset its pointers on a GPU reset. 2725 * Bludgeon them with a mmio update to be sure. 2726 */ 2727 ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR, 2728 0xffff << 16 | reset_value << 8 | reset_value); 2729 ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR); 2730 2731 /* 2732 * After a reset, the HW starts writing into CSB entry [0]. We 2733 * therefore have to set our HEAD pointer back one entry so that 2734 * the *first* entry we check is entry 0. To complicate this further, 2735 * as we don't wait for the first interrupt after reset, we have to 2736 * fake the HW write to point back to the last entry so that our 2737 * inline comparison of our cached head position against the last HW 2738 * write works even before the first interrupt. 2739 */ 2740 execlists->csb_head = reset_value; 2741 WRITE_ONCE(*execlists->csb_write, reset_value); 2742 wmb(); /* Make sure this is visible to HW (paranoia?) */ 2743 2744 /* Check that the GPU does indeed update the CSB entries! */ 2745 memset(execlists->csb_status, -1, (reset_value + 1) * sizeof(u64)); 2746 invalidate_csb_entries(&execlists->csb_status[0], 2747 &execlists->csb_status[reset_value]); 2748 2749 /* Once more for luck and our trusty paranoia */ 2750 ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR, 2751 0xffff << 16 | reset_value << 8 | reset_value); 2752 ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR); 2753 2754 GEM_BUG_ON(READ_ONCE(*execlists->csb_write) != reset_value); 2755 } 2756 2757 static void sanitize_hwsp(struct intel_engine_cs *engine) 2758 { 2759 struct intel_timeline *tl; 2760 2761 list_for_each_entry(tl, &engine->status_page.timelines, engine_link) 2762 intel_timeline_reset_seqno(tl); 2763 } 2764 2765 static void execlists_sanitize(struct intel_engine_cs *engine) 2766 { 2767 GEM_BUG_ON(execlists_active(&engine->execlists)); 2768 2769 /* 2770 * Poison residual state on resume, in case the suspend didn't! 2771 * 2772 * We have to assume that across suspend/resume (or other loss 2773 * of control) that the contents of our pinned buffers has been 2774 * lost, replaced by garbage. Since this doesn't always happen, 2775 * let's poison such state so that we more quickly spot when 2776 * we falsely assume it has been preserved. 2777 */ 2778 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 2779 memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE); 2780 2781 reset_csb_pointers(engine); 2782 2783 /* 2784 * The kernel_context HWSP is stored in the status_page. As above, 2785 * that may be lost on resume/initialisation, and so we need to 2786 * reset the value in the HWSP. 2787 */ 2788 sanitize_hwsp(engine); 2789 2790 /* And scrub the dirty cachelines for the HWSP */ 2791 clflush_cache_range(engine->status_page.addr, PAGE_SIZE); 2792 2793 intel_engine_reset_pinned_contexts(engine); 2794 } 2795 2796 static void enable_error_interrupt(struct intel_engine_cs *engine) 2797 { 2798 u32 status; 2799 2800 engine->execlists.error_interrupt = 0; 2801 ENGINE_WRITE(engine, RING_EMR, ~0u); 2802 ENGINE_WRITE(engine, RING_EIR, ~0u); /* clear all existing errors */ 2803 2804 status = ENGINE_READ(engine, RING_ESR); 2805 if (unlikely(status)) { 2806 drm_err(&engine->i915->drm, 2807 "engine '%s' resumed still in error: %08x\n", 2808 engine->name, status); 2809 __intel_gt_reset(engine->gt, engine->mask); 2810 } 2811 2812 /* 2813 * On current gen8+, we have 2 signals to play with 2814 * 2815 * - I915_ERROR_INSTUCTION (bit 0) 2816 * 2817 * Generate an error if the command parser encounters an invalid 2818 * instruction 2819 * 2820 * This is a fatal error. 2821 * 2822 * - CP_PRIV (bit 2) 2823 * 2824 * Generate an error on privilege violation (where the CP replaces 2825 * the instruction with a no-op). This also fires for writes into 2826 * read-only scratch pages. 2827 * 2828 * This is a non-fatal error, parsing continues. 2829 * 2830 * * there are a few others defined for odd HW that we do not use 2831 * 2832 * Since CP_PRIV fires for cases where we have chosen to ignore the 2833 * error (as the HW is validating and suppressing the mistakes), we 2834 * only unmask the instruction error bit. 2835 */ 2836 ENGINE_WRITE(engine, RING_EMR, ~I915_ERROR_INSTRUCTION); 2837 } 2838 2839 static void enable_execlists(struct intel_engine_cs *engine) 2840 { 2841 u32 mode; 2842 2843 assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL); 2844 2845 intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */ 2846 2847 if (GRAPHICS_VER(engine->i915) >= 11) 2848 mode = _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE); 2849 else 2850 mode = _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE); 2851 ENGINE_WRITE_FW(engine, RING_MODE_GEN7, mode); 2852 2853 ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); 2854 2855 ENGINE_WRITE_FW(engine, 2856 RING_HWS_PGA, 2857 i915_ggtt_offset(engine->status_page.vma)); 2858 ENGINE_POSTING_READ(engine, RING_HWS_PGA); 2859 2860 enable_error_interrupt(engine); 2861 } 2862 2863 static int execlists_resume(struct intel_engine_cs *engine) 2864 { 2865 intel_mocs_init_engine(engine); 2866 intel_breadcrumbs_reset(engine->breadcrumbs); 2867 2868 enable_execlists(engine); 2869 2870 return 0; 2871 } 2872 2873 static void execlists_reset_prepare(struct intel_engine_cs *engine) 2874 { 2875 ENGINE_TRACE(engine, "depth<-%d\n", 2876 atomic_read(&engine->sched_engine->tasklet.count)); 2877 2878 /* 2879 * Prevent request submission to the hardware until we have 2880 * completed the reset in i915_gem_reset_finish(). If a request 2881 * is completed by one engine, it may then queue a request 2882 * to a second via its execlists->tasklet *just* as we are 2883 * calling engine->resume() and also writing the ELSP. 2884 * Turning off the execlists->tasklet until the reset is over 2885 * prevents the race. 2886 */ 2887 __tasklet_disable_sync_once(&engine->sched_engine->tasklet); 2888 GEM_BUG_ON(!reset_in_progress(engine)); 2889 2890 /* 2891 * We stop engines, otherwise we might get failed reset and a 2892 * dead gpu (on elk). Also as modern gpu as kbl can suffer 2893 * from system hang if batchbuffer is progressing when 2894 * the reset is issued, regardless of READY_TO_RESET ack. 2895 * Thus assume it is best to stop engines on all gens 2896 * where we have a gpu reset. 2897 * 2898 * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES) 2899 * 2900 * FIXME: Wa for more modern gens needs to be validated 2901 */ 2902 ring_set_paused(engine, 1); 2903 intel_engine_stop_cs(engine); 2904 2905 engine->execlists.reset_ccid = active_ccid(engine); 2906 } 2907 2908 static struct i915_request ** 2909 reset_csb(struct intel_engine_cs *engine, struct i915_request **inactive) 2910 { 2911 struct intel_engine_execlists * const execlists = &engine->execlists; 2912 2913 mb(); /* paranoia: read the CSB pointers from after the reset */ 2914 clflush(execlists->csb_write); 2915 mb(); 2916 2917 inactive = process_csb(engine, inactive); /* drain preemption events */ 2918 2919 /* Following the reset, we need to reload the CSB read/write pointers */ 2920 reset_csb_pointers(engine); 2921 2922 return inactive; 2923 } 2924 2925 static void 2926 execlists_reset_active(struct intel_engine_cs *engine, bool stalled) 2927 { 2928 struct intel_context *ce; 2929 struct i915_request *rq; 2930 u32 head; 2931 2932 /* 2933 * Save the currently executing context, even if we completed 2934 * its request, it was still running at the time of the 2935 * reset and will have been clobbered. 2936 */ 2937 rq = active_context(engine, engine->execlists.reset_ccid); 2938 if (!rq) 2939 return; 2940 2941 ce = rq->context; 2942 GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); 2943 2944 if (__i915_request_is_complete(rq)) { 2945 /* Idle context; tidy up the ring so we can restart afresh */ 2946 head = intel_ring_wrap(ce->ring, rq->tail); 2947 goto out_replay; 2948 } 2949 2950 /* We still have requests in-flight; the engine should be active */ 2951 GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); 2952 2953 /* Context has requests still in-flight; it should not be idle! */ 2954 GEM_BUG_ON(i915_active_is_idle(&ce->active)); 2955 2956 rq = active_request(ce->timeline, rq); 2957 head = intel_ring_wrap(ce->ring, rq->head); 2958 GEM_BUG_ON(head == ce->ring->tail); 2959 2960 /* 2961 * If this request hasn't started yet, e.g. it is waiting on a 2962 * semaphore, we need to avoid skipping the request or else we 2963 * break the signaling chain. However, if the context is corrupt 2964 * the request will not restart and we will be stuck with a wedged 2965 * device. It is quite often the case that if we issue a reset 2966 * while the GPU is loading the context image, that the context 2967 * image becomes corrupt. 2968 * 2969 * Otherwise, if we have not started yet, the request should replay 2970 * perfectly and we do not need to flag the result as being erroneous. 2971 */ 2972 if (!__i915_request_has_started(rq)) 2973 goto out_replay; 2974 2975 /* 2976 * If the request was innocent, we leave the request in the ELSP 2977 * and will try to replay it on restarting. The context image may 2978 * have been corrupted by the reset, in which case we may have 2979 * to service a new GPU hang, but more likely we can continue on 2980 * without impact. 2981 * 2982 * If the request was guilty, we presume the context is corrupt 2983 * and have to at least restore the RING register in the context 2984 * image back to the expected values to skip over the guilty request. 2985 */ 2986 __i915_request_reset(rq, stalled); 2987 2988 /* 2989 * We want a simple context + ring to execute the breadcrumb update. 2990 * We cannot rely on the context being intact across the GPU hang, 2991 * so clear it and rebuild just what we need for the breadcrumb. 2992 * All pending requests for this context will be zapped, and any 2993 * future request will be after userspace has had the opportunity 2994 * to recreate its own state. 2995 */ 2996 out_replay: 2997 ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n", 2998 head, ce->ring->tail); 2999 lrc_reset_regs(ce, engine); 3000 ce->lrc.lrca = lrc_update_regs(ce, engine, head); 3001 } 3002 3003 static void execlists_reset_csb(struct intel_engine_cs *engine, bool stalled) 3004 { 3005 struct intel_engine_execlists * const execlists = &engine->execlists; 3006 struct i915_request *post[2 * EXECLIST_MAX_PORTS]; 3007 struct i915_request **inactive; 3008 3009 rcu_read_lock(); 3010 inactive = reset_csb(engine, post); 3011 3012 execlists_reset_active(engine, true); 3013 3014 inactive = cancel_port_requests(execlists, inactive); 3015 post_process_csb(post, inactive); 3016 rcu_read_unlock(); 3017 } 3018 3019 static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled) 3020 { 3021 unsigned long flags; 3022 3023 ENGINE_TRACE(engine, "\n"); 3024 3025 /* Process the csb, find the guilty context and throw away */ 3026 execlists_reset_csb(engine, stalled); 3027 3028 /* Push back any incomplete requests for replay after the reset. */ 3029 rcu_read_lock(); 3030 spin_lock_irqsave(&engine->sched_engine->lock, flags); 3031 __unwind_incomplete_requests(engine); 3032 spin_unlock_irqrestore(&engine->sched_engine->lock, flags); 3033 rcu_read_unlock(); 3034 } 3035 3036 static void nop_submission_tasklet(struct tasklet_struct *t) 3037 { 3038 struct i915_sched_engine *sched_engine = 3039 from_tasklet(sched_engine, t, tasklet); 3040 struct intel_engine_cs * const engine = sched_engine->private_data; 3041 3042 /* The driver is wedged; don't process any more events. */ 3043 WRITE_ONCE(engine->sched_engine->queue_priority_hint, INT_MIN); 3044 } 3045 3046 static void execlists_reset_cancel(struct intel_engine_cs *engine) 3047 { 3048 struct intel_engine_execlists * const execlists = &engine->execlists; 3049 struct i915_sched_engine * const sched_engine = engine->sched_engine; 3050 struct i915_request *rq, *rn; 3051 struct rb_node *rb; 3052 unsigned long flags; 3053 3054 ENGINE_TRACE(engine, "\n"); 3055 3056 /* 3057 * Before we call engine->cancel_requests(), we should have exclusive 3058 * access to the submission state. This is arranged for us by the 3059 * caller disabling the interrupt generation, the tasklet and other 3060 * threads that may then access the same state, giving us a free hand 3061 * to reset state. However, we still need to let lockdep be aware that 3062 * we know this state may be accessed in hardirq context, so we 3063 * disable the irq around this manipulation and we want to keep 3064 * the spinlock focused on its duties and not accidentally conflate 3065 * coverage to the submission's irq state. (Similarly, although we 3066 * shouldn't need to disable irq around the manipulation of the 3067 * submission's irq state, we also wish to remind ourselves that 3068 * it is irq state.) 3069 */ 3070 execlists_reset_csb(engine, true); 3071 3072 rcu_read_lock(); 3073 spin_lock_irqsave(&engine->sched_engine->lock, flags); 3074 3075 /* Mark all executing requests as skipped. */ 3076 list_for_each_entry(rq, &engine->sched_engine->requests, sched.link) 3077 i915_request_put(i915_request_mark_eio(rq)); 3078 intel_engine_signal_breadcrumbs(engine); 3079 3080 /* Flush the queued requests to the timeline list (for retiring). */ 3081 while ((rb = rb_first_cached(&sched_engine->queue))) { 3082 struct i915_priolist *p = to_priolist(rb); 3083 3084 priolist_for_each_request_consume(rq, rn, p) { 3085 if (i915_request_mark_eio(rq)) { 3086 __i915_request_submit(rq); 3087 i915_request_put(rq); 3088 } 3089 } 3090 3091 rb_erase_cached(&p->node, &sched_engine->queue); 3092 i915_priolist_free(p); 3093 } 3094 3095 /* On-hold requests will be flushed to timeline upon their release */ 3096 list_for_each_entry(rq, &sched_engine->hold, sched.link) 3097 i915_request_put(i915_request_mark_eio(rq)); 3098 3099 /* Cancel all attached virtual engines */ 3100 while ((rb = rb_first_cached(&execlists->virtual))) { 3101 struct virtual_engine *ve = 3102 rb_entry(rb, typeof(*ve), nodes[engine->id].rb); 3103 3104 rb_erase_cached(rb, &execlists->virtual); 3105 RB_CLEAR_NODE(rb); 3106 3107 spin_lock(&ve->base.sched_engine->lock); 3108 rq = fetch_and_zero(&ve->request); 3109 if (rq) { 3110 if (i915_request_mark_eio(rq)) { 3111 rq->engine = engine; 3112 __i915_request_submit(rq); 3113 i915_request_put(rq); 3114 } 3115 i915_request_put(rq); 3116 3117 ve->base.sched_engine->queue_priority_hint = INT_MIN; 3118 } 3119 spin_unlock(&ve->base.sched_engine->lock); 3120 } 3121 3122 /* Remaining _unready_ requests will be nop'ed when submitted */ 3123 3124 sched_engine->queue_priority_hint = INT_MIN; 3125 sched_engine->queue = RB_ROOT_CACHED; 3126 3127 GEM_BUG_ON(__tasklet_is_enabled(&engine->sched_engine->tasklet)); 3128 engine->sched_engine->tasklet.callback = nop_submission_tasklet; 3129 3130 spin_unlock_irqrestore(&engine->sched_engine->lock, flags); 3131 rcu_read_unlock(); 3132 } 3133 3134 static void execlists_reset_finish(struct intel_engine_cs *engine) 3135 { 3136 struct intel_engine_execlists * const execlists = &engine->execlists; 3137 3138 /* 3139 * After a GPU reset, we may have requests to replay. Do so now while 3140 * we still have the forcewake to be sure that the GPU is not allowed 3141 * to sleep before we restart and reload a context. 3142 * 3143 * If the GPU reset fails, the engine may still be alive with requests 3144 * inflight. We expect those to complete, or for the device to be 3145 * reset as the next level of recovery, and as a final resort we 3146 * will declare the device wedged. 3147 */ 3148 GEM_BUG_ON(!reset_in_progress(engine)); 3149 3150 /* And kick in case we missed a new request submission. */ 3151 if (__tasklet_enable(&engine->sched_engine->tasklet)) 3152 __execlists_kick(execlists); 3153 3154 ENGINE_TRACE(engine, "depth->%d\n", 3155 atomic_read(&engine->sched_engine->tasklet.count)); 3156 } 3157 3158 static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine) 3159 { 3160 ENGINE_WRITE(engine, RING_IMR, 3161 ~(engine->irq_enable_mask | engine->irq_keep_mask)); 3162 ENGINE_POSTING_READ(engine, RING_IMR); 3163 } 3164 3165 static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine) 3166 { 3167 ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask); 3168 } 3169 3170 static void execlists_park(struct intel_engine_cs *engine) 3171 { 3172 cancel_timer(&engine->execlists.timer); 3173 cancel_timer(&engine->execlists.preempt); 3174 } 3175 3176 static void add_to_engine(struct i915_request *rq) 3177 { 3178 lockdep_assert_held(&rq->engine->sched_engine->lock); 3179 list_move_tail(&rq->sched.link, &rq->engine->sched_engine->requests); 3180 } 3181 3182 static void remove_from_engine(struct i915_request *rq) 3183 { 3184 struct intel_engine_cs *engine, *locked; 3185 3186 /* 3187 * Virtual engines complicate acquiring the engine timeline lock, 3188 * as their rq->engine pointer is not stable until under that 3189 * engine lock. The simple ploy we use is to take the lock then 3190 * check that the rq still belongs to the newly locked engine. 3191 */ 3192 locked = READ_ONCE(rq->engine); 3193 spin_lock_irq(&locked->sched_engine->lock); 3194 while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) { 3195 spin_unlock(&locked->sched_engine->lock); 3196 spin_lock(&engine->sched_engine->lock); 3197 locked = engine; 3198 } 3199 list_del_init(&rq->sched.link); 3200 3201 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 3202 clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags); 3203 3204 /* Prevent further __await_execution() registering a cb, then flush */ 3205 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); 3206 3207 spin_unlock_irq(&locked->sched_engine->lock); 3208 3209 i915_request_notify_execute_cb_imm(rq); 3210 } 3211 3212 static bool can_preempt(struct intel_engine_cs *engine) 3213 { 3214 if (GRAPHICS_VER(engine->i915) > 8) 3215 return true; 3216 3217 /* GPGPU on bdw requires extra w/a; not implemented */ 3218 return engine->class != RENDER_CLASS; 3219 } 3220 3221 static void kick_execlists(const struct i915_request *rq, int prio) 3222 { 3223 struct intel_engine_cs *engine = rq->engine; 3224 struct i915_sched_engine *sched_engine = engine->sched_engine; 3225 const struct i915_request *inflight; 3226 3227 /* 3228 * We only need to kick the tasklet once for the high priority 3229 * new context we add into the queue. 3230 */ 3231 if (prio <= sched_engine->queue_priority_hint) 3232 return; 3233 3234 rcu_read_lock(); 3235 3236 /* Nothing currently active? We're overdue for a submission! */ 3237 inflight = execlists_active(&engine->execlists); 3238 if (!inflight) 3239 goto unlock; 3240 3241 /* 3242 * If we are already the currently executing context, don't 3243 * bother evaluating if we should preempt ourselves. 3244 */ 3245 if (inflight->context == rq->context) 3246 goto unlock; 3247 3248 ENGINE_TRACE(engine, 3249 "bumping queue-priority-hint:%d for rq:%llx:%lld, inflight:%llx:%lld prio %d\n", 3250 prio, 3251 rq->fence.context, rq->fence.seqno, 3252 inflight->fence.context, inflight->fence.seqno, 3253 inflight->sched.attr.priority); 3254 3255 sched_engine->queue_priority_hint = prio; 3256 3257 /* 3258 * Allow preemption of low -> normal -> high, but we do 3259 * not allow low priority tasks to preempt other low priority 3260 * tasks under the impression that latency for low priority 3261 * tasks does not matter (as much as background throughput), 3262 * so kiss. 3263 */ 3264 if (prio >= max(I915_PRIORITY_NORMAL, rq_prio(inflight))) 3265 tasklet_hi_schedule(&sched_engine->tasklet); 3266 3267 unlock: 3268 rcu_read_unlock(); 3269 } 3270 3271 static void execlists_set_default_submission(struct intel_engine_cs *engine) 3272 { 3273 engine->submit_request = execlists_submit_request; 3274 engine->sched_engine->schedule = i915_schedule; 3275 engine->sched_engine->kick_backend = kick_execlists; 3276 engine->sched_engine->tasklet.callback = execlists_submission_tasklet; 3277 } 3278 3279 static void execlists_shutdown(struct intel_engine_cs *engine) 3280 { 3281 /* Synchronise with residual timers and any softirq they raise */ 3282 del_timer_sync(&engine->execlists.timer); 3283 del_timer_sync(&engine->execlists.preempt); 3284 tasklet_kill(&engine->sched_engine->tasklet); 3285 } 3286 3287 static void execlists_release(struct intel_engine_cs *engine) 3288 { 3289 engine->sanitize = NULL; /* no longer in control, nothing to sanitize */ 3290 3291 execlists_shutdown(engine); 3292 3293 intel_engine_cleanup_common(engine); 3294 lrc_fini_wa_ctx(engine); 3295 } 3296 3297 static ktime_t __execlists_engine_busyness(struct intel_engine_cs *engine, 3298 ktime_t *now) 3299 { 3300 struct intel_engine_execlists_stats *stats = &engine->stats.execlists; 3301 ktime_t total = stats->total; 3302 3303 /* 3304 * If the engine is executing something at the moment 3305 * add it to the total. 3306 */ 3307 *now = ktime_get(); 3308 if (READ_ONCE(stats->active)) 3309 total = ktime_add(total, ktime_sub(*now, stats->start)); 3310 3311 return total; 3312 } 3313 3314 static ktime_t execlists_engine_busyness(struct intel_engine_cs *engine, 3315 ktime_t *now) 3316 { 3317 struct intel_engine_execlists_stats *stats = &engine->stats.execlists; 3318 unsigned int seq; 3319 ktime_t total; 3320 3321 do { 3322 seq = read_seqcount_begin(&stats->lock); 3323 total = __execlists_engine_busyness(engine, now); 3324 } while (read_seqcount_retry(&stats->lock, seq)); 3325 3326 return total; 3327 } 3328 3329 static void 3330 logical_ring_default_vfuncs(struct intel_engine_cs *engine) 3331 { 3332 /* Default vfuncs which can be overridden by each engine. */ 3333 3334 engine->resume = execlists_resume; 3335 3336 engine->cops = &execlists_context_ops; 3337 engine->request_alloc = execlists_request_alloc; 3338 engine->add_active_request = add_to_engine; 3339 engine->remove_active_request = remove_from_engine; 3340 3341 engine->reset.prepare = execlists_reset_prepare; 3342 engine->reset.rewind = execlists_reset_rewind; 3343 engine->reset.cancel = execlists_reset_cancel; 3344 engine->reset.finish = execlists_reset_finish; 3345 3346 engine->park = execlists_park; 3347 engine->unpark = NULL; 3348 3349 engine->emit_flush = gen8_emit_flush_xcs; 3350 engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; 3351 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs; 3352 if (GRAPHICS_VER(engine->i915) >= 12) { 3353 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs; 3354 engine->emit_flush = gen12_emit_flush_xcs; 3355 } 3356 engine->set_default_submission = execlists_set_default_submission; 3357 3358 if (GRAPHICS_VER(engine->i915) < 11) { 3359 engine->irq_enable = gen8_logical_ring_enable_irq; 3360 engine->irq_disable = gen8_logical_ring_disable_irq; 3361 } else { 3362 /* 3363 * TODO: On Gen11 interrupt masks need to be clear 3364 * to allow C6 entry. Keep interrupts enabled at 3365 * and take the hit of generating extra interrupts 3366 * until a more refined solution exists. 3367 */ 3368 } 3369 intel_engine_set_irq_handler(engine, execlists_irq_handler); 3370 3371 engine->flags |= I915_ENGINE_SUPPORTS_STATS; 3372 if (!intel_vgpu_active(engine->i915)) { 3373 engine->flags |= I915_ENGINE_HAS_SEMAPHORES; 3374 if (can_preempt(engine)) { 3375 engine->flags |= I915_ENGINE_HAS_PREEMPTION; 3376 if (CONFIG_DRM_I915_TIMESLICE_DURATION) 3377 engine->flags |= I915_ENGINE_HAS_TIMESLICES; 3378 } 3379 } 3380 3381 if (intel_engine_has_preemption(engine)) 3382 engine->emit_bb_start = gen8_emit_bb_start; 3383 else 3384 engine->emit_bb_start = gen8_emit_bb_start_noarb; 3385 3386 engine->busyness = execlists_engine_busyness; 3387 } 3388 3389 static void logical_ring_default_irqs(struct intel_engine_cs *engine) 3390 { 3391 unsigned int shift = 0; 3392 3393 if (GRAPHICS_VER(engine->i915) < 11) { 3394 const u8 irq_shifts[] = { 3395 [RCS0] = GEN8_RCS_IRQ_SHIFT, 3396 [BCS0] = GEN8_BCS_IRQ_SHIFT, 3397 [VCS0] = GEN8_VCS0_IRQ_SHIFT, 3398 [VCS1] = GEN8_VCS1_IRQ_SHIFT, 3399 [VECS0] = GEN8_VECS_IRQ_SHIFT, 3400 }; 3401 3402 shift = irq_shifts[engine->id]; 3403 } 3404 3405 engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift; 3406 engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift; 3407 engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT << shift; 3408 engine->irq_keep_mask |= GT_WAIT_SEMAPHORE_INTERRUPT << shift; 3409 } 3410 3411 static void rcs_submission_override(struct intel_engine_cs *engine) 3412 { 3413 switch (GRAPHICS_VER(engine->i915)) { 3414 case 12: 3415 engine->emit_flush = gen12_emit_flush_rcs; 3416 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs; 3417 break; 3418 case 11: 3419 engine->emit_flush = gen11_emit_flush_rcs; 3420 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs; 3421 break; 3422 default: 3423 engine->emit_flush = gen8_emit_flush_rcs; 3424 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; 3425 break; 3426 } 3427 } 3428 3429 int intel_execlists_submission_setup(struct intel_engine_cs *engine) 3430 { 3431 struct intel_engine_execlists * const execlists = &engine->execlists; 3432 struct drm_i915_private *i915 = engine->i915; 3433 struct intel_uncore *uncore = engine->uncore; 3434 u32 base = engine->mmio_base; 3435 3436 tasklet_setup(&engine->sched_engine->tasklet, execlists_submission_tasklet); 3437 timer_setup(&engine->execlists.timer, execlists_timeslice, 0); 3438 timer_setup(&engine->execlists.preempt, execlists_preempt, 0); 3439 3440 logical_ring_default_vfuncs(engine); 3441 logical_ring_default_irqs(engine); 3442 3443 if (engine->class == RENDER_CLASS) 3444 rcs_submission_override(engine); 3445 3446 lrc_init_wa_ctx(engine); 3447 3448 if (HAS_LOGICAL_RING_ELSQ(i915)) { 3449 execlists->submit_reg = uncore->regs + 3450 i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base)); 3451 execlists->ctrl_reg = uncore->regs + 3452 i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base)); 3453 3454 engine->fw_domain = intel_uncore_forcewake_for_reg(engine->uncore, 3455 RING_EXECLIST_CONTROL(engine->mmio_base), 3456 FW_REG_WRITE); 3457 } else { 3458 execlists->submit_reg = uncore->regs + 3459 i915_mmio_reg_offset(RING_ELSP(base)); 3460 } 3461 3462 execlists->csb_status = 3463 (u64 *)&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX]; 3464 3465 execlists->csb_write = 3466 &engine->status_page.addr[intel_hws_csb_write_index(i915)]; 3467 3468 if (GRAPHICS_VER(i915) < 11) 3469 execlists->csb_size = GEN8_CSB_ENTRIES; 3470 else 3471 execlists->csb_size = GEN11_CSB_ENTRIES; 3472 3473 engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0); 3474 if (GRAPHICS_VER(engine->i915) >= 11 && 3475 GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 50)) { 3476 execlists->ccid |= engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT - 32); 3477 execlists->ccid |= engine->class << (GEN11_ENGINE_CLASS_SHIFT - 32); 3478 } 3479 3480 /* Finally, take ownership and responsibility for cleanup! */ 3481 engine->sanitize = execlists_sanitize; 3482 engine->release = execlists_release; 3483 3484 return 0; 3485 } 3486 3487 static struct list_head *virtual_queue(struct virtual_engine *ve) 3488 { 3489 return &ve->base.sched_engine->default_priolist.requests; 3490 } 3491 3492 static void rcu_virtual_context_destroy(struct work_struct *wrk) 3493 { 3494 struct virtual_engine *ve = 3495 container_of(wrk, typeof(*ve), rcu.work); 3496 unsigned int n; 3497 3498 GEM_BUG_ON(ve->context.inflight); 3499 3500 /* Preempt-to-busy may leave a stale request behind. */ 3501 if (unlikely(ve->request)) { 3502 struct i915_request *old; 3503 3504 spin_lock_irq(&ve->base.sched_engine->lock); 3505 3506 old = fetch_and_zero(&ve->request); 3507 if (old) { 3508 GEM_BUG_ON(!__i915_request_is_complete(old)); 3509 __i915_request_submit(old); 3510 i915_request_put(old); 3511 } 3512 3513 spin_unlock_irq(&ve->base.sched_engine->lock); 3514 } 3515 3516 /* 3517 * Flush the tasklet in case it is still running on another core. 3518 * 3519 * This needs to be done before we remove ourselves from the siblings' 3520 * rbtrees as in the case it is running in parallel, it may reinsert 3521 * the rb_node into a sibling. 3522 */ 3523 tasklet_kill(&ve->base.sched_engine->tasklet); 3524 3525 /* Decouple ourselves from the siblings, no more access allowed. */ 3526 for (n = 0; n < ve->num_siblings; n++) { 3527 struct intel_engine_cs *sibling = ve->siblings[n]; 3528 struct rb_node *node = &ve->nodes[sibling->id].rb; 3529 3530 if (RB_EMPTY_NODE(node)) 3531 continue; 3532 3533 spin_lock_irq(&sibling->sched_engine->lock); 3534 3535 /* Detachment is lazily performed in the sched_engine->tasklet */ 3536 if (!RB_EMPTY_NODE(node)) 3537 rb_erase_cached(node, &sibling->execlists.virtual); 3538 3539 spin_unlock_irq(&sibling->sched_engine->lock); 3540 } 3541 GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.sched_engine->tasklet)); 3542 GEM_BUG_ON(!list_empty(virtual_queue(ve))); 3543 3544 lrc_fini(&ve->context); 3545 intel_context_fini(&ve->context); 3546 3547 if (ve->base.breadcrumbs) 3548 intel_breadcrumbs_put(ve->base.breadcrumbs); 3549 if (ve->base.sched_engine) 3550 i915_sched_engine_put(ve->base.sched_engine); 3551 intel_engine_free_request_pool(&ve->base); 3552 3553 kfree(ve); 3554 } 3555 3556 static void virtual_context_destroy(struct kref *kref) 3557 { 3558 struct virtual_engine *ve = 3559 container_of(kref, typeof(*ve), context.ref); 3560 3561 GEM_BUG_ON(!list_empty(&ve->context.signals)); 3562 3563 /* 3564 * When destroying the virtual engine, we have to be aware that 3565 * it may still be in use from an hardirq/softirq context causing 3566 * the resubmission of a completed request (background completion 3567 * due to preempt-to-busy). Before we can free the engine, we need 3568 * to flush the submission code and tasklets that are still potentially 3569 * accessing the engine. Flushing the tasklets requires process context, 3570 * and since we can guard the resubmit onto the engine with an RCU read 3571 * lock, we can delegate the free of the engine to an RCU worker. 3572 */ 3573 INIT_RCU_WORK(&ve->rcu, rcu_virtual_context_destroy); 3574 queue_rcu_work(system_wq, &ve->rcu); 3575 } 3576 3577 static void virtual_engine_initial_hint(struct virtual_engine *ve) 3578 { 3579 int swp; 3580 3581 /* 3582 * Pick a random sibling on starting to help spread the load around. 3583 * 3584 * New contexts are typically created with exactly the same order 3585 * of siblings, and often started in batches. Due to the way we iterate 3586 * the array of sibling when submitting requests, sibling[0] is 3587 * prioritised for dequeuing. If we make sure that sibling[0] is fairly 3588 * randomised across the system, we also help spread the load by the 3589 * first engine we inspect being different each time. 3590 * 3591 * NB This does not force us to execute on this engine, it will just 3592 * typically be the first we inspect for submission. 3593 */ 3594 swp = prandom_u32_max(ve->num_siblings); 3595 if (swp) 3596 swap(ve->siblings[swp], ve->siblings[0]); 3597 } 3598 3599 static int virtual_context_alloc(struct intel_context *ce) 3600 { 3601 struct virtual_engine *ve = container_of(ce, typeof(*ve), context); 3602 3603 return lrc_alloc(ce, ve->siblings[0]); 3604 } 3605 3606 static int virtual_context_pre_pin(struct intel_context *ce, 3607 struct i915_gem_ww_ctx *ww, 3608 void **vaddr) 3609 { 3610 struct virtual_engine *ve = container_of(ce, typeof(*ve), context); 3611 3612 /* Note: we must use a real engine class for setting up reg state */ 3613 return __execlists_context_pre_pin(ce, ve->siblings[0], ww, vaddr); 3614 } 3615 3616 static int virtual_context_pin(struct intel_context *ce, void *vaddr) 3617 { 3618 struct virtual_engine *ve = container_of(ce, typeof(*ve), context); 3619 3620 return lrc_pin(ce, ve->siblings[0], vaddr); 3621 } 3622 3623 static void virtual_context_enter(struct intel_context *ce) 3624 { 3625 struct virtual_engine *ve = container_of(ce, typeof(*ve), context); 3626 unsigned int n; 3627 3628 for (n = 0; n < ve->num_siblings; n++) 3629 intel_engine_pm_get(ve->siblings[n]); 3630 3631 intel_timeline_enter(ce->timeline); 3632 } 3633 3634 static void virtual_context_exit(struct intel_context *ce) 3635 { 3636 struct virtual_engine *ve = container_of(ce, typeof(*ve), context); 3637 unsigned int n; 3638 3639 intel_timeline_exit(ce->timeline); 3640 3641 for (n = 0; n < ve->num_siblings; n++) 3642 intel_engine_pm_put(ve->siblings[n]); 3643 } 3644 3645 static struct intel_engine_cs * 3646 virtual_get_sibling(struct intel_engine_cs *engine, unsigned int sibling) 3647 { 3648 struct virtual_engine *ve = to_virtual_engine(engine); 3649 3650 if (sibling >= ve->num_siblings) 3651 return NULL; 3652 3653 return ve->siblings[sibling]; 3654 } 3655 3656 static const struct intel_context_ops virtual_context_ops = { 3657 .flags = COPS_HAS_INFLIGHT, 3658 3659 .alloc = virtual_context_alloc, 3660 3661 .cancel_request = execlists_context_cancel_request, 3662 3663 .pre_pin = virtual_context_pre_pin, 3664 .pin = virtual_context_pin, 3665 .unpin = lrc_unpin, 3666 .post_unpin = lrc_post_unpin, 3667 3668 .enter = virtual_context_enter, 3669 .exit = virtual_context_exit, 3670 3671 .destroy = virtual_context_destroy, 3672 3673 .get_sibling = virtual_get_sibling, 3674 }; 3675 3676 static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve) 3677 { 3678 struct i915_request *rq; 3679 intel_engine_mask_t mask; 3680 3681 rq = READ_ONCE(ve->request); 3682 if (!rq) 3683 return 0; 3684 3685 /* The rq is ready for submission; rq->execution_mask is now stable. */ 3686 mask = rq->execution_mask; 3687 if (unlikely(!mask)) { 3688 /* Invalid selection, submit to a random engine in error */ 3689 i915_request_set_error_once(rq, -ENODEV); 3690 mask = ve->siblings[0]->mask; 3691 } 3692 3693 ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n", 3694 rq->fence.context, rq->fence.seqno, 3695 mask, ve->base.sched_engine->queue_priority_hint); 3696 3697 return mask; 3698 } 3699 3700 static void virtual_submission_tasklet(struct tasklet_struct *t) 3701 { 3702 struct i915_sched_engine *sched_engine = 3703 from_tasklet(sched_engine, t, tasklet); 3704 struct virtual_engine * const ve = 3705 (struct virtual_engine *)sched_engine->private_data; 3706 const int prio = READ_ONCE(sched_engine->queue_priority_hint); 3707 intel_engine_mask_t mask; 3708 unsigned int n; 3709 3710 rcu_read_lock(); 3711 mask = virtual_submission_mask(ve); 3712 rcu_read_unlock(); 3713 if (unlikely(!mask)) 3714 return; 3715 3716 for (n = 0; n < ve->num_siblings; n++) { 3717 struct intel_engine_cs *sibling = READ_ONCE(ve->siblings[n]); 3718 struct ve_node * const node = &ve->nodes[sibling->id]; 3719 struct rb_node **parent, *rb; 3720 bool first; 3721 3722 if (!READ_ONCE(ve->request)) 3723 break; /* already handled by a sibling's tasklet */ 3724 3725 spin_lock_irq(&sibling->sched_engine->lock); 3726 3727 if (unlikely(!(mask & sibling->mask))) { 3728 if (!RB_EMPTY_NODE(&node->rb)) { 3729 rb_erase_cached(&node->rb, 3730 &sibling->execlists.virtual); 3731 RB_CLEAR_NODE(&node->rb); 3732 } 3733 3734 goto unlock_engine; 3735 } 3736 3737 if (unlikely(!RB_EMPTY_NODE(&node->rb))) { 3738 /* 3739 * Cheat and avoid rebalancing the tree if we can 3740 * reuse this node in situ. 3741 */ 3742 first = rb_first_cached(&sibling->execlists.virtual) == 3743 &node->rb; 3744 if (prio == node->prio || (prio > node->prio && first)) 3745 goto submit_engine; 3746 3747 rb_erase_cached(&node->rb, &sibling->execlists.virtual); 3748 } 3749 3750 rb = NULL; 3751 first = true; 3752 parent = &sibling->execlists.virtual.rb_root.rb_node; 3753 while (*parent) { 3754 struct ve_node *other; 3755 3756 rb = *parent; 3757 other = rb_entry(rb, typeof(*other), rb); 3758 if (prio > other->prio) { 3759 parent = &rb->rb_left; 3760 } else { 3761 parent = &rb->rb_right; 3762 first = false; 3763 } 3764 } 3765 3766 rb_link_node(&node->rb, rb, parent); 3767 rb_insert_color_cached(&node->rb, 3768 &sibling->execlists.virtual, 3769 first); 3770 3771 submit_engine: 3772 GEM_BUG_ON(RB_EMPTY_NODE(&node->rb)); 3773 node->prio = prio; 3774 if (first && prio > sibling->sched_engine->queue_priority_hint) 3775 tasklet_hi_schedule(&sibling->sched_engine->tasklet); 3776 3777 unlock_engine: 3778 spin_unlock_irq(&sibling->sched_engine->lock); 3779 3780 if (intel_context_inflight(&ve->context)) 3781 break; 3782 } 3783 } 3784 3785 static void virtual_submit_request(struct i915_request *rq) 3786 { 3787 struct virtual_engine *ve = to_virtual_engine(rq->engine); 3788 unsigned long flags; 3789 3790 ENGINE_TRACE(&ve->base, "rq=%llx:%lld\n", 3791 rq->fence.context, 3792 rq->fence.seqno); 3793 3794 GEM_BUG_ON(ve->base.submit_request != virtual_submit_request); 3795 3796 spin_lock_irqsave(&ve->base.sched_engine->lock, flags); 3797 3798 /* By the time we resubmit a request, it may be completed */ 3799 if (__i915_request_is_complete(rq)) { 3800 __i915_request_submit(rq); 3801 goto unlock; 3802 } 3803 3804 if (ve->request) { /* background completion from preempt-to-busy */ 3805 GEM_BUG_ON(!__i915_request_is_complete(ve->request)); 3806 __i915_request_submit(ve->request); 3807 i915_request_put(ve->request); 3808 } 3809 3810 ve->base.sched_engine->queue_priority_hint = rq_prio(rq); 3811 ve->request = i915_request_get(rq); 3812 3813 GEM_BUG_ON(!list_empty(virtual_queue(ve))); 3814 list_move_tail(&rq->sched.link, virtual_queue(ve)); 3815 3816 tasklet_hi_schedule(&ve->base.sched_engine->tasklet); 3817 3818 unlock: 3819 spin_unlock_irqrestore(&ve->base.sched_engine->lock, flags); 3820 } 3821 3822 static struct intel_context * 3823 execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 3824 unsigned long flags) 3825 { 3826 struct virtual_engine *ve; 3827 unsigned int n; 3828 int err; 3829 3830 ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL); 3831 if (!ve) 3832 return ERR_PTR(-ENOMEM); 3833 3834 ve->base.i915 = siblings[0]->i915; 3835 ve->base.gt = siblings[0]->gt; 3836 ve->base.uncore = siblings[0]->uncore; 3837 ve->base.id = -1; 3838 3839 ve->base.class = OTHER_CLASS; 3840 ve->base.uabi_class = I915_ENGINE_CLASS_INVALID; 3841 ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 3842 ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 3843 3844 /* 3845 * The decision on whether to submit a request using semaphores 3846 * depends on the saturated state of the engine. We only compute 3847 * this during HW submission of the request, and we need for this 3848 * state to be globally applied to all requests being submitted 3849 * to this engine. Virtual engines encompass more than one physical 3850 * engine and so we cannot accurately tell in advance if one of those 3851 * engines is already saturated and so cannot afford to use a semaphore 3852 * and be pessimized in priority for doing so -- if we are the only 3853 * context using semaphores after all other clients have stopped, we 3854 * will be starved on the saturated system. Such a global switch for 3855 * semaphores is less than ideal, but alas is the current compromise. 3856 */ 3857 ve->base.saturated = ALL_ENGINES; 3858 3859 snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); 3860 3861 intel_engine_init_execlists(&ve->base); 3862 3863 ve->base.sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL); 3864 if (!ve->base.sched_engine) { 3865 err = -ENOMEM; 3866 goto err_put; 3867 } 3868 ve->base.sched_engine->private_data = &ve->base; 3869 3870 ve->base.cops = &virtual_context_ops; 3871 ve->base.request_alloc = execlists_request_alloc; 3872 3873 ve->base.sched_engine->schedule = i915_schedule; 3874 ve->base.sched_engine->kick_backend = kick_execlists; 3875 ve->base.submit_request = virtual_submit_request; 3876 3877 INIT_LIST_HEAD(virtual_queue(ve)); 3878 tasklet_setup(&ve->base.sched_engine->tasklet, virtual_submission_tasklet); 3879 3880 intel_context_init(&ve->context, &ve->base); 3881 3882 ve->base.breadcrumbs = intel_breadcrumbs_create(NULL); 3883 if (!ve->base.breadcrumbs) { 3884 err = -ENOMEM; 3885 goto err_put; 3886 } 3887 3888 for (n = 0; n < count; n++) { 3889 struct intel_engine_cs *sibling = siblings[n]; 3890 3891 GEM_BUG_ON(!is_power_of_2(sibling->mask)); 3892 if (sibling->mask & ve->base.mask) { 3893 DRM_DEBUG("duplicate %s entry in load balancer\n", 3894 sibling->name); 3895 err = -EINVAL; 3896 goto err_put; 3897 } 3898 3899 /* 3900 * The virtual engine implementation is tightly coupled to 3901 * the execlists backend -- we push out request directly 3902 * into a tree inside each physical engine. We could support 3903 * layering if we handle cloning of the requests and 3904 * submitting a copy into each backend. 3905 */ 3906 if (sibling->sched_engine->tasklet.callback != 3907 execlists_submission_tasklet) { 3908 err = -ENODEV; 3909 goto err_put; 3910 } 3911 3912 GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb)); 3913 RB_CLEAR_NODE(&ve->nodes[sibling->id].rb); 3914 3915 ve->siblings[ve->num_siblings++] = sibling; 3916 ve->base.mask |= sibling->mask; 3917 ve->base.logical_mask |= sibling->logical_mask; 3918 3919 /* 3920 * All physical engines must be compatible for their emission 3921 * functions (as we build the instructions during request 3922 * construction and do not alter them before submission 3923 * on the physical engine). We use the engine class as a guide 3924 * here, although that could be refined. 3925 */ 3926 if (ve->base.class != OTHER_CLASS) { 3927 if (ve->base.class != sibling->class) { 3928 DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n", 3929 sibling->class, ve->base.class); 3930 err = -EINVAL; 3931 goto err_put; 3932 } 3933 continue; 3934 } 3935 3936 ve->base.class = sibling->class; 3937 ve->base.uabi_class = sibling->uabi_class; 3938 snprintf(ve->base.name, sizeof(ve->base.name), 3939 "v%dx%d", ve->base.class, count); 3940 ve->base.context_size = sibling->context_size; 3941 3942 ve->base.add_active_request = sibling->add_active_request; 3943 ve->base.remove_active_request = sibling->remove_active_request; 3944 ve->base.emit_bb_start = sibling->emit_bb_start; 3945 ve->base.emit_flush = sibling->emit_flush; 3946 ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb; 3947 ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb; 3948 ve->base.emit_fini_breadcrumb_dw = 3949 sibling->emit_fini_breadcrumb_dw; 3950 3951 ve->base.flags = sibling->flags; 3952 } 3953 3954 ve->base.flags |= I915_ENGINE_IS_VIRTUAL; 3955 3956 virtual_engine_initial_hint(ve); 3957 return &ve->context; 3958 3959 err_put: 3960 intel_context_put(&ve->context); 3961 return ERR_PTR(err); 3962 } 3963 3964 void intel_execlists_show_requests(struct intel_engine_cs *engine, 3965 struct drm_printer *m, 3966 void (*show_request)(struct drm_printer *m, 3967 const struct i915_request *rq, 3968 const char *prefix, 3969 int indent), 3970 unsigned int max) 3971 { 3972 const struct intel_engine_execlists *execlists = &engine->execlists; 3973 struct i915_sched_engine *sched_engine = engine->sched_engine; 3974 struct i915_request *rq, *last; 3975 unsigned long flags; 3976 unsigned int count; 3977 struct rb_node *rb; 3978 3979 spin_lock_irqsave(&sched_engine->lock, flags); 3980 3981 last = NULL; 3982 count = 0; 3983 list_for_each_entry(rq, &sched_engine->requests, sched.link) { 3984 if (count++ < max - 1) 3985 show_request(m, rq, "\t\t", 0); 3986 else 3987 last = rq; 3988 } 3989 if (last) { 3990 if (count > max) { 3991 drm_printf(m, 3992 "\t\t...skipping %d executing requests...\n", 3993 count - max); 3994 } 3995 show_request(m, last, "\t\t", 0); 3996 } 3997 3998 if (sched_engine->queue_priority_hint != INT_MIN) 3999 drm_printf(m, "\t\tQueue priority hint: %d\n", 4000 READ_ONCE(sched_engine->queue_priority_hint)); 4001 4002 last = NULL; 4003 count = 0; 4004 for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) { 4005 struct i915_priolist *p = rb_entry(rb, typeof(*p), node); 4006 4007 priolist_for_each_request(rq, p) { 4008 if (count++ < max - 1) 4009 show_request(m, rq, "\t\t", 0); 4010 else 4011 last = rq; 4012 } 4013 } 4014 if (last) { 4015 if (count > max) { 4016 drm_printf(m, 4017 "\t\t...skipping %d queued requests...\n", 4018 count - max); 4019 } 4020 show_request(m, last, "\t\t", 0); 4021 } 4022 4023 last = NULL; 4024 count = 0; 4025 for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) { 4026 struct virtual_engine *ve = 4027 rb_entry(rb, typeof(*ve), nodes[engine->id].rb); 4028 struct i915_request *rq = READ_ONCE(ve->request); 4029 4030 if (rq) { 4031 if (count++ < max - 1) 4032 show_request(m, rq, "\t\t", 0); 4033 else 4034 last = rq; 4035 } 4036 } 4037 if (last) { 4038 if (count > max) { 4039 drm_printf(m, 4040 "\t\t...skipping %d virtual requests...\n", 4041 count - max); 4042 } 4043 show_request(m, last, "\t\t", 0); 4044 } 4045 4046 spin_unlock_irqrestore(&sched_engine->lock, flags); 4047 } 4048 4049 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 4050 #include "selftest_execlists.c" 4051 #endif 4052