xref: /openbmc/linux/drivers/gpu/drm/i915/gt/intel_lrc.c (revision d997e240)
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Ben Widawsky <ben@bwidawsk.net>
25  *    Michel Thierry <michel.thierry@intel.com>
26  *    Thomas Daniel <thomas.daniel@intel.com>
27  *    Oscar Mateo <oscar.mateo@intel.com>
28  *
29  */
30 
31 /**
32  * DOC: Logical Rings, Logical Ring Contexts and Execlists
33  *
34  * Motivation:
35  * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts".
36  * These expanded contexts enable a number of new abilities, especially
37  * "Execlists" (also implemented in this file).
38  *
39  * One of the main differences with the legacy HW contexts is that logical
40  * ring contexts incorporate many more things to the context's state, like
41  * PDPs or ringbuffer control registers:
42  *
43  * The reason why PDPs are included in the context is straightforward: as
44  * PPGTTs (per-process GTTs) are actually per-context, having the PDPs
45  * contained there mean you don't need to do a ppgtt->switch_mm yourself,
46  * instead, the GPU will do it for you on the context switch.
47  *
48  * But, what about the ringbuffer control registers (head, tail, etc..)?
49  * shouldn't we just need a set of those per engine command streamer? This is
50  * where the name "Logical Rings" starts to make sense: by virtualizing the
51  * rings, the engine cs shifts to a new "ring buffer" with every context
52  * switch. When you want to submit a workload to the GPU you: A) choose your
53  * context, B) find its appropriate virtualized ring, C) write commands to it
54  * and then, finally, D) tell the GPU to switch to that context.
55  *
56  * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch
57  * to a contexts is via a context execution list, ergo "Execlists".
58  *
59  * LRC implementation:
60  * Regarding the creation of contexts, we have:
61  *
62  * - One global default context.
63  * - One local default context for each opened fd.
64  * - One local extra context for each context create ioctl call.
65  *
66  * Now that ringbuffers belong per-context (and not per-engine, like before)
67  * and that contexts are uniquely tied to a given engine (and not reusable,
68  * like before) we need:
69  *
70  * - One ringbuffer per-engine inside each context.
71  * - One backing object per-engine inside each context.
72  *
73  * The global default context starts its life with these new objects fully
74  * allocated and populated. The local default context for each opened fd is
75  * more complex, because we don't know at creation time which engine is going
76  * to use them. To handle this, we have implemented a deferred creation of LR
77  * contexts:
78  *
79  * The local context starts its life as a hollow or blank holder, that only
80  * gets populated for a given engine once we receive an execbuffer. If later
81  * on we receive another execbuffer ioctl for the same context but a different
82  * engine, we allocate/populate a new ringbuffer and context backing object and
83  * so on.
84  *
85  * Finally, regarding local contexts created using the ioctl call: as they are
86  * only allowed with the render ring, we can allocate & populate them right
87  * away (no need to defer anything, at least for now).
88  *
89  * Execlists implementation:
90  * Execlists are the new method by which, on gen8+ hardware, workloads are
91  * submitted for execution (as opposed to the legacy, ringbuffer-based, method).
92  * This method works as follows:
93  *
94  * When a request is committed, its commands (the BB start and any leading or
95  * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer
96  * for the appropriate context. The tail pointer in the hardware context is not
97  * updated at this time, but instead, kept by the driver in the ringbuffer
98  * structure. A structure representing this request is added to a request queue
99  * for the appropriate engine: this structure contains a copy of the context's
100  * tail after the request was written to the ring buffer and a pointer to the
101  * context itself.
102  *
103  * If the engine's request queue was empty before the request was added, the
104  * queue is processed immediately. Otherwise the queue will be processed during
105  * a context switch interrupt. In any case, elements on the queue will get sent
106  * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a
107  * globally unique 20-bits submission ID.
108  *
109  * When execution of a request completes, the GPU updates the context status
110  * buffer with a context complete event and generates a context switch interrupt.
111  * During the interrupt handling, the driver examines the events in the buffer:
112  * for each context complete event, if the announced ID matches that on the head
113  * of the request queue, then that request is retired and removed from the queue.
114  *
115  * After processing, if any requests were retired and the queue is not empty
116  * then a new execution list can be submitted. The two requests at the front of
117  * the queue are next to be submitted but since a context may not occur twice in
118  * an execution list, if subsequent requests have the same ID as the first then
119  * the two requests must be combined. This is done simply by discarding requests
120  * at the head of the queue until either only one requests is left (in which case
121  * we use a NULL second context) or the first two requests have unique IDs.
122  *
123  * By always executing the first two requests in the queue the driver ensures
124  * that the GPU is kept as busy as possible. In the case where a single context
125  * completes but a second context is still executing, the request for this second
126  * context will be at the head of the queue when we remove the first one. This
127  * request will then be resubmitted along with a new request for a different context,
128  * which will cause the hardware to continue executing the second request and queue
129  * the new request (the GPU detects the condition of a context getting preempted
130  * with the same context and optimizes the context switch flow by not doing
131  * preemption, but just sampling the new tail pointer).
132  *
133  */
134 #include <linux/interrupt.h>
135 
136 #include "i915_drv.h"
137 #include "i915_perf.h"
138 #include "i915_trace.h"
139 #include "i915_vgpu.h"
140 #include "intel_breadcrumbs.h"
141 #include "intel_context.h"
142 #include "intel_engine_pm.h"
143 #include "intel_gt.h"
144 #include "intel_gt_pm.h"
145 #include "intel_gt_requests.h"
146 #include "intel_lrc_reg.h"
147 #include "intel_mocs.h"
148 #include "intel_reset.h"
149 #include "intel_ring.h"
150 #include "intel_workarounds.h"
151 #include "shmem_utils.h"
152 
153 #define RING_EXECLIST_QFULL		(1 << 0x2)
154 #define RING_EXECLIST1_VALID		(1 << 0x3)
155 #define RING_EXECLIST0_VALID		(1 << 0x4)
156 #define RING_EXECLIST_ACTIVE_STATUS	(3 << 0xE)
157 #define RING_EXECLIST1_ACTIVE		(1 << 0x11)
158 #define RING_EXECLIST0_ACTIVE		(1 << 0x12)
159 
160 #define GEN8_CTX_STATUS_IDLE_ACTIVE	(1 << 0)
161 #define GEN8_CTX_STATUS_PREEMPTED	(1 << 1)
162 #define GEN8_CTX_STATUS_ELEMENT_SWITCH	(1 << 2)
163 #define GEN8_CTX_STATUS_ACTIVE_IDLE	(1 << 3)
164 #define GEN8_CTX_STATUS_COMPLETE	(1 << 4)
165 #define GEN8_CTX_STATUS_LITE_RESTORE	(1 << 15)
166 
167 #define GEN8_CTX_STATUS_COMPLETED_MASK \
168 	 (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
169 
170 #define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
171 
172 #define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE	(0x1) /* lower csb dword */
173 #define GEN12_CTX_SWITCH_DETAIL(csb_dw)	((csb_dw) & 0xF) /* upper csb dword */
174 #define GEN12_CSB_SW_CTX_ID_MASK		GENMASK(25, 15)
175 #define GEN12_IDLE_CTX_ID		0x7FF
176 #define GEN12_CSB_CTX_VALID(csb_dw) \
177 	(FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID)
178 
179 /* Typical size of the average request (2 pipecontrols and a MI_BB) */
180 #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
181 
182 struct virtual_engine {
183 	struct intel_engine_cs base;
184 	struct intel_context context;
185 	struct rcu_work rcu;
186 
187 	/*
188 	 * We allow only a single request through the virtual engine at a time
189 	 * (each request in the timeline waits for the completion fence of
190 	 * the previous before being submitted). By restricting ourselves to
191 	 * only submitting a single request, each request is placed on to a
192 	 * physical to maximise load spreading (by virtue of the late greedy
193 	 * scheduling -- each real engine takes the next available request
194 	 * upon idling).
195 	 */
196 	struct i915_request *request;
197 
198 	/*
199 	 * We keep a rbtree of available virtual engines inside each physical
200 	 * engine, sorted by priority. Here we preallocate the nodes we need
201 	 * for the virtual engine, indexed by physical_engine->id.
202 	 */
203 	struct ve_node {
204 		struct rb_node rb;
205 		int prio;
206 	} nodes[I915_NUM_ENGINES];
207 
208 	/*
209 	 * Keep track of bonded pairs -- restrictions upon on our selection
210 	 * of physical engines any particular request may be submitted to.
211 	 * If we receive a submit-fence from a master engine, we will only
212 	 * use one of sibling_mask physical engines.
213 	 */
214 	struct ve_bond {
215 		const struct intel_engine_cs *master;
216 		intel_engine_mask_t sibling_mask;
217 	} *bonds;
218 	unsigned int num_bonds;
219 
220 	/* And finally, which physical engines this virtual engine maps onto. */
221 	unsigned int num_siblings;
222 	struct intel_engine_cs *siblings[];
223 };
224 
225 static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
226 {
227 	GEM_BUG_ON(!intel_engine_is_virtual(engine));
228 	return container_of(engine, struct virtual_engine, base);
229 }
230 
231 static int __execlists_context_alloc(struct intel_context *ce,
232 				     struct intel_engine_cs *engine);
233 
234 static void execlists_init_reg_state(u32 *reg_state,
235 				     const struct intel_context *ce,
236 				     const struct intel_engine_cs *engine,
237 				     const struct intel_ring *ring,
238 				     bool close);
239 static void
240 __execlists_update_reg_state(const struct intel_context *ce,
241 			     const struct intel_engine_cs *engine,
242 			     u32 head);
243 
244 static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
245 {
246 	if (INTEL_GEN(engine->i915) >= 12)
247 		return 0x60;
248 	else if (INTEL_GEN(engine->i915) >= 9)
249 		return 0x54;
250 	else if (engine->class == RENDER_CLASS)
251 		return 0x58;
252 	else
253 		return -1;
254 }
255 
256 static int lrc_ring_gpr0(const struct intel_engine_cs *engine)
257 {
258 	if (INTEL_GEN(engine->i915) >= 12)
259 		return 0x74;
260 	else if (INTEL_GEN(engine->i915) >= 9)
261 		return 0x68;
262 	else if (engine->class == RENDER_CLASS)
263 		return 0xd8;
264 	else
265 		return -1;
266 }
267 
268 static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine)
269 {
270 	if (INTEL_GEN(engine->i915) >= 12)
271 		return 0x12;
272 	else if (INTEL_GEN(engine->i915) >= 9 || engine->class == RENDER_CLASS)
273 		return 0x18;
274 	else
275 		return -1;
276 }
277 
278 static int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine)
279 {
280 	int x;
281 
282 	x = lrc_ring_wa_bb_per_ctx(engine);
283 	if (x < 0)
284 		return x;
285 
286 	return x + 2;
287 }
288 
289 static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine)
290 {
291 	int x;
292 
293 	x = lrc_ring_indirect_ptr(engine);
294 	if (x < 0)
295 		return x;
296 
297 	return x + 2;
298 }
299 
300 static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine)
301 {
302 	if (engine->class != RENDER_CLASS)
303 		return -1;
304 
305 	if (INTEL_GEN(engine->i915) >= 12)
306 		return 0xb6;
307 	else if (INTEL_GEN(engine->i915) >= 11)
308 		return 0xaa;
309 	else
310 		return -1;
311 }
312 
313 static u32
314 lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine)
315 {
316 	switch (INTEL_GEN(engine->i915)) {
317 	default:
318 		MISSING_CASE(INTEL_GEN(engine->i915));
319 		fallthrough;
320 	case 12:
321 		return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
322 	case 11:
323 		return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
324 	case 10:
325 		return GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
326 	case 9:
327 		return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
328 	case 8:
329 		return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
330 	}
331 }
332 
333 static void
334 lrc_ring_setup_indirect_ctx(u32 *regs,
335 			    const struct intel_engine_cs *engine,
336 			    u32 ctx_bb_ggtt_addr,
337 			    u32 size)
338 {
339 	GEM_BUG_ON(!size);
340 	GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES));
341 	GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1);
342 	regs[lrc_ring_indirect_ptr(engine) + 1] =
343 		ctx_bb_ggtt_addr | (size / CACHELINE_BYTES);
344 
345 	GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1);
346 	regs[lrc_ring_indirect_offset(engine) + 1] =
347 		lrc_ring_indirect_offset_default(engine) << 6;
348 }
349 
350 static u32 intel_context_get_runtime(const struct intel_context *ce)
351 {
352 	/*
353 	 * We can use either ppHWSP[16] which is recorded before the context
354 	 * switch (and so excludes the cost of context switches) or use the
355 	 * value from the context image itself, which is saved/restored earlier
356 	 * and so includes the cost of the save.
357 	 */
358 	return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
359 }
360 
361 static void mark_eio(struct i915_request *rq)
362 {
363 	if (i915_request_completed(rq))
364 		return;
365 
366 	GEM_BUG_ON(i915_request_signaled(rq));
367 
368 	i915_request_set_error_once(rq, -EIO);
369 	i915_request_mark_complete(rq);
370 }
371 
372 static struct i915_request *
373 active_request(const struct intel_timeline * const tl, struct i915_request *rq)
374 {
375 	struct i915_request *active = rq;
376 
377 	rcu_read_lock();
378 	list_for_each_entry_continue_reverse(rq, &tl->requests, link) {
379 		if (i915_request_completed(rq))
380 			break;
381 
382 		active = rq;
383 	}
384 	rcu_read_unlock();
385 
386 	return active;
387 }
388 
389 static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
390 {
391 	return (i915_ggtt_offset(engine->status_page.vma) +
392 		I915_GEM_HWS_PREEMPT_ADDR);
393 }
394 
395 static inline void
396 ring_set_paused(const struct intel_engine_cs *engine, int state)
397 {
398 	/*
399 	 * We inspect HWS_PREEMPT with a semaphore inside
400 	 * engine->emit_fini_breadcrumb. If the dword is true,
401 	 * the ring is paused as the semaphore will busywait
402 	 * until the dword is false.
403 	 */
404 	engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state;
405 	if (state)
406 		wmb();
407 }
408 
409 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
410 {
411 	return rb_entry(rb, struct i915_priolist, node);
412 }
413 
414 static inline int rq_prio(const struct i915_request *rq)
415 {
416 	return READ_ONCE(rq->sched.attr.priority);
417 }
418 
419 static int effective_prio(const struct i915_request *rq)
420 {
421 	int prio = rq_prio(rq);
422 
423 	/*
424 	 * If this request is special and must not be interrupted at any
425 	 * cost, so be it. Note we are only checking the most recent request
426 	 * in the context and so may be masking an earlier vip request. It
427 	 * is hoped that under the conditions where nopreempt is used, this
428 	 * will not matter (i.e. all requests to that context will be
429 	 * nopreempt for as long as desired).
430 	 */
431 	if (i915_request_has_nopreempt(rq))
432 		prio = I915_PRIORITY_UNPREEMPTABLE;
433 
434 	return prio;
435 }
436 
437 static int queue_prio(const struct intel_engine_execlists *execlists)
438 {
439 	struct i915_priolist *p;
440 	struct rb_node *rb;
441 
442 	rb = rb_first_cached(&execlists->queue);
443 	if (!rb)
444 		return INT_MIN;
445 
446 	/*
447 	 * As the priolist[] are inverted, with the highest priority in [0],
448 	 * we have to flip the index value to become priority.
449 	 */
450 	p = to_priolist(rb);
451 	if (!I915_USER_PRIORITY_SHIFT)
452 		return p->priority;
453 
454 	return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used);
455 }
456 
457 static inline bool need_preempt(const struct intel_engine_cs *engine,
458 				const struct i915_request *rq,
459 				struct rb_node *rb)
460 {
461 	int last_prio;
462 
463 	if (!intel_engine_has_semaphores(engine))
464 		return false;
465 
466 	/*
467 	 * Check if the current priority hint merits a preemption attempt.
468 	 *
469 	 * We record the highest value priority we saw during rescheduling
470 	 * prior to this dequeue, therefore we know that if it is strictly
471 	 * less than the current tail of ESLP[0], we do not need to force
472 	 * a preempt-to-idle cycle.
473 	 *
474 	 * However, the priority hint is a mere hint that we may need to
475 	 * preempt. If that hint is stale or we may be trying to preempt
476 	 * ourselves, ignore the request.
477 	 *
478 	 * More naturally we would write
479 	 *      prio >= max(0, last);
480 	 * except that we wish to prevent triggering preemption at the same
481 	 * priority level: the task that is running should remain running
482 	 * to preserve FIFO ordering of dependencies.
483 	 */
484 	last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1);
485 	if (engine->execlists.queue_priority_hint <= last_prio)
486 		return false;
487 
488 	/*
489 	 * Check against the first request in ELSP[1], it will, thanks to the
490 	 * power of PI, be the highest priority of that context.
491 	 */
492 	if (!list_is_last(&rq->sched.link, &engine->active.requests) &&
493 	    rq_prio(list_next_entry(rq, sched.link)) > last_prio)
494 		return true;
495 
496 	if (rb) {
497 		struct virtual_engine *ve =
498 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
499 		bool preempt = false;
500 
501 		if (engine == ve->siblings[0]) { /* only preempt one sibling */
502 			struct i915_request *next;
503 
504 			rcu_read_lock();
505 			next = READ_ONCE(ve->request);
506 			if (next)
507 				preempt = rq_prio(next) > last_prio;
508 			rcu_read_unlock();
509 		}
510 
511 		if (preempt)
512 			return preempt;
513 	}
514 
515 	/*
516 	 * If the inflight context did not trigger the preemption, then maybe
517 	 * it was the set of queued requests? Pick the highest priority in
518 	 * the queue (the first active priolist) and see if it deserves to be
519 	 * running instead of ELSP[0].
520 	 *
521 	 * The highest priority request in the queue can not be either
522 	 * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same
523 	 * context, it's priority would not exceed ELSP[0] aka last_prio.
524 	 */
525 	return queue_prio(&engine->execlists) > last_prio;
526 }
527 
528 __maybe_unused static inline bool
529 assert_priority_queue(const struct i915_request *prev,
530 		      const struct i915_request *next)
531 {
532 	/*
533 	 * Without preemption, the prev may refer to the still active element
534 	 * which we refuse to let go.
535 	 *
536 	 * Even with preemption, there are times when we think it is better not
537 	 * to preempt and leave an ostensibly lower priority request in flight.
538 	 */
539 	if (i915_request_is_active(prev))
540 		return true;
541 
542 	return rq_prio(prev) >= rq_prio(next);
543 }
544 
545 /*
546  * The context descriptor encodes various attributes of a context,
547  * including its GTT address and some flags. Because it's fairly
548  * expensive to calculate, we'll just do it once and cache the result,
549  * which remains valid until the context is unpinned.
550  *
551  * This is what a descriptor looks like, from LSB to MSB::
552  *
553  *      bits  0-11:    flags, GEN8_CTX_* (cached in ctx->desc_template)
554  *      bits 12-31:    LRCA, GTT address of (the HWSP of) this context
555  *      bits 32-52:    ctx ID, a globally unique tag (highest bit used by GuC)
556  *      bits 53-54:    mbz, reserved for use by hardware
557  *      bits 55-63:    group ID, currently unused and set to 0
558  *
559  * Starting from Gen11, the upper dword of the descriptor has a new format:
560  *
561  *      bits 32-36:    reserved
562  *      bits 37-47:    SW context ID
563  *      bits 48:53:    engine instance
564  *      bit 54:        mbz, reserved for use by hardware
565  *      bits 55-60:    SW counter
566  *      bits 61-63:    engine class
567  *
568  * engine info, SW context ID and SW counter need to form a unique number
569  * (Context ID) per lrc.
570  */
571 static u32
572 lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
573 {
574 	u32 desc;
575 
576 	desc = INTEL_LEGACY_32B_CONTEXT;
577 	if (i915_vm_is_4lvl(ce->vm))
578 		desc = INTEL_LEGACY_64B_CONTEXT;
579 	desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
580 
581 	desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
582 	if (IS_GEN(engine->i915, 8))
583 		desc |= GEN8_CTX_L3LLC_COHERENT;
584 
585 	return i915_ggtt_offset(ce->state) | desc;
586 }
587 
588 static inline unsigned int dword_in_page(void *addr)
589 {
590 	return offset_in_page(addr) / sizeof(u32);
591 }
592 
593 static void set_offsets(u32 *regs,
594 			const u8 *data,
595 			const struct intel_engine_cs *engine,
596 			bool clear)
597 #define NOP(x) (BIT(7) | (x))
598 #define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
599 #define POSTED BIT(0)
600 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
601 #define REG16(x) \
602 	(((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
603 	(((x) >> 2) & 0x7f)
604 #define END(total_state_size) 0, (total_state_size)
605 {
606 	const u32 base = engine->mmio_base;
607 
608 	while (*data) {
609 		u8 count, flags;
610 
611 		if (*data & BIT(7)) { /* skip */
612 			count = *data++ & ~BIT(7);
613 			if (clear)
614 				memset32(regs, MI_NOOP, count);
615 			regs += count;
616 			continue;
617 		}
618 
619 		count = *data & 0x3f;
620 		flags = *data >> 6;
621 		data++;
622 
623 		*regs = MI_LOAD_REGISTER_IMM(count);
624 		if (flags & POSTED)
625 			*regs |= MI_LRI_FORCE_POSTED;
626 		if (INTEL_GEN(engine->i915) >= 11)
627 			*regs |= MI_LRI_LRM_CS_MMIO;
628 		regs++;
629 
630 		GEM_BUG_ON(!count);
631 		do {
632 			u32 offset = 0;
633 			u8 v;
634 
635 			do {
636 				v = *data++;
637 				offset <<= 7;
638 				offset |= v & ~BIT(7);
639 			} while (v & BIT(7));
640 
641 			regs[0] = base + (offset << 2);
642 			if (clear)
643 				regs[1] = 0;
644 			regs += 2;
645 		} while (--count);
646 	}
647 
648 	if (clear) {
649 		u8 count = *++data;
650 
651 		/* Clear past the tail for HW access */
652 		GEM_BUG_ON(dword_in_page(regs) > count);
653 		memset32(regs, MI_NOOP, count - dword_in_page(regs));
654 
655 		/* Close the batch; used mainly by live_lrc_layout() */
656 		*regs = MI_BATCH_BUFFER_END;
657 		if (INTEL_GEN(engine->i915) >= 10)
658 			*regs |= BIT(0);
659 	}
660 }
661 
662 static const u8 gen8_xcs_offsets[] = {
663 	NOP(1),
664 	LRI(11, 0),
665 	REG16(0x244),
666 	REG(0x034),
667 	REG(0x030),
668 	REG(0x038),
669 	REG(0x03c),
670 	REG(0x168),
671 	REG(0x140),
672 	REG(0x110),
673 	REG(0x11c),
674 	REG(0x114),
675 	REG(0x118),
676 
677 	NOP(9),
678 	LRI(9, 0),
679 	REG16(0x3a8),
680 	REG16(0x28c),
681 	REG16(0x288),
682 	REG16(0x284),
683 	REG16(0x280),
684 	REG16(0x27c),
685 	REG16(0x278),
686 	REG16(0x274),
687 	REG16(0x270),
688 
689 	NOP(13),
690 	LRI(2, 0),
691 	REG16(0x200),
692 	REG(0x028),
693 
694 	END(80)
695 };
696 
697 static const u8 gen9_xcs_offsets[] = {
698 	NOP(1),
699 	LRI(14, POSTED),
700 	REG16(0x244),
701 	REG(0x034),
702 	REG(0x030),
703 	REG(0x038),
704 	REG(0x03c),
705 	REG(0x168),
706 	REG(0x140),
707 	REG(0x110),
708 	REG(0x11c),
709 	REG(0x114),
710 	REG(0x118),
711 	REG(0x1c0),
712 	REG(0x1c4),
713 	REG(0x1c8),
714 
715 	NOP(3),
716 	LRI(9, POSTED),
717 	REG16(0x3a8),
718 	REG16(0x28c),
719 	REG16(0x288),
720 	REG16(0x284),
721 	REG16(0x280),
722 	REG16(0x27c),
723 	REG16(0x278),
724 	REG16(0x274),
725 	REG16(0x270),
726 
727 	NOP(13),
728 	LRI(1, POSTED),
729 	REG16(0x200),
730 
731 	NOP(13),
732 	LRI(44, POSTED),
733 	REG(0x028),
734 	REG(0x09c),
735 	REG(0x0c0),
736 	REG(0x178),
737 	REG(0x17c),
738 	REG16(0x358),
739 	REG(0x170),
740 	REG(0x150),
741 	REG(0x154),
742 	REG(0x158),
743 	REG16(0x41c),
744 	REG16(0x600),
745 	REG16(0x604),
746 	REG16(0x608),
747 	REG16(0x60c),
748 	REG16(0x610),
749 	REG16(0x614),
750 	REG16(0x618),
751 	REG16(0x61c),
752 	REG16(0x620),
753 	REG16(0x624),
754 	REG16(0x628),
755 	REG16(0x62c),
756 	REG16(0x630),
757 	REG16(0x634),
758 	REG16(0x638),
759 	REG16(0x63c),
760 	REG16(0x640),
761 	REG16(0x644),
762 	REG16(0x648),
763 	REG16(0x64c),
764 	REG16(0x650),
765 	REG16(0x654),
766 	REG16(0x658),
767 	REG16(0x65c),
768 	REG16(0x660),
769 	REG16(0x664),
770 	REG16(0x668),
771 	REG16(0x66c),
772 	REG16(0x670),
773 	REG16(0x674),
774 	REG16(0x678),
775 	REG16(0x67c),
776 	REG(0x068),
777 
778 	END(176)
779 };
780 
781 static const u8 gen12_xcs_offsets[] = {
782 	NOP(1),
783 	LRI(13, POSTED),
784 	REG16(0x244),
785 	REG(0x034),
786 	REG(0x030),
787 	REG(0x038),
788 	REG(0x03c),
789 	REG(0x168),
790 	REG(0x140),
791 	REG(0x110),
792 	REG(0x1c0),
793 	REG(0x1c4),
794 	REG(0x1c8),
795 	REG(0x180),
796 	REG16(0x2b4),
797 
798 	NOP(5),
799 	LRI(9, POSTED),
800 	REG16(0x3a8),
801 	REG16(0x28c),
802 	REG16(0x288),
803 	REG16(0x284),
804 	REG16(0x280),
805 	REG16(0x27c),
806 	REG16(0x278),
807 	REG16(0x274),
808 	REG16(0x270),
809 
810 	END(80)
811 };
812 
813 static const u8 gen8_rcs_offsets[] = {
814 	NOP(1),
815 	LRI(14, POSTED),
816 	REG16(0x244),
817 	REG(0x034),
818 	REG(0x030),
819 	REG(0x038),
820 	REG(0x03c),
821 	REG(0x168),
822 	REG(0x140),
823 	REG(0x110),
824 	REG(0x11c),
825 	REG(0x114),
826 	REG(0x118),
827 	REG(0x1c0),
828 	REG(0x1c4),
829 	REG(0x1c8),
830 
831 	NOP(3),
832 	LRI(9, POSTED),
833 	REG16(0x3a8),
834 	REG16(0x28c),
835 	REG16(0x288),
836 	REG16(0x284),
837 	REG16(0x280),
838 	REG16(0x27c),
839 	REG16(0x278),
840 	REG16(0x274),
841 	REG16(0x270),
842 
843 	NOP(13),
844 	LRI(1, 0),
845 	REG(0x0c8),
846 
847 	END(80)
848 };
849 
850 static const u8 gen9_rcs_offsets[] = {
851 	NOP(1),
852 	LRI(14, POSTED),
853 	REG16(0x244),
854 	REG(0x34),
855 	REG(0x30),
856 	REG(0x38),
857 	REG(0x3c),
858 	REG(0x168),
859 	REG(0x140),
860 	REG(0x110),
861 	REG(0x11c),
862 	REG(0x114),
863 	REG(0x118),
864 	REG(0x1c0),
865 	REG(0x1c4),
866 	REG(0x1c8),
867 
868 	NOP(3),
869 	LRI(9, POSTED),
870 	REG16(0x3a8),
871 	REG16(0x28c),
872 	REG16(0x288),
873 	REG16(0x284),
874 	REG16(0x280),
875 	REG16(0x27c),
876 	REG16(0x278),
877 	REG16(0x274),
878 	REG16(0x270),
879 
880 	NOP(13),
881 	LRI(1, 0),
882 	REG(0xc8),
883 
884 	NOP(13),
885 	LRI(44, POSTED),
886 	REG(0x28),
887 	REG(0x9c),
888 	REG(0xc0),
889 	REG(0x178),
890 	REG(0x17c),
891 	REG16(0x358),
892 	REG(0x170),
893 	REG(0x150),
894 	REG(0x154),
895 	REG(0x158),
896 	REG16(0x41c),
897 	REG16(0x600),
898 	REG16(0x604),
899 	REG16(0x608),
900 	REG16(0x60c),
901 	REG16(0x610),
902 	REG16(0x614),
903 	REG16(0x618),
904 	REG16(0x61c),
905 	REG16(0x620),
906 	REG16(0x624),
907 	REG16(0x628),
908 	REG16(0x62c),
909 	REG16(0x630),
910 	REG16(0x634),
911 	REG16(0x638),
912 	REG16(0x63c),
913 	REG16(0x640),
914 	REG16(0x644),
915 	REG16(0x648),
916 	REG16(0x64c),
917 	REG16(0x650),
918 	REG16(0x654),
919 	REG16(0x658),
920 	REG16(0x65c),
921 	REG16(0x660),
922 	REG16(0x664),
923 	REG16(0x668),
924 	REG16(0x66c),
925 	REG16(0x670),
926 	REG16(0x674),
927 	REG16(0x678),
928 	REG16(0x67c),
929 	REG(0x68),
930 
931 	END(176)
932 };
933 
934 static const u8 gen11_rcs_offsets[] = {
935 	NOP(1),
936 	LRI(15, POSTED),
937 	REG16(0x244),
938 	REG(0x034),
939 	REG(0x030),
940 	REG(0x038),
941 	REG(0x03c),
942 	REG(0x168),
943 	REG(0x140),
944 	REG(0x110),
945 	REG(0x11c),
946 	REG(0x114),
947 	REG(0x118),
948 	REG(0x1c0),
949 	REG(0x1c4),
950 	REG(0x1c8),
951 	REG(0x180),
952 
953 	NOP(1),
954 	LRI(9, POSTED),
955 	REG16(0x3a8),
956 	REG16(0x28c),
957 	REG16(0x288),
958 	REG16(0x284),
959 	REG16(0x280),
960 	REG16(0x27c),
961 	REG16(0x278),
962 	REG16(0x274),
963 	REG16(0x270),
964 
965 	LRI(1, POSTED),
966 	REG(0x1b0),
967 
968 	NOP(10),
969 	LRI(1, 0),
970 	REG(0x0c8),
971 
972 	END(80)
973 };
974 
975 static const u8 gen12_rcs_offsets[] = {
976 	NOP(1),
977 	LRI(13, POSTED),
978 	REG16(0x244),
979 	REG(0x034),
980 	REG(0x030),
981 	REG(0x038),
982 	REG(0x03c),
983 	REG(0x168),
984 	REG(0x140),
985 	REG(0x110),
986 	REG(0x1c0),
987 	REG(0x1c4),
988 	REG(0x1c8),
989 	REG(0x180),
990 	REG16(0x2b4),
991 
992 	NOP(5),
993 	LRI(9, POSTED),
994 	REG16(0x3a8),
995 	REG16(0x28c),
996 	REG16(0x288),
997 	REG16(0x284),
998 	REG16(0x280),
999 	REG16(0x27c),
1000 	REG16(0x278),
1001 	REG16(0x274),
1002 	REG16(0x270),
1003 
1004 	LRI(3, POSTED),
1005 	REG(0x1b0),
1006 	REG16(0x5a8),
1007 	REG16(0x5ac),
1008 
1009 	NOP(6),
1010 	LRI(1, 0),
1011 	REG(0x0c8),
1012 	NOP(3 + 9 + 1),
1013 
1014 	LRI(51, POSTED),
1015 	REG16(0x588),
1016 	REG16(0x588),
1017 	REG16(0x588),
1018 	REG16(0x588),
1019 	REG16(0x588),
1020 	REG16(0x588),
1021 	REG(0x028),
1022 	REG(0x09c),
1023 	REG(0x0c0),
1024 	REG(0x178),
1025 	REG(0x17c),
1026 	REG16(0x358),
1027 	REG(0x170),
1028 	REG(0x150),
1029 	REG(0x154),
1030 	REG(0x158),
1031 	REG16(0x41c),
1032 	REG16(0x600),
1033 	REG16(0x604),
1034 	REG16(0x608),
1035 	REG16(0x60c),
1036 	REG16(0x610),
1037 	REG16(0x614),
1038 	REG16(0x618),
1039 	REG16(0x61c),
1040 	REG16(0x620),
1041 	REG16(0x624),
1042 	REG16(0x628),
1043 	REG16(0x62c),
1044 	REG16(0x630),
1045 	REG16(0x634),
1046 	REG16(0x638),
1047 	REG16(0x63c),
1048 	REG16(0x640),
1049 	REG16(0x644),
1050 	REG16(0x648),
1051 	REG16(0x64c),
1052 	REG16(0x650),
1053 	REG16(0x654),
1054 	REG16(0x658),
1055 	REG16(0x65c),
1056 	REG16(0x660),
1057 	REG16(0x664),
1058 	REG16(0x668),
1059 	REG16(0x66c),
1060 	REG16(0x670),
1061 	REG16(0x674),
1062 	REG16(0x678),
1063 	REG16(0x67c),
1064 	REG(0x068),
1065 	REG(0x084),
1066 	NOP(1),
1067 
1068 	END(192)
1069 };
1070 
1071 #undef END
1072 #undef REG16
1073 #undef REG
1074 #undef LRI
1075 #undef NOP
1076 
1077 static const u8 *reg_offsets(const struct intel_engine_cs *engine)
1078 {
1079 	/*
1080 	 * The gen12+ lists only have the registers we program in the basic
1081 	 * default state. We rely on the context image using relative
1082 	 * addressing to automatic fixup the register state between the
1083 	 * physical engines for virtual engine.
1084 	 */
1085 	GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 &&
1086 		   !intel_engine_has_relative_mmio(engine));
1087 
1088 	if (engine->class == RENDER_CLASS) {
1089 		if (INTEL_GEN(engine->i915) >= 12)
1090 			return gen12_rcs_offsets;
1091 		else if (INTEL_GEN(engine->i915) >= 11)
1092 			return gen11_rcs_offsets;
1093 		else if (INTEL_GEN(engine->i915) >= 9)
1094 			return gen9_rcs_offsets;
1095 		else
1096 			return gen8_rcs_offsets;
1097 	} else {
1098 		if (INTEL_GEN(engine->i915) >= 12)
1099 			return gen12_xcs_offsets;
1100 		else if (INTEL_GEN(engine->i915) >= 9)
1101 			return gen9_xcs_offsets;
1102 		else
1103 			return gen8_xcs_offsets;
1104 	}
1105 }
1106 
1107 static struct i915_request *
1108 __unwind_incomplete_requests(struct intel_engine_cs *engine)
1109 {
1110 	struct i915_request *rq, *rn, *active = NULL;
1111 	struct list_head *pl;
1112 	int prio = I915_PRIORITY_INVALID;
1113 
1114 	lockdep_assert_held(&engine->active.lock);
1115 
1116 	list_for_each_entry_safe_reverse(rq, rn,
1117 					 &engine->active.requests,
1118 					 sched.link) {
1119 		if (i915_request_completed(rq)) {
1120 			list_del_init(&rq->sched.link);
1121 			continue;
1122 		}
1123 
1124 		__i915_request_unsubmit(rq);
1125 
1126 		/*
1127 		 * Push the request back into the queue for later resubmission.
1128 		 * If this request is not native to this physical engine (i.e.
1129 		 * it came from a virtual source), push it back onto the virtual
1130 		 * engine so that it can be moved across onto another physical
1131 		 * engine as load dictates.
1132 		 */
1133 		if (likely(rq->execution_mask == engine->mask)) {
1134 			GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
1135 			if (rq_prio(rq) != prio) {
1136 				prio = rq_prio(rq);
1137 				pl = i915_sched_lookup_priolist(engine, prio);
1138 			}
1139 			GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
1140 
1141 			list_move(&rq->sched.link, pl);
1142 			set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
1143 
1144 			/* Check in case we rollback so far we wrap [size/2] */
1145 			if (intel_ring_direction(rq->ring,
1146 						 rq->tail,
1147 						 rq->ring->tail + 8) > 0)
1148 				rq->context->lrc.desc |= CTX_DESC_FORCE_RESTORE;
1149 
1150 			active = rq;
1151 		} else {
1152 			struct intel_engine_cs *owner = rq->context->engine;
1153 
1154 			WRITE_ONCE(rq->engine, owner);
1155 			owner->submit_request(rq);
1156 			active = NULL;
1157 		}
1158 	}
1159 
1160 	return active;
1161 }
1162 
1163 struct i915_request *
1164 execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
1165 {
1166 	struct intel_engine_cs *engine =
1167 		container_of(execlists, typeof(*engine), execlists);
1168 
1169 	return __unwind_incomplete_requests(engine);
1170 }
1171 
1172 static inline void
1173 execlists_context_status_change(struct i915_request *rq, unsigned long status)
1174 {
1175 	/*
1176 	 * Only used when GVT-g is enabled now. When GVT-g is disabled,
1177 	 * The compiler should eliminate this function as dead-code.
1178 	 */
1179 	if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
1180 		return;
1181 
1182 	atomic_notifier_call_chain(&rq->engine->context_status_notifier,
1183 				   status, rq);
1184 }
1185 
1186 static void intel_engine_context_in(struct intel_engine_cs *engine)
1187 {
1188 	unsigned long flags;
1189 
1190 	if (atomic_add_unless(&engine->stats.active, 1, 0))
1191 		return;
1192 
1193 	write_seqlock_irqsave(&engine->stats.lock, flags);
1194 	if (!atomic_add_unless(&engine->stats.active, 1, 0)) {
1195 		engine->stats.start = ktime_get();
1196 		atomic_inc(&engine->stats.active);
1197 	}
1198 	write_sequnlock_irqrestore(&engine->stats.lock, flags);
1199 }
1200 
1201 static void intel_engine_context_out(struct intel_engine_cs *engine)
1202 {
1203 	unsigned long flags;
1204 
1205 	GEM_BUG_ON(!atomic_read(&engine->stats.active));
1206 
1207 	if (atomic_add_unless(&engine->stats.active, -1, 1))
1208 		return;
1209 
1210 	write_seqlock_irqsave(&engine->stats.lock, flags);
1211 	if (atomic_dec_and_test(&engine->stats.active)) {
1212 		engine->stats.total =
1213 			ktime_add(engine->stats.total,
1214 				  ktime_sub(ktime_get(), engine->stats.start));
1215 	}
1216 	write_sequnlock_irqrestore(&engine->stats.lock, flags);
1217 }
1218 
1219 static void
1220 execlists_check_context(const struct intel_context *ce,
1221 			const struct intel_engine_cs *engine,
1222 			const char *when)
1223 {
1224 	const struct intel_ring *ring = ce->ring;
1225 	u32 *regs = ce->lrc_reg_state;
1226 	bool valid = true;
1227 	int x;
1228 
1229 	if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) {
1230 		pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
1231 		       engine->name,
1232 		       regs[CTX_RING_START],
1233 		       i915_ggtt_offset(ring->vma));
1234 		regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
1235 		valid = false;
1236 	}
1237 
1238 	if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) !=
1239 	    (RING_CTL_SIZE(ring->size) | RING_VALID)) {
1240 		pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
1241 		       engine->name,
1242 		       regs[CTX_RING_CTL],
1243 		       (u32)(RING_CTL_SIZE(ring->size) | RING_VALID));
1244 		regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
1245 		valid = false;
1246 	}
1247 
1248 	x = lrc_ring_mi_mode(engine);
1249 	if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) {
1250 		pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
1251 		       engine->name, regs[x + 1]);
1252 		regs[x + 1] &= ~STOP_RING;
1253 		regs[x + 1] |= STOP_RING << 16;
1254 		valid = false;
1255 	}
1256 
1257 	WARN_ONCE(!valid, "Invalid lrc state found %s submission\n", when);
1258 }
1259 
1260 static void restore_default_state(struct intel_context *ce,
1261 				  struct intel_engine_cs *engine)
1262 {
1263 	u32 *regs;
1264 
1265 	regs = memset(ce->lrc_reg_state, 0, engine->context_size - PAGE_SIZE);
1266 	execlists_init_reg_state(regs, ce, engine, ce->ring, true);
1267 
1268 	ce->runtime.last = intel_context_get_runtime(ce);
1269 }
1270 
1271 static void reset_active(struct i915_request *rq,
1272 			 struct intel_engine_cs *engine)
1273 {
1274 	struct intel_context * const ce = rq->context;
1275 	u32 head;
1276 
1277 	/*
1278 	 * The executing context has been cancelled. We want to prevent
1279 	 * further execution along this context and propagate the error on
1280 	 * to anything depending on its results.
1281 	 *
1282 	 * In __i915_request_submit(), we apply the -EIO and remove the
1283 	 * requests' payloads for any banned requests. But first, we must
1284 	 * rewind the context back to the start of the incomplete request so
1285 	 * that we do not jump back into the middle of the batch.
1286 	 *
1287 	 * We preserve the breadcrumbs and semaphores of the incomplete
1288 	 * requests so that inter-timeline dependencies (i.e other timelines)
1289 	 * remain correctly ordered. And we defer to __i915_request_submit()
1290 	 * so that all asynchronous waits are correctly handled.
1291 	 */
1292 	ENGINE_TRACE(engine, "{ rq=%llx:%lld }\n",
1293 		     rq->fence.context, rq->fence.seqno);
1294 
1295 	/* On resubmission of the active request, payload will be scrubbed */
1296 	if (i915_request_completed(rq))
1297 		head = rq->tail;
1298 	else
1299 		head = active_request(ce->timeline, rq)->head;
1300 	head = intel_ring_wrap(ce->ring, head);
1301 
1302 	/* Scrub the context image to prevent replaying the previous batch */
1303 	restore_default_state(ce, engine);
1304 	__execlists_update_reg_state(ce, engine, head);
1305 
1306 	/* We've switched away, so this should be a no-op, but intent matters */
1307 	ce->lrc.desc |= CTX_DESC_FORCE_RESTORE;
1308 }
1309 
1310 static void st_update_runtime_underflow(struct intel_context *ce, s32 dt)
1311 {
1312 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1313 	ce->runtime.num_underflow++;
1314 	ce->runtime.max_underflow = max_t(u32, ce->runtime.max_underflow, -dt);
1315 #endif
1316 }
1317 
1318 static void intel_context_update_runtime(struct intel_context *ce)
1319 {
1320 	u32 old;
1321 	s32 dt;
1322 
1323 	if (intel_context_is_barrier(ce))
1324 		return;
1325 
1326 	old = ce->runtime.last;
1327 	ce->runtime.last = intel_context_get_runtime(ce);
1328 	dt = ce->runtime.last - old;
1329 
1330 	if (unlikely(dt < 0)) {
1331 		CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n",
1332 			 old, ce->runtime.last, dt);
1333 		st_update_runtime_underflow(ce, dt);
1334 		return;
1335 	}
1336 
1337 	ewma_runtime_add(&ce->runtime.avg, dt);
1338 	ce->runtime.total += dt;
1339 }
1340 
1341 static inline struct intel_engine_cs *
1342 __execlists_schedule_in(struct i915_request *rq)
1343 {
1344 	struct intel_engine_cs * const engine = rq->engine;
1345 	struct intel_context * const ce = rq->context;
1346 
1347 	intel_context_get(ce);
1348 
1349 	if (unlikely(intel_context_is_banned(ce)))
1350 		reset_active(rq, engine);
1351 
1352 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
1353 		execlists_check_context(ce, engine, "before");
1354 
1355 	if (ce->tag) {
1356 		/* Use a fixed tag for OA and friends */
1357 		GEM_BUG_ON(ce->tag <= BITS_PER_LONG);
1358 		ce->lrc.ccid = ce->tag;
1359 	} else {
1360 		/* We don't need a strict matching tag, just different values */
1361 		unsigned int tag = ffs(READ_ONCE(engine->context_tag));
1362 
1363 		GEM_BUG_ON(tag == 0 || tag >= BITS_PER_LONG);
1364 		clear_bit(tag - 1, &engine->context_tag);
1365 		ce->lrc.ccid = tag << (GEN11_SW_CTX_ID_SHIFT - 32);
1366 
1367 		BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID);
1368 	}
1369 
1370 	ce->lrc.ccid |= engine->execlists.ccid;
1371 
1372 	__intel_gt_pm_get(engine->gt);
1373 	if (engine->fw_domain && !atomic_fetch_inc(&engine->fw_active))
1374 		intel_uncore_forcewake_get(engine->uncore, engine->fw_domain);
1375 	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
1376 	intel_engine_context_in(engine);
1377 
1378 	return engine;
1379 }
1380 
1381 static inline struct i915_request *
1382 execlists_schedule_in(struct i915_request *rq, int idx)
1383 {
1384 	struct intel_context * const ce = rq->context;
1385 	struct intel_engine_cs *old;
1386 
1387 	GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
1388 	trace_i915_request_in(rq, idx);
1389 
1390 	old = READ_ONCE(ce->inflight);
1391 	do {
1392 		if (!old) {
1393 			WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq));
1394 			break;
1395 		}
1396 	} while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old)));
1397 
1398 	GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
1399 	return i915_request_get(rq);
1400 }
1401 
1402 static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
1403 {
1404 	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
1405 	struct i915_request *next = READ_ONCE(ve->request);
1406 
1407 	if (next == rq || (next && next->execution_mask & ~rq->execution_mask))
1408 		tasklet_hi_schedule(&ve->base.execlists.tasklet);
1409 }
1410 
1411 static inline void
1412 __execlists_schedule_out(struct i915_request *rq,
1413 			 struct intel_engine_cs * const engine,
1414 			 unsigned int ccid)
1415 {
1416 	struct intel_context * const ce = rq->context;
1417 
1418 	/*
1419 	 * NB process_csb() is not under the engine->active.lock and hence
1420 	 * schedule_out can race with schedule_in meaning that we should
1421 	 * refrain from doing non-trivial work here.
1422 	 */
1423 
1424 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
1425 		execlists_check_context(ce, engine, "after");
1426 
1427 	/*
1428 	 * If we have just completed this context, the engine may now be
1429 	 * idle and we want to re-enter powersaving.
1430 	 */
1431 	if (list_is_last_rcu(&rq->link, &ce->timeline->requests) &&
1432 	    i915_request_completed(rq))
1433 		intel_engine_add_retire(engine, ce->timeline);
1434 
1435 	ccid >>= GEN11_SW_CTX_ID_SHIFT - 32;
1436 	ccid &= GEN12_MAX_CONTEXT_HW_ID;
1437 	if (ccid < BITS_PER_LONG) {
1438 		GEM_BUG_ON(ccid == 0);
1439 		GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag));
1440 		set_bit(ccid - 1, &engine->context_tag);
1441 	}
1442 
1443 	intel_context_update_runtime(ce);
1444 	intel_engine_context_out(engine);
1445 	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
1446 	if (engine->fw_domain && !atomic_dec_return(&engine->fw_active))
1447 		intel_uncore_forcewake_put(engine->uncore, engine->fw_domain);
1448 	intel_gt_pm_put_async(engine->gt);
1449 
1450 	/*
1451 	 * If this is part of a virtual engine, its next request may
1452 	 * have been blocked waiting for access to the active context.
1453 	 * We have to kick all the siblings again in case we need to
1454 	 * switch (e.g. the next request is not runnable on this
1455 	 * engine). Hopefully, we will already have submitted the next
1456 	 * request before the tasklet runs and do not need to rebuild
1457 	 * each virtual tree and kick everyone again.
1458 	 */
1459 	if (ce->engine != engine)
1460 		kick_siblings(rq, ce);
1461 
1462 	intel_context_put(ce);
1463 }
1464 
1465 static inline void
1466 execlists_schedule_out(struct i915_request *rq)
1467 {
1468 	struct intel_context * const ce = rq->context;
1469 	struct intel_engine_cs *cur, *old;
1470 	u32 ccid;
1471 
1472 	trace_i915_request_out(rq);
1473 
1474 	ccid = rq->context->lrc.ccid;
1475 	old = READ_ONCE(ce->inflight);
1476 	do
1477 		cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL;
1478 	while (!try_cmpxchg(&ce->inflight, &old, cur));
1479 	if (!cur)
1480 		__execlists_schedule_out(rq, old, ccid);
1481 
1482 	i915_request_put(rq);
1483 }
1484 
1485 static u64 execlists_update_context(struct i915_request *rq)
1486 {
1487 	struct intel_context *ce = rq->context;
1488 	u64 desc = ce->lrc.desc;
1489 	u32 tail, prev;
1490 
1491 	/*
1492 	 * WaIdleLiteRestore:bdw,skl
1493 	 *
1494 	 * We should never submit the context with the same RING_TAIL twice
1495 	 * just in case we submit an empty ring, which confuses the HW.
1496 	 *
1497 	 * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of
1498 	 * the normal request to be able to always advance the RING_TAIL on
1499 	 * subsequent resubmissions (for lite restore). Should that fail us,
1500 	 * and we try and submit the same tail again, force the context
1501 	 * reload.
1502 	 *
1503 	 * If we need to return to a preempted context, we need to skip the
1504 	 * lite-restore and force it to reload the RING_TAIL. Otherwise, the
1505 	 * HW has a tendency to ignore us rewinding the TAIL to the end of
1506 	 * an earlier request.
1507 	 */
1508 	GEM_BUG_ON(ce->lrc_reg_state[CTX_RING_TAIL] != rq->ring->tail);
1509 	prev = rq->ring->tail;
1510 	tail = intel_ring_set_tail(rq->ring, rq->tail);
1511 	if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0))
1512 		desc |= CTX_DESC_FORCE_RESTORE;
1513 	ce->lrc_reg_state[CTX_RING_TAIL] = tail;
1514 	rq->tail = rq->wa_tail;
1515 
1516 	/*
1517 	 * Make sure the context image is complete before we submit it to HW.
1518 	 *
1519 	 * Ostensibly, writes (including the WCB) should be flushed prior to
1520 	 * an uncached write such as our mmio register access, the empirical
1521 	 * evidence (esp. on Braswell) suggests that the WC write into memory
1522 	 * may not be visible to the HW prior to the completion of the UC
1523 	 * register write and that we may begin execution from the context
1524 	 * before its image is complete leading to invalid PD chasing.
1525 	 */
1526 	wmb();
1527 
1528 	ce->lrc.desc &= ~CTX_DESC_FORCE_RESTORE;
1529 	return desc;
1530 }
1531 
1532 static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port)
1533 {
1534 	if (execlists->ctrl_reg) {
1535 		writel(lower_32_bits(desc), execlists->submit_reg + port * 2);
1536 		writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1);
1537 	} else {
1538 		writel(upper_32_bits(desc), execlists->submit_reg);
1539 		writel(lower_32_bits(desc), execlists->submit_reg);
1540 	}
1541 }
1542 
1543 static __maybe_unused char *
1544 dump_port(char *buf, int buflen, const char *prefix, struct i915_request *rq)
1545 {
1546 	if (!rq)
1547 		return "";
1548 
1549 	snprintf(buf, buflen, "%sccid:%x %llx:%lld%s prio %d",
1550 		 prefix,
1551 		 rq->context->lrc.ccid,
1552 		 rq->fence.context, rq->fence.seqno,
1553 		 i915_request_completed(rq) ? "!" :
1554 		 i915_request_started(rq) ? "*" :
1555 		 "",
1556 		 rq_prio(rq));
1557 
1558 	return buf;
1559 }
1560 
1561 static __maybe_unused void
1562 trace_ports(const struct intel_engine_execlists *execlists,
1563 	    const char *msg,
1564 	    struct i915_request * const *ports)
1565 {
1566 	const struct intel_engine_cs *engine =
1567 		container_of(execlists, typeof(*engine), execlists);
1568 	char __maybe_unused p0[40], p1[40];
1569 
1570 	if (!ports[0])
1571 		return;
1572 
1573 	ENGINE_TRACE(engine, "%s { %s%s }\n", msg,
1574 		     dump_port(p0, sizeof(p0), "", ports[0]),
1575 		     dump_port(p1, sizeof(p1), ", ", ports[1]));
1576 }
1577 
1578 static inline bool
1579 reset_in_progress(const struct intel_engine_execlists *execlists)
1580 {
1581 	return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
1582 }
1583 
1584 static __maybe_unused bool
1585 assert_pending_valid(const struct intel_engine_execlists *execlists,
1586 		     const char *msg)
1587 {
1588 	struct intel_engine_cs *engine =
1589 		container_of(execlists, typeof(*engine), execlists);
1590 	struct i915_request * const *port, *rq;
1591 	struct intel_context *ce = NULL;
1592 	bool sentinel = false;
1593 	u32 ccid = -1;
1594 
1595 	trace_ports(execlists, msg, execlists->pending);
1596 
1597 	/* We may be messing around with the lists during reset, lalala */
1598 	if (reset_in_progress(execlists))
1599 		return true;
1600 
1601 	if (!execlists->pending[0]) {
1602 		GEM_TRACE_ERR("%s: Nothing pending for promotion!\n",
1603 			      engine->name);
1604 		return false;
1605 	}
1606 
1607 	if (execlists->pending[execlists_num_ports(execlists)]) {
1608 		GEM_TRACE_ERR("%s: Excess pending[%d] for promotion!\n",
1609 			      engine->name, execlists_num_ports(execlists));
1610 		return false;
1611 	}
1612 
1613 	for (port = execlists->pending; (rq = *port); port++) {
1614 		unsigned long flags;
1615 		bool ok = true;
1616 
1617 		GEM_BUG_ON(!kref_read(&rq->fence.refcount));
1618 		GEM_BUG_ON(!i915_request_is_active(rq));
1619 
1620 		if (ce == rq->context) {
1621 			GEM_TRACE_ERR("%s: Dup context:%llx in pending[%zd]\n",
1622 				      engine->name,
1623 				      ce->timeline->fence_context,
1624 				      port - execlists->pending);
1625 			return false;
1626 		}
1627 		ce = rq->context;
1628 
1629 		if (ccid == ce->lrc.ccid) {
1630 			GEM_TRACE_ERR("%s: Dup ccid:%x context:%llx in pending[%zd]\n",
1631 				      engine->name,
1632 				      ccid, ce->timeline->fence_context,
1633 				      port - execlists->pending);
1634 			return false;
1635 		}
1636 		ccid = ce->lrc.ccid;
1637 
1638 		/*
1639 		 * Sentinels are supposed to be the last request so they flush
1640 		 * the current execution off the HW. Check that they are the only
1641 		 * request in the pending submission.
1642 		 */
1643 		if (sentinel) {
1644 			GEM_TRACE_ERR("%s: context:%llx after sentinel in pending[%zd]\n",
1645 				      engine->name,
1646 				      ce->timeline->fence_context,
1647 				      port - execlists->pending);
1648 			return false;
1649 		}
1650 		sentinel = i915_request_has_sentinel(rq);
1651 
1652 		/* Hold tightly onto the lock to prevent concurrent retires! */
1653 		if (!spin_trylock_irqsave(&rq->lock, flags))
1654 			continue;
1655 
1656 		if (i915_request_completed(rq))
1657 			goto unlock;
1658 
1659 		if (i915_active_is_idle(&ce->active) &&
1660 		    !intel_context_is_barrier(ce)) {
1661 			GEM_TRACE_ERR("%s: Inactive context:%llx in pending[%zd]\n",
1662 				      engine->name,
1663 				      ce->timeline->fence_context,
1664 				      port - execlists->pending);
1665 			ok = false;
1666 			goto unlock;
1667 		}
1668 
1669 		if (!i915_vma_is_pinned(ce->state)) {
1670 			GEM_TRACE_ERR("%s: Unpinned context:%llx in pending[%zd]\n",
1671 				      engine->name,
1672 				      ce->timeline->fence_context,
1673 				      port - execlists->pending);
1674 			ok = false;
1675 			goto unlock;
1676 		}
1677 
1678 		if (!i915_vma_is_pinned(ce->ring->vma)) {
1679 			GEM_TRACE_ERR("%s: Unpinned ring:%llx in pending[%zd]\n",
1680 				      engine->name,
1681 				      ce->timeline->fence_context,
1682 				      port - execlists->pending);
1683 			ok = false;
1684 			goto unlock;
1685 		}
1686 
1687 unlock:
1688 		spin_unlock_irqrestore(&rq->lock, flags);
1689 		if (!ok)
1690 			return false;
1691 	}
1692 
1693 	return ce;
1694 }
1695 
1696 static void execlists_submit_ports(struct intel_engine_cs *engine)
1697 {
1698 	struct intel_engine_execlists *execlists = &engine->execlists;
1699 	unsigned int n;
1700 
1701 	GEM_BUG_ON(!assert_pending_valid(execlists, "submit"));
1702 
1703 	/*
1704 	 * We can skip acquiring intel_runtime_pm_get() here as it was taken
1705 	 * on our behalf by the request (see i915_gem_mark_busy()) and it will
1706 	 * not be relinquished until the device is idle (see
1707 	 * i915_gem_idle_work_handler()). As a precaution, we make sure
1708 	 * that all ELSP are drained i.e. we have processed the CSB,
1709 	 * before allowing ourselves to idle and calling intel_runtime_pm_put().
1710 	 */
1711 	GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
1712 
1713 	/*
1714 	 * ELSQ note: the submit queue is not cleared after being submitted
1715 	 * to the HW so we need to make sure we always clean it up. This is
1716 	 * currently ensured by the fact that we always write the same number
1717 	 * of elsq entries, keep this in mind before changing the loop below.
1718 	 */
1719 	for (n = execlists_num_ports(execlists); n--; ) {
1720 		struct i915_request *rq = execlists->pending[n];
1721 
1722 		write_desc(execlists,
1723 			   rq ? execlists_update_context(rq) : 0,
1724 			   n);
1725 	}
1726 
1727 	/* we need to manually load the submit queue */
1728 	if (execlists->ctrl_reg)
1729 		writel(EL_CTRL_LOAD, execlists->ctrl_reg);
1730 }
1731 
1732 static bool ctx_single_port_submission(const struct intel_context *ce)
1733 {
1734 	return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
1735 		intel_context_force_single_submission(ce));
1736 }
1737 
1738 static bool can_merge_ctx(const struct intel_context *prev,
1739 			  const struct intel_context *next)
1740 {
1741 	if (prev != next)
1742 		return false;
1743 
1744 	if (ctx_single_port_submission(prev))
1745 		return false;
1746 
1747 	return true;
1748 }
1749 
1750 static unsigned long i915_request_flags(const struct i915_request *rq)
1751 {
1752 	return READ_ONCE(rq->fence.flags);
1753 }
1754 
1755 static bool can_merge_rq(const struct i915_request *prev,
1756 			 const struct i915_request *next)
1757 {
1758 	GEM_BUG_ON(prev == next);
1759 	GEM_BUG_ON(!assert_priority_queue(prev, next));
1760 
1761 	/*
1762 	 * We do not submit known completed requests. Therefore if the next
1763 	 * request is already completed, we can pretend to merge it in
1764 	 * with the previous context (and we will skip updating the ELSP
1765 	 * and tracking). Thus hopefully keeping the ELSP full with active
1766 	 * contexts, despite the best efforts of preempt-to-busy to confuse
1767 	 * us.
1768 	 */
1769 	if (i915_request_completed(next))
1770 		return true;
1771 
1772 	if (unlikely((i915_request_flags(prev) ^ i915_request_flags(next)) &
1773 		     (BIT(I915_FENCE_FLAG_NOPREEMPT) |
1774 		      BIT(I915_FENCE_FLAG_SENTINEL))))
1775 		return false;
1776 
1777 	if (!can_merge_ctx(prev->context, next->context))
1778 		return false;
1779 
1780 	GEM_BUG_ON(i915_seqno_passed(prev->fence.seqno, next->fence.seqno));
1781 	return true;
1782 }
1783 
1784 static void virtual_update_register_offsets(u32 *regs,
1785 					    struct intel_engine_cs *engine)
1786 {
1787 	set_offsets(regs, reg_offsets(engine), engine, false);
1788 }
1789 
1790 static bool virtual_matches(const struct virtual_engine *ve,
1791 			    const struct i915_request *rq,
1792 			    const struct intel_engine_cs *engine)
1793 {
1794 	const struct intel_engine_cs *inflight;
1795 
1796 	if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */
1797 		return false;
1798 
1799 	/*
1800 	 * We track when the HW has completed saving the context image
1801 	 * (i.e. when we have seen the final CS event switching out of
1802 	 * the context) and must not overwrite the context image before
1803 	 * then. This restricts us to only using the active engine
1804 	 * while the previous virtualized request is inflight (so
1805 	 * we reuse the register offsets). This is a very small
1806 	 * hystersis on the greedy seelction algorithm.
1807 	 */
1808 	inflight = intel_context_inflight(&ve->context);
1809 	if (inflight && inflight != engine)
1810 		return false;
1811 
1812 	return true;
1813 }
1814 
1815 static void virtual_xfer_context(struct virtual_engine *ve,
1816 				 struct intel_engine_cs *engine)
1817 {
1818 	unsigned int n;
1819 
1820 	if (likely(engine == ve->siblings[0]))
1821 		return;
1822 
1823 	GEM_BUG_ON(READ_ONCE(ve->context.inflight));
1824 	if (!intel_engine_has_relative_mmio(engine))
1825 		virtual_update_register_offsets(ve->context.lrc_reg_state,
1826 						engine);
1827 
1828 	/*
1829 	 * Move the bound engine to the top of the list for
1830 	 * future execution. We then kick this tasklet first
1831 	 * before checking others, so that we preferentially
1832 	 * reuse this set of bound registers.
1833 	 */
1834 	for (n = 1; n < ve->num_siblings; n++) {
1835 		if (ve->siblings[n] == engine) {
1836 			swap(ve->siblings[n], ve->siblings[0]);
1837 			break;
1838 		}
1839 	}
1840 }
1841 
1842 static void defer_request(struct i915_request *rq, struct list_head * const pl)
1843 {
1844 	LIST_HEAD(list);
1845 
1846 	/*
1847 	 * We want to move the interrupted request to the back of
1848 	 * the round-robin list (i.e. its priority level), but
1849 	 * in doing so, we must then move all requests that were in
1850 	 * flight and were waiting for the interrupted request to
1851 	 * be run after it again.
1852 	 */
1853 	do {
1854 		struct i915_dependency *p;
1855 
1856 		GEM_BUG_ON(i915_request_is_active(rq));
1857 		list_move_tail(&rq->sched.link, pl);
1858 
1859 		for_each_waiter(p, rq) {
1860 			struct i915_request *w =
1861 				container_of(p->waiter, typeof(*w), sched);
1862 
1863 			if (p->flags & I915_DEPENDENCY_WEAK)
1864 				continue;
1865 
1866 			/* Leave semaphores spinning on the other engines */
1867 			if (w->engine != rq->engine)
1868 				continue;
1869 
1870 			/* No waiter should start before its signaler */
1871 			GEM_BUG_ON(i915_request_has_initial_breadcrumb(w) &&
1872 				   i915_request_started(w) &&
1873 				   !i915_request_completed(rq));
1874 
1875 			GEM_BUG_ON(i915_request_is_active(w));
1876 			if (!i915_request_is_ready(w))
1877 				continue;
1878 
1879 			if (rq_prio(w) < rq_prio(rq))
1880 				continue;
1881 
1882 			GEM_BUG_ON(rq_prio(w) > rq_prio(rq));
1883 			list_move_tail(&w->sched.link, &list);
1884 		}
1885 
1886 		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
1887 	} while (rq);
1888 }
1889 
1890 static void defer_active(struct intel_engine_cs *engine)
1891 {
1892 	struct i915_request *rq;
1893 
1894 	rq = __unwind_incomplete_requests(engine);
1895 	if (!rq)
1896 		return;
1897 
1898 	defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq)));
1899 }
1900 
1901 static bool
1902 need_timeslice(const struct intel_engine_cs *engine,
1903 	       const struct i915_request *rq,
1904 	       const struct rb_node *rb)
1905 {
1906 	int hint;
1907 
1908 	if (!intel_engine_has_timeslices(engine))
1909 		return false;
1910 
1911 	hint = engine->execlists.queue_priority_hint;
1912 
1913 	if (rb) {
1914 		const struct virtual_engine *ve =
1915 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
1916 		const struct intel_engine_cs *inflight =
1917 			intel_context_inflight(&ve->context);
1918 
1919 		if (!inflight || inflight == engine) {
1920 			struct i915_request *next;
1921 
1922 			rcu_read_lock();
1923 			next = READ_ONCE(ve->request);
1924 			if (next)
1925 				hint = max(hint, rq_prio(next));
1926 			rcu_read_unlock();
1927 		}
1928 	}
1929 
1930 	if (!list_is_last(&rq->sched.link, &engine->active.requests))
1931 		hint = max(hint, rq_prio(list_next_entry(rq, sched.link)));
1932 
1933 	GEM_BUG_ON(hint >= I915_PRIORITY_UNPREEMPTABLE);
1934 	return hint >= effective_prio(rq);
1935 }
1936 
1937 static bool
1938 timeslice_yield(const struct intel_engine_execlists *el,
1939 		const struct i915_request *rq)
1940 {
1941 	/*
1942 	 * Once bitten, forever smitten!
1943 	 *
1944 	 * If the active context ever busy-waited on a semaphore,
1945 	 * it will be treated as a hog until the end of its timeslice (i.e.
1946 	 * until it is scheduled out and replaced by a new submission,
1947 	 * possibly even its own lite-restore). The HW only sends an interrupt
1948 	 * on the first miss, and we do know if that semaphore has been
1949 	 * signaled, or even if it is now stuck on another semaphore. Play
1950 	 * safe, yield if it might be stuck -- it will be given a fresh
1951 	 * timeslice in the near future.
1952 	 */
1953 	return rq->context->lrc.ccid == READ_ONCE(el->yield);
1954 }
1955 
1956 static bool
1957 timeslice_expired(const struct intel_engine_execlists *el,
1958 		  const struct i915_request *rq)
1959 {
1960 	return timer_expired(&el->timer) || timeslice_yield(el, rq);
1961 }
1962 
1963 static int
1964 switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
1965 {
1966 	if (list_is_last(&rq->sched.link, &engine->active.requests))
1967 		return engine->execlists.queue_priority_hint;
1968 
1969 	return rq_prio(list_next_entry(rq, sched.link));
1970 }
1971 
1972 static inline unsigned long
1973 timeslice(const struct intel_engine_cs *engine)
1974 {
1975 	return READ_ONCE(engine->props.timeslice_duration_ms);
1976 }
1977 
1978 static unsigned long active_timeslice(const struct intel_engine_cs *engine)
1979 {
1980 	const struct intel_engine_execlists *execlists = &engine->execlists;
1981 	const struct i915_request *rq = *execlists->active;
1982 
1983 	if (!rq || i915_request_completed(rq))
1984 		return 0;
1985 
1986 	if (READ_ONCE(execlists->switch_priority_hint) < effective_prio(rq))
1987 		return 0;
1988 
1989 	return timeslice(engine);
1990 }
1991 
1992 static void set_timeslice(struct intel_engine_cs *engine)
1993 {
1994 	unsigned long duration;
1995 
1996 	if (!intel_engine_has_timeslices(engine))
1997 		return;
1998 
1999 	duration = active_timeslice(engine);
2000 	ENGINE_TRACE(engine, "bump timeslicing, interval:%lu", duration);
2001 
2002 	set_timer_ms(&engine->execlists.timer, duration);
2003 }
2004 
2005 static void start_timeslice(struct intel_engine_cs *engine, int prio)
2006 {
2007 	struct intel_engine_execlists *execlists = &engine->execlists;
2008 	unsigned long duration;
2009 
2010 	if (!intel_engine_has_timeslices(engine))
2011 		return;
2012 
2013 	WRITE_ONCE(execlists->switch_priority_hint, prio);
2014 	if (prio == INT_MIN)
2015 		return;
2016 
2017 	if (timer_pending(&execlists->timer))
2018 		return;
2019 
2020 	duration = timeslice(engine);
2021 	ENGINE_TRACE(engine,
2022 		     "start timeslicing, prio:%d, interval:%lu",
2023 		     prio, duration);
2024 
2025 	set_timer_ms(&execlists->timer, duration);
2026 }
2027 
2028 static void record_preemption(struct intel_engine_execlists *execlists)
2029 {
2030 	(void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
2031 }
2032 
2033 static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
2034 					    const struct i915_request *rq)
2035 {
2036 	if (!rq)
2037 		return 0;
2038 
2039 	/* Force a fast reset for terminated contexts (ignoring sysfs!) */
2040 	if (unlikely(intel_context_is_banned(rq->context)))
2041 		return 1;
2042 
2043 	return READ_ONCE(engine->props.preempt_timeout_ms);
2044 }
2045 
2046 static void set_preempt_timeout(struct intel_engine_cs *engine,
2047 				const struct i915_request *rq)
2048 {
2049 	if (!intel_engine_has_preempt_reset(engine))
2050 		return;
2051 
2052 	set_timer_ms(&engine->execlists.preempt,
2053 		     active_preempt_timeout(engine, rq));
2054 }
2055 
2056 static inline void clear_ports(struct i915_request **ports, int count)
2057 {
2058 	memset_p((void **)ports, NULL, count);
2059 }
2060 
2061 static inline void
2062 copy_ports(struct i915_request **dst, struct i915_request **src, int count)
2063 {
2064 	/* A memcpy_p() would be very useful here! */
2065 	while (count--)
2066 		WRITE_ONCE(*dst++, *src++); /* avoid write tearing */
2067 }
2068 
2069 static void execlists_dequeue(struct intel_engine_cs *engine)
2070 {
2071 	struct intel_engine_execlists * const execlists = &engine->execlists;
2072 	struct i915_request **port = execlists->pending;
2073 	struct i915_request ** const last_port = port + execlists->port_mask;
2074 	struct i915_request * const *active;
2075 	struct i915_request *last;
2076 	struct rb_node *rb;
2077 	bool submit = false;
2078 
2079 	/*
2080 	 * Hardware submission is through 2 ports. Conceptually each port
2081 	 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
2082 	 * static for a context, and unique to each, so we only execute
2083 	 * requests belonging to a single context from each ring. RING_HEAD
2084 	 * is maintained by the CS in the context image, it marks the place
2085 	 * where it got up to last time, and through RING_TAIL we tell the CS
2086 	 * where we want to execute up to this time.
2087 	 *
2088 	 * In this list the requests are in order of execution. Consecutive
2089 	 * requests from the same context are adjacent in the ringbuffer. We
2090 	 * can combine these requests into a single RING_TAIL update:
2091 	 *
2092 	 *              RING_HEAD...req1...req2
2093 	 *                                    ^- RING_TAIL
2094 	 * since to execute req2 the CS must first execute req1.
2095 	 *
2096 	 * Our goal then is to point each port to the end of a consecutive
2097 	 * sequence of requests as being the most optimal (fewest wake ups
2098 	 * and context switches) submission.
2099 	 */
2100 
2101 	for (rb = rb_first_cached(&execlists->virtual); rb; ) {
2102 		struct virtual_engine *ve =
2103 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
2104 		struct i915_request *rq = READ_ONCE(ve->request);
2105 
2106 		if (!rq) { /* lazily cleanup after another engine handled rq */
2107 			rb_erase_cached(rb, &execlists->virtual);
2108 			RB_CLEAR_NODE(rb);
2109 			rb = rb_first_cached(&execlists->virtual);
2110 			continue;
2111 		}
2112 
2113 		if (!virtual_matches(ve, rq, engine)) {
2114 			rb = rb_next(rb);
2115 			continue;
2116 		}
2117 
2118 		break;
2119 	}
2120 
2121 	/*
2122 	 * If the queue is higher priority than the last
2123 	 * request in the currently active context, submit afresh.
2124 	 * We will resubmit again afterwards in case we need to split
2125 	 * the active context to interject the preemption request,
2126 	 * i.e. we will retrigger preemption following the ack in case
2127 	 * of trouble.
2128 	 */
2129 	active = READ_ONCE(execlists->active);
2130 
2131 	/*
2132 	 * In theory we can skip over completed contexts that have not
2133 	 * yet been processed by events (as those events are in flight):
2134 	 *
2135 	 * while ((last = *active) && i915_request_completed(last))
2136 	 *	active++;
2137 	 *
2138 	 * However, the GPU cannot handle this as it will ultimately
2139 	 * find itself trying to jump back into a context it has just
2140 	 * completed and barf.
2141 	 */
2142 
2143 	if ((last = *active)) {
2144 		if (i915_request_completed(last)) {
2145 			goto check_secondary;
2146 		} else if (need_preempt(engine, last, rb)) {
2147 			ENGINE_TRACE(engine,
2148 				     "preempting last=%llx:%lld, prio=%d, hint=%d\n",
2149 				     last->fence.context,
2150 				     last->fence.seqno,
2151 				     last->sched.attr.priority,
2152 				     execlists->queue_priority_hint);
2153 			record_preemption(execlists);
2154 
2155 			/*
2156 			 * Don't let the RING_HEAD advance past the breadcrumb
2157 			 * as we unwind (and until we resubmit) so that we do
2158 			 * not accidentally tell it to go backwards.
2159 			 */
2160 			ring_set_paused(engine, 1);
2161 
2162 			/*
2163 			 * Note that we have not stopped the GPU at this point,
2164 			 * so we are unwinding the incomplete requests as they
2165 			 * remain inflight and so by the time we do complete
2166 			 * the preemption, some of the unwound requests may
2167 			 * complete!
2168 			 */
2169 			__unwind_incomplete_requests(engine);
2170 
2171 			last = NULL;
2172 		} else if (need_timeslice(engine, last, rb) &&
2173 			   timeslice_expired(execlists, last)) {
2174 			ENGINE_TRACE(engine,
2175 				     "expired last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n",
2176 				     last->fence.context,
2177 				     last->fence.seqno,
2178 				     last->sched.attr.priority,
2179 				     execlists->queue_priority_hint,
2180 				     yesno(timeslice_yield(execlists, last)));
2181 
2182 			ring_set_paused(engine, 1);
2183 			defer_active(engine);
2184 
2185 			/*
2186 			 * Unlike for preemption, if we rewind and continue
2187 			 * executing the same context as previously active,
2188 			 * the order of execution will remain the same and
2189 			 * the tail will only advance. We do not need to
2190 			 * force a full context restore, as a lite-restore
2191 			 * is sufficient to resample the monotonic TAIL.
2192 			 *
2193 			 * If we switch to any other context, similarly we
2194 			 * will not rewind TAIL of current context, and
2195 			 * normal save/restore will preserve state and allow
2196 			 * us to later continue executing the same request.
2197 			 */
2198 			last = NULL;
2199 		} else {
2200 			/*
2201 			 * Otherwise if we already have a request pending
2202 			 * for execution after the current one, we can
2203 			 * just wait until the next CS event before
2204 			 * queuing more. In either case we will force a
2205 			 * lite-restore preemption event, but if we wait
2206 			 * we hopefully coalesce several updates into a single
2207 			 * submission.
2208 			 */
2209 check_secondary:
2210 			if (!list_is_last(&last->sched.link,
2211 					  &engine->active.requests)) {
2212 				/*
2213 				 * Even if ELSP[1] is occupied and not worthy
2214 				 * of timeslices, our queue might be.
2215 				 */
2216 				start_timeslice(engine, queue_prio(execlists));
2217 				return;
2218 			}
2219 		}
2220 	}
2221 
2222 	while (rb) { /* XXX virtual is always taking precedence */
2223 		struct virtual_engine *ve =
2224 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
2225 		struct i915_request *rq;
2226 
2227 		spin_lock(&ve->base.active.lock);
2228 
2229 		rq = ve->request;
2230 		if (unlikely(!rq)) { /* lost the race to a sibling */
2231 			spin_unlock(&ve->base.active.lock);
2232 			rb_erase_cached(rb, &execlists->virtual);
2233 			RB_CLEAR_NODE(rb);
2234 			rb = rb_first_cached(&execlists->virtual);
2235 			continue;
2236 		}
2237 
2238 		GEM_BUG_ON(rq != ve->request);
2239 		GEM_BUG_ON(rq->engine != &ve->base);
2240 		GEM_BUG_ON(rq->context != &ve->context);
2241 
2242 		if (rq_prio(rq) >= queue_prio(execlists)) {
2243 			if (!virtual_matches(ve, rq, engine)) {
2244 				spin_unlock(&ve->base.active.lock);
2245 				rb = rb_next(rb);
2246 				continue;
2247 			}
2248 
2249 			if (last && !can_merge_rq(last, rq)) {
2250 				spin_unlock(&ve->base.active.lock);
2251 				start_timeslice(engine, rq_prio(rq));
2252 				return; /* leave this for another sibling */
2253 			}
2254 
2255 			ENGINE_TRACE(engine,
2256 				     "virtual rq=%llx:%lld%s, new engine? %s\n",
2257 				     rq->fence.context,
2258 				     rq->fence.seqno,
2259 				     i915_request_completed(rq) ? "!" :
2260 				     i915_request_started(rq) ? "*" :
2261 				     "",
2262 				     yesno(engine != ve->siblings[0]));
2263 
2264 			WRITE_ONCE(ve->request, NULL);
2265 			WRITE_ONCE(ve->base.execlists.queue_priority_hint,
2266 				   INT_MIN);
2267 			rb_erase_cached(rb, &execlists->virtual);
2268 			RB_CLEAR_NODE(rb);
2269 
2270 			GEM_BUG_ON(!(rq->execution_mask & engine->mask));
2271 			WRITE_ONCE(rq->engine, engine);
2272 
2273 			if (__i915_request_submit(rq)) {
2274 				/*
2275 				 * Only after we confirm that we will submit
2276 				 * this request (i.e. it has not already
2277 				 * completed), do we want to update the context.
2278 				 *
2279 				 * This serves two purposes. It avoids
2280 				 * unnecessary work if we are resubmitting an
2281 				 * already completed request after timeslicing.
2282 				 * But more importantly, it prevents us altering
2283 				 * ve->siblings[] on an idle context, where
2284 				 * we may be using ve->siblings[] in
2285 				 * virtual_context_enter / virtual_context_exit.
2286 				 */
2287 				virtual_xfer_context(ve, engine);
2288 				GEM_BUG_ON(ve->siblings[0] != engine);
2289 
2290 				submit = true;
2291 				last = rq;
2292 			}
2293 			i915_request_put(rq);
2294 
2295 			/*
2296 			 * Hmm, we have a bunch of virtual engine requests,
2297 			 * but the first one was already completed (thanks
2298 			 * preempt-to-busy!). Keep looking at the veng queue
2299 			 * until we have no more relevant requests (i.e.
2300 			 * the normal submit queue has higher priority).
2301 			 */
2302 			if (!submit) {
2303 				spin_unlock(&ve->base.active.lock);
2304 				rb = rb_first_cached(&execlists->virtual);
2305 				continue;
2306 			}
2307 		}
2308 
2309 		spin_unlock(&ve->base.active.lock);
2310 		break;
2311 	}
2312 
2313 	while ((rb = rb_first_cached(&execlists->queue))) {
2314 		struct i915_priolist *p = to_priolist(rb);
2315 		struct i915_request *rq, *rn;
2316 		int i;
2317 
2318 		priolist_for_each_request_consume(rq, rn, p, i) {
2319 			bool merge = true;
2320 
2321 			/*
2322 			 * Can we combine this request with the current port?
2323 			 * It has to be the same context/ringbuffer and not
2324 			 * have any exceptions (e.g. GVT saying never to
2325 			 * combine contexts).
2326 			 *
2327 			 * If we can combine the requests, we can execute both
2328 			 * by updating the RING_TAIL to point to the end of the
2329 			 * second request, and so we never need to tell the
2330 			 * hardware about the first.
2331 			 */
2332 			if (last && !can_merge_rq(last, rq)) {
2333 				/*
2334 				 * If we are on the second port and cannot
2335 				 * combine this request with the last, then we
2336 				 * are done.
2337 				 */
2338 				if (port == last_port)
2339 					goto done;
2340 
2341 				/*
2342 				 * We must not populate both ELSP[] with the
2343 				 * same LRCA, i.e. we must submit 2 different
2344 				 * contexts if we submit 2 ELSP.
2345 				 */
2346 				if (last->context == rq->context)
2347 					goto done;
2348 
2349 				if (i915_request_has_sentinel(last))
2350 					goto done;
2351 
2352 				/*
2353 				 * If GVT overrides us we only ever submit
2354 				 * port[0], leaving port[1] empty. Note that we
2355 				 * also have to be careful that we don't queue
2356 				 * the same context (even though a different
2357 				 * request) to the second port.
2358 				 */
2359 				if (ctx_single_port_submission(last->context) ||
2360 				    ctx_single_port_submission(rq->context))
2361 					goto done;
2362 
2363 				merge = false;
2364 			}
2365 
2366 			if (__i915_request_submit(rq)) {
2367 				if (!merge) {
2368 					*port = execlists_schedule_in(last, port - execlists->pending);
2369 					port++;
2370 					last = NULL;
2371 				}
2372 
2373 				GEM_BUG_ON(last &&
2374 					   !can_merge_ctx(last->context,
2375 							  rq->context));
2376 				GEM_BUG_ON(last &&
2377 					   i915_seqno_passed(last->fence.seqno,
2378 							     rq->fence.seqno));
2379 
2380 				submit = true;
2381 				last = rq;
2382 			}
2383 		}
2384 
2385 		rb_erase_cached(&p->node, &execlists->queue);
2386 		i915_priolist_free(p);
2387 	}
2388 
2389 done:
2390 	/*
2391 	 * Here be a bit of magic! Or sleight-of-hand, whichever you prefer.
2392 	 *
2393 	 * We choose the priority hint such that if we add a request of greater
2394 	 * priority than this, we kick the submission tasklet to decide on
2395 	 * the right order of submitting the requests to hardware. We must
2396 	 * also be prepared to reorder requests as they are in-flight on the
2397 	 * HW. We derive the priority hint then as the first "hole" in
2398 	 * the HW submission ports and if there are no available slots,
2399 	 * the priority of the lowest executing request, i.e. last.
2400 	 *
2401 	 * When we do receive a higher priority request ready to run from the
2402 	 * user, see queue_request(), the priority hint is bumped to that
2403 	 * request triggering preemption on the next dequeue (or subsequent
2404 	 * interrupt for secondary ports).
2405 	 */
2406 	execlists->queue_priority_hint = queue_prio(execlists);
2407 
2408 	if (submit) {
2409 		*port = execlists_schedule_in(last, port - execlists->pending);
2410 		execlists->switch_priority_hint =
2411 			switch_prio(engine, *execlists->pending);
2412 
2413 		/*
2414 		 * Skip if we ended up with exactly the same set of requests,
2415 		 * e.g. trying to timeslice a pair of ordered contexts
2416 		 */
2417 		if (!memcmp(active, execlists->pending,
2418 			    (port - execlists->pending + 1) * sizeof(*port))) {
2419 			do
2420 				execlists_schedule_out(fetch_and_zero(port));
2421 			while (port-- != execlists->pending);
2422 
2423 			goto skip_submit;
2424 		}
2425 		clear_ports(port + 1, last_port - port);
2426 
2427 		WRITE_ONCE(execlists->yield, -1);
2428 		set_preempt_timeout(engine, *active);
2429 		execlists_submit_ports(engine);
2430 	} else {
2431 		start_timeslice(engine, execlists->queue_priority_hint);
2432 skip_submit:
2433 		ring_set_paused(engine, 0);
2434 	}
2435 }
2436 
2437 static void
2438 cancel_port_requests(struct intel_engine_execlists * const execlists)
2439 {
2440 	struct i915_request * const *port;
2441 
2442 	for (port = execlists->pending; *port; port++)
2443 		execlists_schedule_out(*port);
2444 	clear_ports(execlists->pending, ARRAY_SIZE(execlists->pending));
2445 
2446 	/* Mark the end of active before we overwrite *active */
2447 	for (port = xchg(&execlists->active, execlists->pending); *port; port++)
2448 		execlists_schedule_out(*port);
2449 	clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight));
2450 
2451 	smp_wmb(); /* complete the seqlock for execlists_active() */
2452 	WRITE_ONCE(execlists->active, execlists->inflight);
2453 }
2454 
2455 static inline void
2456 invalidate_csb_entries(const u64 *first, const u64 *last)
2457 {
2458 	clflush((void *)first);
2459 	clflush((void *)last);
2460 }
2461 
2462 /*
2463  * Starting with Gen12, the status has a new format:
2464  *
2465  *     bit  0:     switched to new queue
2466  *     bit  1:     reserved
2467  *     bit  2:     semaphore wait mode (poll or signal), only valid when
2468  *                 switch detail is set to "wait on semaphore"
2469  *     bits 3-5:   engine class
2470  *     bits 6-11:  engine instance
2471  *     bits 12-14: reserved
2472  *     bits 15-25: sw context id of the lrc the GT switched to
2473  *     bits 26-31: sw counter of the lrc the GT switched to
2474  *     bits 32-35: context switch detail
2475  *                  - 0: ctx complete
2476  *                  - 1: wait on sync flip
2477  *                  - 2: wait on vblank
2478  *                  - 3: wait on scanline
2479  *                  - 4: wait on semaphore
2480  *                  - 5: context preempted (not on SEMAPHORE_WAIT or
2481  *                       WAIT_FOR_EVENT)
2482  *     bit  36:    reserved
2483  *     bits 37-43: wait detail (for switch detail 1 to 4)
2484  *     bits 44-46: reserved
2485  *     bits 47-57: sw context id of the lrc the GT switched away from
2486  *     bits 58-63: sw counter of the lrc the GT switched away from
2487  */
2488 static inline bool gen12_csb_parse(const u64 csb)
2489 {
2490 	bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(csb));
2491 	bool new_queue =
2492 		lower_32_bits(csb) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
2493 
2494 	/*
2495 	 * The context switch detail is not guaranteed to be 5 when a preemption
2496 	 * occurs, so we can't just check for that. The check below works for
2497 	 * all the cases we care about, including preemptions of WAIT
2498 	 * instructions and lite-restore. Preempt-to-idle via the CTRL register
2499 	 * would require some extra handling, but we don't support that.
2500 	 */
2501 	if (!ctx_away_valid || new_queue) {
2502 		GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(csb)));
2503 		return true;
2504 	}
2505 
2506 	/*
2507 	 * switch detail = 5 is covered by the case above and we do not expect a
2508 	 * context switch on an unsuccessful wait instruction since we always
2509 	 * use polling mode.
2510 	 */
2511 	GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(csb)));
2512 	return false;
2513 }
2514 
2515 static inline bool gen8_csb_parse(const u64 csb)
2516 {
2517 	return csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
2518 }
2519 
2520 static noinline u64
2521 wa_csb_read(const struct intel_engine_cs *engine, u64 * const csb)
2522 {
2523 	u64 entry;
2524 
2525 	/*
2526 	 * Reading from the HWSP has one particular advantage: we can detect
2527 	 * a stale entry. Since the write into HWSP is broken, we have no reason
2528 	 * to trust the HW at all, the mmio entry may equally be unordered, so
2529 	 * we prefer the path that is self-checking and as a last resort,
2530 	 * return the mmio value.
2531 	 *
2532 	 * tgl,dg1:HSDES#22011327657
2533 	 */
2534 	preempt_disable();
2535 	if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 10)) {
2536 		int idx = csb - engine->execlists.csb_status;
2537 		int status;
2538 
2539 		status = GEN8_EXECLISTS_STATUS_BUF;
2540 		if (idx >= 6) {
2541 			status = GEN11_EXECLISTS_STATUS_BUF2;
2542 			idx -= 6;
2543 		}
2544 		status += sizeof(u64) * idx;
2545 
2546 		entry = intel_uncore_read64(engine->uncore,
2547 					    _MMIO(engine->mmio_base + status));
2548 	}
2549 	preempt_enable();
2550 
2551 	return entry;
2552 }
2553 
2554 static inline u64
2555 csb_read(const struct intel_engine_cs *engine, u64 * const csb)
2556 {
2557 	u64 entry = READ_ONCE(*csb);
2558 
2559 	/*
2560 	 * Unfortunately, the GPU does not always serialise its write
2561 	 * of the CSB entries before its write of the CSB pointer, at least
2562 	 * from the perspective of the CPU, using what is known as a Global
2563 	 * Observation Point. We may read a new CSB tail pointer, but then
2564 	 * read the stale CSB entries, causing us to misinterpret the
2565 	 * context-switch events, and eventually declare the GPU hung.
2566 	 *
2567 	 * icl:HSDES#1806554093
2568 	 * tgl:HSDES#22011248461
2569 	 */
2570 	if (unlikely(entry == -1))
2571 		entry = wa_csb_read(engine, csb);
2572 
2573 	/* Consume this entry so that we can spot its future reuse. */
2574 	WRITE_ONCE(*csb, -1);
2575 
2576 	/* ELSP is an implicit wmb() before the GPU wraps and overwrites csb */
2577 	return entry;
2578 }
2579 
2580 static void process_csb(struct intel_engine_cs *engine)
2581 {
2582 	struct intel_engine_execlists * const execlists = &engine->execlists;
2583 	u64 * const buf = execlists->csb_status;
2584 	const u8 num_entries = execlists->csb_size;
2585 	u8 head, tail;
2586 
2587 	/*
2588 	 * As we modify our execlists state tracking we require exclusive
2589 	 * access. Either we are inside the tasklet, or the tasklet is disabled
2590 	 * and we assume that is only inside the reset paths and so serialised.
2591 	 */
2592 	GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) &&
2593 		   !reset_in_progress(execlists));
2594 	GEM_BUG_ON(!intel_engine_in_execlists_submission_mode(engine));
2595 
2596 	/*
2597 	 * Note that csb_write, csb_status may be either in HWSP or mmio.
2598 	 * When reading from the csb_write mmio register, we have to be
2599 	 * careful to only use the GEN8_CSB_WRITE_PTR portion, which is
2600 	 * the low 4bits. As it happens we know the next 4bits are always
2601 	 * zero and so we can simply masked off the low u8 of the register
2602 	 * and treat it identically to reading from the HWSP (without having
2603 	 * to use explicit shifting and masking, and probably bifurcating
2604 	 * the code to handle the legacy mmio read).
2605 	 */
2606 	head = execlists->csb_head;
2607 	tail = READ_ONCE(*execlists->csb_write);
2608 	if (unlikely(head == tail))
2609 		return;
2610 
2611 	/*
2612 	 * We will consume all events from HW, or at least pretend to.
2613 	 *
2614 	 * The sequence of events from the HW is deterministic, and derived
2615 	 * from our writes to the ELSP, with a smidgen of variability for
2616 	 * the arrival of the asynchronous requests wrt to the inflight
2617 	 * execution. If the HW sends an event that does not correspond with
2618 	 * the one we are expecting, we have to abandon all hope as we lose
2619 	 * all tracking of what the engine is actually executing. We will
2620 	 * only detect we are out of sequence with the HW when we get an
2621 	 * 'impossible' event because we have already drained our own
2622 	 * preemption/promotion queue. If this occurs, we know that we likely
2623 	 * lost track of execution earlier and must unwind and restart, the
2624 	 * simplest way is by stop processing the event queue and force the
2625 	 * engine to reset.
2626 	 */
2627 	execlists->csb_head = tail;
2628 	ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail);
2629 
2630 	/*
2631 	 * Hopefully paired with a wmb() in HW!
2632 	 *
2633 	 * We must complete the read of the write pointer before any reads
2634 	 * from the CSB, so that we do not see stale values. Without an rmb
2635 	 * (lfence) the HW may speculatively perform the CSB[] reads *before*
2636 	 * we perform the READ_ONCE(*csb_write).
2637 	 */
2638 	rmb();
2639 	do {
2640 		bool promote;
2641 		u64 csb;
2642 
2643 		if (++head == num_entries)
2644 			head = 0;
2645 
2646 		/*
2647 		 * We are flying near dragons again.
2648 		 *
2649 		 * We hold a reference to the request in execlist_port[]
2650 		 * but no more than that. We are operating in softirq
2651 		 * context and so cannot hold any mutex or sleep. That
2652 		 * prevents us stopping the requests we are processing
2653 		 * in port[] from being retired simultaneously (the
2654 		 * breadcrumb will be complete before we see the
2655 		 * context-switch). As we only hold the reference to the
2656 		 * request, any pointer chasing underneath the request
2657 		 * is subject to a potential use-after-free. Thus we
2658 		 * store all of the bookkeeping within port[] as
2659 		 * required, and avoid using unguarded pointers beneath
2660 		 * request itself. The same applies to the atomic
2661 		 * status notifier.
2662 		 */
2663 
2664 		csb = csb_read(engine, buf + head);
2665 		ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
2666 			     head, upper_32_bits(csb), lower_32_bits(csb));
2667 
2668 		if (INTEL_GEN(engine->i915) >= 12)
2669 			promote = gen12_csb_parse(csb);
2670 		else
2671 			promote = gen8_csb_parse(csb);
2672 		if (promote) {
2673 			struct i915_request * const *old = execlists->active;
2674 
2675 			if (GEM_WARN_ON(!*execlists->pending)) {
2676 				execlists->error_interrupt |= ERROR_CSB;
2677 				break;
2678 			}
2679 
2680 			ring_set_paused(engine, 0);
2681 
2682 			/* Point active to the new ELSP; prevent overwriting */
2683 			WRITE_ONCE(execlists->active, execlists->pending);
2684 			smp_wmb(); /* notify execlists_active() */
2685 
2686 			/* cancel old inflight, prepare for switch */
2687 			trace_ports(execlists, "preempted", old);
2688 			while (*old)
2689 				execlists_schedule_out(*old++);
2690 
2691 			/* switch pending to inflight */
2692 			GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
2693 			copy_ports(execlists->inflight,
2694 				   execlists->pending,
2695 				   execlists_num_ports(execlists));
2696 			smp_wmb(); /* complete the seqlock */
2697 			WRITE_ONCE(execlists->active, execlists->inflight);
2698 
2699 			/* XXX Magic delay for tgl */
2700 			ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
2701 
2702 			WRITE_ONCE(execlists->pending[0], NULL);
2703 		} else {
2704 			if (GEM_WARN_ON(!*execlists->active)) {
2705 				execlists->error_interrupt |= ERROR_CSB;
2706 				break;
2707 			}
2708 
2709 			/* port0 completed, advanced to port1 */
2710 			trace_ports(execlists, "completed", execlists->active);
2711 
2712 			/*
2713 			 * We rely on the hardware being strongly
2714 			 * ordered, that the breadcrumb write is
2715 			 * coherent (visible from the CPU) before the
2716 			 * user interrupt is processed. One might assume
2717 			 * that the breadcrumb write being before the
2718 			 * user interrupt and the CS event for the context
2719 			 * switch would therefore be before the CS event
2720 			 * itself...
2721 			 */
2722 			if (GEM_SHOW_DEBUG() &&
2723 			    !i915_request_completed(*execlists->active)) {
2724 				struct i915_request *rq = *execlists->active;
2725 				const u32 *regs __maybe_unused =
2726 					rq->context->lrc_reg_state;
2727 
2728 				ENGINE_TRACE(engine,
2729 					     "context completed before request!\n");
2730 				ENGINE_TRACE(engine,
2731 					     "ring:{start:0x%08x, head:%04x, tail:%04x, ctl:%08x, mode:%08x}\n",
2732 					     ENGINE_READ(engine, RING_START),
2733 					     ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR,
2734 					     ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR,
2735 					     ENGINE_READ(engine, RING_CTL),
2736 					     ENGINE_READ(engine, RING_MI_MODE));
2737 				ENGINE_TRACE(engine,
2738 					     "rq:{start:%08x, head:%04x, tail:%04x, seqno:%llx:%d, hwsp:%d}, ",
2739 					     i915_ggtt_offset(rq->ring->vma),
2740 					     rq->head, rq->tail,
2741 					     rq->fence.context,
2742 					     lower_32_bits(rq->fence.seqno),
2743 					     hwsp_seqno(rq));
2744 				ENGINE_TRACE(engine,
2745 					     "ctx:{start:%08x, head:%04x, tail:%04x}, ",
2746 					     regs[CTX_RING_START],
2747 					     regs[CTX_RING_HEAD],
2748 					     regs[CTX_RING_TAIL]);
2749 			}
2750 
2751 			execlists_schedule_out(*execlists->active++);
2752 
2753 			GEM_BUG_ON(execlists->active - execlists->inflight >
2754 				   execlists_num_ports(execlists));
2755 		}
2756 	} while (head != tail);
2757 
2758 	set_timeslice(engine);
2759 
2760 	/*
2761 	 * Gen11 has proven to fail wrt global observation point between
2762 	 * entry and tail update, failing on the ordering and thus
2763 	 * we see an old entry in the context status buffer.
2764 	 *
2765 	 * Forcibly evict out entries for the next gpu csb update,
2766 	 * to increase the odds that we get a fresh entries with non
2767 	 * working hardware. The cost for doing so comes out mostly with
2768 	 * the wash as hardware, working or not, will need to do the
2769 	 * invalidation before.
2770 	 */
2771 	invalidate_csb_entries(&buf[0], &buf[num_entries - 1]);
2772 }
2773 
2774 static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
2775 {
2776 	lockdep_assert_held(&engine->active.lock);
2777 	if (!READ_ONCE(engine->execlists.pending[0])) {
2778 		rcu_read_lock(); /* protect peeking at execlists->active */
2779 		execlists_dequeue(engine);
2780 		rcu_read_unlock();
2781 	}
2782 }
2783 
2784 static void __execlists_hold(struct i915_request *rq)
2785 {
2786 	LIST_HEAD(list);
2787 
2788 	do {
2789 		struct i915_dependency *p;
2790 
2791 		if (i915_request_is_active(rq))
2792 			__i915_request_unsubmit(rq);
2793 
2794 		clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2795 		list_move_tail(&rq->sched.link, &rq->engine->active.hold);
2796 		i915_request_set_hold(rq);
2797 		RQ_TRACE(rq, "on hold\n");
2798 
2799 		for_each_waiter(p, rq) {
2800 			struct i915_request *w =
2801 				container_of(p->waiter, typeof(*w), sched);
2802 
2803 			/* Leave semaphores spinning on the other engines */
2804 			if (w->engine != rq->engine)
2805 				continue;
2806 
2807 			if (!i915_request_is_ready(w))
2808 				continue;
2809 
2810 			if (i915_request_completed(w))
2811 				continue;
2812 
2813 			if (i915_request_on_hold(w))
2814 				continue;
2815 
2816 			list_move_tail(&w->sched.link, &list);
2817 		}
2818 
2819 		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
2820 	} while (rq);
2821 }
2822 
2823 static bool execlists_hold(struct intel_engine_cs *engine,
2824 			   struct i915_request *rq)
2825 {
2826 	if (i915_request_on_hold(rq))
2827 		return false;
2828 
2829 	spin_lock_irq(&engine->active.lock);
2830 
2831 	if (i915_request_completed(rq)) { /* too late! */
2832 		rq = NULL;
2833 		goto unlock;
2834 	}
2835 
2836 	if (rq->engine != engine) { /* preempted virtual engine */
2837 		struct virtual_engine *ve = to_virtual_engine(rq->engine);
2838 
2839 		/*
2840 		 * intel_context_inflight() is only protected by virtue
2841 		 * of process_csb() being called only by the tasklet (or
2842 		 * directly from inside reset while the tasklet is suspended).
2843 		 * Assert that neither of those are allowed to run while we
2844 		 * poke at the request queues.
2845 		 */
2846 		GEM_BUG_ON(!reset_in_progress(&engine->execlists));
2847 
2848 		/*
2849 		 * An unsubmitted request along a virtual engine will
2850 		 * remain on the active (this) engine until we are able
2851 		 * to process the context switch away (and so mark the
2852 		 * context as no longer in flight). That cannot have happened
2853 		 * yet, otherwise we would not be hanging!
2854 		 */
2855 		spin_lock(&ve->base.active.lock);
2856 		GEM_BUG_ON(intel_context_inflight(rq->context) != engine);
2857 		GEM_BUG_ON(ve->request != rq);
2858 		ve->request = NULL;
2859 		spin_unlock(&ve->base.active.lock);
2860 		i915_request_put(rq);
2861 
2862 		rq->engine = engine;
2863 	}
2864 
2865 	/*
2866 	 * Transfer this request onto the hold queue to prevent it
2867 	 * being resumbitted to HW (and potentially completed) before we have
2868 	 * released it. Since we may have already submitted following
2869 	 * requests, we need to remove those as well.
2870 	 */
2871 	GEM_BUG_ON(i915_request_on_hold(rq));
2872 	GEM_BUG_ON(rq->engine != engine);
2873 	__execlists_hold(rq);
2874 	GEM_BUG_ON(list_empty(&engine->active.hold));
2875 
2876 unlock:
2877 	spin_unlock_irq(&engine->active.lock);
2878 	return rq;
2879 }
2880 
2881 static bool hold_request(const struct i915_request *rq)
2882 {
2883 	struct i915_dependency *p;
2884 	bool result = false;
2885 
2886 	/*
2887 	 * If one of our ancestors is on hold, we must also be on hold,
2888 	 * otherwise we will bypass it and execute before it.
2889 	 */
2890 	rcu_read_lock();
2891 	for_each_signaler(p, rq) {
2892 		const struct i915_request *s =
2893 			container_of(p->signaler, typeof(*s), sched);
2894 
2895 		if (s->engine != rq->engine)
2896 			continue;
2897 
2898 		result = i915_request_on_hold(s);
2899 		if (result)
2900 			break;
2901 	}
2902 	rcu_read_unlock();
2903 
2904 	return result;
2905 }
2906 
2907 static void __execlists_unhold(struct i915_request *rq)
2908 {
2909 	LIST_HEAD(list);
2910 
2911 	do {
2912 		struct i915_dependency *p;
2913 
2914 		RQ_TRACE(rq, "hold release\n");
2915 
2916 		GEM_BUG_ON(!i915_request_on_hold(rq));
2917 		GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
2918 
2919 		i915_request_clear_hold(rq);
2920 		list_move_tail(&rq->sched.link,
2921 			       i915_sched_lookup_priolist(rq->engine,
2922 							  rq_prio(rq)));
2923 		set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2924 
2925 		/* Also release any children on this engine that are ready */
2926 		for_each_waiter(p, rq) {
2927 			struct i915_request *w =
2928 				container_of(p->waiter, typeof(*w), sched);
2929 
2930 			/* Propagate any change in error status */
2931 			if (rq->fence.error)
2932 				i915_request_set_error_once(w, rq->fence.error);
2933 
2934 			if (w->engine != rq->engine)
2935 				continue;
2936 
2937 			if (!i915_request_on_hold(w))
2938 				continue;
2939 
2940 			/* Check that no other parents are also on hold */
2941 			if (hold_request(w))
2942 				continue;
2943 
2944 			list_move_tail(&w->sched.link, &list);
2945 		}
2946 
2947 		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
2948 	} while (rq);
2949 }
2950 
2951 static void execlists_unhold(struct intel_engine_cs *engine,
2952 			     struct i915_request *rq)
2953 {
2954 	spin_lock_irq(&engine->active.lock);
2955 
2956 	/*
2957 	 * Move this request back to the priority queue, and all of its
2958 	 * children and grandchildren that were suspended along with it.
2959 	 */
2960 	__execlists_unhold(rq);
2961 
2962 	if (rq_prio(rq) > engine->execlists.queue_priority_hint) {
2963 		engine->execlists.queue_priority_hint = rq_prio(rq);
2964 		tasklet_hi_schedule(&engine->execlists.tasklet);
2965 	}
2966 
2967 	spin_unlock_irq(&engine->active.lock);
2968 }
2969 
2970 struct execlists_capture {
2971 	struct work_struct work;
2972 	struct i915_request *rq;
2973 	struct i915_gpu_coredump *error;
2974 };
2975 
2976 static void execlists_capture_work(struct work_struct *work)
2977 {
2978 	struct execlists_capture *cap = container_of(work, typeof(*cap), work);
2979 	const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN;
2980 	struct intel_engine_cs *engine = cap->rq->engine;
2981 	struct intel_gt_coredump *gt = cap->error->gt;
2982 	struct intel_engine_capture_vma *vma;
2983 
2984 	/* Compress all the objects attached to the request, slow! */
2985 	vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp);
2986 	if (vma) {
2987 		struct i915_vma_compress *compress =
2988 			i915_vma_capture_prepare(gt);
2989 
2990 		intel_engine_coredump_add_vma(gt->engine, vma, compress);
2991 		i915_vma_capture_finish(gt, compress);
2992 	}
2993 
2994 	gt->simulated = gt->engine->simulated;
2995 	cap->error->simulated = gt->simulated;
2996 
2997 	/* Publish the error state, and announce it to the world */
2998 	i915_error_state_store(cap->error);
2999 	i915_gpu_coredump_put(cap->error);
3000 
3001 	/* Return this request and all that depend upon it for signaling */
3002 	execlists_unhold(engine, cap->rq);
3003 	i915_request_put(cap->rq);
3004 
3005 	kfree(cap);
3006 }
3007 
3008 static struct execlists_capture *capture_regs(struct intel_engine_cs *engine)
3009 {
3010 	const gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
3011 	struct execlists_capture *cap;
3012 
3013 	cap = kmalloc(sizeof(*cap), gfp);
3014 	if (!cap)
3015 		return NULL;
3016 
3017 	cap->error = i915_gpu_coredump_alloc(engine->i915, gfp);
3018 	if (!cap->error)
3019 		goto err_cap;
3020 
3021 	cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp);
3022 	if (!cap->error->gt)
3023 		goto err_gpu;
3024 
3025 	cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp);
3026 	if (!cap->error->gt->engine)
3027 		goto err_gt;
3028 
3029 	cap->error->gt->engine->hung = true;
3030 
3031 	return cap;
3032 
3033 err_gt:
3034 	kfree(cap->error->gt);
3035 err_gpu:
3036 	kfree(cap->error);
3037 err_cap:
3038 	kfree(cap);
3039 	return NULL;
3040 }
3041 
3042 static struct i915_request *
3043 active_context(struct intel_engine_cs *engine, u32 ccid)
3044 {
3045 	const struct intel_engine_execlists * const el = &engine->execlists;
3046 	struct i915_request * const *port, *rq;
3047 
3048 	/*
3049 	 * Use the most recent result from process_csb(), but just in case
3050 	 * we trigger an error (via interrupt) before the first CS event has
3051 	 * been written, peek at the next submission.
3052 	 */
3053 
3054 	for (port = el->active; (rq = *port); port++) {
3055 		if (rq->context->lrc.ccid == ccid) {
3056 			ENGINE_TRACE(engine,
3057 				     "ccid found at active:%zd\n",
3058 				     port - el->active);
3059 			return rq;
3060 		}
3061 	}
3062 
3063 	for (port = el->pending; (rq = *port); port++) {
3064 		if (rq->context->lrc.ccid == ccid) {
3065 			ENGINE_TRACE(engine,
3066 				     "ccid found at pending:%zd\n",
3067 				     port - el->pending);
3068 			return rq;
3069 		}
3070 	}
3071 
3072 	ENGINE_TRACE(engine, "ccid:%x not found\n", ccid);
3073 	return NULL;
3074 }
3075 
3076 static u32 active_ccid(struct intel_engine_cs *engine)
3077 {
3078 	return ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI);
3079 }
3080 
3081 static void execlists_capture(struct intel_engine_cs *engine)
3082 {
3083 	struct execlists_capture *cap;
3084 
3085 	if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR))
3086 		return;
3087 
3088 	/*
3089 	 * We need to _quickly_ capture the engine state before we reset.
3090 	 * We are inside an atomic section (softirq) here and we are delaying
3091 	 * the forced preemption event.
3092 	 */
3093 	cap = capture_regs(engine);
3094 	if (!cap)
3095 		return;
3096 
3097 	spin_lock_irq(&engine->active.lock);
3098 	cap->rq = active_context(engine, active_ccid(engine));
3099 	if (cap->rq) {
3100 		cap->rq = active_request(cap->rq->context->timeline, cap->rq);
3101 		cap->rq = i915_request_get_rcu(cap->rq);
3102 	}
3103 	spin_unlock_irq(&engine->active.lock);
3104 	if (!cap->rq)
3105 		goto err_free;
3106 
3107 	/*
3108 	 * Remove the request from the execlists queue, and take ownership
3109 	 * of the request. We pass it to our worker who will _slowly_ compress
3110 	 * all the pages the _user_ requested for debugging their batch, after
3111 	 * which we return it to the queue for signaling.
3112 	 *
3113 	 * By removing them from the execlists queue, we also remove the
3114 	 * requests from being processed by __unwind_incomplete_requests()
3115 	 * during the intel_engine_reset(), and so they will *not* be replayed
3116 	 * afterwards.
3117 	 *
3118 	 * Note that because we have not yet reset the engine at this point,
3119 	 * it is possible for the request that we have identified as being
3120 	 * guilty, did in fact complete and we will then hit an arbitration
3121 	 * point allowing the outstanding preemption to succeed. The likelihood
3122 	 * of that is very low (as capturing of the engine registers should be
3123 	 * fast enough to run inside an irq-off atomic section!), so we will
3124 	 * simply hold that request accountable for being non-preemptible
3125 	 * long enough to force the reset.
3126 	 */
3127 	if (!execlists_hold(engine, cap->rq))
3128 		goto err_rq;
3129 
3130 	INIT_WORK(&cap->work, execlists_capture_work);
3131 	schedule_work(&cap->work);
3132 	return;
3133 
3134 err_rq:
3135 	i915_request_put(cap->rq);
3136 err_free:
3137 	i915_gpu_coredump_put(cap->error);
3138 	kfree(cap);
3139 }
3140 
3141 static void execlists_reset(struct intel_engine_cs *engine, const char *msg)
3142 {
3143 	const unsigned int bit = I915_RESET_ENGINE + engine->id;
3144 	unsigned long *lock = &engine->gt->reset.flags;
3145 
3146 	if (!intel_has_reset_engine(engine->gt))
3147 		return;
3148 
3149 	if (test_and_set_bit(bit, lock))
3150 		return;
3151 
3152 	ENGINE_TRACE(engine, "reset for %s\n", msg);
3153 
3154 	/* Mark this tasklet as disabled to avoid waiting for it to complete */
3155 	tasklet_disable_nosync(&engine->execlists.tasklet);
3156 
3157 	ring_set_paused(engine, 1); /* Freeze the current request in place */
3158 	execlists_capture(engine);
3159 	intel_engine_reset(engine, msg);
3160 
3161 	tasklet_enable(&engine->execlists.tasklet);
3162 	clear_and_wake_up_bit(bit, lock);
3163 }
3164 
3165 static bool preempt_timeout(const struct intel_engine_cs *const engine)
3166 {
3167 	const struct timer_list *t = &engine->execlists.preempt;
3168 
3169 	if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
3170 		return false;
3171 
3172 	if (!timer_expired(t))
3173 		return false;
3174 
3175 	return READ_ONCE(engine->execlists.pending[0]);
3176 }
3177 
3178 /*
3179  * Check the unread Context Status Buffers and manage the submission of new
3180  * contexts to the ELSP accordingly.
3181  */
3182 static void execlists_submission_tasklet(unsigned long data)
3183 {
3184 	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
3185 	bool timeout = preempt_timeout(engine);
3186 
3187 	process_csb(engine);
3188 
3189 	if (unlikely(READ_ONCE(engine->execlists.error_interrupt))) {
3190 		const char *msg;
3191 
3192 		/* Generate the error message in priority wrt to the user! */
3193 		if (engine->execlists.error_interrupt & GENMASK(15, 0))
3194 			msg = "CS error"; /* thrown by a user payload */
3195 		else if (engine->execlists.error_interrupt & ERROR_CSB)
3196 			msg = "invalid CSB event";
3197 		else
3198 			msg = "internal error";
3199 
3200 		engine->execlists.error_interrupt = 0;
3201 		execlists_reset(engine, msg);
3202 	}
3203 
3204 	if (!READ_ONCE(engine->execlists.pending[0]) || timeout) {
3205 		unsigned long flags;
3206 
3207 		spin_lock_irqsave(&engine->active.lock, flags);
3208 		__execlists_submission_tasklet(engine);
3209 		spin_unlock_irqrestore(&engine->active.lock, flags);
3210 
3211 		/* Recheck after serialising with direct-submission */
3212 		if (unlikely(timeout && preempt_timeout(engine))) {
3213 			cancel_timer(&engine->execlists.preempt);
3214 			execlists_reset(engine, "preemption time out");
3215 		}
3216 	}
3217 }
3218 
3219 static void __execlists_kick(struct intel_engine_execlists *execlists)
3220 {
3221 	/* Kick the tasklet for some interrupt coalescing and reset handling */
3222 	tasklet_hi_schedule(&execlists->tasklet);
3223 }
3224 
3225 #define execlists_kick(t, member) \
3226 	__execlists_kick(container_of(t, struct intel_engine_execlists, member))
3227 
3228 static void execlists_timeslice(struct timer_list *timer)
3229 {
3230 	execlists_kick(timer, timer);
3231 }
3232 
3233 static void execlists_preempt(struct timer_list *timer)
3234 {
3235 	execlists_kick(timer, preempt);
3236 }
3237 
3238 static void queue_request(struct intel_engine_cs *engine,
3239 			  struct i915_request *rq)
3240 {
3241 	GEM_BUG_ON(!list_empty(&rq->sched.link));
3242 	list_add_tail(&rq->sched.link,
3243 		      i915_sched_lookup_priolist(engine, rq_prio(rq)));
3244 	set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
3245 }
3246 
3247 static void __submit_queue_imm(struct intel_engine_cs *engine)
3248 {
3249 	struct intel_engine_execlists * const execlists = &engine->execlists;
3250 
3251 	if (reset_in_progress(execlists))
3252 		return; /* defer until we restart the engine following reset */
3253 
3254 	__execlists_submission_tasklet(engine);
3255 }
3256 
3257 static void submit_queue(struct intel_engine_cs *engine,
3258 			 const struct i915_request *rq)
3259 {
3260 	struct intel_engine_execlists *execlists = &engine->execlists;
3261 
3262 	if (rq_prio(rq) <= execlists->queue_priority_hint)
3263 		return;
3264 
3265 	execlists->queue_priority_hint = rq_prio(rq);
3266 	__submit_queue_imm(engine);
3267 }
3268 
3269 static bool ancestor_on_hold(const struct intel_engine_cs *engine,
3270 			     const struct i915_request *rq)
3271 {
3272 	GEM_BUG_ON(i915_request_on_hold(rq));
3273 	return !list_empty(&engine->active.hold) && hold_request(rq);
3274 }
3275 
3276 static void flush_csb(struct intel_engine_cs *engine)
3277 {
3278 	struct intel_engine_execlists *el = &engine->execlists;
3279 
3280 	if (READ_ONCE(el->pending[0]) && tasklet_trylock(&el->tasklet)) {
3281 		if (!reset_in_progress(el))
3282 			process_csb(engine);
3283 		tasklet_unlock(&el->tasklet);
3284 	}
3285 }
3286 
3287 static void execlists_submit_request(struct i915_request *request)
3288 {
3289 	struct intel_engine_cs *engine = request->engine;
3290 	unsigned long flags;
3291 
3292 	/* Hopefully we clear execlists->pending[] to let us through */
3293 	flush_csb(engine);
3294 
3295 	/* Will be called from irq-context when using foreign fences. */
3296 	spin_lock_irqsave(&engine->active.lock, flags);
3297 
3298 	if (unlikely(ancestor_on_hold(engine, request))) {
3299 		RQ_TRACE(request, "ancestor on hold\n");
3300 		list_add_tail(&request->sched.link, &engine->active.hold);
3301 		i915_request_set_hold(request);
3302 	} else {
3303 		queue_request(engine, request);
3304 
3305 		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
3306 		GEM_BUG_ON(list_empty(&request->sched.link));
3307 
3308 		submit_queue(engine, request);
3309 	}
3310 
3311 	spin_unlock_irqrestore(&engine->active.lock, flags);
3312 }
3313 
3314 static void __execlists_context_fini(struct intel_context *ce)
3315 {
3316 	intel_ring_put(ce->ring);
3317 	i915_vma_put(ce->state);
3318 }
3319 
3320 static void execlists_context_destroy(struct kref *kref)
3321 {
3322 	struct intel_context *ce = container_of(kref, typeof(*ce), ref);
3323 
3324 	GEM_BUG_ON(!i915_active_is_idle(&ce->active));
3325 	GEM_BUG_ON(intel_context_is_pinned(ce));
3326 
3327 	if (ce->state)
3328 		__execlists_context_fini(ce);
3329 
3330 	intel_context_fini(ce);
3331 	intel_context_free(ce);
3332 }
3333 
3334 static void
3335 set_redzone(void *vaddr, const struct intel_engine_cs *engine)
3336 {
3337 	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
3338 		return;
3339 
3340 	vaddr += engine->context_size;
3341 
3342 	memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE);
3343 }
3344 
3345 static void
3346 check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
3347 {
3348 	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
3349 		return;
3350 
3351 	vaddr += engine->context_size;
3352 
3353 	if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
3354 		drm_err_once(&engine->i915->drm,
3355 			     "%s context redzone overwritten!\n",
3356 			     engine->name);
3357 }
3358 
3359 static void execlists_context_unpin(struct intel_context *ce)
3360 {
3361 	check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET,
3362 		      ce->engine);
3363 }
3364 
3365 static void execlists_context_post_unpin(struct intel_context *ce)
3366 {
3367 	i915_gem_object_unpin_map(ce->state->obj);
3368 }
3369 
3370 static u32 *
3371 gen12_emit_timestamp_wa(const struct intel_context *ce, u32 *cs)
3372 {
3373 	*cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
3374 		MI_SRM_LRM_GLOBAL_GTT |
3375 		MI_LRI_LRM_CS_MMIO;
3376 	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3377 	*cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
3378 		CTX_TIMESTAMP * sizeof(u32);
3379 	*cs++ = 0;
3380 
3381 	*cs++ = MI_LOAD_REGISTER_REG |
3382 		MI_LRR_SOURCE_CS_MMIO |
3383 		MI_LRI_LRM_CS_MMIO;
3384 	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3385 	*cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
3386 
3387 	*cs++ = MI_LOAD_REGISTER_REG |
3388 		MI_LRR_SOURCE_CS_MMIO |
3389 		MI_LRI_LRM_CS_MMIO;
3390 	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3391 	*cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
3392 
3393 	return cs;
3394 }
3395 
3396 static u32 *
3397 gen12_emit_restore_scratch(const struct intel_context *ce, u32 *cs)
3398 {
3399 	GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -1);
3400 
3401 	*cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
3402 		MI_SRM_LRM_GLOBAL_GTT |
3403 		MI_LRI_LRM_CS_MMIO;
3404 	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3405 	*cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
3406 		(lrc_ring_gpr0(ce->engine) + 1) * sizeof(u32);
3407 	*cs++ = 0;
3408 
3409 	return cs;
3410 }
3411 
3412 static u32 *
3413 gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs)
3414 {
3415 	GEM_BUG_ON(lrc_ring_cmd_buf_cctl(ce->engine) == -1);
3416 
3417 	*cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
3418 		MI_SRM_LRM_GLOBAL_GTT |
3419 		MI_LRI_LRM_CS_MMIO;
3420 	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3421 	*cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
3422 		(lrc_ring_cmd_buf_cctl(ce->engine) + 1) * sizeof(u32);
3423 	*cs++ = 0;
3424 
3425 	*cs++ = MI_LOAD_REGISTER_REG |
3426 		MI_LRR_SOURCE_CS_MMIO |
3427 		MI_LRI_LRM_CS_MMIO;
3428 	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3429 	*cs++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(0));
3430 
3431 	return cs;
3432 }
3433 
3434 static u32 *
3435 gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
3436 {
3437 	cs = gen12_emit_timestamp_wa(ce, cs);
3438 	cs = gen12_emit_cmd_buf_wa(ce, cs);
3439 	cs = gen12_emit_restore_scratch(ce, cs);
3440 
3441 	return cs;
3442 }
3443 
3444 static u32 *
3445 gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
3446 {
3447 	cs = gen12_emit_timestamp_wa(ce, cs);
3448 	cs = gen12_emit_restore_scratch(ce, cs);
3449 
3450 	return cs;
3451 }
3452 
3453 static inline u32 context_wa_bb_offset(const struct intel_context *ce)
3454 {
3455 	return PAGE_SIZE * ce->wa_bb_page;
3456 }
3457 
3458 static u32 *context_indirect_bb(const struct intel_context *ce)
3459 {
3460 	void *ptr;
3461 
3462 	GEM_BUG_ON(!ce->wa_bb_page);
3463 
3464 	ptr = ce->lrc_reg_state;
3465 	ptr -= LRC_STATE_OFFSET; /* back to start of context image */
3466 	ptr += context_wa_bb_offset(ce);
3467 
3468 	return ptr;
3469 }
3470 
3471 static void
3472 setup_indirect_ctx_bb(const struct intel_context *ce,
3473 		      const struct intel_engine_cs *engine,
3474 		      u32 *(*emit)(const struct intel_context *, u32 *))
3475 {
3476 	u32 * const start = context_indirect_bb(ce);
3477 	u32 *cs;
3478 
3479 	cs = emit(ce, start);
3480 	GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs));
3481 	while ((unsigned long)cs % CACHELINE_BYTES)
3482 		*cs++ = MI_NOOP;
3483 
3484 	lrc_ring_setup_indirect_ctx(ce->lrc_reg_state, engine,
3485 				    i915_ggtt_offset(ce->state) +
3486 				    context_wa_bb_offset(ce),
3487 				    (cs - start) * sizeof(*cs));
3488 }
3489 
3490 static void
3491 __execlists_update_reg_state(const struct intel_context *ce,
3492 			     const struct intel_engine_cs *engine,
3493 			     u32 head)
3494 {
3495 	struct intel_ring *ring = ce->ring;
3496 	u32 *regs = ce->lrc_reg_state;
3497 
3498 	GEM_BUG_ON(!intel_ring_offset_valid(ring, head));
3499 	GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
3500 
3501 	regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
3502 	regs[CTX_RING_HEAD] = head;
3503 	regs[CTX_RING_TAIL] = ring->tail;
3504 	regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
3505 
3506 	/* RPCS */
3507 	if (engine->class == RENDER_CLASS) {
3508 		regs[CTX_R_PWR_CLK_STATE] =
3509 			intel_sseu_make_rpcs(engine->gt, &ce->sseu);
3510 
3511 		i915_oa_init_reg_state(ce, engine);
3512 	}
3513 
3514 	if (ce->wa_bb_page) {
3515 		u32 *(*fn)(const struct intel_context *ce, u32 *cs);
3516 
3517 		fn = gen12_emit_indirect_ctx_xcs;
3518 		if (ce->engine->class == RENDER_CLASS)
3519 			fn = gen12_emit_indirect_ctx_rcs;
3520 
3521 		/* Mutually exclusive wrt to global indirect bb */
3522 		GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size);
3523 		setup_indirect_ctx_bb(ce, engine, fn);
3524 	}
3525 }
3526 
3527 static int
3528 execlists_context_pre_pin(struct intel_context *ce,
3529 			  struct i915_gem_ww_ctx *ww, void **vaddr)
3530 {
3531 	GEM_BUG_ON(!ce->state);
3532 	GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
3533 
3534 	*vaddr = i915_gem_object_pin_map(ce->state->obj,
3535 					i915_coherent_map_type(ce->engine->i915) |
3536 					I915_MAP_OVERRIDE);
3537 
3538 	return PTR_ERR_OR_ZERO(*vaddr);
3539 }
3540 
3541 static int
3542 __execlists_context_pin(struct intel_context *ce,
3543 			struct intel_engine_cs *engine,
3544 			void *vaddr)
3545 {
3546 	ce->lrc.lrca = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
3547 	ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET;
3548 	__execlists_update_reg_state(ce, engine, ce->ring->tail);
3549 
3550 	return 0;
3551 }
3552 
3553 static int execlists_context_pin(struct intel_context *ce, void *vaddr)
3554 {
3555 	return __execlists_context_pin(ce, ce->engine, vaddr);
3556 }
3557 
3558 static int execlists_context_alloc(struct intel_context *ce)
3559 {
3560 	return __execlists_context_alloc(ce, ce->engine);
3561 }
3562 
3563 static void execlists_context_reset(struct intel_context *ce)
3564 {
3565 	CE_TRACE(ce, "reset\n");
3566 	GEM_BUG_ON(!intel_context_is_pinned(ce));
3567 
3568 	intel_ring_reset(ce->ring, ce->ring->emit);
3569 
3570 	/* Scrub away the garbage */
3571 	execlists_init_reg_state(ce->lrc_reg_state,
3572 				 ce, ce->engine, ce->ring, true);
3573 	__execlists_update_reg_state(ce, ce->engine, ce->ring->tail);
3574 
3575 	ce->lrc.desc |= CTX_DESC_FORCE_RESTORE;
3576 }
3577 
3578 static const struct intel_context_ops execlists_context_ops = {
3579 	.alloc = execlists_context_alloc,
3580 
3581 	.pre_pin = execlists_context_pre_pin,
3582 	.pin = execlists_context_pin,
3583 	.unpin = execlists_context_unpin,
3584 	.post_unpin = execlists_context_post_unpin,
3585 
3586 	.enter = intel_context_enter_engine,
3587 	.exit = intel_context_exit_engine,
3588 
3589 	.reset = execlists_context_reset,
3590 	.destroy = execlists_context_destroy,
3591 };
3592 
3593 static u32 hwsp_offset(const struct i915_request *rq)
3594 {
3595 	const struct intel_timeline_cacheline *cl;
3596 
3597 	/* Before the request is executed, the timeline/cachline is fixed */
3598 
3599 	cl = rcu_dereference_protected(rq->hwsp_cacheline, 1);
3600 	if (cl)
3601 		return cl->ggtt_offset;
3602 
3603 	return rcu_dereference_protected(rq->timeline, 1)->hwsp_offset;
3604 }
3605 
3606 static int gen8_emit_init_breadcrumb(struct i915_request *rq)
3607 {
3608 	u32 *cs;
3609 
3610 	GEM_BUG_ON(i915_request_has_initial_breadcrumb(rq));
3611 	if (!i915_request_timeline(rq)->has_initial_breadcrumb)
3612 		return 0;
3613 
3614 	cs = intel_ring_begin(rq, 6);
3615 	if (IS_ERR(cs))
3616 		return PTR_ERR(cs);
3617 
3618 	/*
3619 	 * Check if we have been preempted before we even get started.
3620 	 *
3621 	 * After this point i915_request_started() reports true, even if
3622 	 * we get preempted and so are no longer running.
3623 	 */
3624 	*cs++ = MI_ARB_CHECK;
3625 	*cs++ = MI_NOOP;
3626 
3627 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
3628 	*cs++ = hwsp_offset(rq);
3629 	*cs++ = 0;
3630 	*cs++ = rq->fence.seqno - 1;
3631 
3632 	intel_ring_advance(rq, cs);
3633 
3634 	/* Record the updated position of the request's payload */
3635 	rq->infix = intel_ring_offset(rq, cs);
3636 
3637 	__set_bit(I915_FENCE_FLAG_INITIAL_BREADCRUMB, &rq->fence.flags);
3638 
3639 	return 0;
3640 }
3641 
3642 static int emit_pdps(struct i915_request *rq)
3643 {
3644 	const struct intel_engine_cs * const engine = rq->engine;
3645 	struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(rq->context->vm);
3646 	int err, i;
3647 	u32 *cs;
3648 
3649 	GEM_BUG_ON(intel_vgpu_active(rq->engine->i915));
3650 
3651 	/*
3652 	 * Beware ye of the dragons, this sequence is magic!
3653 	 *
3654 	 * Small changes to this sequence can cause anything from
3655 	 * GPU hangs to forcewake errors and machine lockups!
3656 	 */
3657 
3658 	/* Flush any residual operations from the context load */
3659 	err = engine->emit_flush(rq, EMIT_FLUSH);
3660 	if (err)
3661 		return err;
3662 
3663 	/* Magic required to prevent forcewake errors! */
3664 	err = engine->emit_flush(rq, EMIT_INVALIDATE);
3665 	if (err)
3666 		return err;
3667 
3668 	cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2);
3669 	if (IS_ERR(cs))
3670 		return PTR_ERR(cs);
3671 
3672 	/* Ensure the LRI have landed before we invalidate & continue */
3673 	*cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED;
3674 	for (i = GEN8_3LVL_PDPES; i--; ) {
3675 		const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
3676 		u32 base = engine->mmio_base;
3677 
3678 		*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i));
3679 		*cs++ = upper_32_bits(pd_daddr);
3680 		*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i));
3681 		*cs++ = lower_32_bits(pd_daddr);
3682 	}
3683 	*cs++ = MI_NOOP;
3684 
3685 	intel_ring_advance(rq, cs);
3686 
3687 	return 0;
3688 }
3689 
3690 static int execlists_request_alloc(struct i915_request *request)
3691 {
3692 	int ret;
3693 
3694 	GEM_BUG_ON(!intel_context_is_pinned(request->context));
3695 
3696 	/*
3697 	 * Flush enough space to reduce the likelihood of waiting after
3698 	 * we start building the request - in which case we will just
3699 	 * have to repeat work.
3700 	 */
3701 	request->reserved_space += EXECLISTS_REQUEST_SIZE;
3702 
3703 	/*
3704 	 * Note that after this point, we have committed to using
3705 	 * this request as it is being used to both track the
3706 	 * state of engine initialisation and liveness of the
3707 	 * golden renderstate above. Think twice before you try
3708 	 * to cancel/unwind this request now.
3709 	 */
3710 
3711 	if (!i915_vm_is_4lvl(request->context->vm)) {
3712 		ret = emit_pdps(request);
3713 		if (ret)
3714 			return ret;
3715 	}
3716 
3717 	/* Unconditionally invalidate GPU caches and TLBs. */
3718 	ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
3719 	if (ret)
3720 		return ret;
3721 
3722 	request->reserved_space -= EXECLISTS_REQUEST_SIZE;
3723 	return 0;
3724 }
3725 
3726 /*
3727  * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
3728  * PIPE_CONTROL instruction. This is required for the flush to happen correctly
3729  * but there is a slight complication as this is applied in WA batch where the
3730  * values are only initialized once so we cannot take register value at the
3731  * beginning and reuse it further; hence we save its value to memory, upload a
3732  * constant value with bit21 set and then we restore it back with the saved value.
3733  * To simplify the WA, a constant value is formed by using the default value
3734  * of this register. This shouldn't be a problem because we are only modifying
3735  * it for a short period and this batch in non-premptible. We can ofcourse
3736  * use additional instructions that read the actual value of the register
3737  * at that time and set our bit of interest but it makes the WA complicated.
3738  *
3739  * This WA is also required for Gen9 so extracting as a function avoids
3740  * code duplication.
3741  */
3742 static u32 *
3743 gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
3744 {
3745 	/* NB no one else is allowed to scribble over scratch + 256! */
3746 	*batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
3747 	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3748 	*batch++ = intel_gt_scratch_offset(engine->gt,
3749 					   INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
3750 	*batch++ = 0;
3751 
3752 	*batch++ = MI_LOAD_REGISTER_IMM(1);
3753 	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3754 	*batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
3755 
3756 	batch = gen8_emit_pipe_control(batch,
3757 				       PIPE_CONTROL_CS_STALL |
3758 				       PIPE_CONTROL_DC_FLUSH_ENABLE,
3759 				       0);
3760 
3761 	*batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
3762 	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3763 	*batch++ = intel_gt_scratch_offset(engine->gt,
3764 					   INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
3765 	*batch++ = 0;
3766 
3767 	return batch;
3768 }
3769 
3770 /*
3771  * Typically we only have one indirect_ctx and per_ctx batch buffer which are
3772  * initialized at the beginning and shared across all contexts but this field
3773  * helps us to have multiple batches at different offsets and select them based
3774  * on a criteria. At the moment this batch always start at the beginning of the page
3775  * and at this point we don't have multiple wa_ctx batch buffers.
3776  *
3777  * The number of WA applied are not known at the beginning; we use this field
3778  * to return the no of DWORDS written.
3779  *
3780  * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
3781  * so it adds NOOPs as padding to make it cacheline aligned.
3782  * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
3783  * makes a complete batch buffer.
3784  */
3785 static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3786 {
3787 	/* WaDisableCtxRestoreArbitration:bdw,chv */
3788 	*batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3789 
3790 	/* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
3791 	if (IS_BROADWELL(engine->i915))
3792 		batch = gen8_emit_flush_coherentl3_wa(engine, batch);
3793 
3794 	/* WaClearSlmSpaceAtContextSwitch:bdw,chv */
3795 	/* Actual scratch location is at 128 bytes offset */
3796 	batch = gen8_emit_pipe_control(batch,
3797 				       PIPE_CONTROL_FLUSH_L3 |
3798 				       PIPE_CONTROL_STORE_DATA_INDEX |
3799 				       PIPE_CONTROL_CS_STALL |
3800 				       PIPE_CONTROL_QW_WRITE,
3801 				       LRC_PPHWSP_SCRATCH_ADDR);
3802 
3803 	*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3804 
3805 	/* Pad to end of cacheline */
3806 	while ((unsigned long)batch % CACHELINE_BYTES)
3807 		*batch++ = MI_NOOP;
3808 
3809 	/*
3810 	 * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
3811 	 * execution depends on the length specified in terms of cache lines
3812 	 * in the register CTX_RCS_INDIRECT_CTX
3813 	 */
3814 
3815 	return batch;
3816 }
3817 
3818 struct lri {
3819 	i915_reg_t reg;
3820 	u32 value;
3821 };
3822 
3823 static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
3824 {
3825 	GEM_BUG_ON(!count || count > 63);
3826 
3827 	*batch++ = MI_LOAD_REGISTER_IMM(count);
3828 	do {
3829 		*batch++ = i915_mmio_reg_offset(lri->reg);
3830 		*batch++ = lri->value;
3831 	} while (lri++, --count);
3832 	*batch++ = MI_NOOP;
3833 
3834 	return batch;
3835 }
3836 
3837 static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3838 {
3839 	static const struct lri lri[] = {
3840 		/* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
3841 		{
3842 			COMMON_SLICE_CHICKEN2,
3843 			__MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
3844 				       0),
3845 		},
3846 
3847 		/* BSpec: 11391 */
3848 		{
3849 			FF_SLICE_CHICKEN,
3850 			__MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
3851 				       FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
3852 		},
3853 
3854 		/* BSpec: 11299 */
3855 		{
3856 			_3D_CHICKEN3,
3857 			__MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
3858 				       _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
3859 		}
3860 	};
3861 
3862 	*batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3863 
3864 	/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
3865 	batch = gen8_emit_flush_coherentl3_wa(engine, batch);
3866 
3867 	/* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
3868 	batch = gen8_emit_pipe_control(batch,
3869 				       PIPE_CONTROL_FLUSH_L3 |
3870 				       PIPE_CONTROL_STORE_DATA_INDEX |
3871 				       PIPE_CONTROL_CS_STALL |
3872 				       PIPE_CONTROL_QW_WRITE,
3873 				       LRC_PPHWSP_SCRATCH_ADDR);
3874 
3875 	batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
3876 
3877 	/* WaMediaPoolStateCmdInWABB:bxt,glk */
3878 	if (HAS_POOLED_EU(engine->i915)) {
3879 		/*
3880 		 * EU pool configuration is setup along with golden context
3881 		 * during context initialization. This value depends on
3882 		 * device type (2x6 or 3x6) and needs to be updated based
3883 		 * on which subslice is disabled especially for 2x6
3884 		 * devices, however it is safe to load default
3885 		 * configuration of 3x6 device instead of masking off
3886 		 * corresponding bits because HW ignores bits of a disabled
3887 		 * subslice and drops down to appropriate config. Please
3888 		 * see render_state_setup() in i915_gem_render_state.c for
3889 		 * possible configurations, to avoid duplication they are
3890 		 * not shown here again.
3891 		 */
3892 		*batch++ = GEN9_MEDIA_POOL_STATE;
3893 		*batch++ = GEN9_MEDIA_POOL_ENABLE;
3894 		*batch++ = 0x00777000;
3895 		*batch++ = 0;
3896 		*batch++ = 0;
3897 		*batch++ = 0;
3898 	}
3899 
3900 	*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3901 
3902 	/* Pad to end of cacheline */
3903 	while ((unsigned long)batch % CACHELINE_BYTES)
3904 		*batch++ = MI_NOOP;
3905 
3906 	return batch;
3907 }
3908 
3909 static u32 *
3910 gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3911 {
3912 	int i;
3913 
3914 	/*
3915 	 * WaPipeControlBefore3DStateSamplePattern: cnl
3916 	 *
3917 	 * Ensure the engine is idle prior to programming a
3918 	 * 3DSTATE_SAMPLE_PATTERN during a context restore.
3919 	 */
3920 	batch = gen8_emit_pipe_control(batch,
3921 				       PIPE_CONTROL_CS_STALL,
3922 				       0);
3923 	/*
3924 	 * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for
3925 	 * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in
3926 	 * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is
3927 	 * confusing. Since gen8_emit_pipe_control() already advances the
3928 	 * batch by 6 dwords, we advance the other 10 here, completing a
3929 	 * cacheline. It's not clear if the workaround requires this padding
3930 	 * before other commands, or if it's just the regular padding we would
3931 	 * already have for the workaround bb, so leave it here for now.
3932 	 */
3933 	for (i = 0; i < 10; i++)
3934 		*batch++ = MI_NOOP;
3935 
3936 	/* Pad to end of cacheline */
3937 	while ((unsigned long)batch % CACHELINE_BYTES)
3938 		*batch++ = MI_NOOP;
3939 
3940 	return batch;
3941 }
3942 
3943 #define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE)
3944 
3945 static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
3946 {
3947 	struct drm_i915_gem_object *obj;
3948 	struct i915_vma *vma;
3949 	int err;
3950 
3951 	obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_OBJ_SIZE);
3952 	if (IS_ERR(obj))
3953 		return PTR_ERR(obj);
3954 
3955 	vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
3956 	if (IS_ERR(vma)) {
3957 		err = PTR_ERR(vma);
3958 		goto err;
3959 	}
3960 
3961 	err = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH);
3962 	if (err)
3963 		goto err;
3964 
3965 	engine->wa_ctx.vma = vma;
3966 	return 0;
3967 
3968 err:
3969 	i915_gem_object_put(obj);
3970 	return err;
3971 }
3972 
3973 static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine)
3974 {
3975 	i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
3976 }
3977 
3978 typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
3979 
3980 static int intel_init_workaround_bb(struct intel_engine_cs *engine)
3981 {
3982 	struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
3983 	struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx,
3984 					    &wa_ctx->per_ctx };
3985 	wa_bb_func_t wa_bb_fn[2];
3986 	void *batch, *batch_ptr;
3987 	unsigned int i;
3988 	int ret;
3989 
3990 	if (engine->class != RENDER_CLASS)
3991 		return 0;
3992 
3993 	switch (INTEL_GEN(engine->i915)) {
3994 	case 12:
3995 	case 11:
3996 		return 0;
3997 	case 10:
3998 		wa_bb_fn[0] = gen10_init_indirectctx_bb;
3999 		wa_bb_fn[1] = NULL;
4000 		break;
4001 	case 9:
4002 		wa_bb_fn[0] = gen9_init_indirectctx_bb;
4003 		wa_bb_fn[1] = NULL;
4004 		break;
4005 	case 8:
4006 		wa_bb_fn[0] = gen8_init_indirectctx_bb;
4007 		wa_bb_fn[1] = NULL;
4008 		break;
4009 	default:
4010 		MISSING_CASE(INTEL_GEN(engine->i915));
4011 		return 0;
4012 	}
4013 
4014 	ret = lrc_setup_wa_ctx(engine);
4015 	if (ret) {
4016 		drm_dbg(&engine->i915->drm,
4017 			"Failed to setup context WA page: %d\n", ret);
4018 		return ret;
4019 	}
4020 
4021 	batch = i915_gem_object_pin_map(wa_ctx->vma->obj, I915_MAP_WB);
4022 
4023 	/*
4024 	 * Emit the two workaround batch buffers, recording the offset from the
4025 	 * start of the workaround batch buffer object for each and their
4026 	 * respective sizes.
4027 	 */
4028 	batch_ptr = batch;
4029 	for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
4030 		wa_bb[i]->offset = batch_ptr - batch;
4031 		if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
4032 						  CACHELINE_BYTES))) {
4033 			ret = -EINVAL;
4034 			break;
4035 		}
4036 		if (wa_bb_fn[i])
4037 			batch_ptr = wa_bb_fn[i](engine, batch_ptr);
4038 		wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
4039 	}
4040 	GEM_BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE);
4041 
4042 	__i915_gem_object_flush_map(wa_ctx->vma->obj, 0, batch_ptr - batch);
4043 	__i915_gem_object_release_map(wa_ctx->vma->obj);
4044 	if (ret)
4045 		lrc_destroy_wa_ctx(engine);
4046 
4047 	return ret;
4048 }
4049 
4050 static void reset_csb_pointers(struct intel_engine_cs *engine)
4051 {
4052 	struct intel_engine_execlists * const execlists = &engine->execlists;
4053 	const unsigned int reset_value = execlists->csb_size - 1;
4054 
4055 	ring_set_paused(engine, 0);
4056 
4057 	/*
4058 	 * Sometimes Icelake forgets to reset its pointers on a GPU reset.
4059 	 * Bludgeon them with a mmio update to be sure.
4060 	 */
4061 	ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
4062 		     0xffff << 16 | reset_value << 8 | reset_value);
4063 	ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
4064 
4065 	/*
4066 	 * After a reset, the HW starts writing into CSB entry [0]. We
4067 	 * therefore have to set our HEAD pointer back one entry so that
4068 	 * the *first* entry we check is entry 0. To complicate this further,
4069 	 * as we don't wait for the first interrupt after reset, we have to
4070 	 * fake the HW write to point back to the last entry so that our
4071 	 * inline comparison of our cached head position against the last HW
4072 	 * write works even before the first interrupt.
4073 	 */
4074 	execlists->csb_head = reset_value;
4075 	WRITE_ONCE(*execlists->csb_write, reset_value);
4076 	wmb(); /* Make sure this is visible to HW (paranoia?) */
4077 
4078 	/* Check that the GPU does indeed update the CSB entries! */
4079 	memset(execlists->csb_status, -1, (reset_value + 1) * sizeof(u64));
4080 	invalidate_csb_entries(&execlists->csb_status[0],
4081 			       &execlists->csb_status[reset_value]);
4082 
4083 	/* Once more for luck and our trusty paranoia */
4084 	ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
4085 		     0xffff << 16 | reset_value << 8 | reset_value);
4086 	ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
4087 
4088 	GEM_BUG_ON(READ_ONCE(*execlists->csb_write) != reset_value);
4089 }
4090 
4091 static void execlists_sanitize(struct intel_engine_cs *engine)
4092 {
4093 	GEM_BUG_ON(execlists_active(&engine->execlists));
4094 
4095 	/*
4096 	 * Poison residual state on resume, in case the suspend didn't!
4097 	 *
4098 	 * We have to assume that across suspend/resume (or other loss
4099 	 * of control) that the contents of our pinned buffers has been
4100 	 * lost, replaced by garbage. Since this doesn't always happen,
4101 	 * let's poison such state so that we more quickly spot when
4102 	 * we falsely assume it has been preserved.
4103 	 */
4104 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
4105 		memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE);
4106 
4107 	reset_csb_pointers(engine);
4108 
4109 	/*
4110 	 * The kernel_context HWSP is stored in the status_page. As above,
4111 	 * that may be lost on resume/initialisation, and so we need to
4112 	 * reset the value in the HWSP.
4113 	 */
4114 	intel_timeline_reset_seqno(engine->kernel_context->timeline);
4115 
4116 	/* And scrub the dirty cachelines for the HWSP */
4117 	clflush_cache_range(engine->status_page.addr, PAGE_SIZE);
4118 }
4119 
4120 static void enable_error_interrupt(struct intel_engine_cs *engine)
4121 {
4122 	u32 status;
4123 
4124 	engine->execlists.error_interrupt = 0;
4125 	ENGINE_WRITE(engine, RING_EMR, ~0u);
4126 	ENGINE_WRITE(engine, RING_EIR, ~0u); /* clear all existing errors */
4127 
4128 	status = ENGINE_READ(engine, RING_ESR);
4129 	if (unlikely(status)) {
4130 		drm_err(&engine->i915->drm,
4131 			"engine '%s' resumed still in error: %08x\n",
4132 			engine->name, status);
4133 		__intel_gt_reset(engine->gt, engine->mask);
4134 	}
4135 
4136 	/*
4137 	 * On current gen8+, we have 2 signals to play with
4138 	 *
4139 	 * - I915_ERROR_INSTUCTION (bit 0)
4140 	 *
4141 	 *    Generate an error if the command parser encounters an invalid
4142 	 *    instruction
4143 	 *
4144 	 *    This is a fatal error.
4145 	 *
4146 	 * - CP_PRIV (bit 2)
4147 	 *
4148 	 *    Generate an error on privilege violation (where the CP replaces
4149 	 *    the instruction with a no-op). This also fires for writes into
4150 	 *    read-only scratch pages.
4151 	 *
4152 	 *    This is a non-fatal error, parsing continues.
4153 	 *
4154 	 * * there are a few others defined for odd HW that we do not use
4155 	 *
4156 	 * Since CP_PRIV fires for cases where we have chosen to ignore the
4157 	 * error (as the HW is validating and suppressing the mistakes), we
4158 	 * only unmask the instruction error bit.
4159 	 */
4160 	ENGINE_WRITE(engine, RING_EMR, ~I915_ERROR_INSTRUCTION);
4161 }
4162 
4163 static void enable_execlists(struct intel_engine_cs *engine)
4164 {
4165 	u32 mode;
4166 
4167 	assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
4168 
4169 	intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
4170 
4171 	if (INTEL_GEN(engine->i915) >= 11)
4172 		mode = _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE);
4173 	else
4174 		mode = _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE);
4175 	ENGINE_WRITE_FW(engine, RING_MODE_GEN7, mode);
4176 
4177 	ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
4178 
4179 	ENGINE_WRITE_FW(engine,
4180 			RING_HWS_PGA,
4181 			i915_ggtt_offset(engine->status_page.vma));
4182 	ENGINE_POSTING_READ(engine, RING_HWS_PGA);
4183 
4184 	enable_error_interrupt(engine);
4185 
4186 	engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0);
4187 }
4188 
4189 static bool unexpected_starting_state(struct intel_engine_cs *engine)
4190 {
4191 	bool unexpected = false;
4192 
4193 	if (ENGINE_READ_FW(engine, RING_MI_MODE) & STOP_RING) {
4194 		drm_dbg(&engine->i915->drm,
4195 			"STOP_RING still set in RING_MI_MODE\n");
4196 		unexpected = true;
4197 	}
4198 
4199 	return unexpected;
4200 }
4201 
4202 static int execlists_resume(struct intel_engine_cs *engine)
4203 {
4204 	intel_mocs_init_engine(engine);
4205 
4206 	intel_breadcrumbs_reset(engine->breadcrumbs);
4207 
4208 	if (GEM_SHOW_DEBUG() && unexpected_starting_state(engine)) {
4209 		struct drm_printer p = drm_debug_printer(__func__);
4210 
4211 		intel_engine_dump(engine, &p, NULL);
4212 	}
4213 
4214 	enable_execlists(engine);
4215 
4216 	return 0;
4217 }
4218 
4219 static void execlists_reset_prepare(struct intel_engine_cs *engine)
4220 {
4221 	struct intel_engine_execlists * const execlists = &engine->execlists;
4222 	unsigned long flags;
4223 
4224 	ENGINE_TRACE(engine, "depth<-%d\n",
4225 		     atomic_read(&execlists->tasklet.count));
4226 
4227 	/*
4228 	 * Prevent request submission to the hardware until we have
4229 	 * completed the reset in i915_gem_reset_finish(). If a request
4230 	 * is completed by one engine, it may then queue a request
4231 	 * to a second via its execlists->tasklet *just* as we are
4232 	 * calling engine->resume() and also writing the ELSP.
4233 	 * Turning off the execlists->tasklet until the reset is over
4234 	 * prevents the race.
4235 	 */
4236 	__tasklet_disable_sync_once(&execlists->tasklet);
4237 	GEM_BUG_ON(!reset_in_progress(execlists));
4238 
4239 	/* And flush any current direct submission. */
4240 	spin_lock_irqsave(&engine->active.lock, flags);
4241 	spin_unlock_irqrestore(&engine->active.lock, flags);
4242 
4243 	/*
4244 	 * We stop engines, otherwise we might get failed reset and a
4245 	 * dead gpu (on elk). Also as modern gpu as kbl can suffer
4246 	 * from system hang if batchbuffer is progressing when
4247 	 * the reset is issued, regardless of READY_TO_RESET ack.
4248 	 * Thus assume it is best to stop engines on all gens
4249 	 * where we have a gpu reset.
4250 	 *
4251 	 * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
4252 	 *
4253 	 * FIXME: Wa for more modern gens needs to be validated
4254 	 */
4255 	ring_set_paused(engine, 1);
4256 	intel_engine_stop_cs(engine);
4257 
4258 	engine->execlists.reset_ccid = active_ccid(engine);
4259 }
4260 
4261 static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
4262 {
4263 	int x;
4264 
4265 	x = lrc_ring_mi_mode(engine);
4266 	if (x != -1) {
4267 		regs[x + 1] &= ~STOP_RING;
4268 		regs[x + 1] |= STOP_RING << 16;
4269 	}
4270 }
4271 
4272 static void __execlists_reset_reg_state(const struct intel_context *ce,
4273 					const struct intel_engine_cs *engine)
4274 {
4275 	u32 *regs = ce->lrc_reg_state;
4276 
4277 	__reset_stop_ring(regs, engine);
4278 }
4279 
4280 static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
4281 {
4282 	struct intel_engine_execlists * const execlists = &engine->execlists;
4283 	struct intel_context *ce;
4284 	struct i915_request *rq;
4285 	u32 head;
4286 
4287 	mb(); /* paranoia: read the CSB pointers from after the reset */
4288 	clflush(execlists->csb_write);
4289 	mb();
4290 
4291 	process_csb(engine); /* drain preemption events */
4292 
4293 	/* Following the reset, we need to reload the CSB read/write pointers */
4294 	reset_csb_pointers(engine);
4295 
4296 	/*
4297 	 * Save the currently executing context, even if we completed
4298 	 * its request, it was still running at the time of the
4299 	 * reset and will have been clobbered.
4300 	 */
4301 	rq = active_context(engine, engine->execlists.reset_ccid);
4302 	if (!rq)
4303 		goto unwind;
4304 
4305 	ce = rq->context;
4306 	GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
4307 
4308 	if (i915_request_completed(rq)) {
4309 		/* Idle context; tidy up the ring so we can restart afresh */
4310 		head = intel_ring_wrap(ce->ring, rq->tail);
4311 		goto out_replay;
4312 	}
4313 
4314 	/* We still have requests in-flight; the engine should be active */
4315 	GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
4316 
4317 	/* Context has requests still in-flight; it should not be idle! */
4318 	GEM_BUG_ON(i915_active_is_idle(&ce->active));
4319 
4320 	rq = active_request(ce->timeline, rq);
4321 	head = intel_ring_wrap(ce->ring, rq->head);
4322 	GEM_BUG_ON(head == ce->ring->tail);
4323 
4324 	/*
4325 	 * If this request hasn't started yet, e.g. it is waiting on a
4326 	 * semaphore, we need to avoid skipping the request or else we
4327 	 * break the signaling chain. However, if the context is corrupt
4328 	 * the request will not restart and we will be stuck with a wedged
4329 	 * device. It is quite often the case that if we issue a reset
4330 	 * while the GPU is loading the context image, that the context
4331 	 * image becomes corrupt.
4332 	 *
4333 	 * Otherwise, if we have not started yet, the request should replay
4334 	 * perfectly and we do not need to flag the result as being erroneous.
4335 	 */
4336 	if (!i915_request_started(rq))
4337 		goto out_replay;
4338 
4339 	/*
4340 	 * If the request was innocent, we leave the request in the ELSP
4341 	 * and will try to replay it on restarting. The context image may
4342 	 * have been corrupted by the reset, in which case we may have
4343 	 * to service a new GPU hang, but more likely we can continue on
4344 	 * without impact.
4345 	 *
4346 	 * If the request was guilty, we presume the context is corrupt
4347 	 * and have to at least restore the RING register in the context
4348 	 * image back to the expected values to skip over the guilty request.
4349 	 */
4350 	__i915_request_reset(rq, stalled);
4351 
4352 	/*
4353 	 * We want a simple context + ring to execute the breadcrumb update.
4354 	 * We cannot rely on the context being intact across the GPU hang,
4355 	 * so clear it and rebuild just what we need for the breadcrumb.
4356 	 * All pending requests for this context will be zapped, and any
4357 	 * future request will be after userspace has had the opportunity
4358 	 * to recreate its own state.
4359 	 */
4360 out_replay:
4361 	ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n",
4362 		     head, ce->ring->tail);
4363 	__execlists_reset_reg_state(ce, engine);
4364 	__execlists_update_reg_state(ce, engine, head);
4365 	ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
4366 
4367 unwind:
4368 	/* Push back any incomplete requests for replay after the reset. */
4369 	cancel_port_requests(execlists);
4370 	__unwind_incomplete_requests(engine);
4371 }
4372 
4373 static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled)
4374 {
4375 	unsigned long flags;
4376 
4377 	ENGINE_TRACE(engine, "\n");
4378 
4379 	spin_lock_irqsave(&engine->active.lock, flags);
4380 
4381 	__execlists_reset(engine, stalled);
4382 
4383 	spin_unlock_irqrestore(&engine->active.lock, flags);
4384 }
4385 
4386 static void nop_submission_tasklet(unsigned long data)
4387 {
4388 	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
4389 
4390 	/* The driver is wedged; don't process any more events. */
4391 	WRITE_ONCE(engine->execlists.queue_priority_hint, INT_MIN);
4392 }
4393 
4394 static void execlists_reset_cancel(struct intel_engine_cs *engine)
4395 {
4396 	struct intel_engine_execlists * const execlists = &engine->execlists;
4397 	struct i915_request *rq, *rn;
4398 	struct rb_node *rb;
4399 	unsigned long flags;
4400 
4401 	ENGINE_TRACE(engine, "\n");
4402 
4403 	/*
4404 	 * Before we call engine->cancel_requests(), we should have exclusive
4405 	 * access to the submission state. This is arranged for us by the
4406 	 * caller disabling the interrupt generation, the tasklet and other
4407 	 * threads that may then access the same state, giving us a free hand
4408 	 * to reset state. However, we still need to let lockdep be aware that
4409 	 * we know this state may be accessed in hardirq context, so we
4410 	 * disable the irq around this manipulation and we want to keep
4411 	 * the spinlock focused on its duties and not accidentally conflate
4412 	 * coverage to the submission's irq state. (Similarly, although we
4413 	 * shouldn't need to disable irq around the manipulation of the
4414 	 * submission's irq state, we also wish to remind ourselves that
4415 	 * it is irq state.)
4416 	 */
4417 	spin_lock_irqsave(&engine->active.lock, flags);
4418 
4419 	__execlists_reset(engine, true);
4420 
4421 	/* Mark all executing requests as skipped. */
4422 	list_for_each_entry(rq, &engine->active.requests, sched.link)
4423 		mark_eio(rq);
4424 	intel_engine_signal_breadcrumbs(engine);
4425 
4426 	/* Flush the queued requests to the timeline list (for retiring). */
4427 	while ((rb = rb_first_cached(&execlists->queue))) {
4428 		struct i915_priolist *p = to_priolist(rb);
4429 		int i;
4430 
4431 		priolist_for_each_request_consume(rq, rn, p, i) {
4432 			mark_eio(rq);
4433 			__i915_request_submit(rq);
4434 		}
4435 
4436 		rb_erase_cached(&p->node, &execlists->queue);
4437 		i915_priolist_free(p);
4438 	}
4439 
4440 	/* On-hold requests will be flushed to timeline upon their release */
4441 	list_for_each_entry(rq, &engine->active.hold, sched.link)
4442 		mark_eio(rq);
4443 
4444 	/* Cancel all attached virtual engines */
4445 	while ((rb = rb_first_cached(&execlists->virtual))) {
4446 		struct virtual_engine *ve =
4447 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
4448 
4449 		rb_erase_cached(rb, &execlists->virtual);
4450 		RB_CLEAR_NODE(rb);
4451 
4452 		spin_lock(&ve->base.active.lock);
4453 		rq = fetch_and_zero(&ve->request);
4454 		if (rq) {
4455 			mark_eio(rq);
4456 
4457 			rq->engine = engine;
4458 			__i915_request_submit(rq);
4459 			i915_request_put(rq);
4460 
4461 			ve->base.execlists.queue_priority_hint = INT_MIN;
4462 		}
4463 		spin_unlock(&ve->base.active.lock);
4464 	}
4465 
4466 	/* Remaining _unready_ requests will be nop'ed when submitted */
4467 
4468 	execlists->queue_priority_hint = INT_MIN;
4469 	execlists->queue = RB_ROOT_CACHED;
4470 
4471 	GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet));
4472 	execlists->tasklet.func = nop_submission_tasklet;
4473 
4474 	spin_unlock_irqrestore(&engine->active.lock, flags);
4475 }
4476 
4477 static void execlists_reset_finish(struct intel_engine_cs *engine)
4478 {
4479 	struct intel_engine_execlists * const execlists = &engine->execlists;
4480 
4481 	/*
4482 	 * After a GPU reset, we may have requests to replay. Do so now while
4483 	 * we still have the forcewake to be sure that the GPU is not allowed
4484 	 * to sleep before we restart and reload a context.
4485 	 */
4486 	GEM_BUG_ON(!reset_in_progress(execlists));
4487 	if (!RB_EMPTY_ROOT(&execlists->queue.rb_root))
4488 		execlists->tasklet.func(execlists->tasklet.data);
4489 
4490 	if (__tasklet_enable(&execlists->tasklet))
4491 		/* And kick in case we missed a new request submission. */
4492 		tasklet_hi_schedule(&execlists->tasklet);
4493 	ENGINE_TRACE(engine, "depth->%d\n",
4494 		     atomic_read(&execlists->tasklet.count));
4495 }
4496 
4497 static int gen8_emit_bb_start_noarb(struct i915_request *rq,
4498 				    u64 offset, u32 len,
4499 				    const unsigned int flags)
4500 {
4501 	u32 *cs;
4502 
4503 	cs = intel_ring_begin(rq, 4);
4504 	if (IS_ERR(cs))
4505 		return PTR_ERR(cs);
4506 
4507 	/*
4508 	 * WaDisableCtxRestoreArbitration:bdw,chv
4509 	 *
4510 	 * We don't need to perform MI_ARB_ENABLE as often as we do (in
4511 	 * particular all the gen that do not need the w/a at all!), if we
4512 	 * took care to make sure that on every switch into this context
4513 	 * (both ordinary and for preemption) that arbitrartion was enabled
4514 	 * we would be fine.  However, for gen8 there is another w/a that
4515 	 * requires us to not preempt inside GPGPU execution, so we keep
4516 	 * arbitration disabled for gen8 batches. Arbitration will be
4517 	 * re-enabled before we close the request
4518 	 * (engine->emit_fini_breadcrumb).
4519 	 */
4520 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
4521 
4522 	/* FIXME(BDW+): Address space and security selectors. */
4523 	*cs++ = MI_BATCH_BUFFER_START_GEN8 |
4524 		(flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
4525 	*cs++ = lower_32_bits(offset);
4526 	*cs++ = upper_32_bits(offset);
4527 
4528 	intel_ring_advance(rq, cs);
4529 
4530 	return 0;
4531 }
4532 
4533 static int gen8_emit_bb_start(struct i915_request *rq,
4534 			      u64 offset, u32 len,
4535 			      const unsigned int flags)
4536 {
4537 	u32 *cs;
4538 
4539 	cs = intel_ring_begin(rq, 6);
4540 	if (IS_ERR(cs))
4541 		return PTR_ERR(cs);
4542 
4543 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4544 
4545 	*cs++ = MI_BATCH_BUFFER_START_GEN8 |
4546 		(flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
4547 	*cs++ = lower_32_bits(offset);
4548 	*cs++ = upper_32_bits(offset);
4549 
4550 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
4551 	*cs++ = MI_NOOP;
4552 
4553 	intel_ring_advance(rq, cs);
4554 
4555 	return 0;
4556 }
4557 
4558 static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine)
4559 {
4560 	ENGINE_WRITE(engine, RING_IMR,
4561 		     ~(engine->irq_enable_mask | engine->irq_keep_mask));
4562 	ENGINE_POSTING_READ(engine, RING_IMR);
4563 }
4564 
4565 static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine)
4566 {
4567 	ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
4568 }
4569 
4570 static int gen8_emit_flush(struct i915_request *request, u32 mode)
4571 {
4572 	u32 cmd, *cs;
4573 
4574 	cs = intel_ring_begin(request, 4);
4575 	if (IS_ERR(cs))
4576 		return PTR_ERR(cs);
4577 
4578 	cmd = MI_FLUSH_DW + 1;
4579 
4580 	/* We always require a command barrier so that subsequent
4581 	 * commands, such as breadcrumb interrupts, are strictly ordered
4582 	 * wrt the contents of the write cache being flushed to memory
4583 	 * (and thus being coherent from the CPU).
4584 	 */
4585 	cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
4586 
4587 	if (mode & EMIT_INVALIDATE) {
4588 		cmd |= MI_INVALIDATE_TLB;
4589 		if (request->engine->class == VIDEO_DECODE_CLASS)
4590 			cmd |= MI_INVALIDATE_BSD;
4591 	}
4592 
4593 	*cs++ = cmd;
4594 	*cs++ = LRC_PPHWSP_SCRATCH_ADDR;
4595 	*cs++ = 0; /* upper addr */
4596 	*cs++ = 0; /* value */
4597 	intel_ring_advance(request, cs);
4598 
4599 	return 0;
4600 }
4601 
4602 static int gen8_emit_flush_render(struct i915_request *request,
4603 				  u32 mode)
4604 {
4605 	bool vf_flush_wa = false, dc_flush_wa = false;
4606 	u32 *cs, flags = 0;
4607 	int len;
4608 
4609 	flags |= PIPE_CONTROL_CS_STALL;
4610 
4611 	if (mode & EMIT_FLUSH) {
4612 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
4613 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
4614 		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
4615 		flags |= PIPE_CONTROL_FLUSH_ENABLE;
4616 	}
4617 
4618 	if (mode & EMIT_INVALIDATE) {
4619 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
4620 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
4621 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
4622 		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
4623 		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
4624 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
4625 		flags |= PIPE_CONTROL_QW_WRITE;
4626 		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4627 
4628 		/*
4629 		 * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
4630 		 * pipe control.
4631 		 */
4632 		if (IS_GEN(request->engine->i915, 9))
4633 			vf_flush_wa = true;
4634 
4635 		/* WaForGAMHang:kbl */
4636 		if (IS_KBL_GT_REVID(request->engine->i915, 0, KBL_REVID_B0))
4637 			dc_flush_wa = true;
4638 	}
4639 
4640 	len = 6;
4641 
4642 	if (vf_flush_wa)
4643 		len += 6;
4644 
4645 	if (dc_flush_wa)
4646 		len += 12;
4647 
4648 	cs = intel_ring_begin(request, len);
4649 	if (IS_ERR(cs))
4650 		return PTR_ERR(cs);
4651 
4652 	if (vf_flush_wa)
4653 		cs = gen8_emit_pipe_control(cs, 0, 0);
4654 
4655 	if (dc_flush_wa)
4656 		cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE,
4657 					    0);
4658 
4659 	cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4660 
4661 	if (dc_flush_wa)
4662 		cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0);
4663 
4664 	intel_ring_advance(request, cs);
4665 
4666 	return 0;
4667 }
4668 
4669 static int gen11_emit_flush_render(struct i915_request *request,
4670 				   u32 mode)
4671 {
4672 	if (mode & EMIT_FLUSH) {
4673 		u32 *cs;
4674 		u32 flags = 0;
4675 
4676 		flags |= PIPE_CONTROL_CS_STALL;
4677 
4678 		flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
4679 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
4680 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
4681 		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
4682 		flags |= PIPE_CONTROL_FLUSH_ENABLE;
4683 		flags |= PIPE_CONTROL_QW_WRITE;
4684 		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4685 
4686 		cs = intel_ring_begin(request, 6);
4687 		if (IS_ERR(cs))
4688 			return PTR_ERR(cs);
4689 
4690 		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4691 		intel_ring_advance(request, cs);
4692 	}
4693 
4694 	if (mode & EMIT_INVALIDATE) {
4695 		u32 *cs;
4696 		u32 flags = 0;
4697 
4698 		flags |= PIPE_CONTROL_CS_STALL;
4699 
4700 		flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
4701 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
4702 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
4703 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
4704 		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
4705 		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
4706 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
4707 		flags |= PIPE_CONTROL_QW_WRITE;
4708 		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4709 
4710 		cs = intel_ring_begin(request, 6);
4711 		if (IS_ERR(cs))
4712 			return PTR_ERR(cs);
4713 
4714 		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4715 		intel_ring_advance(request, cs);
4716 	}
4717 
4718 	return 0;
4719 }
4720 
4721 static u32 preparser_disable(bool state)
4722 {
4723 	return MI_ARB_CHECK | 1 << 8 | state;
4724 }
4725 
4726 static i915_reg_t aux_inv_reg(const struct intel_engine_cs *engine)
4727 {
4728 	static const i915_reg_t vd[] = {
4729 		GEN12_VD0_AUX_NV,
4730 		GEN12_VD1_AUX_NV,
4731 		GEN12_VD2_AUX_NV,
4732 		GEN12_VD3_AUX_NV,
4733 	};
4734 
4735 	static const i915_reg_t ve[] = {
4736 		GEN12_VE0_AUX_NV,
4737 		GEN12_VE1_AUX_NV,
4738 	};
4739 
4740 	if (engine->class == VIDEO_DECODE_CLASS)
4741 		return vd[engine->instance];
4742 
4743 	if (engine->class == VIDEO_ENHANCEMENT_CLASS)
4744 		return ve[engine->instance];
4745 
4746 	GEM_BUG_ON("unknown aux_inv_reg\n");
4747 
4748 	return INVALID_MMIO_REG;
4749 }
4750 
4751 static u32 *
4752 gen12_emit_aux_table_inv(const i915_reg_t inv_reg, u32 *cs)
4753 {
4754 	*cs++ = MI_LOAD_REGISTER_IMM(1);
4755 	*cs++ = i915_mmio_reg_offset(inv_reg);
4756 	*cs++ = AUX_INV;
4757 	*cs++ = MI_NOOP;
4758 
4759 	return cs;
4760 }
4761 
4762 static int gen12_emit_flush_render(struct i915_request *request,
4763 				   u32 mode)
4764 {
4765 	if (mode & EMIT_FLUSH) {
4766 		u32 flags = 0;
4767 		u32 *cs;
4768 
4769 		flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
4770 		flags |= PIPE_CONTROL_FLUSH_L3;
4771 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
4772 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
4773 		/* Wa_1409600907:tgl */
4774 		flags |= PIPE_CONTROL_DEPTH_STALL;
4775 		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
4776 		flags |= PIPE_CONTROL_FLUSH_ENABLE;
4777 
4778 		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4779 		flags |= PIPE_CONTROL_QW_WRITE;
4780 
4781 		flags |= PIPE_CONTROL_CS_STALL;
4782 
4783 		cs = intel_ring_begin(request, 6);
4784 		if (IS_ERR(cs))
4785 			return PTR_ERR(cs);
4786 
4787 		cs = gen12_emit_pipe_control(cs,
4788 					     PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
4789 					     flags, LRC_PPHWSP_SCRATCH_ADDR);
4790 		intel_ring_advance(request, cs);
4791 	}
4792 
4793 	if (mode & EMIT_INVALIDATE) {
4794 		u32 flags = 0;
4795 		u32 *cs;
4796 
4797 		flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
4798 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
4799 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
4800 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
4801 		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
4802 		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
4803 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
4804 
4805 		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4806 		flags |= PIPE_CONTROL_QW_WRITE;
4807 
4808 		flags |= PIPE_CONTROL_CS_STALL;
4809 
4810 		cs = intel_ring_begin(request, 8 + 4);
4811 		if (IS_ERR(cs))
4812 			return PTR_ERR(cs);
4813 
4814 		/*
4815 		 * Prevent the pre-parser from skipping past the TLB
4816 		 * invalidate and loading a stale page for the batch
4817 		 * buffer / request payload.
4818 		 */
4819 		*cs++ = preparser_disable(true);
4820 
4821 		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4822 
4823 		/* hsdes: 1809175790 */
4824 		cs = gen12_emit_aux_table_inv(GEN12_GFX_CCS_AUX_NV, cs);
4825 
4826 		*cs++ = preparser_disable(false);
4827 		intel_ring_advance(request, cs);
4828 	}
4829 
4830 	return 0;
4831 }
4832 
4833 static int gen12_emit_flush(struct i915_request *request, u32 mode)
4834 {
4835 	intel_engine_mask_t aux_inv = 0;
4836 	u32 cmd, *cs;
4837 
4838 	cmd = 4;
4839 	if (mode & EMIT_INVALIDATE)
4840 		cmd += 2;
4841 	if (mode & EMIT_INVALIDATE)
4842 		aux_inv = request->engine->mask & ~BIT(BCS0);
4843 	if (aux_inv)
4844 		cmd += 2 * hweight8(aux_inv) + 2;
4845 
4846 	cs = intel_ring_begin(request, cmd);
4847 	if (IS_ERR(cs))
4848 		return PTR_ERR(cs);
4849 
4850 	if (mode & EMIT_INVALIDATE)
4851 		*cs++ = preparser_disable(true);
4852 
4853 	cmd = MI_FLUSH_DW + 1;
4854 
4855 	/* We always require a command barrier so that subsequent
4856 	 * commands, such as breadcrumb interrupts, are strictly ordered
4857 	 * wrt the contents of the write cache being flushed to memory
4858 	 * (and thus being coherent from the CPU).
4859 	 */
4860 	cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
4861 
4862 	if (mode & EMIT_INVALIDATE) {
4863 		cmd |= MI_INVALIDATE_TLB;
4864 		if (request->engine->class == VIDEO_DECODE_CLASS)
4865 			cmd |= MI_INVALIDATE_BSD;
4866 	}
4867 
4868 	*cs++ = cmd;
4869 	*cs++ = LRC_PPHWSP_SCRATCH_ADDR;
4870 	*cs++ = 0; /* upper addr */
4871 	*cs++ = 0; /* value */
4872 
4873 	if (aux_inv) { /* hsdes: 1809175790 */
4874 		struct intel_engine_cs *engine;
4875 		unsigned int tmp;
4876 
4877 		*cs++ = MI_LOAD_REGISTER_IMM(hweight8(aux_inv));
4878 		for_each_engine_masked(engine, request->engine->gt,
4879 				       aux_inv, tmp) {
4880 			*cs++ = i915_mmio_reg_offset(aux_inv_reg(engine));
4881 			*cs++ = AUX_INV;
4882 		}
4883 		*cs++ = MI_NOOP;
4884 	}
4885 
4886 	if (mode & EMIT_INVALIDATE)
4887 		*cs++ = preparser_disable(false);
4888 
4889 	intel_ring_advance(request, cs);
4890 
4891 	return 0;
4892 }
4893 
4894 static void assert_request_valid(struct i915_request *rq)
4895 {
4896 	struct intel_ring *ring __maybe_unused = rq->ring;
4897 
4898 	/* Can we unwind this request without appearing to go forwards? */
4899 	GEM_BUG_ON(intel_ring_direction(ring, rq->wa_tail, rq->head) <= 0);
4900 }
4901 
4902 /*
4903  * Reserve space for 2 NOOPs at the end of each request to be
4904  * used as a workaround for not being allowed to do lite
4905  * restore with HEAD==TAIL (WaIdleLiteRestore).
4906  */
4907 static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
4908 {
4909 	/* Ensure there's always at least one preemption point per-request. */
4910 	*cs++ = MI_ARB_CHECK;
4911 	*cs++ = MI_NOOP;
4912 	request->wa_tail = intel_ring_offset(request, cs);
4913 
4914 	/* Check that entire request is less than half the ring */
4915 	assert_request_valid(request);
4916 
4917 	return cs;
4918 }
4919 
4920 static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs)
4921 {
4922 	*cs++ = MI_SEMAPHORE_WAIT |
4923 		MI_SEMAPHORE_GLOBAL_GTT |
4924 		MI_SEMAPHORE_POLL |
4925 		MI_SEMAPHORE_SAD_EQ_SDD;
4926 	*cs++ = 0;
4927 	*cs++ = intel_hws_preempt_address(request->engine);
4928 	*cs++ = 0;
4929 
4930 	return cs;
4931 }
4932 
4933 static __always_inline u32*
4934 gen8_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
4935 {
4936 	*cs++ = MI_USER_INTERRUPT;
4937 
4938 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4939 	if (intel_engine_has_semaphores(request->engine))
4940 		cs = emit_preempt_busywait(request, cs);
4941 
4942 	request->tail = intel_ring_offset(request, cs);
4943 	assert_ring_tail_valid(request->ring, request->tail);
4944 
4945 	return gen8_emit_wa_tail(request, cs);
4946 }
4947 
4948 static u32 *emit_xcs_breadcrumb(struct i915_request *rq, u32 *cs)
4949 {
4950 	return gen8_emit_ggtt_write(cs, rq->fence.seqno, hwsp_offset(rq), 0);
4951 }
4952 
4953 static u32 *gen8_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
4954 {
4955 	return gen8_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs));
4956 }
4957 
4958 static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4959 {
4960 	cs = gen8_emit_pipe_control(cs,
4961 				    PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
4962 				    PIPE_CONTROL_DEPTH_CACHE_FLUSH |
4963 				    PIPE_CONTROL_DC_FLUSH_ENABLE,
4964 				    0);
4965 
4966 	/* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
4967 	cs = gen8_emit_ggtt_write_rcs(cs,
4968 				      request->fence.seqno,
4969 				      hwsp_offset(request),
4970 				      PIPE_CONTROL_FLUSH_ENABLE |
4971 				      PIPE_CONTROL_CS_STALL);
4972 
4973 	return gen8_emit_fini_breadcrumb_tail(request, cs);
4974 }
4975 
4976 static u32 *
4977 gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4978 {
4979 	cs = gen8_emit_ggtt_write_rcs(cs,
4980 				      request->fence.seqno,
4981 				      hwsp_offset(request),
4982 				      PIPE_CONTROL_CS_STALL |
4983 				      PIPE_CONTROL_TILE_CACHE_FLUSH |
4984 				      PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
4985 				      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
4986 				      PIPE_CONTROL_DC_FLUSH_ENABLE |
4987 				      PIPE_CONTROL_FLUSH_ENABLE);
4988 
4989 	return gen8_emit_fini_breadcrumb_tail(request, cs);
4990 }
4991 
4992 /*
4993  * Note that the CS instruction pre-parser will not stall on the breadcrumb
4994  * flush and will continue pre-fetching the instructions after it before the
4995  * memory sync is completed. On pre-gen12 HW, the pre-parser will stop at
4996  * BB_START/END instructions, so, even though we might pre-fetch the pre-amble
4997  * of the next request before the memory has been flushed, we're guaranteed that
4998  * we won't access the batch itself too early.
4999  * However, on gen12+ the parser can pre-fetch across the BB_START/END commands,
5000  * so, if the current request is modifying an instruction in the next request on
5001  * the same intel_context, we might pre-fetch and then execute the pre-update
5002  * instruction. To avoid this, the users of self-modifying code should either
5003  * disable the parser around the code emitting the memory writes, via a new flag
5004  * added to MI_ARB_CHECK, or emit the writes from a different intel_context. For
5005  * the in-kernel use-cases we've opted to use a separate context, see
5006  * reloc_gpu() as an example.
5007  * All the above applies only to the instructions themselves. Non-inline data
5008  * used by the instructions is not pre-fetched.
5009  */
5010 
5011 static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs)
5012 {
5013 	*cs++ = MI_SEMAPHORE_WAIT_TOKEN |
5014 		MI_SEMAPHORE_GLOBAL_GTT |
5015 		MI_SEMAPHORE_POLL |
5016 		MI_SEMAPHORE_SAD_EQ_SDD;
5017 	*cs++ = 0;
5018 	*cs++ = intel_hws_preempt_address(request->engine);
5019 	*cs++ = 0;
5020 	*cs++ = 0;
5021 	*cs++ = MI_NOOP;
5022 
5023 	return cs;
5024 }
5025 
5026 static __always_inline u32*
5027 gen12_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
5028 {
5029 	*cs++ = MI_USER_INTERRUPT;
5030 
5031 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
5032 	if (intel_engine_has_semaphores(request->engine))
5033 		cs = gen12_emit_preempt_busywait(request, cs);
5034 
5035 	request->tail = intel_ring_offset(request, cs);
5036 	assert_ring_tail_valid(request->ring, request->tail);
5037 
5038 	return gen8_emit_wa_tail(request, cs);
5039 }
5040 
5041 static u32 *gen12_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
5042 {
5043 	/* XXX Stalling flush before seqno write; post-sync not */
5044 	cs = emit_xcs_breadcrumb(rq, __gen8_emit_flush_dw(cs, 0, 0, 0));
5045 	return gen12_emit_fini_breadcrumb_tail(rq, cs);
5046 }
5047 
5048 static u32 *
5049 gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
5050 {
5051 	cs = gen12_emit_ggtt_write_rcs(cs,
5052 				       request->fence.seqno,
5053 				       hwsp_offset(request),
5054 				       PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
5055 				       PIPE_CONTROL_CS_STALL |
5056 				       PIPE_CONTROL_TILE_CACHE_FLUSH |
5057 				       PIPE_CONTROL_FLUSH_L3 |
5058 				       PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
5059 				       PIPE_CONTROL_DEPTH_CACHE_FLUSH |
5060 				       /* Wa_1409600907:tgl */
5061 				       PIPE_CONTROL_DEPTH_STALL |
5062 				       PIPE_CONTROL_DC_FLUSH_ENABLE |
5063 				       PIPE_CONTROL_FLUSH_ENABLE);
5064 
5065 	return gen12_emit_fini_breadcrumb_tail(request, cs);
5066 }
5067 
5068 static void execlists_park(struct intel_engine_cs *engine)
5069 {
5070 	cancel_timer(&engine->execlists.timer);
5071 	cancel_timer(&engine->execlists.preempt);
5072 }
5073 
5074 void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
5075 {
5076 	engine->submit_request = execlists_submit_request;
5077 	engine->schedule = i915_schedule;
5078 	engine->execlists.tasklet.func = execlists_submission_tasklet;
5079 
5080 	engine->reset.prepare = execlists_reset_prepare;
5081 	engine->reset.rewind = execlists_reset_rewind;
5082 	engine->reset.cancel = execlists_reset_cancel;
5083 	engine->reset.finish = execlists_reset_finish;
5084 
5085 	engine->park = execlists_park;
5086 	engine->unpark = NULL;
5087 
5088 	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
5089 	if (!intel_vgpu_active(engine->i915)) {
5090 		engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
5091 		if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) {
5092 			engine->flags |= I915_ENGINE_HAS_PREEMPTION;
5093 			if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
5094 				engine->flags |= I915_ENGINE_HAS_TIMESLICES;
5095 		}
5096 	}
5097 
5098 	if (INTEL_GEN(engine->i915) >= 12)
5099 		engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO;
5100 
5101 	if (intel_engine_has_preemption(engine))
5102 		engine->emit_bb_start = gen8_emit_bb_start;
5103 	else
5104 		engine->emit_bb_start = gen8_emit_bb_start_noarb;
5105 }
5106 
5107 static void execlists_shutdown(struct intel_engine_cs *engine)
5108 {
5109 	/* Synchronise with residual timers and any softirq they raise */
5110 	del_timer_sync(&engine->execlists.timer);
5111 	del_timer_sync(&engine->execlists.preempt);
5112 	tasklet_kill(&engine->execlists.tasklet);
5113 }
5114 
5115 static void execlists_release(struct intel_engine_cs *engine)
5116 {
5117 	engine->sanitize = NULL; /* no longer in control, nothing to sanitize */
5118 
5119 	execlists_shutdown(engine);
5120 
5121 	intel_engine_cleanup_common(engine);
5122 	lrc_destroy_wa_ctx(engine);
5123 }
5124 
5125 static void
5126 logical_ring_default_vfuncs(struct intel_engine_cs *engine)
5127 {
5128 	/* Default vfuncs which can be overriden by each engine. */
5129 
5130 	engine->resume = execlists_resume;
5131 
5132 	engine->cops = &execlists_context_ops;
5133 	engine->request_alloc = execlists_request_alloc;
5134 
5135 	engine->emit_flush = gen8_emit_flush;
5136 	engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
5137 	engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
5138 	if (INTEL_GEN(engine->i915) >= 12) {
5139 		engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb;
5140 		engine->emit_flush = gen12_emit_flush;
5141 	}
5142 	engine->set_default_submission = intel_execlists_set_default_submission;
5143 
5144 	if (INTEL_GEN(engine->i915) < 11) {
5145 		engine->irq_enable = gen8_logical_ring_enable_irq;
5146 		engine->irq_disable = gen8_logical_ring_disable_irq;
5147 	} else {
5148 		/*
5149 		 * TODO: On Gen11 interrupt masks need to be clear
5150 		 * to allow C6 entry. Keep interrupts enabled at
5151 		 * and take the hit of generating extra interrupts
5152 		 * until a more refined solution exists.
5153 		 */
5154 	}
5155 }
5156 
5157 static inline void
5158 logical_ring_default_irqs(struct intel_engine_cs *engine)
5159 {
5160 	unsigned int shift = 0;
5161 
5162 	if (INTEL_GEN(engine->i915) < 11) {
5163 		const u8 irq_shifts[] = {
5164 			[RCS0]  = GEN8_RCS_IRQ_SHIFT,
5165 			[BCS0]  = GEN8_BCS_IRQ_SHIFT,
5166 			[VCS0]  = GEN8_VCS0_IRQ_SHIFT,
5167 			[VCS1]  = GEN8_VCS1_IRQ_SHIFT,
5168 			[VECS0] = GEN8_VECS_IRQ_SHIFT,
5169 		};
5170 
5171 		shift = irq_shifts[engine->id];
5172 	}
5173 
5174 	engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
5175 	engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
5176 	engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT << shift;
5177 	engine->irq_keep_mask |= GT_WAIT_SEMAPHORE_INTERRUPT << shift;
5178 }
5179 
5180 static void rcs_submission_override(struct intel_engine_cs *engine)
5181 {
5182 	switch (INTEL_GEN(engine->i915)) {
5183 	case 12:
5184 		engine->emit_flush = gen12_emit_flush_render;
5185 		engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
5186 		break;
5187 	case 11:
5188 		engine->emit_flush = gen11_emit_flush_render;
5189 		engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
5190 		break;
5191 	default:
5192 		engine->emit_flush = gen8_emit_flush_render;
5193 		engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
5194 		break;
5195 	}
5196 }
5197 
5198 int intel_execlists_submission_setup(struct intel_engine_cs *engine)
5199 {
5200 	struct intel_engine_execlists * const execlists = &engine->execlists;
5201 	struct drm_i915_private *i915 = engine->i915;
5202 	struct intel_uncore *uncore = engine->uncore;
5203 	u32 base = engine->mmio_base;
5204 
5205 	tasklet_init(&engine->execlists.tasklet,
5206 		     execlists_submission_tasklet, (unsigned long)engine);
5207 	timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
5208 	timer_setup(&engine->execlists.preempt, execlists_preempt, 0);
5209 
5210 	logical_ring_default_vfuncs(engine);
5211 	logical_ring_default_irqs(engine);
5212 
5213 	if (engine->class == RENDER_CLASS)
5214 		rcs_submission_override(engine);
5215 
5216 	if (intel_init_workaround_bb(engine))
5217 		/*
5218 		 * We continue even if we fail to initialize WA batch
5219 		 * because we only expect rare glitches but nothing
5220 		 * critical to prevent us from using GPU
5221 		 */
5222 		drm_err(&i915->drm, "WA batch buffer initialization failed\n");
5223 
5224 	if (HAS_LOGICAL_RING_ELSQ(i915)) {
5225 		execlists->submit_reg = uncore->regs +
5226 			i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base));
5227 		execlists->ctrl_reg = uncore->regs +
5228 			i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base));
5229 	} else {
5230 		execlists->submit_reg = uncore->regs +
5231 			i915_mmio_reg_offset(RING_ELSP(base));
5232 	}
5233 
5234 	execlists->csb_status =
5235 		(u64 *)&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
5236 
5237 	execlists->csb_write =
5238 		&engine->status_page.addr[intel_hws_csb_write_index(i915)];
5239 
5240 	if (INTEL_GEN(i915) < 11)
5241 		execlists->csb_size = GEN8_CSB_ENTRIES;
5242 	else
5243 		execlists->csb_size = GEN11_CSB_ENTRIES;
5244 
5245 	if (INTEL_GEN(engine->i915) >= 11) {
5246 		execlists->ccid |= engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT - 32);
5247 		execlists->ccid |= engine->class << (GEN11_ENGINE_CLASS_SHIFT - 32);
5248 	}
5249 
5250 	/* Finally, take ownership and responsibility for cleanup! */
5251 	engine->sanitize = execlists_sanitize;
5252 	engine->release = execlists_release;
5253 
5254 	return 0;
5255 }
5256 
5257 static void init_common_reg_state(u32 * const regs,
5258 				  const struct intel_engine_cs *engine,
5259 				  const struct intel_ring *ring,
5260 				  bool inhibit)
5261 {
5262 	u32 ctl;
5263 
5264 	ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
5265 	ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
5266 	if (inhibit)
5267 		ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
5268 	if (INTEL_GEN(engine->i915) < 11)
5269 		ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
5270 					   CTX_CTRL_RS_CTX_ENABLE);
5271 	regs[CTX_CONTEXT_CONTROL] = ctl;
5272 
5273 	regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
5274 	regs[CTX_TIMESTAMP] = 0;
5275 }
5276 
5277 static void init_wa_bb_reg_state(u32 * const regs,
5278 				 const struct intel_engine_cs *engine)
5279 {
5280 	const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
5281 
5282 	if (wa_ctx->per_ctx.size) {
5283 		const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
5284 
5285 		GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1);
5286 		regs[lrc_ring_wa_bb_per_ctx(engine) + 1] =
5287 			(ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
5288 	}
5289 
5290 	if (wa_ctx->indirect_ctx.size) {
5291 		lrc_ring_setup_indirect_ctx(regs, engine,
5292 					    i915_ggtt_offset(wa_ctx->vma) +
5293 					    wa_ctx->indirect_ctx.offset,
5294 					    wa_ctx->indirect_ctx.size);
5295 	}
5296 }
5297 
5298 static void init_ppgtt_reg_state(u32 *regs, const struct i915_ppgtt *ppgtt)
5299 {
5300 	if (i915_vm_is_4lvl(&ppgtt->vm)) {
5301 		/* 64b PPGTT (48bit canonical)
5302 		 * PDP0_DESCRIPTOR contains the base address to PML4 and
5303 		 * other PDP Descriptors are ignored.
5304 		 */
5305 		ASSIGN_CTX_PML4(ppgtt, regs);
5306 	} else {
5307 		ASSIGN_CTX_PDP(ppgtt, regs, 3);
5308 		ASSIGN_CTX_PDP(ppgtt, regs, 2);
5309 		ASSIGN_CTX_PDP(ppgtt, regs, 1);
5310 		ASSIGN_CTX_PDP(ppgtt, regs, 0);
5311 	}
5312 }
5313 
5314 static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
5315 {
5316 	if (i915_is_ggtt(vm))
5317 		return i915_vm_to_ggtt(vm)->alias;
5318 	else
5319 		return i915_vm_to_ppgtt(vm);
5320 }
5321 
5322 static void execlists_init_reg_state(u32 *regs,
5323 				     const struct intel_context *ce,
5324 				     const struct intel_engine_cs *engine,
5325 				     const struct intel_ring *ring,
5326 				     bool inhibit)
5327 {
5328 	/*
5329 	 * A context is actually a big batch buffer with several
5330 	 * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
5331 	 * values we are setting here are only for the first context restore:
5332 	 * on a subsequent save, the GPU will recreate this batchbuffer with new
5333 	 * values (including all the missing MI_LOAD_REGISTER_IMM commands that
5334 	 * we are not initializing here).
5335 	 *
5336 	 * Must keep consistent with virtual_update_register_offsets().
5337 	 */
5338 	set_offsets(regs, reg_offsets(engine), engine, inhibit);
5339 
5340 	init_common_reg_state(regs, engine, ring, inhibit);
5341 	init_ppgtt_reg_state(regs, vm_alias(ce->vm));
5342 
5343 	init_wa_bb_reg_state(regs, engine);
5344 
5345 	__reset_stop_ring(regs, engine);
5346 }
5347 
5348 static int
5349 populate_lr_context(struct intel_context *ce,
5350 		    struct drm_i915_gem_object *ctx_obj,
5351 		    struct intel_engine_cs *engine,
5352 		    struct intel_ring *ring)
5353 {
5354 	bool inhibit = true;
5355 	void *vaddr;
5356 
5357 	vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
5358 	if (IS_ERR(vaddr)) {
5359 		drm_dbg(&engine->i915->drm, "Could not map object pages!\n");
5360 		return PTR_ERR(vaddr);
5361 	}
5362 
5363 	set_redzone(vaddr, engine);
5364 
5365 	if (engine->default_state) {
5366 		shmem_read(engine->default_state, 0,
5367 			   vaddr, engine->context_size);
5368 		__set_bit(CONTEXT_VALID_BIT, &ce->flags);
5369 		inhibit = false;
5370 	}
5371 
5372 	/* Clear the ppHWSP (inc. per-context counters) */
5373 	memset(vaddr, 0, PAGE_SIZE);
5374 
5375 	/*
5376 	 * The second page of the context object contains some registers which
5377 	 * must be set up prior to the first execution.
5378 	 */
5379 	execlists_init_reg_state(vaddr + LRC_STATE_OFFSET,
5380 				 ce, engine, ring, inhibit);
5381 
5382 	__i915_gem_object_flush_map(ctx_obj, 0, engine->context_size);
5383 	i915_gem_object_unpin_map(ctx_obj);
5384 	return 0;
5385 }
5386 
5387 static struct intel_timeline *pinned_timeline(struct intel_context *ce)
5388 {
5389 	struct intel_timeline *tl = fetch_and_zero(&ce->timeline);
5390 
5391 	return intel_timeline_create_from_engine(ce->engine,
5392 						 page_unmask_bits(tl));
5393 }
5394 
5395 static int __execlists_context_alloc(struct intel_context *ce,
5396 				     struct intel_engine_cs *engine)
5397 {
5398 	struct drm_i915_gem_object *ctx_obj;
5399 	struct intel_ring *ring;
5400 	struct i915_vma *vma;
5401 	u32 context_size;
5402 	int ret;
5403 
5404 	GEM_BUG_ON(ce->state);
5405 	context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
5406 
5407 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
5408 		context_size += I915_GTT_PAGE_SIZE; /* for redzone */
5409 
5410 	if (INTEL_GEN(engine->i915) == 12) {
5411 		ce->wa_bb_page = context_size / PAGE_SIZE;
5412 		context_size += PAGE_SIZE;
5413 	}
5414 
5415 	ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size);
5416 	if (IS_ERR(ctx_obj))
5417 		return PTR_ERR(ctx_obj);
5418 
5419 	vma = i915_vma_instance(ctx_obj, &engine->gt->ggtt->vm, NULL);
5420 	if (IS_ERR(vma)) {
5421 		ret = PTR_ERR(vma);
5422 		goto error_deref_obj;
5423 	}
5424 
5425 	if (!page_mask_bits(ce->timeline)) {
5426 		struct intel_timeline *tl;
5427 
5428 		/*
5429 		 * Use the static global HWSP for the kernel context, and
5430 		 * a dynamically allocated cacheline for everyone else.
5431 		 */
5432 		if (unlikely(ce->timeline))
5433 			tl = pinned_timeline(ce);
5434 		else
5435 			tl = intel_timeline_create(engine->gt);
5436 		if (IS_ERR(tl)) {
5437 			ret = PTR_ERR(tl);
5438 			goto error_deref_obj;
5439 		}
5440 
5441 		ce->timeline = tl;
5442 	}
5443 
5444 	ring = intel_engine_create_ring(engine, (unsigned long)ce->ring);
5445 	if (IS_ERR(ring)) {
5446 		ret = PTR_ERR(ring);
5447 		goto error_deref_obj;
5448 	}
5449 
5450 	ret = populate_lr_context(ce, ctx_obj, engine, ring);
5451 	if (ret) {
5452 		drm_dbg(&engine->i915->drm,
5453 			"Failed to populate LRC: %d\n", ret);
5454 		goto error_ring_free;
5455 	}
5456 
5457 	ce->ring = ring;
5458 	ce->state = vma;
5459 
5460 	return 0;
5461 
5462 error_ring_free:
5463 	intel_ring_put(ring);
5464 error_deref_obj:
5465 	i915_gem_object_put(ctx_obj);
5466 	return ret;
5467 }
5468 
5469 static struct list_head *virtual_queue(struct virtual_engine *ve)
5470 {
5471 	return &ve->base.execlists.default_priolist.requests[0];
5472 }
5473 
5474 static void rcu_virtual_context_destroy(struct work_struct *wrk)
5475 {
5476 	struct virtual_engine *ve =
5477 		container_of(wrk, typeof(*ve), rcu.work);
5478 	unsigned int n;
5479 
5480 	GEM_BUG_ON(ve->context.inflight);
5481 
5482 	/* Preempt-to-busy may leave a stale request behind. */
5483 	if (unlikely(ve->request)) {
5484 		struct i915_request *old;
5485 
5486 		spin_lock_irq(&ve->base.active.lock);
5487 
5488 		old = fetch_and_zero(&ve->request);
5489 		if (old) {
5490 			GEM_BUG_ON(!i915_request_completed(old));
5491 			__i915_request_submit(old);
5492 			i915_request_put(old);
5493 		}
5494 
5495 		spin_unlock_irq(&ve->base.active.lock);
5496 	}
5497 
5498 	/*
5499 	 * Flush the tasklet in case it is still running on another core.
5500 	 *
5501 	 * This needs to be done before we remove ourselves from the siblings'
5502 	 * rbtrees as in the case it is running in parallel, it may reinsert
5503 	 * the rb_node into a sibling.
5504 	 */
5505 	tasklet_kill(&ve->base.execlists.tasklet);
5506 
5507 	/* Decouple ourselves from the siblings, no more access allowed. */
5508 	for (n = 0; n < ve->num_siblings; n++) {
5509 		struct intel_engine_cs *sibling = ve->siblings[n];
5510 		struct rb_node *node = &ve->nodes[sibling->id].rb;
5511 
5512 		if (RB_EMPTY_NODE(node))
5513 			continue;
5514 
5515 		spin_lock_irq(&sibling->active.lock);
5516 
5517 		/* Detachment is lazily performed in the execlists tasklet */
5518 		if (!RB_EMPTY_NODE(node))
5519 			rb_erase_cached(node, &sibling->execlists.virtual);
5520 
5521 		spin_unlock_irq(&sibling->active.lock);
5522 	}
5523 	GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
5524 	GEM_BUG_ON(!list_empty(virtual_queue(ve)));
5525 
5526 	if (ve->context.state)
5527 		__execlists_context_fini(&ve->context);
5528 	intel_context_fini(&ve->context);
5529 
5530 	intel_breadcrumbs_free(ve->base.breadcrumbs);
5531 	intel_engine_free_request_pool(&ve->base);
5532 
5533 	kfree(ve->bonds);
5534 	kfree(ve);
5535 }
5536 
5537 static void virtual_context_destroy(struct kref *kref)
5538 {
5539 	struct virtual_engine *ve =
5540 		container_of(kref, typeof(*ve), context.ref);
5541 
5542 	GEM_BUG_ON(!list_empty(&ve->context.signals));
5543 
5544 	/*
5545 	 * When destroying the virtual engine, we have to be aware that
5546 	 * it may still be in use from an hardirq/softirq context causing
5547 	 * the resubmission of a completed request (background completion
5548 	 * due to preempt-to-busy). Before we can free the engine, we need
5549 	 * to flush the submission code and tasklets that are still potentially
5550 	 * accessing the engine. Flushing the tasklets requires process context,
5551 	 * and since we can guard the resubmit onto the engine with an RCU read
5552 	 * lock, we can delegate the free of the engine to an RCU worker.
5553 	 */
5554 	INIT_RCU_WORK(&ve->rcu, rcu_virtual_context_destroy);
5555 	queue_rcu_work(system_wq, &ve->rcu);
5556 }
5557 
5558 static void virtual_engine_initial_hint(struct virtual_engine *ve)
5559 {
5560 	int swp;
5561 
5562 	/*
5563 	 * Pick a random sibling on starting to help spread the load around.
5564 	 *
5565 	 * New contexts are typically created with exactly the same order
5566 	 * of siblings, and often started in batches. Due to the way we iterate
5567 	 * the array of sibling when submitting requests, sibling[0] is
5568 	 * prioritised for dequeuing. If we make sure that sibling[0] is fairly
5569 	 * randomised across the system, we also help spread the load by the
5570 	 * first engine we inspect being different each time.
5571 	 *
5572 	 * NB This does not force us to execute on this engine, it will just
5573 	 * typically be the first we inspect for submission.
5574 	 */
5575 	swp = prandom_u32_max(ve->num_siblings);
5576 	if (swp)
5577 		swap(ve->siblings[swp], ve->siblings[0]);
5578 }
5579 
5580 static int virtual_context_alloc(struct intel_context *ce)
5581 {
5582 	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
5583 
5584 	return __execlists_context_alloc(ce, ve->siblings[0]);
5585 }
5586 
5587 static int virtual_context_pin(struct intel_context *ce, void *vaddr)
5588 {
5589 	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
5590 
5591 	/* Note: we must use a real engine class for setting up reg state */
5592 	return __execlists_context_pin(ce, ve->siblings[0], vaddr);
5593 }
5594 
5595 static void virtual_context_enter(struct intel_context *ce)
5596 {
5597 	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
5598 	unsigned int n;
5599 
5600 	for (n = 0; n < ve->num_siblings; n++)
5601 		intel_engine_pm_get(ve->siblings[n]);
5602 
5603 	intel_timeline_enter(ce->timeline);
5604 }
5605 
5606 static void virtual_context_exit(struct intel_context *ce)
5607 {
5608 	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
5609 	unsigned int n;
5610 
5611 	intel_timeline_exit(ce->timeline);
5612 
5613 	for (n = 0; n < ve->num_siblings; n++)
5614 		intel_engine_pm_put(ve->siblings[n]);
5615 }
5616 
5617 static const struct intel_context_ops virtual_context_ops = {
5618 	.alloc = virtual_context_alloc,
5619 
5620 	.pre_pin = execlists_context_pre_pin,
5621 	.pin = virtual_context_pin,
5622 	.unpin = execlists_context_unpin,
5623 	.post_unpin = execlists_context_post_unpin,
5624 
5625 	.enter = virtual_context_enter,
5626 	.exit = virtual_context_exit,
5627 
5628 	.destroy = virtual_context_destroy,
5629 };
5630 
5631 static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
5632 {
5633 	struct i915_request *rq;
5634 	intel_engine_mask_t mask;
5635 
5636 	rq = READ_ONCE(ve->request);
5637 	if (!rq)
5638 		return 0;
5639 
5640 	/* The rq is ready for submission; rq->execution_mask is now stable. */
5641 	mask = rq->execution_mask;
5642 	if (unlikely(!mask)) {
5643 		/* Invalid selection, submit to a random engine in error */
5644 		i915_request_set_error_once(rq, -ENODEV);
5645 		mask = ve->siblings[0]->mask;
5646 	}
5647 
5648 	ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n",
5649 		     rq->fence.context, rq->fence.seqno,
5650 		     mask, ve->base.execlists.queue_priority_hint);
5651 
5652 	return mask;
5653 }
5654 
5655 static void virtual_submission_tasklet(unsigned long data)
5656 {
5657 	struct virtual_engine * const ve = (struct virtual_engine *)data;
5658 	const int prio = READ_ONCE(ve->base.execlists.queue_priority_hint);
5659 	intel_engine_mask_t mask;
5660 	unsigned int n;
5661 
5662 	rcu_read_lock();
5663 	mask = virtual_submission_mask(ve);
5664 	rcu_read_unlock();
5665 	if (unlikely(!mask))
5666 		return;
5667 
5668 	local_irq_disable();
5669 	for (n = 0; n < ve->num_siblings; n++) {
5670 		struct intel_engine_cs *sibling = READ_ONCE(ve->siblings[n]);
5671 		struct ve_node * const node = &ve->nodes[sibling->id];
5672 		struct rb_node **parent, *rb;
5673 		bool first;
5674 
5675 		if (!READ_ONCE(ve->request))
5676 			break; /* already handled by a sibling's tasklet */
5677 
5678 		if (unlikely(!(mask & sibling->mask))) {
5679 			if (!RB_EMPTY_NODE(&node->rb)) {
5680 				spin_lock(&sibling->active.lock);
5681 				rb_erase_cached(&node->rb,
5682 						&sibling->execlists.virtual);
5683 				RB_CLEAR_NODE(&node->rb);
5684 				spin_unlock(&sibling->active.lock);
5685 			}
5686 			continue;
5687 		}
5688 
5689 		spin_lock(&sibling->active.lock);
5690 
5691 		if (!RB_EMPTY_NODE(&node->rb)) {
5692 			/*
5693 			 * Cheat and avoid rebalancing the tree if we can
5694 			 * reuse this node in situ.
5695 			 */
5696 			first = rb_first_cached(&sibling->execlists.virtual) ==
5697 				&node->rb;
5698 			if (prio == node->prio || (prio > node->prio && first))
5699 				goto submit_engine;
5700 
5701 			rb_erase_cached(&node->rb, &sibling->execlists.virtual);
5702 		}
5703 
5704 		rb = NULL;
5705 		first = true;
5706 		parent = &sibling->execlists.virtual.rb_root.rb_node;
5707 		while (*parent) {
5708 			struct ve_node *other;
5709 
5710 			rb = *parent;
5711 			other = rb_entry(rb, typeof(*other), rb);
5712 			if (prio > other->prio) {
5713 				parent = &rb->rb_left;
5714 			} else {
5715 				parent = &rb->rb_right;
5716 				first = false;
5717 			}
5718 		}
5719 
5720 		rb_link_node(&node->rb, rb, parent);
5721 		rb_insert_color_cached(&node->rb,
5722 				       &sibling->execlists.virtual,
5723 				       first);
5724 
5725 submit_engine:
5726 		GEM_BUG_ON(RB_EMPTY_NODE(&node->rb));
5727 		node->prio = prio;
5728 		if (first && prio > sibling->execlists.queue_priority_hint)
5729 			tasklet_hi_schedule(&sibling->execlists.tasklet);
5730 
5731 		spin_unlock(&sibling->active.lock);
5732 	}
5733 	local_irq_enable();
5734 }
5735 
5736 static void virtual_submit_request(struct i915_request *rq)
5737 {
5738 	struct virtual_engine *ve = to_virtual_engine(rq->engine);
5739 	struct i915_request *old;
5740 	unsigned long flags;
5741 
5742 	ENGINE_TRACE(&ve->base, "rq=%llx:%lld\n",
5743 		     rq->fence.context,
5744 		     rq->fence.seqno);
5745 
5746 	GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
5747 
5748 	spin_lock_irqsave(&ve->base.active.lock, flags);
5749 
5750 	old = ve->request;
5751 	if (old) { /* background completion event from preempt-to-busy */
5752 		GEM_BUG_ON(!i915_request_completed(old));
5753 		__i915_request_submit(old);
5754 		i915_request_put(old);
5755 	}
5756 
5757 	if (i915_request_completed(rq)) {
5758 		__i915_request_submit(rq);
5759 
5760 		ve->base.execlists.queue_priority_hint = INT_MIN;
5761 		ve->request = NULL;
5762 	} else {
5763 		ve->base.execlists.queue_priority_hint = rq_prio(rq);
5764 		ve->request = i915_request_get(rq);
5765 
5766 		GEM_BUG_ON(!list_empty(virtual_queue(ve)));
5767 		list_move_tail(&rq->sched.link, virtual_queue(ve));
5768 
5769 		tasklet_hi_schedule(&ve->base.execlists.tasklet);
5770 	}
5771 
5772 	spin_unlock_irqrestore(&ve->base.active.lock, flags);
5773 }
5774 
5775 static struct ve_bond *
5776 virtual_find_bond(struct virtual_engine *ve,
5777 		  const struct intel_engine_cs *master)
5778 {
5779 	int i;
5780 
5781 	for (i = 0; i < ve->num_bonds; i++) {
5782 		if (ve->bonds[i].master == master)
5783 			return &ve->bonds[i];
5784 	}
5785 
5786 	return NULL;
5787 }
5788 
5789 static void
5790 virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
5791 {
5792 	struct virtual_engine *ve = to_virtual_engine(rq->engine);
5793 	intel_engine_mask_t allowed, exec;
5794 	struct ve_bond *bond;
5795 
5796 	allowed = ~to_request(signal)->engine->mask;
5797 
5798 	bond = virtual_find_bond(ve, to_request(signal)->engine);
5799 	if (bond)
5800 		allowed &= bond->sibling_mask;
5801 
5802 	/* Restrict the bonded request to run on only the available engines */
5803 	exec = READ_ONCE(rq->execution_mask);
5804 	while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed))
5805 		;
5806 
5807 	/* Prevent the master from being re-run on the bonded engines */
5808 	to_request(signal)->execution_mask &= ~allowed;
5809 }
5810 
5811 struct intel_context *
5812 intel_execlists_create_virtual(struct intel_engine_cs **siblings,
5813 			       unsigned int count)
5814 {
5815 	struct virtual_engine *ve;
5816 	unsigned int n;
5817 	int err;
5818 
5819 	if (count == 0)
5820 		return ERR_PTR(-EINVAL);
5821 
5822 	if (count == 1)
5823 		return intel_context_create(siblings[0]);
5824 
5825 	ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL);
5826 	if (!ve)
5827 		return ERR_PTR(-ENOMEM);
5828 
5829 	ve->base.i915 = siblings[0]->i915;
5830 	ve->base.gt = siblings[0]->gt;
5831 	ve->base.uncore = siblings[0]->uncore;
5832 	ve->base.id = -1;
5833 
5834 	ve->base.class = OTHER_CLASS;
5835 	ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
5836 	ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
5837 	ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
5838 
5839 	/*
5840 	 * The decision on whether to submit a request using semaphores
5841 	 * depends on the saturated state of the engine. We only compute
5842 	 * this during HW submission of the request, and we need for this
5843 	 * state to be globally applied to all requests being submitted
5844 	 * to this engine. Virtual engines encompass more than one physical
5845 	 * engine and so we cannot accurately tell in advance if one of those
5846 	 * engines is already saturated and so cannot afford to use a semaphore
5847 	 * and be pessimized in priority for doing so -- if we are the only
5848 	 * context using semaphores after all other clients have stopped, we
5849 	 * will be starved on the saturated system. Such a global switch for
5850 	 * semaphores is less than ideal, but alas is the current compromise.
5851 	 */
5852 	ve->base.saturated = ALL_ENGINES;
5853 
5854 	snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
5855 
5856 	intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
5857 	intel_engine_init_execlists(&ve->base);
5858 
5859 	ve->base.cops = &virtual_context_ops;
5860 	ve->base.request_alloc = execlists_request_alloc;
5861 
5862 	ve->base.schedule = i915_schedule;
5863 	ve->base.submit_request = virtual_submit_request;
5864 	ve->base.bond_execute = virtual_bond_execute;
5865 
5866 	INIT_LIST_HEAD(virtual_queue(ve));
5867 	ve->base.execlists.queue_priority_hint = INT_MIN;
5868 	tasklet_init(&ve->base.execlists.tasklet,
5869 		     virtual_submission_tasklet,
5870 		     (unsigned long)ve);
5871 
5872 	intel_context_init(&ve->context, &ve->base);
5873 
5874 	ve->base.breadcrumbs = intel_breadcrumbs_create(NULL);
5875 	if (!ve->base.breadcrumbs) {
5876 		err = -ENOMEM;
5877 		goto err_put;
5878 	}
5879 
5880 	for (n = 0; n < count; n++) {
5881 		struct intel_engine_cs *sibling = siblings[n];
5882 
5883 		GEM_BUG_ON(!is_power_of_2(sibling->mask));
5884 		if (sibling->mask & ve->base.mask) {
5885 			DRM_DEBUG("duplicate %s entry in load balancer\n",
5886 				  sibling->name);
5887 			err = -EINVAL;
5888 			goto err_put;
5889 		}
5890 
5891 		/*
5892 		 * The virtual engine implementation is tightly coupled to
5893 		 * the execlists backend -- we push out request directly
5894 		 * into a tree inside each physical engine. We could support
5895 		 * layering if we handle cloning of the requests and
5896 		 * submitting a copy into each backend.
5897 		 */
5898 		if (sibling->execlists.tasklet.func !=
5899 		    execlists_submission_tasklet) {
5900 			err = -ENODEV;
5901 			goto err_put;
5902 		}
5903 
5904 		GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb));
5905 		RB_CLEAR_NODE(&ve->nodes[sibling->id].rb);
5906 
5907 		ve->siblings[ve->num_siblings++] = sibling;
5908 		ve->base.mask |= sibling->mask;
5909 
5910 		/*
5911 		 * All physical engines must be compatible for their emission
5912 		 * functions (as we build the instructions during request
5913 		 * construction and do not alter them before submission
5914 		 * on the physical engine). We use the engine class as a guide
5915 		 * here, although that could be refined.
5916 		 */
5917 		if (ve->base.class != OTHER_CLASS) {
5918 			if (ve->base.class != sibling->class) {
5919 				DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
5920 					  sibling->class, ve->base.class);
5921 				err = -EINVAL;
5922 				goto err_put;
5923 			}
5924 			continue;
5925 		}
5926 
5927 		ve->base.class = sibling->class;
5928 		ve->base.uabi_class = sibling->uabi_class;
5929 		snprintf(ve->base.name, sizeof(ve->base.name),
5930 			 "v%dx%d", ve->base.class, count);
5931 		ve->base.context_size = sibling->context_size;
5932 
5933 		ve->base.emit_bb_start = sibling->emit_bb_start;
5934 		ve->base.emit_flush = sibling->emit_flush;
5935 		ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb;
5936 		ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb;
5937 		ve->base.emit_fini_breadcrumb_dw =
5938 			sibling->emit_fini_breadcrumb_dw;
5939 
5940 		ve->base.flags = sibling->flags;
5941 	}
5942 
5943 	ve->base.flags |= I915_ENGINE_IS_VIRTUAL;
5944 
5945 	virtual_engine_initial_hint(ve);
5946 	return &ve->context;
5947 
5948 err_put:
5949 	intel_context_put(&ve->context);
5950 	return ERR_PTR(err);
5951 }
5952 
5953 struct intel_context *
5954 intel_execlists_clone_virtual(struct intel_engine_cs *src)
5955 {
5956 	struct virtual_engine *se = to_virtual_engine(src);
5957 	struct intel_context *dst;
5958 
5959 	dst = intel_execlists_create_virtual(se->siblings,
5960 					     se->num_siblings);
5961 	if (IS_ERR(dst))
5962 		return dst;
5963 
5964 	if (se->num_bonds) {
5965 		struct virtual_engine *de = to_virtual_engine(dst->engine);
5966 
5967 		de->bonds = kmemdup(se->bonds,
5968 				    sizeof(*se->bonds) * se->num_bonds,
5969 				    GFP_KERNEL);
5970 		if (!de->bonds) {
5971 			intel_context_put(dst);
5972 			return ERR_PTR(-ENOMEM);
5973 		}
5974 
5975 		de->num_bonds = se->num_bonds;
5976 	}
5977 
5978 	return dst;
5979 }
5980 
5981 int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
5982 				     const struct intel_engine_cs *master,
5983 				     const struct intel_engine_cs *sibling)
5984 {
5985 	struct virtual_engine *ve = to_virtual_engine(engine);
5986 	struct ve_bond *bond;
5987 	int n;
5988 
5989 	/* Sanity check the sibling is part of the virtual engine */
5990 	for (n = 0; n < ve->num_siblings; n++)
5991 		if (sibling == ve->siblings[n])
5992 			break;
5993 	if (n == ve->num_siblings)
5994 		return -EINVAL;
5995 
5996 	bond = virtual_find_bond(ve, master);
5997 	if (bond) {
5998 		bond->sibling_mask |= sibling->mask;
5999 		return 0;
6000 	}
6001 
6002 	bond = krealloc(ve->bonds,
6003 			sizeof(*bond) * (ve->num_bonds + 1),
6004 			GFP_KERNEL);
6005 	if (!bond)
6006 		return -ENOMEM;
6007 
6008 	bond[ve->num_bonds].master = master;
6009 	bond[ve->num_bonds].sibling_mask = sibling->mask;
6010 
6011 	ve->bonds = bond;
6012 	ve->num_bonds++;
6013 
6014 	return 0;
6015 }
6016 
6017 void intel_execlists_show_requests(struct intel_engine_cs *engine,
6018 				   struct drm_printer *m,
6019 				   void (*show_request)(struct drm_printer *m,
6020 							const struct i915_request *rq,
6021 							const char *prefix,
6022 							int indent),
6023 				   unsigned int max)
6024 {
6025 	const struct intel_engine_execlists *execlists = &engine->execlists;
6026 	struct i915_request *rq, *last;
6027 	unsigned long flags;
6028 	unsigned int count;
6029 	struct rb_node *rb;
6030 
6031 	spin_lock_irqsave(&engine->active.lock, flags);
6032 
6033 	last = NULL;
6034 	count = 0;
6035 	list_for_each_entry(rq, &engine->active.requests, sched.link) {
6036 		if (count++ < max - 1)
6037 			show_request(m, rq, "\t\t", 0);
6038 		else
6039 			last = rq;
6040 	}
6041 	if (last) {
6042 		if (count > max) {
6043 			drm_printf(m,
6044 				   "\t\t...skipping %d executing requests...\n",
6045 				   count - max);
6046 		}
6047 		show_request(m, last, "\t\t", 0);
6048 	}
6049 
6050 	if (execlists->switch_priority_hint != INT_MIN)
6051 		drm_printf(m, "\t\tSwitch priority hint: %d\n",
6052 			   READ_ONCE(execlists->switch_priority_hint));
6053 	if (execlists->queue_priority_hint != INT_MIN)
6054 		drm_printf(m, "\t\tQueue priority hint: %d\n",
6055 			   READ_ONCE(execlists->queue_priority_hint));
6056 
6057 	last = NULL;
6058 	count = 0;
6059 	for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
6060 		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
6061 		int i;
6062 
6063 		priolist_for_each_request(rq, p, i) {
6064 			if (count++ < max - 1)
6065 				show_request(m, rq, "\t\t", 0);
6066 			else
6067 				last = rq;
6068 		}
6069 	}
6070 	if (last) {
6071 		if (count > max) {
6072 			drm_printf(m,
6073 				   "\t\t...skipping %d queued requests...\n",
6074 				   count - max);
6075 		}
6076 		show_request(m, last, "\t\t", 0);
6077 	}
6078 
6079 	last = NULL;
6080 	count = 0;
6081 	for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) {
6082 		struct virtual_engine *ve =
6083 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
6084 		struct i915_request *rq = READ_ONCE(ve->request);
6085 
6086 		if (rq) {
6087 			if (count++ < max - 1)
6088 				show_request(m, rq, "\t\t", 0);
6089 			else
6090 				last = rq;
6091 		}
6092 	}
6093 	if (last) {
6094 		if (count > max) {
6095 			drm_printf(m,
6096 				   "\t\t...skipping %d virtual requests...\n",
6097 				   count - max);
6098 		}
6099 		show_request(m, last, "\t\t", 0);
6100 	}
6101 
6102 	spin_unlock_irqrestore(&engine->active.lock, flags);
6103 }
6104 
6105 void intel_lr_context_reset(struct intel_engine_cs *engine,
6106 			    struct intel_context *ce,
6107 			    u32 head,
6108 			    bool scrub)
6109 {
6110 	GEM_BUG_ON(!intel_context_is_pinned(ce));
6111 
6112 	/*
6113 	 * We want a simple context + ring to execute the breadcrumb update.
6114 	 * We cannot rely on the context being intact across the GPU hang,
6115 	 * so clear it and rebuild just what we need for the breadcrumb.
6116 	 * All pending requests for this context will be zapped, and any
6117 	 * future request will be after userspace has had the opportunity
6118 	 * to recreate its own state.
6119 	 */
6120 	if (scrub)
6121 		restore_default_state(ce, engine);
6122 
6123 	/* Rerun the request; its payload has been neutered (if guilty). */
6124 	__execlists_update_reg_state(ce, engine, head);
6125 }
6126 
6127 bool
6128 intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine)
6129 {
6130 	return engine->set_default_submission ==
6131 	       intel_execlists_set_default_submission;
6132 }
6133 
6134 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
6135 #include "selftest_lrc.c"
6136 #endif
6137