xref: /openbmc/linux/drivers/gpu/drm/i915/gt/intel_lrc.c (revision a6c76bb0)
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Ben Widawsky <ben@bwidawsk.net>
25  *    Michel Thierry <michel.thierry@intel.com>
26  *    Thomas Daniel <thomas.daniel@intel.com>
27  *    Oscar Mateo <oscar.mateo@intel.com>
28  *
29  */
30 
31 /**
32  * DOC: Logical Rings, Logical Ring Contexts and Execlists
33  *
34  * Motivation:
35  * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts".
36  * These expanded contexts enable a number of new abilities, especially
37  * "Execlists" (also implemented in this file).
38  *
39  * One of the main differences with the legacy HW contexts is that logical
40  * ring contexts incorporate many more things to the context's state, like
41  * PDPs or ringbuffer control registers:
42  *
43  * The reason why PDPs are included in the context is straightforward: as
44  * PPGTTs (per-process GTTs) are actually per-context, having the PDPs
45  * contained there mean you don't need to do a ppgtt->switch_mm yourself,
46  * instead, the GPU will do it for you on the context switch.
47  *
48  * But, what about the ringbuffer control registers (head, tail, etc..)?
49  * shouldn't we just need a set of those per engine command streamer? This is
50  * where the name "Logical Rings" starts to make sense: by virtualizing the
51  * rings, the engine cs shifts to a new "ring buffer" with every context
52  * switch. When you want to submit a workload to the GPU you: A) choose your
53  * context, B) find its appropriate virtualized ring, C) write commands to it
54  * and then, finally, D) tell the GPU to switch to that context.
55  *
56  * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch
57  * to a contexts is via a context execution list, ergo "Execlists".
58  *
59  * LRC implementation:
60  * Regarding the creation of contexts, we have:
61  *
62  * - One global default context.
63  * - One local default context for each opened fd.
64  * - One local extra context for each context create ioctl call.
65  *
66  * Now that ringbuffers belong per-context (and not per-engine, like before)
67  * and that contexts are uniquely tied to a given engine (and not reusable,
68  * like before) we need:
69  *
70  * - One ringbuffer per-engine inside each context.
71  * - One backing object per-engine inside each context.
72  *
73  * The global default context starts its life with these new objects fully
74  * allocated and populated. The local default context for each opened fd is
75  * more complex, because we don't know at creation time which engine is going
76  * to use them. To handle this, we have implemented a deferred creation of LR
77  * contexts:
78  *
79  * The local context starts its life as a hollow or blank holder, that only
80  * gets populated for a given engine once we receive an execbuffer. If later
81  * on we receive another execbuffer ioctl for the same context but a different
82  * engine, we allocate/populate a new ringbuffer and context backing object and
83  * so on.
84  *
85  * Finally, regarding local contexts created using the ioctl call: as they are
86  * only allowed with the render ring, we can allocate & populate them right
87  * away (no need to defer anything, at least for now).
88  *
89  * Execlists implementation:
90  * Execlists are the new method by which, on gen8+ hardware, workloads are
91  * submitted for execution (as opposed to the legacy, ringbuffer-based, method).
92  * This method works as follows:
93  *
94  * When a request is committed, its commands (the BB start and any leading or
95  * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer
96  * for the appropriate context. The tail pointer in the hardware context is not
97  * updated at this time, but instead, kept by the driver in the ringbuffer
98  * structure. A structure representing this request is added to a request queue
99  * for the appropriate engine: this structure contains a copy of the context's
100  * tail after the request was written to the ring buffer and a pointer to the
101  * context itself.
102  *
103  * If the engine's request queue was empty before the request was added, the
104  * queue is processed immediately. Otherwise the queue will be processed during
105  * a context switch interrupt. In any case, elements on the queue will get sent
106  * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a
107  * globally unique 20-bits submission ID.
108  *
109  * When execution of a request completes, the GPU updates the context status
110  * buffer with a context complete event and generates a context switch interrupt.
111  * During the interrupt handling, the driver examines the events in the buffer:
112  * for each context complete event, if the announced ID matches that on the head
113  * of the request queue, then that request is retired and removed from the queue.
114  *
115  * After processing, if any requests were retired and the queue is not empty
116  * then a new execution list can be submitted. The two requests at the front of
117  * the queue are next to be submitted but since a context may not occur twice in
118  * an execution list, if subsequent requests have the same ID as the first then
119  * the two requests must be combined. This is done simply by discarding requests
120  * at the head of the queue until either only one requests is left (in which case
121  * we use a NULL second context) or the first two requests have unique IDs.
122  *
123  * By always executing the first two requests in the queue the driver ensures
124  * that the GPU is kept as busy as possible. In the case where a single context
125  * completes but a second context is still executing, the request for this second
126  * context will be at the head of the queue when we remove the first one. This
127  * request will then be resubmitted along with a new request for a different context,
128  * which will cause the hardware to continue executing the second request and queue
129  * the new request (the GPU detects the condition of a context getting preempted
130  * with the same context and optimizes the context switch flow by not doing
131  * preemption, but just sampling the new tail pointer).
132  *
133  */
134 #include <linux/interrupt.h>
135 
136 #include "i915_drv.h"
137 #include "i915_perf.h"
138 #include "i915_trace.h"
139 #include "i915_vgpu.h"
140 #include "intel_breadcrumbs.h"
141 #include "intel_context.h"
142 #include "intel_engine_pm.h"
143 #include "intel_gt.h"
144 #include "intel_gt_pm.h"
145 #include "intel_gt_requests.h"
146 #include "intel_lrc_reg.h"
147 #include "intel_mocs.h"
148 #include "intel_reset.h"
149 #include "intel_ring.h"
150 #include "intel_workarounds.h"
151 #include "shmem_utils.h"
152 
153 #define RING_EXECLIST_QFULL		(1 << 0x2)
154 #define RING_EXECLIST1_VALID		(1 << 0x3)
155 #define RING_EXECLIST0_VALID		(1 << 0x4)
156 #define RING_EXECLIST_ACTIVE_STATUS	(3 << 0xE)
157 #define RING_EXECLIST1_ACTIVE		(1 << 0x11)
158 #define RING_EXECLIST0_ACTIVE		(1 << 0x12)
159 
160 #define GEN8_CTX_STATUS_IDLE_ACTIVE	(1 << 0)
161 #define GEN8_CTX_STATUS_PREEMPTED	(1 << 1)
162 #define GEN8_CTX_STATUS_ELEMENT_SWITCH	(1 << 2)
163 #define GEN8_CTX_STATUS_ACTIVE_IDLE	(1 << 3)
164 #define GEN8_CTX_STATUS_COMPLETE	(1 << 4)
165 #define GEN8_CTX_STATUS_LITE_RESTORE	(1 << 15)
166 
167 #define GEN8_CTX_STATUS_COMPLETED_MASK \
168 	 (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
169 
170 #define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
171 
172 #define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE	(0x1) /* lower csb dword */
173 #define GEN12_CTX_SWITCH_DETAIL(csb_dw)	((csb_dw) & 0xF) /* upper csb dword */
174 #define GEN12_CSB_SW_CTX_ID_MASK		GENMASK(25, 15)
175 #define GEN12_IDLE_CTX_ID		0x7FF
176 #define GEN12_CSB_CTX_VALID(csb_dw) \
177 	(FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID)
178 
179 /* Typical size of the average request (2 pipecontrols and a MI_BB) */
180 #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
181 
182 struct virtual_engine {
183 	struct intel_engine_cs base;
184 	struct intel_context context;
185 	struct rcu_work rcu;
186 
187 	/*
188 	 * We allow only a single request through the virtual engine at a time
189 	 * (each request in the timeline waits for the completion fence of
190 	 * the previous before being submitted). By restricting ourselves to
191 	 * only submitting a single request, each request is placed on to a
192 	 * physical to maximise load spreading (by virtue of the late greedy
193 	 * scheduling -- each real engine takes the next available request
194 	 * upon idling).
195 	 */
196 	struct i915_request *request;
197 
198 	/*
199 	 * We keep a rbtree of available virtual engines inside each physical
200 	 * engine, sorted by priority. Here we preallocate the nodes we need
201 	 * for the virtual engine, indexed by physical_engine->id.
202 	 */
203 	struct ve_node {
204 		struct rb_node rb;
205 		int prio;
206 	} nodes[I915_NUM_ENGINES];
207 
208 	/*
209 	 * Keep track of bonded pairs -- restrictions upon on our selection
210 	 * of physical engines any particular request may be submitted to.
211 	 * If we receive a submit-fence from a master engine, we will only
212 	 * use one of sibling_mask physical engines.
213 	 */
214 	struct ve_bond {
215 		const struct intel_engine_cs *master;
216 		intel_engine_mask_t sibling_mask;
217 	} *bonds;
218 	unsigned int num_bonds;
219 
220 	/* And finally, which physical engines this virtual engine maps onto. */
221 	unsigned int num_siblings;
222 	struct intel_engine_cs *siblings[];
223 };
224 
225 static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
226 {
227 	GEM_BUG_ON(!intel_engine_is_virtual(engine));
228 	return container_of(engine, struct virtual_engine, base);
229 }
230 
231 static int __execlists_context_alloc(struct intel_context *ce,
232 				     struct intel_engine_cs *engine);
233 
234 static void execlists_init_reg_state(u32 *reg_state,
235 				     const struct intel_context *ce,
236 				     const struct intel_engine_cs *engine,
237 				     const struct intel_ring *ring,
238 				     bool close);
239 static void
240 __execlists_update_reg_state(const struct intel_context *ce,
241 			     const struct intel_engine_cs *engine,
242 			     u32 head);
243 
244 static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
245 {
246 	if (INTEL_GEN(engine->i915) >= 12)
247 		return 0x60;
248 	else if (INTEL_GEN(engine->i915) >= 9)
249 		return 0x54;
250 	else if (engine->class == RENDER_CLASS)
251 		return 0x58;
252 	else
253 		return -1;
254 }
255 
256 static int lrc_ring_gpr0(const struct intel_engine_cs *engine)
257 {
258 	if (INTEL_GEN(engine->i915) >= 12)
259 		return 0x74;
260 	else if (INTEL_GEN(engine->i915) >= 9)
261 		return 0x68;
262 	else if (engine->class == RENDER_CLASS)
263 		return 0xd8;
264 	else
265 		return -1;
266 }
267 
268 static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine)
269 {
270 	if (INTEL_GEN(engine->i915) >= 12)
271 		return 0x12;
272 	else if (INTEL_GEN(engine->i915) >= 9 || engine->class == RENDER_CLASS)
273 		return 0x18;
274 	else
275 		return -1;
276 }
277 
278 static int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine)
279 {
280 	int x;
281 
282 	x = lrc_ring_wa_bb_per_ctx(engine);
283 	if (x < 0)
284 		return x;
285 
286 	return x + 2;
287 }
288 
289 static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine)
290 {
291 	int x;
292 
293 	x = lrc_ring_indirect_ptr(engine);
294 	if (x < 0)
295 		return x;
296 
297 	return x + 2;
298 }
299 
300 static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine)
301 {
302 	if (engine->class != RENDER_CLASS)
303 		return -1;
304 
305 	if (INTEL_GEN(engine->i915) >= 12)
306 		return 0xb6;
307 	else if (INTEL_GEN(engine->i915) >= 11)
308 		return 0xaa;
309 	else
310 		return -1;
311 }
312 
313 static u32
314 lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine)
315 {
316 	switch (INTEL_GEN(engine->i915)) {
317 	default:
318 		MISSING_CASE(INTEL_GEN(engine->i915));
319 		fallthrough;
320 	case 12:
321 		return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
322 	case 11:
323 		return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
324 	case 10:
325 		return GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
326 	case 9:
327 		return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
328 	case 8:
329 		return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
330 	}
331 }
332 
333 static void
334 lrc_ring_setup_indirect_ctx(u32 *regs,
335 			    const struct intel_engine_cs *engine,
336 			    u32 ctx_bb_ggtt_addr,
337 			    u32 size)
338 {
339 	GEM_BUG_ON(!size);
340 	GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES));
341 	GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1);
342 	regs[lrc_ring_indirect_ptr(engine) + 1] =
343 		ctx_bb_ggtt_addr | (size / CACHELINE_BYTES);
344 
345 	GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1);
346 	regs[lrc_ring_indirect_offset(engine) + 1] =
347 		lrc_ring_indirect_offset_default(engine) << 6;
348 }
349 
350 static u32 intel_context_get_runtime(const struct intel_context *ce)
351 {
352 	/*
353 	 * We can use either ppHWSP[16] which is recorded before the context
354 	 * switch (and so excludes the cost of context switches) or use the
355 	 * value from the context image itself, which is saved/restored earlier
356 	 * and so includes the cost of the save.
357 	 */
358 	return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
359 }
360 
361 static void mark_eio(struct i915_request *rq)
362 {
363 	if (i915_request_completed(rq))
364 		return;
365 
366 	GEM_BUG_ON(i915_request_signaled(rq));
367 
368 	i915_request_set_error_once(rq, -EIO);
369 	i915_request_mark_complete(rq);
370 }
371 
372 static struct i915_request *
373 active_request(const struct intel_timeline * const tl, struct i915_request *rq)
374 {
375 	struct i915_request *active = rq;
376 
377 	rcu_read_lock();
378 	list_for_each_entry_continue_reverse(rq, &tl->requests, link) {
379 		if (i915_request_completed(rq))
380 			break;
381 
382 		active = rq;
383 	}
384 	rcu_read_unlock();
385 
386 	return active;
387 }
388 
389 static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
390 {
391 	return (i915_ggtt_offset(engine->status_page.vma) +
392 		I915_GEM_HWS_PREEMPT_ADDR);
393 }
394 
395 static inline void
396 ring_set_paused(const struct intel_engine_cs *engine, int state)
397 {
398 	/*
399 	 * We inspect HWS_PREEMPT with a semaphore inside
400 	 * engine->emit_fini_breadcrumb. If the dword is true,
401 	 * the ring is paused as the semaphore will busywait
402 	 * until the dword is false.
403 	 */
404 	engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state;
405 	if (state)
406 		wmb();
407 }
408 
409 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
410 {
411 	return rb_entry(rb, struct i915_priolist, node);
412 }
413 
414 static inline int rq_prio(const struct i915_request *rq)
415 {
416 	return READ_ONCE(rq->sched.attr.priority);
417 }
418 
419 static int effective_prio(const struct i915_request *rq)
420 {
421 	int prio = rq_prio(rq);
422 
423 	/*
424 	 * If this request is special and must not be interrupted at any
425 	 * cost, so be it. Note we are only checking the most recent request
426 	 * in the context and so may be masking an earlier vip request. It
427 	 * is hoped that under the conditions where nopreempt is used, this
428 	 * will not matter (i.e. all requests to that context will be
429 	 * nopreempt for as long as desired).
430 	 */
431 	if (i915_request_has_nopreempt(rq))
432 		prio = I915_PRIORITY_UNPREEMPTABLE;
433 
434 	return prio;
435 }
436 
437 static int queue_prio(const struct intel_engine_execlists *execlists)
438 {
439 	struct i915_priolist *p;
440 	struct rb_node *rb;
441 
442 	rb = rb_first_cached(&execlists->queue);
443 	if (!rb)
444 		return INT_MIN;
445 
446 	/*
447 	 * As the priolist[] are inverted, with the highest priority in [0],
448 	 * we have to flip the index value to become priority.
449 	 */
450 	p = to_priolist(rb);
451 	if (!I915_USER_PRIORITY_SHIFT)
452 		return p->priority;
453 
454 	return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used);
455 }
456 
457 static inline bool need_preempt(const struct intel_engine_cs *engine,
458 				const struct i915_request *rq,
459 				struct rb_node *rb)
460 {
461 	int last_prio;
462 
463 	if (!intel_engine_has_semaphores(engine))
464 		return false;
465 
466 	/*
467 	 * Check if the current priority hint merits a preemption attempt.
468 	 *
469 	 * We record the highest value priority we saw during rescheduling
470 	 * prior to this dequeue, therefore we know that if it is strictly
471 	 * less than the current tail of ESLP[0], we do not need to force
472 	 * a preempt-to-idle cycle.
473 	 *
474 	 * However, the priority hint is a mere hint that we may need to
475 	 * preempt. If that hint is stale or we may be trying to preempt
476 	 * ourselves, ignore the request.
477 	 *
478 	 * More naturally we would write
479 	 *      prio >= max(0, last);
480 	 * except that we wish to prevent triggering preemption at the same
481 	 * priority level: the task that is running should remain running
482 	 * to preserve FIFO ordering of dependencies.
483 	 */
484 	last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1);
485 	if (engine->execlists.queue_priority_hint <= last_prio)
486 		return false;
487 
488 	/*
489 	 * Check against the first request in ELSP[1], it will, thanks to the
490 	 * power of PI, be the highest priority of that context.
491 	 */
492 	if (!list_is_last(&rq->sched.link, &engine->active.requests) &&
493 	    rq_prio(list_next_entry(rq, sched.link)) > last_prio)
494 		return true;
495 
496 	if (rb) {
497 		struct virtual_engine *ve =
498 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
499 		bool preempt = false;
500 
501 		if (engine == ve->siblings[0]) { /* only preempt one sibling */
502 			struct i915_request *next;
503 
504 			rcu_read_lock();
505 			next = READ_ONCE(ve->request);
506 			if (next)
507 				preempt = rq_prio(next) > last_prio;
508 			rcu_read_unlock();
509 		}
510 
511 		if (preempt)
512 			return preempt;
513 	}
514 
515 	/*
516 	 * If the inflight context did not trigger the preemption, then maybe
517 	 * it was the set of queued requests? Pick the highest priority in
518 	 * the queue (the first active priolist) and see if it deserves to be
519 	 * running instead of ELSP[0].
520 	 *
521 	 * The highest priority request in the queue can not be either
522 	 * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same
523 	 * context, it's priority would not exceed ELSP[0] aka last_prio.
524 	 */
525 	return queue_prio(&engine->execlists) > last_prio;
526 }
527 
528 __maybe_unused static inline bool
529 assert_priority_queue(const struct i915_request *prev,
530 		      const struct i915_request *next)
531 {
532 	/*
533 	 * Without preemption, the prev may refer to the still active element
534 	 * which we refuse to let go.
535 	 *
536 	 * Even with preemption, there are times when we think it is better not
537 	 * to preempt and leave an ostensibly lower priority request in flight.
538 	 */
539 	if (i915_request_is_active(prev))
540 		return true;
541 
542 	return rq_prio(prev) >= rq_prio(next);
543 }
544 
545 /*
546  * The context descriptor encodes various attributes of a context,
547  * including its GTT address and some flags. Because it's fairly
548  * expensive to calculate, we'll just do it once and cache the result,
549  * which remains valid until the context is unpinned.
550  *
551  * This is what a descriptor looks like, from LSB to MSB::
552  *
553  *      bits  0-11:    flags, GEN8_CTX_* (cached in ctx->desc_template)
554  *      bits 12-31:    LRCA, GTT address of (the HWSP of) this context
555  *      bits 32-52:    ctx ID, a globally unique tag (highest bit used by GuC)
556  *      bits 53-54:    mbz, reserved for use by hardware
557  *      bits 55-63:    group ID, currently unused and set to 0
558  *
559  * Starting from Gen11, the upper dword of the descriptor has a new format:
560  *
561  *      bits 32-36:    reserved
562  *      bits 37-47:    SW context ID
563  *      bits 48:53:    engine instance
564  *      bit 54:        mbz, reserved for use by hardware
565  *      bits 55-60:    SW counter
566  *      bits 61-63:    engine class
567  *
568  * engine info, SW context ID and SW counter need to form a unique number
569  * (Context ID) per lrc.
570  */
571 static u32
572 lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
573 {
574 	u32 desc;
575 
576 	desc = INTEL_LEGACY_32B_CONTEXT;
577 	if (i915_vm_is_4lvl(ce->vm))
578 		desc = INTEL_LEGACY_64B_CONTEXT;
579 	desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
580 
581 	desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
582 	if (IS_GEN(engine->i915, 8))
583 		desc |= GEN8_CTX_L3LLC_COHERENT;
584 
585 	return i915_ggtt_offset(ce->state) | desc;
586 }
587 
588 static inline unsigned int dword_in_page(void *addr)
589 {
590 	return offset_in_page(addr) / sizeof(u32);
591 }
592 
593 static void set_offsets(u32 *regs,
594 			const u8 *data,
595 			const struct intel_engine_cs *engine,
596 			bool clear)
597 #define NOP(x) (BIT(7) | (x))
598 #define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
599 #define POSTED BIT(0)
600 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
601 #define REG16(x) \
602 	(((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
603 	(((x) >> 2) & 0x7f)
604 #define END(total_state_size) 0, (total_state_size)
605 {
606 	const u32 base = engine->mmio_base;
607 
608 	while (*data) {
609 		u8 count, flags;
610 
611 		if (*data & BIT(7)) { /* skip */
612 			count = *data++ & ~BIT(7);
613 			if (clear)
614 				memset32(regs, MI_NOOP, count);
615 			regs += count;
616 			continue;
617 		}
618 
619 		count = *data & 0x3f;
620 		flags = *data >> 6;
621 		data++;
622 
623 		*regs = MI_LOAD_REGISTER_IMM(count);
624 		if (flags & POSTED)
625 			*regs |= MI_LRI_FORCE_POSTED;
626 		if (INTEL_GEN(engine->i915) >= 11)
627 			*regs |= MI_LRI_LRM_CS_MMIO;
628 		regs++;
629 
630 		GEM_BUG_ON(!count);
631 		do {
632 			u32 offset = 0;
633 			u8 v;
634 
635 			do {
636 				v = *data++;
637 				offset <<= 7;
638 				offset |= v & ~BIT(7);
639 			} while (v & BIT(7));
640 
641 			regs[0] = base + (offset << 2);
642 			if (clear)
643 				regs[1] = 0;
644 			regs += 2;
645 		} while (--count);
646 	}
647 
648 	if (clear) {
649 		u8 count = *++data;
650 
651 		/* Clear past the tail for HW access */
652 		GEM_BUG_ON(dword_in_page(regs) > count);
653 		memset32(regs, MI_NOOP, count - dword_in_page(regs));
654 
655 		/* Close the batch; used mainly by live_lrc_layout() */
656 		*regs = MI_BATCH_BUFFER_END;
657 		if (INTEL_GEN(engine->i915) >= 10)
658 			*regs |= BIT(0);
659 	}
660 }
661 
662 static const u8 gen8_xcs_offsets[] = {
663 	NOP(1),
664 	LRI(11, 0),
665 	REG16(0x244),
666 	REG(0x034),
667 	REG(0x030),
668 	REG(0x038),
669 	REG(0x03c),
670 	REG(0x168),
671 	REG(0x140),
672 	REG(0x110),
673 	REG(0x11c),
674 	REG(0x114),
675 	REG(0x118),
676 
677 	NOP(9),
678 	LRI(9, 0),
679 	REG16(0x3a8),
680 	REG16(0x28c),
681 	REG16(0x288),
682 	REG16(0x284),
683 	REG16(0x280),
684 	REG16(0x27c),
685 	REG16(0x278),
686 	REG16(0x274),
687 	REG16(0x270),
688 
689 	NOP(13),
690 	LRI(2, 0),
691 	REG16(0x200),
692 	REG(0x028),
693 
694 	END(80)
695 };
696 
697 static const u8 gen9_xcs_offsets[] = {
698 	NOP(1),
699 	LRI(14, POSTED),
700 	REG16(0x244),
701 	REG(0x034),
702 	REG(0x030),
703 	REG(0x038),
704 	REG(0x03c),
705 	REG(0x168),
706 	REG(0x140),
707 	REG(0x110),
708 	REG(0x11c),
709 	REG(0x114),
710 	REG(0x118),
711 	REG(0x1c0),
712 	REG(0x1c4),
713 	REG(0x1c8),
714 
715 	NOP(3),
716 	LRI(9, POSTED),
717 	REG16(0x3a8),
718 	REG16(0x28c),
719 	REG16(0x288),
720 	REG16(0x284),
721 	REG16(0x280),
722 	REG16(0x27c),
723 	REG16(0x278),
724 	REG16(0x274),
725 	REG16(0x270),
726 
727 	NOP(13),
728 	LRI(1, POSTED),
729 	REG16(0x200),
730 
731 	NOP(13),
732 	LRI(44, POSTED),
733 	REG(0x028),
734 	REG(0x09c),
735 	REG(0x0c0),
736 	REG(0x178),
737 	REG(0x17c),
738 	REG16(0x358),
739 	REG(0x170),
740 	REG(0x150),
741 	REG(0x154),
742 	REG(0x158),
743 	REG16(0x41c),
744 	REG16(0x600),
745 	REG16(0x604),
746 	REG16(0x608),
747 	REG16(0x60c),
748 	REG16(0x610),
749 	REG16(0x614),
750 	REG16(0x618),
751 	REG16(0x61c),
752 	REG16(0x620),
753 	REG16(0x624),
754 	REG16(0x628),
755 	REG16(0x62c),
756 	REG16(0x630),
757 	REG16(0x634),
758 	REG16(0x638),
759 	REG16(0x63c),
760 	REG16(0x640),
761 	REG16(0x644),
762 	REG16(0x648),
763 	REG16(0x64c),
764 	REG16(0x650),
765 	REG16(0x654),
766 	REG16(0x658),
767 	REG16(0x65c),
768 	REG16(0x660),
769 	REG16(0x664),
770 	REG16(0x668),
771 	REG16(0x66c),
772 	REG16(0x670),
773 	REG16(0x674),
774 	REG16(0x678),
775 	REG16(0x67c),
776 	REG(0x068),
777 
778 	END(176)
779 };
780 
781 static const u8 gen12_xcs_offsets[] = {
782 	NOP(1),
783 	LRI(13, POSTED),
784 	REG16(0x244),
785 	REG(0x034),
786 	REG(0x030),
787 	REG(0x038),
788 	REG(0x03c),
789 	REG(0x168),
790 	REG(0x140),
791 	REG(0x110),
792 	REG(0x1c0),
793 	REG(0x1c4),
794 	REG(0x1c8),
795 	REG(0x180),
796 	REG16(0x2b4),
797 
798 	NOP(5),
799 	LRI(9, POSTED),
800 	REG16(0x3a8),
801 	REG16(0x28c),
802 	REG16(0x288),
803 	REG16(0x284),
804 	REG16(0x280),
805 	REG16(0x27c),
806 	REG16(0x278),
807 	REG16(0x274),
808 	REG16(0x270),
809 
810 	END(80)
811 };
812 
813 static const u8 gen8_rcs_offsets[] = {
814 	NOP(1),
815 	LRI(14, POSTED),
816 	REG16(0x244),
817 	REG(0x034),
818 	REG(0x030),
819 	REG(0x038),
820 	REG(0x03c),
821 	REG(0x168),
822 	REG(0x140),
823 	REG(0x110),
824 	REG(0x11c),
825 	REG(0x114),
826 	REG(0x118),
827 	REG(0x1c0),
828 	REG(0x1c4),
829 	REG(0x1c8),
830 
831 	NOP(3),
832 	LRI(9, POSTED),
833 	REG16(0x3a8),
834 	REG16(0x28c),
835 	REG16(0x288),
836 	REG16(0x284),
837 	REG16(0x280),
838 	REG16(0x27c),
839 	REG16(0x278),
840 	REG16(0x274),
841 	REG16(0x270),
842 
843 	NOP(13),
844 	LRI(1, 0),
845 	REG(0x0c8),
846 
847 	END(80)
848 };
849 
850 static const u8 gen9_rcs_offsets[] = {
851 	NOP(1),
852 	LRI(14, POSTED),
853 	REG16(0x244),
854 	REG(0x34),
855 	REG(0x30),
856 	REG(0x38),
857 	REG(0x3c),
858 	REG(0x168),
859 	REG(0x140),
860 	REG(0x110),
861 	REG(0x11c),
862 	REG(0x114),
863 	REG(0x118),
864 	REG(0x1c0),
865 	REG(0x1c4),
866 	REG(0x1c8),
867 
868 	NOP(3),
869 	LRI(9, POSTED),
870 	REG16(0x3a8),
871 	REG16(0x28c),
872 	REG16(0x288),
873 	REG16(0x284),
874 	REG16(0x280),
875 	REG16(0x27c),
876 	REG16(0x278),
877 	REG16(0x274),
878 	REG16(0x270),
879 
880 	NOP(13),
881 	LRI(1, 0),
882 	REG(0xc8),
883 
884 	NOP(13),
885 	LRI(44, POSTED),
886 	REG(0x28),
887 	REG(0x9c),
888 	REG(0xc0),
889 	REG(0x178),
890 	REG(0x17c),
891 	REG16(0x358),
892 	REG(0x170),
893 	REG(0x150),
894 	REG(0x154),
895 	REG(0x158),
896 	REG16(0x41c),
897 	REG16(0x600),
898 	REG16(0x604),
899 	REG16(0x608),
900 	REG16(0x60c),
901 	REG16(0x610),
902 	REG16(0x614),
903 	REG16(0x618),
904 	REG16(0x61c),
905 	REG16(0x620),
906 	REG16(0x624),
907 	REG16(0x628),
908 	REG16(0x62c),
909 	REG16(0x630),
910 	REG16(0x634),
911 	REG16(0x638),
912 	REG16(0x63c),
913 	REG16(0x640),
914 	REG16(0x644),
915 	REG16(0x648),
916 	REG16(0x64c),
917 	REG16(0x650),
918 	REG16(0x654),
919 	REG16(0x658),
920 	REG16(0x65c),
921 	REG16(0x660),
922 	REG16(0x664),
923 	REG16(0x668),
924 	REG16(0x66c),
925 	REG16(0x670),
926 	REG16(0x674),
927 	REG16(0x678),
928 	REG16(0x67c),
929 	REG(0x68),
930 
931 	END(176)
932 };
933 
934 static const u8 gen11_rcs_offsets[] = {
935 	NOP(1),
936 	LRI(15, POSTED),
937 	REG16(0x244),
938 	REG(0x034),
939 	REG(0x030),
940 	REG(0x038),
941 	REG(0x03c),
942 	REG(0x168),
943 	REG(0x140),
944 	REG(0x110),
945 	REG(0x11c),
946 	REG(0x114),
947 	REG(0x118),
948 	REG(0x1c0),
949 	REG(0x1c4),
950 	REG(0x1c8),
951 	REG(0x180),
952 
953 	NOP(1),
954 	LRI(9, POSTED),
955 	REG16(0x3a8),
956 	REG16(0x28c),
957 	REG16(0x288),
958 	REG16(0x284),
959 	REG16(0x280),
960 	REG16(0x27c),
961 	REG16(0x278),
962 	REG16(0x274),
963 	REG16(0x270),
964 
965 	LRI(1, POSTED),
966 	REG(0x1b0),
967 
968 	NOP(10),
969 	LRI(1, 0),
970 	REG(0x0c8),
971 
972 	END(80)
973 };
974 
975 static const u8 gen12_rcs_offsets[] = {
976 	NOP(1),
977 	LRI(13, POSTED),
978 	REG16(0x244),
979 	REG(0x034),
980 	REG(0x030),
981 	REG(0x038),
982 	REG(0x03c),
983 	REG(0x168),
984 	REG(0x140),
985 	REG(0x110),
986 	REG(0x1c0),
987 	REG(0x1c4),
988 	REG(0x1c8),
989 	REG(0x180),
990 	REG16(0x2b4),
991 
992 	NOP(5),
993 	LRI(9, POSTED),
994 	REG16(0x3a8),
995 	REG16(0x28c),
996 	REG16(0x288),
997 	REG16(0x284),
998 	REG16(0x280),
999 	REG16(0x27c),
1000 	REG16(0x278),
1001 	REG16(0x274),
1002 	REG16(0x270),
1003 
1004 	LRI(3, POSTED),
1005 	REG(0x1b0),
1006 	REG16(0x5a8),
1007 	REG16(0x5ac),
1008 
1009 	NOP(6),
1010 	LRI(1, 0),
1011 	REG(0x0c8),
1012 	NOP(3 + 9 + 1),
1013 
1014 	LRI(51, POSTED),
1015 	REG16(0x588),
1016 	REG16(0x588),
1017 	REG16(0x588),
1018 	REG16(0x588),
1019 	REG16(0x588),
1020 	REG16(0x588),
1021 	REG(0x028),
1022 	REG(0x09c),
1023 	REG(0x0c0),
1024 	REG(0x178),
1025 	REG(0x17c),
1026 	REG16(0x358),
1027 	REG(0x170),
1028 	REG(0x150),
1029 	REG(0x154),
1030 	REG(0x158),
1031 	REG16(0x41c),
1032 	REG16(0x600),
1033 	REG16(0x604),
1034 	REG16(0x608),
1035 	REG16(0x60c),
1036 	REG16(0x610),
1037 	REG16(0x614),
1038 	REG16(0x618),
1039 	REG16(0x61c),
1040 	REG16(0x620),
1041 	REG16(0x624),
1042 	REG16(0x628),
1043 	REG16(0x62c),
1044 	REG16(0x630),
1045 	REG16(0x634),
1046 	REG16(0x638),
1047 	REG16(0x63c),
1048 	REG16(0x640),
1049 	REG16(0x644),
1050 	REG16(0x648),
1051 	REG16(0x64c),
1052 	REG16(0x650),
1053 	REG16(0x654),
1054 	REG16(0x658),
1055 	REG16(0x65c),
1056 	REG16(0x660),
1057 	REG16(0x664),
1058 	REG16(0x668),
1059 	REG16(0x66c),
1060 	REG16(0x670),
1061 	REG16(0x674),
1062 	REG16(0x678),
1063 	REG16(0x67c),
1064 	REG(0x068),
1065 	REG(0x084),
1066 	NOP(1),
1067 
1068 	END(192)
1069 };
1070 
1071 #undef END
1072 #undef REG16
1073 #undef REG
1074 #undef LRI
1075 #undef NOP
1076 
1077 static const u8 *reg_offsets(const struct intel_engine_cs *engine)
1078 {
1079 	/*
1080 	 * The gen12+ lists only have the registers we program in the basic
1081 	 * default state. We rely on the context image using relative
1082 	 * addressing to automatic fixup the register state between the
1083 	 * physical engines for virtual engine.
1084 	 */
1085 	GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 &&
1086 		   !intel_engine_has_relative_mmio(engine));
1087 
1088 	if (engine->class == RENDER_CLASS) {
1089 		if (INTEL_GEN(engine->i915) >= 12)
1090 			return gen12_rcs_offsets;
1091 		else if (INTEL_GEN(engine->i915) >= 11)
1092 			return gen11_rcs_offsets;
1093 		else if (INTEL_GEN(engine->i915) >= 9)
1094 			return gen9_rcs_offsets;
1095 		else
1096 			return gen8_rcs_offsets;
1097 	} else {
1098 		if (INTEL_GEN(engine->i915) >= 12)
1099 			return gen12_xcs_offsets;
1100 		else if (INTEL_GEN(engine->i915) >= 9)
1101 			return gen9_xcs_offsets;
1102 		else
1103 			return gen8_xcs_offsets;
1104 	}
1105 }
1106 
1107 static struct i915_request *
1108 __unwind_incomplete_requests(struct intel_engine_cs *engine)
1109 {
1110 	struct i915_request *rq, *rn, *active = NULL;
1111 	struct list_head *pl;
1112 	int prio = I915_PRIORITY_INVALID;
1113 
1114 	lockdep_assert_held(&engine->active.lock);
1115 
1116 	list_for_each_entry_safe_reverse(rq, rn,
1117 					 &engine->active.requests,
1118 					 sched.link) {
1119 		if (i915_request_completed(rq))
1120 			continue; /* XXX */
1121 
1122 		__i915_request_unsubmit(rq);
1123 
1124 		/*
1125 		 * Push the request back into the queue for later resubmission.
1126 		 * If this request is not native to this physical engine (i.e.
1127 		 * it came from a virtual source), push it back onto the virtual
1128 		 * engine so that it can be moved across onto another physical
1129 		 * engine as load dictates.
1130 		 */
1131 		if (likely(rq->execution_mask == engine->mask)) {
1132 			GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
1133 			if (rq_prio(rq) != prio) {
1134 				prio = rq_prio(rq);
1135 				pl = i915_sched_lookup_priolist(engine, prio);
1136 			}
1137 			GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
1138 
1139 			list_move(&rq->sched.link, pl);
1140 			set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
1141 
1142 			/* Check in case we rollback so far we wrap [size/2] */
1143 			if (intel_ring_direction(rq->ring,
1144 						 rq->tail,
1145 						 rq->ring->tail + 8) > 0)
1146 				rq->context->lrc.desc |= CTX_DESC_FORCE_RESTORE;
1147 
1148 			active = rq;
1149 		} else {
1150 			struct intel_engine_cs *owner = rq->context->engine;
1151 
1152 			WRITE_ONCE(rq->engine, owner);
1153 			owner->submit_request(rq);
1154 			active = NULL;
1155 		}
1156 	}
1157 
1158 	return active;
1159 }
1160 
1161 struct i915_request *
1162 execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
1163 {
1164 	struct intel_engine_cs *engine =
1165 		container_of(execlists, typeof(*engine), execlists);
1166 
1167 	return __unwind_incomplete_requests(engine);
1168 }
1169 
1170 static inline void
1171 execlists_context_status_change(struct i915_request *rq, unsigned long status)
1172 {
1173 	/*
1174 	 * Only used when GVT-g is enabled now. When GVT-g is disabled,
1175 	 * The compiler should eliminate this function as dead-code.
1176 	 */
1177 	if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
1178 		return;
1179 
1180 	atomic_notifier_call_chain(&rq->engine->context_status_notifier,
1181 				   status, rq);
1182 }
1183 
1184 static void intel_engine_context_in(struct intel_engine_cs *engine)
1185 {
1186 	unsigned long flags;
1187 
1188 	if (atomic_add_unless(&engine->stats.active, 1, 0))
1189 		return;
1190 
1191 	write_seqlock_irqsave(&engine->stats.lock, flags);
1192 	if (!atomic_add_unless(&engine->stats.active, 1, 0)) {
1193 		engine->stats.start = ktime_get();
1194 		atomic_inc(&engine->stats.active);
1195 	}
1196 	write_sequnlock_irqrestore(&engine->stats.lock, flags);
1197 }
1198 
1199 static void intel_engine_context_out(struct intel_engine_cs *engine)
1200 {
1201 	unsigned long flags;
1202 
1203 	GEM_BUG_ON(!atomic_read(&engine->stats.active));
1204 
1205 	if (atomic_add_unless(&engine->stats.active, -1, 1))
1206 		return;
1207 
1208 	write_seqlock_irqsave(&engine->stats.lock, flags);
1209 	if (atomic_dec_and_test(&engine->stats.active)) {
1210 		engine->stats.total =
1211 			ktime_add(engine->stats.total,
1212 				  ktime_sub(ktime_get(), engine->stats.start));
1213 	}
1214 	write_sequnlock_irqrestore(&engine->stats.lock, flags);
1215 }
1216 
1217 static void
1218 execlists_check_context(const struct intel_context *ce,
1219 			const struct intel_engine_cs *engine,
1220 			const char *when)
1221 {
1222 	const struct intel_ring *ring = ce->ring;
1223 	u32 *regs = ce->lrc_reg_state;
1224 	bool valid = true;
1225 	int x;
1226 
1227 	if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) {
1228 		pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
1229 		       engine->name,
1230 		       regs[CTX_RING_START],
1231 		       i915_ggtt_offset(ring->vma));
1232 		regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
1233 		valid = false;
1234 	}
1235 
1236 	if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) !=
1237 	    (RING_CTL_SIZE(ring->size) | RING_VALID)) {
1238 		pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
1239 		       engine->name,
1240 		       regs[CTX_RING_CTL],
1241 		       (u32)(RING_CTL_SIZE(ring->size) | RING_VALID));
1242 		regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
1243 		valid = false;
1244 	}
1245 
1246 	x = lrc_ring_mi_mode(engine);
1247 	if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) {
1248 		pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
1249 		       engine->name, regs[x + 1]);
1250 		regs[x + 1] &= ~STOP_RING;
1251 		regs[x + 1] |= STOP_RING << 16;
1252 		valid = false;
1253 	}
1254 
1255 	WARN_ONCE(!valid, "Invalid lrc state found %s submission\n", when);
1256 }
1257 
1258 static void restore_default_state(struct intel_context *ce,
1259 				  struct intel_engine_cs *engine)
1260 {
1261 	u32 *regs;
1262 
1263 	regs = memset(ce->lrc_reg_state, 0, engine->context_size - PAGE_SIZE);
1264 	execlists_init_reg_state(regs, ce, engine, ce->ring, true);
1265 
1266 	ce->runtime.last = intel_context_get_runtime(ce);
1267 }
1268 
1269 static void reset_active(struct i915_request *rq,
1270 			 struct intel_engine_cs *engine)
1271 {
1272 	struct intel_context * const ce = rq->context;
1273 	u32 head;
1274 
1275 	/*
1276 	 * The executing context has been cancelled. We want to prevent
1277 	 * further execution along this context and propagate the error on
1278 	 * to anything depending on its results.
1279 	 *
1280 	 * In __i915_request_submit(), we apply the -EIO and remove the
1281 	 * requests' payloads for any banned requests. But first, we must
1282 	 * rewind the context back to the start of the incomplete request so
1283 	 * that we do not jump back into the middle of the batch.
1284 	 *
1285 	 * We preserve the breadcrumbs and semaphores of the incomplete
1286 	 * requests so that inter-timeline dependencies (i.e other timelines)
1287 	 * remain correctly ordered. And we defer to __i915_request_submit()
1288 	 * so that all asynchronous waits are correctly handled.
1289 	 */
1290 	ENGINE_TRACE(engine, "{ rq=%llx:%lld }\n",
1291 		     rq->fence.context, rq->fence.seqno);
1292 
1293 	/* On resubmission of the active request, payload will be scrubbed */
1294 	if (i915_request_completed(rq))
1295 		head = rq->tail;
1296 	else
1297 		head = active_request(ce->timeline, rq)->head;
1298 	head = intel_ring_wrap(ce->ring, head);
1299 
1300 	/* Scrub the context image to prevent replaying the previous batch */
1301 	restore_default_state(ce, engine);
1302 	__execlists_update_reg_state(ce, engine, head);
1303 
1304 	/* We've switched away, so this should be a no-op, but intent matters */
1305 	ce->lrc.desc |= CTX_DESC_FORCE_RESTORE;
1306 }
1307 
1308 static void st_update_runtime_underflow(struct intel_context *ce, s32 dt)
1309 {
1310 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1311 	ce->runtime.num_underflow += dt < 0;
1312 	ce->runtime.max_underflow = max_t(u32, ce->runtime.max_underflow, -dt);
1313 #endif
1314 }
1315 
1316 static void intel_context_update_runtime(struct intel_context *ce)
1317 {
1318 	u32 old;
1319 	s32 dt;
1320 
1321 	if (intel_context_is_barrier(ce))
1322 		return;
1323 
1324 	old = ce->runtime.last;
1325 	ce->runtime.last = intel_context_get_runtime(ce);
1326 	dt = ce->runtime.last - old;
1327 
1328 	if (unlikely(dt <= 0)) {
1329 		CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n",
1330 			 old, ce->runtime.last, dt);
1331 		st_update_runtime_underflow(ce, dt);
1332 		return;
1333 	}
1334 
1335 	ewma_runtime_add(&ce->runtime.avg, dt);
1336 	ce->runtime.total += dt;
1337 }
1338 
1339 static inline struct intel_engine_cs *
1340 __execlists_schedule_in(struct i915_request *rq)
1341 {
1342 	struct intel_engine_cs * const engine = rq->engine;
1343 	struct intel_context * const ce = rq->context;
1344 
1345 	intel_context_get(ce);
1346 
1347 	if (unlikely(intel_context_is_banned(ce)))
1348 		reset_active(rq, engine);
1349 
1350 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
1351 		execlists_check_context(ce, engine, "before");
1352 
1353 	if (ce->tag) {
1354 		/* Use a fixed tag for OA and friends */
1355 		GEM_BUG_ON(ce->tag <= BITS_PER_LONG);
1356 		ce->lrc.ccid = ce->tag;
1357 	} else {
1358 		/* We don't need a strict matching tag, just different values */
1359 		unsigned int tag = ffs(READ_ONCE(engine->context_tag));
1360 
1361 		GEM_BUG_ON(tag == 0 || tag >= BITS_PER_LONG);
1362 		clear_bit(tag - 1, &engine->context_tag);
1363 		ce->lrc.ccid = tag << (GEN11_SW_CTX_ID_SHIFT - 32);
1364 
1365 		BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID);
1366 	}
1367 
1368 	ce->lrc.ccid |= engine->execlists.ccid;
1369 
1370 	__intel_gt_pm_get(engine->gt);
1371 	if (engine->fw_domain && !atomic_fetch_inc(&engine->fw_active))
1372 		intel_uncore_forcewake_get(engine->uncore, engine->fw_domain);
1373 	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
1374 	intel_engine_context_in(engine);
1375 
1376 	return engine;
1377 }
1378 
1379 static inline struct i915_request *
1380 execlists_schedule_in(struct i915_request *rq, int idx)
1381 {
1382 	struct intel_context * const ce = rq->context;
1383 	struct intel_engine_cs *old;
1384 
1385 	GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
1386 	trace_i915_request_in(rq, idx);
1387 
1388 	old = READ_ONCE(ce->inflight);
1389 	do {
1390 		if (!old) {
1391 			WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq));
1392 			break;
1393 		}
1394 	} while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old)));
1395 
1396 	GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
1397 	return i915_request_get(rq);
1398 }
1399 
1400 static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
1401 {
1402 	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
1403 	struct i915_request *next = READ_ONCE(ve->request);
1404 
1405 	if (next == rq || (next && next->execution_mask & ~rq->execution_mask))
1406 		tasklet_hi_schedule(&ve->base.execlists.tasklet);
1407 }
1408 
1409 static inline void
1410 __execlists_schedule_out(struct i915_request *rq,
1411 			 struct intel_engine_cs * const engine,
1412 			 unsigned int ccid)
1413 {
1414 	struct intel_context * const ce = rq->context;
1415 
1416 	/*
1417 	 * NB process_csb() is not under the engine->active.lock and hence
1418 	 * schedule_out can race with schedule_in meaning that we should
1419 	 * refrain from doing non-trivial work here.
1420 	 */
1421 
1422 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
1423 		execlists_check_context(ce, engine, "after");
1424 
1425 	/*
1426 	 * If we have just completed this context, the engine may now be
1427 	 * idle and we want to re-enter powersaving.
1428 	 */
1429 	if (list_is_last_rcu(&rq->link, &ce->timeline->requests) &&
1430 	    i915_request_completed(rq))
1431 		intel_engine_add_retire(engine, ce->timeline);
1432 
1433 	ccid >>= GEN11_SW_CTX_ID_SHIFT - 32;
1434 	ccid &= GEN12_MAX_CONTEXT_HW_ID;
1435 	if (ccid < BITS_PER_LONG) {
1436 		GEM_BUG_ON(ccid == 0);
1437 		GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag));
1438 		set_bit(ccid - 1, &engine->context_tag);
1439 	}
1440 
1441 	intel_context_update_runtime(ce);
1442 	intel_engine_context_out(engine);
1443 	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
1444 	if (engine->fw_domain && !atomic_dec_return(&engine->fw_active))
1445 		intel_uncore_forcewake_put(engine->uncore, engine->fw_domain);
1446 	intel_gt_pm_put_async(engine->gt);
1447 
1448 	/*
1449 	 * If this is part of a virtual engine, its next request may
1450 	 * have been blocked waiting for access to the active context.
1451 	 * We have to kick all the siblings again in case we need to
1452 	 * switch (e.g. the next request is not runnable on this
1453 	 * engine). Hopefully, we will already have submitted the next
1454 	 * request before the tasklet runs and do not need to rebuild
1455 	 * each virtual tree and kick everyone again.
1456 	 */
1457 	if (ce->engine != engine)
1458 		kick_siblings(rq, ce);
1459 
1460 	intel_context_put(ce);
1461 }
1462 
1463 static inline void
1464 execlists_schedule_out(struct i915_request *rq)
1465 {
1466 	struct intel_context * const ce = rq->context;
1467 	struct intel_engine_cs *cur, *old;
1468 	u32 ccid;
1469 
1470 	trace_i915_request_out(rq);
1471 
1472 	ccid = rq->context->lrc.ccid;
1473 	old = READ_ONCE(ce->inflight);
1474 	do
1475 		cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL;
1476 	while (!try_cmpxchg(&ce->inflight, &old, cur));
1477 	if (!cur)
1478 		__execlists_schedule_out(rq, old, ccid);
1479 
1480 	i915_request_put(rq);
1481 }
1482 
1483 static u64 execlists_update_context(struct i915_request *rq)
1484 {
1485 	struct intel_context *ce = rq->context;
1486 	u64 desc = ce->lrc.desc;
1487 	u32 tail, prev;
1488 
1489 	/*
1490 	 * WaIdleLiteRestore:bdw,skl
1491 	 *
1492 	 * We should never submit the context with the same RING_TAIL twice
1493 	 * just in case we submit an empty ring, which confuses the HW.
1494 	 *
1495 	 * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of
1496 	 * the normal request to be able to always advance the RING_TAIL on
1497 	 * subsequent resubmissions (for lite restore). Should that fail us,
1498 	 * and we try and submit the same tail again, force the context
1499 	 * reload.
1500 	 *
1501 	 * If we need to return to a preempted context, we need to skip the
1502 	 * lite-restore and force it to reload the RING_TAIL. Otherwise, the
1503 	 * HW has a tendency to ignore us rewinding the TAIL to the end of
1504 	 * an earlier request.
1505 	 */
1506 	GEM_BUG_ON(ce->lrc_reg_state[CTX_RING_TAIL] != rq->ring->tail);
1507 	prev = rq->ring->tail;
1508 	tail = intel_ring_set_tail(rq->ring, rq->tail);
1509 	if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0))
1510 		desc |= CTX_DESC_FORCE_RESTORE;
1511 	ce->lrc_reg_state[CTX_RING_TAIL] = tail;
1512 	rq->tail = rq->wa_tail;
1513 
1514 	/*
1515 	 * Make sure the context image is complete before we submit it to HW.
1516 	 *
1517 	 * Ostensibly, writes (including the WCB) should be flushed prior to
1518 	 * an uncached write such as our mmio register access, the empirical
1519 	 * evidence (esp. on Braswell) suggests that the WC write into memory
1520 	 * may not be visible to the HW prior to the completion of the UC
1521 	 * register write and that we may begin execution from the context
1522 	 * before its image is complete leading to invalid PD chasing.
1523 	 */
1524 	wmb();
1525 
1526 	ce->lrc.desc &= ~CTX_DESC_FORCE_RESTORE;
1527 	return desc;
1528 }
1529 
1530 static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port)
1531 {
1532 	if (execlists->ctrl_reg) {
1533 		writel(lower_32_bits(desc), execlists->submit_reg + port * 2);
1534 		writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1);
1535 	} else {
1536 		writel(upper_32_bits(desc), execlists->submit_reg);
1537 		writel(lower_32_bits(desc), execlists->submit_reg);
1538 	}
1539 }
1540 
1541 static __maybe_unused char *
1542 dump_port(char *buf, int buflen, const char *prefix, struct i915_request *rq)
1543 {
1544 	if (!rq)
1545 		return "";
1546 
1547 	snprintf(buf, buflen, "%sccid:%x %llx:%lld%s prio %d",
1548 		 prefix,
1549 		 rq->context->lrc.ccid,
1550 		 rq->fence.context, rq->fence.seqno,
1551 		 i915_request_completed(rq) ? "!" :
1552 		 i915_request_started(rq) ? "*" :
1553 		 "",
1554 		 rq_prio(rq));
1555 
1556 	return buf;
1557 }
1558 
1559 static __maybe_unused void
1560 trace_ports(const struct intel_engine_execlists *execlists,
1561 	    const char *msg,
1562 	    struct i915_request * const *ports)
1563 {
1564 	const struct intel_engine_cs *engine =
1565 		container_of(execlists, typeof(*engine), execlists);
1566 	char __maybe_unused p0[40], p1[40];
1567 
1568 	if (!ports[0])
1569 		return;
1570 
1571 	ENGINE_TRACE(engine, "%s { %s%s }\n", msg,
1572 		     dump_port(p0, sizeof(p0), "", ports[0]),
1573 		     dump_port(p1, sizeof(p1), ", ", ports[1]));
1574 }
1575 
1576 static inline bool
1577 reset_in_progress(const struct intel_engine_execlists *execlists)
1578 {
1579 	return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
1580 }
1581 
1582 static __maybe_unused bool
1583 assert_pending_valid(const struct intel_engine_execlists *execlists,
1584 		     const char *msg)
1585 {
1586 	struct intel_engine_cs *engine =
1587 		container_of(execlists, typeof(*engine), execlists);
1588 	struct i915_request * const *port, *rq;
1589 	struct intel_context *ce = NULL;
1590 	bool sentinel = false;
1591 	u32 ccid = -1;
1592 
1593 	trace_ports(execlists, msg, execlists->pending);
1594 
1595 	/* We may be messing around with the lists during reset, lalala */
1596 	if (reset_in_progress(execlists))
1597 		return true;
1598 
1599 	if (!execlists->pending[0]) {
1600 		GEM_TRACE_ERR("%s: Nothing pending for promotion!\n",
1601 			      engine->name);
1602 		return false;
1603 	}
1604 
1605 	if (execlists->pending[execlists_num_ports(execlists)]) {
1606 		GEM_TRACE_ERR("%s: Excess pending[%d] for promotion!\n",
1607 			      engine->name, execlists_num_ports(execlists));
1608 		return false;
1609 	}
1610 
1611 	for (port = execlists->pending; (rq = *port); port++) {
1612 		unsigned long flags;
1613 		bool ok = true;
1614 
1615 		GEM_BUG_ON(!kref_read(&rq->fence.refcount));
1616 		GEM_BUG_ON(!i915_request_is_active(rq));
1617 
1618 		if (ce == rq->context) {
1619 			GEM_TRACE_ERR("%s: Dup context:%llx in pending[%zd]\n",
1620 				      engine->name,
1621 				      ce->timeline->fence_context,
1622 				      port - execlists->pending);
1623 			return false;
1624 		}
1625 		ce = rq->context;
1626 
1627 		if (ccid == ce->lrc.ccid) {
1628 			GEM_TRACE_ERR("%s: Dup ccid:%x context:%llx in pending[%zd]\n",
1629 				      engine->name,
1630 				      ccid, ce->timeline->fence_context,
1631 				      port - execlists->pending);
1632 			return false;
1633 		}
1634 		ccid = ce->lrc.ccid;
1635 
1636 		/*
1637 		 * Sentinels are supposed to be the last request so they flush
1638 		 * the current execution off the HW. Check that they are the only
1639 		 * request in the pending submission.
1640 		 */
1641 		if (sentinel) {
1642 			GEM_TRACE_ERR("%s: context:%llx after sentinel in pending[%zd]\n",
1643 				      engine->name,
1644 				      ce->timeline->fence_context,
1645 				      port - execlists->pending);
1646 			return false;
1647 		}
1648 		sentinel = i915_request_has_sentinel(rq);
1649 
1650 		/* Hold tightly onto the lock to prevent concurrent retires! */
1651 		if (!spin_trylock_irqsave(&rq->lock, flags))
1652 			continue;
1653 
1654 		if (i915_request_completed(rq))
1655 			goto unlock;
1656 
1657 		if (i915_active_is_idle(&ce->active) &&
1658 		    !intel_context_is_barrier(ce)) {
1659 			GEM_TRACE_ERR("%s: Inactive context:%llx in pending[%zd]\n",
1660 				      engine->name,
1661 				      ce->timeline->fence_context,
1662 				      port - execlists->pending);
1663 			ok = false;
1664 			goto unlock;
1665 		}
1666 
1667 		if (!i915_vma_is_pinned(ce->state)) {
1668 			GEM_TRACE_ERR("%s: Unpinned context:%llx in pending[%zd]\n",
1669 				      engine->name,
1670 				      ce->timeline->fence_context,
1671 				      port - execlists->pending);
1672 			ok = false;
1673 			goto unlock;
1674 		}
1675 
1676 		if (!i915_vma_is_pinned(ce->ring->vma)) {
1677 			GEM_TRACE_ERR("%s: Unpinned ring:%llx in pending[%zd]\n",
1678 				      engine->name,
1679 				      ce->timeline->fence_context,
1680 				      port - execlists->pending);
1681 			ok = false;
1682 			goto unlock;
1683 		}
1684 
1685 unlock:
1686 		spin_unlock_irqrestore(&rq->lock, flags);
1687 		if (!ok)
1688 			return false;
1689 	}
1690 
1691 	return ce;
1692 }
1693 
1694 static void execlists_submit_ports(struct intel_engine_cs *engine)
1695 {
1696 	struct intel_engine_execlists *execlists = &engine->execlists;
1697 	unsigned int n;
1698 
1699 	GEM_BUG_ON(!assert_pending_valid(execlists, "submit"));
1700 
1701 	/*
1702 	 * We can skip acquiring intel_runtime_pm_get() here as it was taken
1703 	 * on our behalf by the request (see i915_gem_mark_busy()) and it will
1704 	 * not be relinquished until the device is idle (see
1705 	 * i915_gem_idle_work_handler()). As a precaution, we make sure
1706 	 * that all ELSP are drained i.e. we have processed the CSB,
1707 	 * before allowing ourselves to idle and calling intel_runtime_pm_put().
1708 	 */
1709 	GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
1710 
1711 	/*
1712 	 * ELSQ note: the submit queue is not cleared after being submitted
1713 	 * to the HW so we need to make sure we always clean it up. This is
1714 	 * currently ensured by the fact that we always write the same number
1715 	 * of elsq entries, keep this in mind before changing the loop below.
1716 	 */
1717 	for (n = execlists_num_ports(execlists); n--; ) {
1718 		struct i915_request *rq = execlists->pending[n];
1719 
1720 		write_desc(execlists,
1721 			   rq ? execlists_update_context(rq) : 0,
1722 			   n);
1723 	}
1724 
1725 	/* we need to manually load the submit queue */
1726 	if (execlists->ctrl_reg)
1727 		writel(EL_CTRL_LOAD, execlists->ctrl_reg);
1728 }
1729 
1730 static bool ctx_single_port_submission(const struct intel_context *ce)
1731 {
1732 	return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
1733 		intel_context_force_single_submission(ce));
1734 }
1735 
1736 static bool can_merge_ctx(const struct intel_context *prev,
1737 			  const struct intel_context *next)
1738 {
1739 	if (prev != next)
1740 		return false;
1741 
1742 	if (ctx_single_port_submission(prev))
1743 		return false;
1744 
1745 	return true;
1746 }
1747 
1748 static unsigned long i915_request_flags(const struct i915_request *rq)
1749 {
1750 	return READ_ONCE(rq->fence.flags);
1751 }
1752 
1753 static bool can_merge_rq(const struct i915_request *prev,
1754 			 const struct i915_request *next)
1755 {
1756 	GEM_BUG_ON(prev == next);
1757 	GEM_BUG_ON(!assert_priority_queue(prev, next));
1758 
1759 	/*
1760 	 * We do not submit known completed requests. Therefore if the next
1761 	 * request is already completed, we can pretend to merge it in
1762 	 * with the previous context (and we will skip updating the ELSP
1763 	 * and tracking). Thus hopefully keeping the ELSP full with active
1764 	 * contexts, despite the best efforts of preempt-to-busy to confuse
1765 	 * us.
1766 	 */
1767 	if (i915_request_completed(next))
1768 		return true;
1769 
1770 	if (unlikely((i915_request_flags(prev) ^ i915_request_flags(next)) &
1771 		     (BIT(I915_FENCE_FLAG_NOPREEMPT) |
1772 		      BIT(I915_FENCE_FLAG_SENTINEL))))
1773 		return false;
1774 
1775 	if (!can_merge_ctx(prev->context, next->context))
1776 		return false;
1777 
1778 	GEM_BUG_ON(i915_seqno_passed(prev->fence.seqno, next->fence.seqno));
1779 	return true;
1780 }
1781 
1782 static void virtual_update_register_offsets(u32 *regs,
1783 					    struct intel_engine_cs *engine)
1784 {
1785 	set_offsets(regs, reg_offsets(engine), engine, false);
1786 }
1787 
1788 static bool virtual_matches(const struct virtual_engine *ve,
1789 			    const struct i915_request *rq,
1790 			    const struct intel_engine_cs *engine)
1791 {
1792 	const struct intel_engine_cs *inflight;
1793 
1794 	if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */
1795 		return false;
1796 
1797 	/*
1798 	 * We track when the HW has completed saving the context image
1799 	 * (i.e. when we have seen the final CS event switching out of
1800 	 * the context) and must not overwrite the context image before
1801 	 * then. This restricts us to only using the active engine
1802 	 * while the previous virtualized request is inflight (so
1803 	 * we reuse the register offsets). This is a very small
1804 	 * hystersis on the greedy seelction algorithm.
1805 	 */
1806 	inflight = intel_context_inflight(&ve->context);
1807 	if (inflight && inflight != engine)
1808 		return false;
1809 
1810 	return true;
1811 }
1812 
1813 static void virtual_xfer_context(struct virtual_engine *ve,
1814 				 struct intel_engine_cs *engine)
1815 {
1816 	unsigned int n;
1817 
1818 	if (likely(engine == ve->siblings[0]))
1819 		return;
1820 
1821 	GEM_BUG_ON(READ_ONCE(ve->context.inflight));
1822 	if (!intel_engine_has_relative_mmio(engine))
1823 		virtual_update_register_offsets(ve->context.lrc_reg_state,
1824 						engine);
1825 
1826 	/*
1827 	 * Move the bound engine to the top of the list for
1828 	 * future execution. We then kick this tasklet first
1829 	 * before checking others, so that we preferentially
1830 	 * reuse this set of bound registers.
1831 	 */
1832 	for (n = 1; n < ve->num_siblings; n++) {
1833 		if (ve->siblings[n] == engine) {
1834 			swap(ve->siblings[n], ve->siblings[0]);
1835 			break;
1836 		}
1837 	}
1838 }
1839 
1840 #define for_each_waiter(p__, rq__) \
1841 	list_for_each_entry_lockless(p__, \
1842 				     &(rq__)->sched.waiters_list, \
1843 				     wait_link)
1844 
1845 #define for_each_signaler(p__, rq__) \
1846 	list_for_each_entry_rcu(p__, \
1847 				&(rq__)->sched.signalers_list, \
1848 				signal_link)
1849 
1850 static void defer_request(struct i915_request *rq, struct list_head * const pl)
1851 {
1852 	LIST_HEAD(list);
1853 
1854 	/*
1855 	 * We want to move the interrupted request to the back of
1856 	 * the round-robin list (i.e. its priority level), but
1857 	 * in doing so, we must then move all requests that were in
1858 	 * flight and were waiting for the interrupted request to
1859 	 * be run after it again.
1860 	 */
1861 	do {
1862 		struct i915_dependency *p;
1863 
1864 		GEM_BUG_ON(i915_request_is_active(rq));
1865 		list_move_tail(&rq->sched.link, pl);
1866 
1867 		for_each_waiter(p, rq) {
1868 			struct i915_request *w =
1869 				container_of(p->waiter, typeof(*w), sched);
1870 
1871 			if (p->flags & I915_DEPENDENCY_WEAK)
1872 				continue;
1873 
1874 			/* Leave semaphores spinning on the other engines */
1875 			if (w->engine != rq->engine)
1876 				continue;
1877 
1878 			/* No waiter should start before its signaler */
1879 			GEM_BUG_ON(i915_request_has_initial_breadcrumb(w) &&
1880 				   i915_request_started(w) &&
1881 				   !i915_request_completed(rq));
1882 
1883 			GEM_BUG_ON(i915_request_is_active(w));
1884 			if (!i915_request_is_ready(w))
1885 				continue;
1886 
1887 			if (rq_prio(w) < rq_prio(rq))
1888 				continue;
1889 
1890 			GEM_BUG_ON(rq_prio(w) > rq_prio(rq));
1891 			list_move_tail(&w->sched.link, &list);
1892 		}
1893 
1894 		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
1895 	} while (rq);
1896 }
1897 
1898 static void defer_active(struct intel_engine_cs *engine)
1899 {
1900 	struct i915_request *rq;
1901 
1902 	rq = __unwind_incomplete_requests(engine);
1903 	if (!rq)
1904 		return;
1905 
1906 	defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq)));
1907 }
1908 
1909 static bool
1910 need_timeslice(const struct intel_engine_cs *engine,
1911 	       const struct i915_request *rq,
1912 	       const struct rb_node *rb)
1913 {
1914 	int hint;
1915 
1916 	if (!intel_engine_has_timeslices(engine))
1917 		return false;
1918 
1919 	hint = engine->execlists.queue_priority_hint;
1920 
1921 	if (rb) {
1922 		const struct virtual_engine *ve =
1923 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
1924 		const struct intel_engine_cs *inflight =
1925 			intel_context_inflight(&ve->context);
1926 
1927 		if (!inflight || inflight == engine) {
1928 			struct i915_request *next;
1929 
1930 			rcu_read_lock();
1931 			next = READ_ONCE(ve->request);
1932 			if (next)
1933 				hint = max(hint, rq_prio(next));
1934 			rcu_read_unlock();
1935 		}
1936 	}
1937 
1938 	if (!list_is_last(&rq->sched.link, &engine->active.requests))
1939 		hint = max(hint, rq_prio(list_next_entry(rq, sched.link)));
1940 
1941 	GEM_BUG_ON(hint >= I915_PRIORITY_UNPREEMPTABLE);
1942 	return hint >= effective_prio(rq);
1943 }
1944 
1945 static bool
1946 timeslice_yield(const struct intel_engine_execlists *el,
1947 		const struct i915_request *rq)
1948 {
1949 	/*
1950 	 * Once bitten, forever smitten!
1951 	 *
1952 	 * If the active context ever busy-waited on a semaphore,
1953 	 * it will be treated as a hog until the end of its timeslice (i.e.
1954 	 * until it is scheduled out and replaced by a new submission,
1955 	 * possibly even its own lite-restore). The HW only sends an interrupt
1956 	 * on the first miss, and we do know if that semaphore has been
1957 	 * signaled, or even if it is now stuck on another semaphore. Play
1958 	 * safe, yield if it might be stuck -- it will be given a fresh
1959 	 * timeslice in the near future.
1960 	 */
1961 	return rq->context->lrc.ccid == READ_ONCE(el->yield);
1962 }
1963 
1964 static bool
1965 timeslice_expired(const struct intel_engine_execlists *el,
1966 		  const struct i915_request *rq)
1967 {
1968 	return timer_expired(&el->timer) || timeslice_yield(el, rq);
1969 }
1970 
1971 static int
1972 switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
1973 {
1974 	if (list_is_last(&rq->sched.link, &engine->active.requests))
1975 		return engine->execlists.queue_priority_hint;
1976 
1977 	return rq_prio(list_next_entry(rq, sched.link));
1978 }
1979 
1980 static inline unsigned long
1981 timeslice(const struct intel_engine_cs *engine)
1982 {
1983 	return READ_ONCE(engine->props.timeslice_duration_ms);
1984 }
1985 
1986 static unsigned long active_timeslice(const struct intel_engine_cs *engine)
1987 {
1988 	const struct intel_engine_execlists *execlists = &engine->execlists;
1989 	const struct i915_request *rq = *execlists->active;
1990 
1991 	if (!rq || i915_request_completed(rq))
1992 		return 0;
1993 
1994 	if (READ_ONCE(execlists->switch_priority_hint) < effective_prio(rq))
1995 		return 0;
1996 
1997 	return timeslice(engine);
1998 }
1999 
2000 static void set_timeslice(struct intel_engine_cs *engine)
2001 {
2002 	unsigned long duration;
2003 
2004 	if (!intel_engine_has_timeslices(engine))
2005 		return;
2006 
2007 	duration = active_timeslice(engine);
2008 	ENGINE_TRACE(engine, "bump timeslicing, interval:%lu", duration);
2009 
2010 	set_timer_ms(&engine->execlists.timer, duration);
2011 }
2012 
2013 static void start_timeslice(struct intel_engine_cs *engine, int prio)
2014 {
2015 	struct intel_engine_execlists *execlists = &engine->execlists;
2016 	unsigned long duration;
2017 
2018 	if (!intel_engine_has_timeslices(engine))
2019 		return;
2020 
2021 	WRITE_ONCE(execlists->switch_priority_hint, prio);
2022 	if (prio == INT_MIN)
2023 		return;
2024 
2025 	if (timer_pending(&execlists->timer))
2026 		return;
2027 
2028 	duration = timeslice(engine);
2029 	ENGINE_TRACE(engine,
2030 		     "start timeslicing, prio:%d, interval:%lu",
2031 		     prio, duration);
2032 
2033 	set_timer_ms(&execlists->timer, duration);
2034 }
2035 
2036 static void record_preemption(struct intel_engine_execlists *execlists)
2037 {
2038 	(void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
2039 }
2040 
2041 static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
2042 					    const struct i915_request *rq)
2043 {
2044 	if (!rq)
2045 		return 0;
2046 
2047 	/* Force a fast reset for terminated contexts (ignoring sysfs!) */
2048 	if (unlikely(intel_context_is_banned(rq->context)))
2049 		return 1;
2050 
2051 	return READ_ONCE(engine->props.preempt_timeout_ms);
2052 }
2053 
2054 static void set_preempt_timeout(struct intel_engine_cs *engine,
2055 				const struct i915_request *rq)
2056 {
2057 	if (!intel_engine_has_preempt_reset(engine))
2058 		return;
2059 
2060 	set_timer_ms(&engine->execlists.preempt,
2061 		     active_preempt_timeout(engine, rq));
2062 }
2063 
2064 static inline void clear_ports(struct i915_request **ports, int count)
2065 {
2066 	memset_p((void **)ports, NULL, count);
2067 }
2068 
2069 static inline void
2070 copy_ports(struct i915_request **dst, struct i915_request **src, int count)
2071 {
2072 	/* A memcpy_p() would be very useful here! */
2073 	while (count--)
2074 		WRITE_ONCE(*dst++, *src++); /* avoid write tearing */
2075 }
2076 
2077 static void execlists_dequeue(struct intel_engine_cs *engine)
2078 {
2079 	struct intel_engine_execlists * const execlists = &engine->execlists;
2080 	struct i915_request **port = execlists->pending;
2081 	struct i915_request ** const last_port = port + execlists->port_mask;
2082 	struct i915_request * const *active;
2083 	struct i915_request *last;
2084 	struct rb_node *rb;
2085 	bool submit = false;
2086 
2087 	/*
2088 	 * Hardware submission is through 2 ports. Conceptually each port
2089 	 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
2090 	 * static for a context, and unique to each, so we only execute
2091 	 * requests belonging to a single context from each ring. RING_HEAD
2092 	 * is maintained by the CS in the context image, it marks the place
2093 	 * where it got up to last time, and through RING_TAIL we tell the CS
2094 	 * where we want to execute up to this time.
2095 	 *
2096 	 * In this list the requests are in order of execution. Consecutive
2097 	 * requests from the same context are adjacent in the ringbuffer. We
2098 	 * can combine these requests into a single RING_TAIL update:
2099 	 *
2100 	 *              RING_HEAD...req1...req2
2101 	 *                                    ^- RING_TAIL
2102 	 * since to execute req2 the CS must first execute req1.
2103 	 *
2104 	 * Our goal then is to point each port to the end of a consecutive
2105 	 * sequence of requests as being the most optimal (fewest wake ups
2106 	 * and context switches) submission.
2107 	 */
2108 
2109 	for (rb = rb_first_cached(&execlists->virtual); rb; ) {
2110 		struct virtual_engine *ve =
2111 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
2112 		struct i915_request *rq = READ_ONCE(ve->request);
2113 
2114 		if (!rq) { /* lazily cleanup after another engine handled rq */
2115 			rb_erase_cached(rb, &execlists->virtual);
2116 			RB_CLEAR_NODE(rb);
2117 			rb = rb_first_cached(&execlists->virtual);
2118 			continue;
2119 		}
2120 
2121 		if (!virtual_matches(ve, rq, engine)) {
2122 			rb = rb_next(rb);
2123 			continue;
2124 		}
2125 
2126 		break;
2127 	}
2128 
2129 	/*
2130 	 * If the queue is higher priority than the last
2131 	 * request in the currently active context, submit afresh.
2132 	 * We will resubmit again afterwards in case we need to split
2133 	 * the active context to interject the preemption request,
2134 	 * i.e. we will retrigger preemption following the ack in case
2135 	 * of trouble.
2136 	 */
2137 	active = READ_ONCE(execlists->active);
2138 
2139 	/*
2140 	 * In theory we can skip over completed contexts that have not
2141 	 * yet been processed by events (as those events are in flight):
2142 	 *
2143 	 * while ((last = *active) && i915_request_completed(last))
2144 	 *	active++;
2145 	 *
2146 	 * However, the GPU cannot handle this as it will ultimately
2147 	 * find itself trying to jump back into a context it has just
2148 	 * completed and barf.
2149 	 */
2150 
2151 	if ((last = *active)) {
2152 		if (need_preempt(engine, last, rb)) {
2153 			if (i915_request_completed(last)) {
2154 				tasklet_hi_schedule(&execlists->tasklet);
2155 				return;
2156 			}
2157 
2158 			ENGINE_TRACE(engine,
2159 				     "preempting last=%llx:%lld, prio=%d, hint=%d\n",
2160 				     last->fence.context,
2161 				     last->fence.seqno,
2162 				     last->sched.attr.priority,
2163 				     execlists->queue_priority_hint);
2164 			record_preemption(execlists);
2165 
2166 			/*
2167 			 * Don't let the RING_HEAD advance past the breadcrumb
2168 			 * as we unwind (and until we resubmit) so that we do
2169 			 * not accidentally tell it to go backwards.
2170 			 */
2171 			ring_set_paused(engine, 1);
2172 
2173 			/*
2174 			 * Note that we have not stopped the GPU at this point,
2175 			 * so we are unwinding the incomplete requests as they
2176 			 * remain inflight and so by the time we do complete
2177 			 * the preemption, some of the unwound requests may
2178 			 * complete!
2179 			 */
2180 			__unwind_incomplete_requests(engine);
2181 
2182 			last = NULL;
2183 		} else if (need_timeslice(engine, last, rb) &&
2184 			   timeslice_expired(execlists, last)) {
2185 			if (i915_request_completed(last)) {
2186 				tasklet_hi_schedule(&execlists->tasklet);
2187 				return;
2188 			}
2189 
2190 			ENGINE_TRACE(engine,
2191 				     "expired last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n",
2192 				     last->fence.context,
2193 				     last->fence.seqno,
2194 				     last->sched.attr.priority,
2195 				     execlists->queue_priority_hint,
2196 				     yesno(timeslice_yield(execlists, last)));
2197 
2198 			ring_set_paused(engine, 1);
2199 			defer_active(engine);
2200 
2201 			/*
2202 			 * Unlike for preemption, if we rewind and continue
2203 			 * executing the same context as previously active,
2204 			 * the order of execution will remain the same and
2205 			 * the tail will only advance. We do not need to
2206 			 * force a full context restore, as a lite-restore
2207 			 * is sufficient to resample the monotonic TAIL.
2208 			 *
2209 			 * If we switch to any other context, similarly we
2210 			 * will not rewind TAIL of current context, and
2211 			 * normal save/restore will preserve state and allow
2212 			 * us to later continue executing the same request.
2213 			 */
2214 			last = NULL;
2215 		} else {
2216 			/*
2217 			 * Otherwise if we already have a request pending
2218 			 * for execution after the current one, we can
2219 			 * just wait until the next CS event before
2220 			 * queuing more. In either case we will force a
2221 			 * lite-restore preemption event, but if we wait
2222 			 * we hopefully coalesce several updates into a single
2223 			 * submission.
2224 			 */
2225 			if (!list_is_last(&last->sched.link,
2226 					  &engine->active.requests)) {
2227 				/*
2228 				 * Even if ELSP[1] is occupied and not worthy
2229 				 * of timeslices, our queue might be.
2230 				 */
2231 				start_timeslice(engine, queue_prio(execlists));
2232 				return;
2233 			}
2234 		}
2235 	}
2236 
2237 	while (rb) { /* XXX virtual is always taking precedence */
2238 		struct virtual_engine *ve =
2239 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
2240 		struct i915_request *rq;
2241 
2242 		spin_lock(&ve->base.active.lock);
2243 
2244 		rq = ve->request;
2245 		if (unlikely(!rq)) { /* lost the race to a sibling */
2246 			spin_unlock(&ve->base.active.lock);
2247 			rb_erase_cached(rb, &execlists->virtual);
2248 			RB_CLEAR_NODE(rb);
2249 			rb = rb_first_cached(&execlists->virtual);
2250 			continue;
2251 		}
2252 
2253 		GEM_BUG_ON(rq != ve->request);
2254 		GEM_BUG_ON(rq->engine != &ve->base);
2255 		GEM_BUG_ON(rq->context != &ve->context);
2256 
2257 		if (rq_prio(rq) >= queue_prio(execlists)) {
2258 			if (!virtual_matches(ve, rq, engine)) {
2259 				spin_unlock(&ve->base.active.lock);
2260 				rb = rb_next(rb);
2261 				continue;
2262 			}
2263 
2264 			if (last && !can_merge_rq(last, rq)) {
2265 				spin_unlock(&ve->base.active.lock);
2266 				start_timeslice(engine, rq_prio(rq));
2267 				return; /* leave this for another sibling */
2268 			}
2269 
2270 			ENGINE_TRACE(engine,
2271 				     "virtual rq=%llx:%lld%s, new engine? %s\n",
2272 				     rq->fence.context,
2273 				     rq->fence.seqno,
2274 				     i915_request_completed(rq) ? "!" :
2275 				     i915_request_started(rq) ? "*" :
2276 				     "",
2277 				     yesno(engine != ve->siblings[0]));
2278 
2279 			WRITE_ONCE(ve->request, NULL);
2280 			WRITE_ONCE(ve->base.execlists.queue_priority_hint,
2281 				   INT_MIN);
2282 			rb_erase_cached(rb, &execlists->virtual);
2283 			RB_CLEAR_NODE(rb);
2284 
2285 			GEM_BUG_ON(!(rq->execution_mask & engine->mask));
2286 			WRITE_ONCE(rq->engine, engine);
2287 
2288 			if (__i915_request_submit(rq)) {
2289 				/*
2290 				 * Only after we confirm that we will submit
2291 				 * this request (i.e. it has not already
2292 				 * completed), do we want to update the context.
2293 				 *
2294 				 * This serves two purposes. It avoids
2295 				 * unnecessary work if we are resubmitting an
2296 				 * already completed request after timeslicing.
2297 				 * But more importantly, it prevents us altering
2298 				 * ve->siblings[] on an idle context, where
2299 				 * we may be using ve->siblings[] in
2300 				 * virtual_context_enter / virtual_context_exit.
2301 				 */
2302 				virtual_xfer_context(ve, engine);
2303 				GEM_BUG_ON(ve->siblings[0] != engine);
2304 
2305 				submit = true;
2306 				last = rq;
2307 			}
2308 			i915_request_put(rq);
2309 
2310 			/*
2311 			 * Hmm, we have a bunch of virtual engine requests,
2312 			 * but the first one was already completed (thanks
2313 			 * preempt-to-busy!). Keep looking at the veng queue
2314 			 * until we have no more relevant requests (i.e.
2315 			 * the normal submit queue has higher priority).
2316 			 */
2317 			if (!submit) {
2318 				spin_unlock(&ve->base.active.lock);
2319 				rb = rb_first_cached(&execlists->virtual);
2320 				continue;
2321 			}
2322 		}
2323 
2324 		spin_unlock(&ve->base.active.lock);
2325 		break;
2326 	}
2327 
2328 	while ((rb = rb_first_cached(&execlists->queue))) {
2329 		struct i915_priolist *p = to_priolist(rb);
2330 		struct i915_request *rq, *rn;
2331 		int i;
2332 
2333 		priolist_for_each_request_consume(rq, rn, p, i) {
2334 			bool merge = true;
2335 
2336 			/*
2337 			 * Can we combine this request with the current port?
2338 			 * It has to be the same context/ringbuffer and not
2339 			 * have any exceptions (e.g. GVT saying never to
2340 			 * combine contexts).
2341 			 *
2342 			 * If we can combine the requests, we can execute both
2343 			 * by updating the RING_TAIL to point to the end of the
2344 			 * second request, and so we never need to tell the
2345 			 * hardware about the first.
2346 			 */
2347 			if (last && !can_merge_rq(last, rq)) {
2348 				/*
2349 				 * If we are on the second port and cannot
2350 				 * combine this request with the last, then we
2351 				 * are done.
2352 				 */
2353 				if (port == last_port)
2354 					goto done;
2355 
2356 				/*
2357 				 * We must not populate both ELSP[] with the
2358 				 * same LRCA, i.e. we must submit 2 different
2359 				 * contexts if we submit 2 ELSP.
2360 				 */
2361 				if (last->context == rq->context)
2362 					goto done;
2363 
2364 				if (i915_request_has_sentinel(last))
2365 					goto done;
2366 
2367 				/*
2368 				 * If GVT overrides us we only ever submit
2369 				 * port[0], leaving port[1] empty. Note that we
2370 				 * also have to be careful that we don't queue
2371 				 * the same context (even though a different
2372 				 * request) to the second port.
2373 				 */
2374 				if (ctx_single_port_submission(last->context) ||
2375 				    ctx_single_port_submission(rq->context))
2376 					goto done;
2377 
2378 				merge = false;
2379 			}
2380 
2381 			if (__i915_request_submit(rq)) {
2382 				if (!merge) {
2383 					*port = execlists_schedule_in(last, port - execlists->pending);
2384 					port++;
2385 					last = NULL;
2386 				}
2387 
2388 				GEM_BUG_ON(last &&
2389 					   !can_merge_ctx(last->context,
2390 							  rq->context));
2391 				GEM_BUG_ON(last &&
2392 					   i915_seqno_passed(last->fence.seqno,
2393 							     rq->fence.seqno));
2394 
2395 				submit = true;
2396 				last = rq;
2397 			}
2398 		}
2399 
2400 		rb_erase_cached(&p->node, &execlists->queue);
2401 		i915_priolist_free(p);
2402 	}
2403 
2404 done:
2405 	/*
2406 	 * Here be a bit of magic! Or sleight-of-hand, whichever you prefer.
2407 	 *
2408 	 * We choose the priority hint such that if we add a request of greater
2409 	 * priority than this, we kick the submission tasklet to decide on
2410 	 * the right order of submitting the requests to hardware. We must
2411 	 * also be prepared to reorder requests as they are in-flight on the
2412 	 * HW. We derive the priority hint then as the first "hole" in
2413 	 * the HW submission ports and if there are no available slots,
2414 	 * the priority of the lowest executing request, i.e. last.
2415 	 *
2416 	 * When we do receive a higher priority request ready to run from the
2417 	 * user, see queue_request(), the priority hint is bumped to that
2418 	 * request triggering preemption on the next dequeue (or subsequent
2419 	 * interrupt for secondary ports).
2420 	 */
2421 	execlists->queue_priority_hint = queue_prio(execlists);
2422 
2423 	if (submit) {
2424 		*port = execlists_schedule_in(last, port - execlists->pending);
2425 		execlists->switch_priority_hint =
2426 			switch_prio(engine, *execlists->pending);
2427 
2428 		/*
2429 		 * Skip if we ended up with exactly the same set of requests,
2430 		 * e.g. trying to timeslice a pair of ordered contexts
2431 		 */
2432 		if (!memcmp(active, execlists->pending,
2433 			    (port - execlists->pending + 1) * sizeof(*port))) {
2434 			do
2435 				execlists_schedule_out(fetch_and_zero(port));
2436 			while (port-- != execlists->pending);
2437 
2438 			goto skip_submit;
2439 		}
2440 		clear_ports(port + 1, last_port - port);
2441 
2442 		WRITE_ONCE(execlists->yield, -1);
2443 		set_preempt_timeout(engine, *active);
2444 		execlists_submit_ports(engine);
2445 	} else {
2446 		start_timeslice(engine, execlists->queue_priority_hint);
2447 skip_submit:
2448 		ring_set_paused(engine, 0);
2449 	}
2450 }
2451 
2452 static void
2453 cancel_port_requests(struct intel_engine_execlists * const execlists)
2454 {
2455 	struct i915_request * const *port;
2456 
2457 	for (port = execlists->pending; *port; port++)
2458 		execlists_schedule_out(*port);
2459 	clear_ports(execlists->pending, ARRAY_SIZE(execlists->pending));
2460 
2461 	/* Mark the end of active before we overwrite *active */
2462 	for (port = xchg(&execlists->active, execlists->pending); *port; port++)
2463 		execlists_schedule_out(*port);
2464 	clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight));
2465 
2466 	smp_wmb(); /* complete the seqlock for execlists_active() */
2467 	WRITE_ONCE(execlists->active, execlists->inflight);
2468 }
2469 
2470 static inline void
2471 invalidate_csb_entries(const u64 *first, const u64 *last)
2472 {
2473 	clflush((void *)first);
2474 	clflush((void *)last);
2475 }
2476 
2477 /*
2478  * Starting with Gen12, the status has a new format:
2479  *
2480  *     bit  0:     switched to new queue
2481  *     bit  1:     reserved
2482  *     bit  2:     semaphore wait mode (poll or signal), only valid when
2483  *                 switch detail is set to "wait on semaphore"
2484  *     bits 3-5:   engine class
2485  *     bits 6-11:  engine instance
2486  *     bits 12-14: reserved
2487  *     bits 15-25: sw context id of the lrc the GT switched to
2488  *     bits 26-31: sw counter of the lrc the GT switched to
2489  *     bits 32-35: context switch detail
2490  *                  - 0: ctx complete
2491  *                  - 1: wait on sync flip
2492  *                  - 2: wait on vblank
2493  *                  - 3: wait on scanline
2494  *                  - 4: wait on semaphore
2495  *                  - 5: context preempted (not on SEMAPHORE_WAIT or
2496  *                       WAIT_FOR_EVENT)
2497  *     bit  36:    reserved
2498  *     bits 37-43: wait detail (for switch detail 1 to 4)
2499  *     bits 44-46: reserved
2500  *     bits 47-57: sw context id of the lrc the GT switched away from
2501  *     bits 58-63: sw counter of the lrc the GT switched away from
2502  */
2503 static inline bool gen12_csb_parse(const u64 csb)
2504 {
2505 	bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(csb));
2506 	bool new_queue =
2507 		lower_32_bits(csb) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
2508 
2509 	/*
2510 	 * The context switch detail is not guaranteed to be 5 when a preemption
2511 	 * occurs, so we can't just check for that. The check below works for
2512 	 * all the cases we care about, including preemptions of WAIT
2513 	 * instructions and lite-restore. Preempt-to-idle via the CTRL register
2514 	 * would require some extra handling, but we don't support that.
2515 	 */
2516 	if (!ctx_away_valid || new_queue) {
2517 		GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(csb)));
2518 		return true;
2519 	}
2520 
2521 	/*
2522 	 * switch detail = 5 is covered by the case above and we do not expect a
2523 	 * context switch on an unsuccessful wait instruction since we always
2524 	 * use polling mode.
2525 	 */
2526 	GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(csb)));
2527 	return false;
2528 }
2529 
2530 static inline bool gen8_csb_parse(const u64 csb)
2531 {
2532 	return csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
2533 }
2534 
2535 static noinline u64
2536 wa_csb_read(const struct intel_engine_cs *engine, u64 * const csb)
2537 {
2538 	u64 entry;
2539 
2540 	/*
2541 	 * Reading from the HWSP has one particular advantage: we can detect
2542 	 * a stale entry. Since the write into HWSP is broken, we have no reason
2543 	 * to trust the HW at all, the mmio entry may equally be unordered, so
2544 	 * we prefer the path that is self-checking and as a last resort,
2545 	 * return the mmio value.
2546 	 *
2547 	 * tgl,dg1:HSDES#22011327657
2548 	 */
2549 	preempt_disable();
2550 	if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 10)) {
2551 		int idx = csb - engine->execlists.csb_status;
2552 		int status;
2553 
2554 		status = GEN8_EXECLISTS_STATUS_BUF;
2555 		if (idx >= 6) {
2556 			status = GEN11_EXECLISTS_STATUS_BUF2;
2557 			idx -= 6;
2558 		}
2559 		status += sizeof(u64) * idx;
2560 
2561 		entry = intel_uncore_read64(engine->uncore,
2562 					    _MMIO(engine->mmio_base + status));
2563 	}
2564 	preempt_enable();
2565 
2566 	return entry;
2567 }
2568 
2569 static inline u64
2570 csb_read(const struct intel_engine_cs *engine, u64 * const csb)
2571 {
2572 	u64 entry = READ_ONCE(*csb);
2573 
2574 	/*
2575 	 * Unfortunately, the GPU does not always serialise its write
2576 	 * of the CSB entries before its write of the CSB pointer, at least
2577 	 * from the perspective of the CPU, using what is known as a Global
2578 	 * Observation Point. We may read a new CSB tail pointer, but then
2579 	 * read the stale CSB entries, causing us to misinterpret the
2580 	 * context-switch events, and eventually declare the GPU hung.
2581 	 *
2582 	 * icl:HSDES#1806554093
2583 	 * tgl:HSDES#22011248461
2584 	 */
2585 	if (unlikely(entry == -1))
2586 		entry = wa_csb_read(engine, csb);
2587 
2588 	/* Consume this entry so that we can spot its future reuse. */
2589 	WRITE_ONCE(*csb, -1);
2590 
2591 	/* ELSP is an implicit wmb() before the GPU wraps and overwrites csb */
2592 	return entry;
2593 }
2594 
2595 static void process_csb(struct intel_engine_cs *engine)
2596 {
2597 	struct intel_engine_execlists * const execlists = &engine->execlists;
2598 	u64 * const buf = execlists->csb_status;
2599 	const u8 num_entries = execlists->csb_size;
2600 	u8 head, tail;
2601 
2602 	/*
2603 	 * As we modify our execlists state tracking we require exclusive
2604 	 * access. Either we are inside the tasklet, or the tasklet is disabled
2605 	 * and we assume that is only inside the reset paths and so serialised.
2606 	 */
2607 	GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) &&
2608 		   !reset_in_progress(execlists));
2609 	GEM_BUG_ON(!intel_engine_in_execlists_submission_mode(engine));
2610 
2611 	/*
2612 	 * Note that csb_write, csb_status may be either in HWSP or mmio.
2613 	 * When reading from the csb_write mmio register, we have to be
2614 	 * careful to only use the GEN8_CSB_WRITE_PTR portion, which is
2615 	 * the low 4bits. As it happens we know the next 4bits are always
2616 	 * zero and so we can simply masked off the low u8 of the register
2617 	 * and treat it identically to reading from the HWSP (without having
2618 	 * to use explicit shifting and masking, and probably bifurcating
2619 	 * the code to handle the legacy mmio read).
2620 	 */
2621 	head = execlists->csb_head;
2622 	tail = READ_ONCE(*execlists->csb_write);
2623 	if (unlikely(head == tail))
2624 		return;
2625 
2626 	/*
2627 	 * We will consume all events from HW, or at least pretend to.
2628 	 *
2629 	 * The sequence of events from the HW is deterministic, and derived
2630 	 * from our writes to the ELSP, with a smidgen of variability for
2631 	 * the arrival of the asynchronous requests wrt to the inflight
2632 	 * execution. If the HW sends an event that does not correspond with
2633 	 * the one we are expecting, we have to abandon all hope as we lose
2634 	 * all tracking of what the engine is actually executing. We will
2635 	 * only detect we are out of sequence with the HW when we get an
2636 	 * 'impossible' event because we have already drained our own
2637 	 * preemption/promotion queue. If this occurs, we know that we likely
2638 	 * lost track of execution earlier and must unwind and restart, the
2639 	 * simplest way is by stop processing the event queue and force the
2640 	 * engine to reset.
2641 	 */
2642 	execlists->csb_head = tail;
2643 	ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail);
2644 
2645 	/*
2646 	 * Hopefully paired with a wmb() in HW!
2647 	 *
2648 	 * We must complete the read of the write pointer before any reads
2649 	 * from the CSB, so that we do not see stale values. Without an rmb
2650 	 * (lfence) the HW may speculatively perform the CSB[] reads *before*
2651 	 * we perform the READ_ONCE(*csb_write).
2652 	 */
2653 	rmb();
2654 	do {
2655 		bool promote;
2656 		u64 csb;
2657 
2658 		if (++head == num_entries)
2659 			head = 0;
2660 
2661 		/*
2662 		 * We are flying near dragons again.
2663 		 *
2664 		 * We hold a reference to the request in execlist_port[]
2665 		 * but no more than that. We are operating in softirq
2666 		 * context and so cannot hold any mutex or sleep. That
2667 		 * prevents us stopping the requests we are processing
2668 		 * in port[] from being retired simultaneously (the
2669 		 * breadcrumb will be complete before we see the
2670 		 * context-switch). As we only hold the reference to the
2671 		 * request, any pointer chasing underneath the request
2672 		 * is subject to a potential use-after-free. Thus we
2673 		 * store all of the bookkeeping within port[] as
2674 		 * required, and avoid using unguarded pointers beneath
2675 		 * request itself. The same applies to the atomic
2676 		 * status notifier.
2677 		 */
2678 
2679 		csb = csb_read(engine, buf + head);
2680 		ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
2681 			     head, upper_32_bits(csb), lower_32_bits(csb));
2682 
2683 		if (INTEL_GEN(engine->i915) >= 12)
2684 			promote = gen12_csb_parse(csb);
2685 		else
2686 			promote = gen8_csb_parse(csb);
2687 		if (promote) {
2688 			struct i915_request * const *old = execlists->active;
2689 
2690 			if (GEM_WARN_ON(!*execlists->pending)) {
2691 				execlists->error_interrupt |= ERROR_CSB;
2692 				break;
2693 			}
2694 
2695 			ring_set_paused(engine, 0);
2696 
2697 			/* Point active to the new ELSP; prevent overwriting */
2698 			WRITE_ONCE(execlists->active, execlists->pending);
2699 			smp_wmb(); /* notify execlists_active() */
2700 
2701 			/* cancel old inflight, prepare for switch */
2702 			trace_ports(execlists, "preempted", old);
2703 			while (*old)
2704 				execlists_schedule_out(*old++);
2705 
2706 			/* switch pending to inflight */
2707 			GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
2708 			copy_ports(execlists->inflight,
2709 				   execlists->pending,
2710 				   execlists_num_ports(execlists));
2711 			smp_wmb(); /* complete the seqlock */
2712 			WRITE_ONCE(execlists->active, execlists->inflight);
2713 
2714 			/* XXX Magic delay for tgl */
2715 			ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
2716 
2717 			WRITE_ONCE(execlists->pending[0], NULL);
2718 		} else {
2719 			if (GEM_WARN_ON(!*execlists->active)) {
2720 				execlists->error_interrupt |= ERROR_CSB;
2721 				break;
2722 			}
2723 
2724 			/* port0 completed, advanced to port1 */
2725 			trace_ports(execlists, "completed", execlists->active);
2726 
2727 			/*
2728 			 * We rely on the hardware being strongly
2729 			 * ordered, that the breadcrumb write is
2730 			 * coherent (visible from the CPU) before the
2731 			 * user interrupt is processed. One might assume
2732 			 * that the breadcrumb write being before the
2733 			 * user interrupt and the CS event for the context
2734 			 * switch would therefore be before the CS event
2735 			 * itself...
2736 			 */
2737 			if (GEM_SHOW_DEBUG() &&
2738 			    !i915_request_completed(*execlists->active)) {
2739 				struct i915_request *rq = *execlists->active;
2740 				const u32 *regs __maybe_unused =
2741 					rq->context->lrc_reg_state;
2742 
2743 				ENGINE_TRACE(engine,
2744 					     "context completed before request!\n");
2745 				ENGINE_TRACE(engine,
2746 					     "ring:{start:0x%08x, head:%04x, tail:%04x, ctl:%08x, mode:%08x}\n",
2747 					     ENGINE_READ(engine, RING_START),
2748 					     ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR,
2749 					     ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR,
2750 					     ENGINE_READ(engine, RING_CTL),
2751 					     ENGINE_READ(engine, RING_MI_MODE));
2752 				ENGINE_TRACE(engine,
2753 					     "rq:{start:%08x, head:%04x, tail:%04x, seqno:%llx:%d, hwsp:%d}, ",
2754 					     i915_ggtt_offset(rq->ring->vma),
2755 					     rq->head, rq->tail,
2756 					     rq->fence.context,
2757 					     lower_32_bits(rq->fence.seqno),
2758 					     hwsp_seqno(rq));
2759 				ENGINE_TRACE(engine,
2760 					     "ctx:{start:%08x, head:%04x, tail:%04x}, ",
2761 					     regs[CTX_RING_START],
2762 					     regs[CTX_RING_HEAD],
2763 					     regs[CTX_RING_TAIL]);
2764 			}
2765 
2766 			execlists_schedule_out(*execlists->active++);
2767 
2768 			GEM_BUG_ON(execlists->active - execlists->inflight >
2769 				   execlists_num_ports(execlists));
2770 		}
2771 	} while (head != tail);
2772 
2773 	set_timeslice(engine);
2774 
2775 	/*
2776 	 * Gen11 has proven to fail wrt global observation point between
2777 	 * entry and tail update, failing on the ordering and thus
2778 	 * we see an old entry in the context status buffer.
2779 	 *
2780 	 * Forcibly evict out entries for the next gpu csb update,
2781 	 * to increase the odds that we get a fresh entries with non
2782 	 * working hardware. The cost for doing so comes out mostly with
2783 	 * the wash as hardware, working or not, will need to do the
2784 	 * invalidation before.
2785 	 */
2786 	invalidate_csb_entries(&buf[0], &buf[num_entries - 1]);
2787 }
2788 
2789 static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
2790 {
2791 	lockdep_assert_held(&engine->active.lock);
2792 	if (!READ_ONCE(engine->execlists.pending[0])) {
2793 		rcu_read_lock(); /* protect peeking at execlists->active */
2794 		execlists_dequeue(engine);
2795 		rcu_read_unlock();
2796 	}
2797 }
2798 
2799 static void __execlists_hold(struct i915_request *rq)
2800 {
2801 	LIST_HEAD(list);
2802 
2803 	do {
2804 		struct i915_dependency *p;
2805 
2806 		if (i915_request_is_active(rq))
2807 			__i915_request_unsubmit(rq);
2808 
2809 		clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2810 		list_move_tail(&rq->sched.link, &rq->engine->active.hold);
2811 		i915_request_set_hold(rq);
2812 		RQ_TRACE(rq, "on hold\n");
2813 
2814 		for_each_waiter(p, rq) {
2815 			struct i915_request *w =
2816 				container_of(p->waiter, typeof(*w), sched);
2817 
2818 			/* Leave semaphores spinning on the other engines */
2819 			if (w->engine != rq->engine)
2820 				continue;
2821 
2822 			if (!i915_request_is_ready(w))
2823 				continue;
2824 
2825 			if (i915_request_completed(w))
2826 				continue;
2827 
2828 			if (i915_request_on_hold(w))
2829 				continue;
2830 
2831 			list_move_tail(&w->sched.link, &list);
2832 		}
2833 
2834 		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
2835 	} while (rq);
2836 }
2837 
2838 static bool execlists_hold(struct intel_engine_cs *engine,
2839 			   struct i915_request *rq)
2840 {
2841 	if (i915_request_on_hold(rq))
2842 		return false;
2843 
2844 	spin_lock_irq(&engine->active.lock);
2845 
2846 	if (i915_request_completed(rq)) { /* too late! */
2847 		rq = NULL;
2848 		goto unlock;
2849 	}
2850 
2851 	if (rq->engine != engine) { /* preempted virtual engine */
2852 		struct virtual_engine *ve = to_virtual_engine(rq->engine);
2853 
2854 		/*
2855 		 * intel_context_inflight() is only protected by virtue
2856 		 * of process_csb() being called only by the tasklet (or
2857 		 * directly from inside reset while the tasklet is suspended).
2858 		 * Assert that neither of those are allowed to run while we
2859 		 * poke at the request queues.
2860 		 */
2861 		GEM_BUG_ON(!reset_in_progress(&engine->execlists));
2862 
2863 		/*
2864 		 * An unsubmitted request along a virtual engine will
2865 		 * remain on the active (this) engine until we are able
2866 		 * to process the context switch away (and so mark the
2867 		 * context as no longer in flight). That cannot have happened
2868 		 * yet, otherwise we would not be hanging!
2869 		 */
2870 		spin_lock(&ve->base.active.lock);
2871 		GEM_BUG_ON(intel_context_inflight(rq->context) != engine);
2872 		GEM_BUG_ON(ve->request != rq);
2873 		ve->request = NULL;
2874 		spin_unlock(&ve->base.active.lock);
2875 		i915_request_put(rq);
2876 
2877 		rq->engine = engine;
2878 	}
2879 
2880 	/*
2881 	 * Transfer this request onto the hold queue to prevent it
2882 	 * being resumbitted to HW (and potentially completed) before we have
2883 	 * released it. Since we may have already submitted following
2884 	 * requests, we need to remove those as well.
2885 	 */
2886 	GEM_BUG_ON(i915_request_on_hold(rq));
2887 	GEM_BUG_ON(rq->engine != engine);
2888 	__execlists_hold(rq);
2889 	GEM_BUG_ON(list_empty(&engine->active.hold));
2890 
2891 unlock:
2892 	spin_unlock_irq(&engine->active.lock);
2893 	return rq;
2894 }
2895 
2896 static bool hold_request(const struct i915_request *rq)
2897 {
2898 	struct i915_dependency *p;
2899 	bool result = false;
2900 
2901 	/*
2902 	 * If one of our ancestors is on hold, we must also be on hold,
2903 	 * otherwise we will bypass it and execute before it.
2904 	 */
2905 	rcu_read_lock();
2906 	for_each_signaler(p, rq) {
2907 		const struct i915_request *s =
2908 			container_of(p->signaler, typeof(*s), sched);
2909 
2910 		if (s->engine != rq->engine)
2911 			continue;
2912 
2913 		result = i915_request_on_hold(s);
2914 		if (result)
2915 			break;
2916 	}
2917 	rcu_read_unlock();
2918 
2919 	return result;
2920 }
2921 
2922 static void __execlists_unhold(struct i915_request *rq)
2923 {
2924 	LIST_HEAD(list);
2925 
2926 	do {
2927 		struct i915_dependency *p;
2928 
2929 		RQ_TRACE(rq, "hold release\n");
2930 
2931 		GEM_BUG_ON(!i915_request_on_hold(rq));
2932 		GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
2933 
2934 		i915_request_clear_hold(rq);
2935 		list_move_tail(&rq->sched.link,
2936 			       i915_sched_lookup_priolist(rq->engine,
2937 							  rq_prio(rq)));
2938 		set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2939 
2940 		/* Also release any children on this engine that are ready */
2941 		for_each_waiter(p, rq) {
2942 			struct i915_request *w =
2943 				container_of(p->waiter, typeof(*w), sched);
2944 
2945 			/* Propagate any change in error status */
2946 			if (rq->fence.error)
2947 				i915_request_set_error_once(w, rq->fence.error);
2948 
2949 			if (w->engine != rq->engine)
2950 				continue;
2951 
2952 			if (!i915_request_on_hold(w))
2953 				continue;
2954 
2955 			/* Check that no other parents are also on hold */
2956 			if (hold_request(w))
2957 				continue;
2958 
2959 			list_move_tail(&w->sched.link, &list);
2960 		}
2961 
2962 		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
2963 	} while (rq);
2964 }
2965 
2966 static void execlists_unhold(struct intel_engine_cs *engine,
2967 			     struct i915_request *rq)
2968 {
2969 	spin_lock_irq(&engine->active.lock);
2970 
2971 	/*
2972 	 * Move this request back to the priority queue, and all of its
2973 	 * children and grandchildren that were suspended along with it.
2974 	 */
2975 	__execlists_unhold(rq);
2976 
2977 	if (rq_prio(rq) > engine->execlists.queue_priority_hint) {
2978 		engine->execlists.queue_priority_hint = rq_prio(rq);
2979 		tasklet_hi_schedule(&engine->execlists.tasklet);
2980 	}
2981 
2982 	spin_unlock_irq(&engine->active.lock);
2983 }
2984 
2985 struct execlists_capture {
2986 	struct work_struct work;
2987 	struct i915_request *rq;
2988 	struct i915_gpu_coredump *error;
2989 };
2990 
2991 static void execlists_capture_work(struct work_struct *work)
2992 {
2993 	struct execlists_capture *cap = container_of(work, typeof(*cap), work);
2994 	const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN;
2995 	struct intel_engine_cs *engine = cap->rq->engine;
2996 	struct intel_gt_coredump *gt = cap->error->gt;
2997 	struct intel_engine_capture_vma *vma;
2998 
2999 	/* Compress all the objects attached to the request, slow! */
3000 	vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp);
3001 	if (vma) {
3002 		struct i915_vma_compress *compress =
3003 			i915_vma_capture_prepare(gt);
3004 
3005 		intel_engine_coredump_add_vma(gt->engine, vma, compress);
3006 		i915_vma_capture_finish(gt, compress);
3007 	}
3008 
3009 	gt->simulated = gt->engine->simulated;
3010 	cap->error->simulated = gt->simulated;
3011 
3012 	/* Publish the error state, and announce it to the world */
3013 	i915_error_state_store(cap->error);
3014 	i915_gpu_coredump_put(cap->error);
3015 
3016 	/* Return this request and all that depend upon it for signaling */
3017 	execlists_unhold(engine, cap->rq);
3018 	i915_request_put(cap->rq);
3019 
3020 	kfree(cap);
3021 }
3022 
3023 static struct execlists_capture *capture_regs(struct intel_engine_cs *engine)
3024 {
3025 	const gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
3026 	struct execlists_capture *cap;
3027 
3028 	cap = kmalloc(sizeof(*cap), gfp);
3029 	if (!cap)
3030 		return NULL;
3031 
3032 	cap->error = i915_gpu_coredump_alloc(engine->i915, gfp);
3033 	if (!cap->error)
3034 		goto err_cap;
3035 
3036 	cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp);
3037 	if (!cap->error->gt)
3038 		goto err_gpu;
3039 
3040 	cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp);
3041 	if (!cap->error->gt->engine)
3042 		goto err_gt;
3043 
3044 	cap->error->gt->engine->hung = true;
3045 
3046 	return cap;
3047 
3048 err_gt:
3049 	kfree(cap->error->gt);
3050 err_gpu:
3051 	kfree(cap->error);
3052 err_cap:
3053 	kfree(cap);
3054 	return NULL;
3055 }
3056 
3057 static struct i915_request *
3058 active_context(struct intel_engine_cs *engine, u32 ccid)
3059 {
3060 	const struct intel_engine_execlists * const el = &engine->execlists;
3061 	struct i915_request * const *port, *rq;
3062 
3063 	/*
3064 	 * Use the most recent result from process_csb(), but just in case
3065 	 * we trigger an error (via interrupt) before the first CS event has
3066 	 * been written, peek at the next submission.
3067 	 */
3068 
3069 	for (port = el->active; (rq = *port); port++) {
3070 		if (rq->context->lrc.ccid == ccid) {
3071 			ENGINE_TRACE(engine,
3072 				     "ccid found at active:%zd\n",
3073 				     port - el->active);
3074 			return rq;
3075 		}
3076 	}
3077 
3078 	for (port = el->pending; (rq = *port); port++) {
3079 		if (rq->context->lrc.ccid == ccid) {
3080 			ENGINE_TRACE(engine,
3081 				     "ccid found at pending:%zd\n",
3082 				     port - el->pending);
3083 			return rq;
3084 		}
3085 	}
3086 
3087 	ENGINE_TRACE(engine, "ccid:%x not found\n", ccid);
3088 	return NULL;
3089 }
3090 
3091 static u32 active_ccid(struct intel_engine_cs *engine)
3092 {
3093 	return ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI);
3094 }
3095 
3096 static void execlists_capture(struct intel_engine_cs *engine)
3097 {
3098 	struct execlists_capture *cap;
3099 
3100 	if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR))
3101 		return;
3102 
3103 	/*
3104 	 * We need to _quickly_ capture the engine state before we reset.
3105 	 * We are inside an atomic section (softirq) here and we are delaying
3106 	 * the forced preemption event.
3107 	 */
3108 	cap = capture_regs(engine);
3109 	if (!cap)
3110 		return;
3111 
3112 	spin_lock_irq(&engine->active.lock);
3113 	cap->rq = active_context(engine, active_ccid(engine));
3114 	if (cap->rq) {
3115 		cap->rq = active_request(cap->rq->context->timeline, cap->rq);
3116 		cap->rq = i915_request_get_rcu(cap->rq);
3117 	}
3118 	spin_unlock_irq(&engine->active.lock);
3119 	if (!cap->rq)
3120 		goto err_free;
3121 
3122 	/*
3123 	 * Remove the request from the execlists queue, and take ownership
3124 	 * of the request. We pass it to our worker who will _slowly_ compress
3125 	 * all the pages the _user_ requested for debugging their batch, after
3126 	 * which we return it to the queue for signaling.
3127 	 *
3128 	 * By removing them from the execlists queue, we also remove the
3129 	 * requests from being processed by __unwind_incomplete_requests()
3130 	 * during the intel_engine_reset(), and so they will *not* be replayed
3131 	 * afterwards.
3132 	 *
3133 	 * Note that because we have not yet reset the engine at this point,
3134 	 * it is possible for the request that we have identified as being
3135 	 * guilty, did in fact complete and we will then hit an arbitration
3136 	 * point allowing the outstanding preemption to succeed. The likelihood
3137 	 * of that is very low (as capturing of the engine registers should be
3138 	 * fast enough to run inside an irq-off atomic section!), so we will
3139 	 * simply hold that request accountable for being non-preemptible
3140 	 * long enough to force the reset.
3141 	 */
3142 	if (!execlists_hold(engine, cap->rq))
3143 		goto err_rq;
3144 
3145 	INIT_WORK(&cap->work, execlists_capture_work);
3146 	schedule_work(&cap->work);
3147 	return;
3148 
3149 err_rq:
3150 	i915_request_put(cap->rq);
3151 err_free:
3152 	i915_gpu_coredump_put(cap->error);
3153 	kfree(cap);
3154 }
3155 
3156 static void execlists_reset(struct intel_engine_cs *engine, const char *msg)
3157 {
3158 	const unsigned int bit = I915_RESET_ENGINE + engine->id;
3159 	unsigned long *lock = &engine->gt->reset.flags;
3160 
3161 	if (!intel_has_reset_engine(engine->gt))
3162 		return;
3163 
3164 	if (test_and_set_bit(bit, lock))
3165 		return;
3166 
3167 	ENGINE_TRACE(engine, "reset for %s\n", msg);
3168 
3169 	/* Mark this tasklet as disabled to avoid waiting for it to complete */
3170 	tasklet_disable_nosync(&engine->execlists.tasklet);
3171 
3172 	ring_set_paused(engine, 1); /* Freeze the current request in place */
3173 	execlists_capture(engine);
3174 	intel_engine_reset(engine, msg);
3175 
3176 	tasklet_enable(&engine->execlists.tasklet);
3177 	clear_and_wake_up_bit(bit, lock);
3178 }
3179 
3180 static bool preempt_timeout(const struct intel_engine_cs *const engine)
3181 {
3182 	const struct timer_list *t = &engine->execlists.preempt;
3183 
3184 	if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
3185 		return false;
3186 
3187 	if (!timer_expired(t))
3188 		return false;
3189 
3190 	return READ_ONCE(engine->execlists.pending[0]);
3191 }
3192 
3193 /*
3194  * Check the unread Context Status Buffers and manage the submission of new
3195  * contexts to the ELSP accordingly.
3196  */
3197 static void execlists_submission_tasklet(unsigned long data)
3198 {
3199 	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
3200 	bool timeout = preempt_timeout(engine);
3201 
3202 	process_csb(engine);
3203 
3204 	if (unlikely(READ_ONCE(engine->execlists.error_interrupt))) {
3205 		const char *msg;
3206 
3207 		/* Generate the error message in priority wrt to the user! */
3208 		if (engine->execlists.error_interrupt & GENMASK(15, 0))
3209 			msg = "CS error"; /* thrown by a user payload */
3210 		else if (engine->execlists.error_interrupt & ERROR_CSB)
3211 			msg = "invalid CSB event";
3212 		else
3213 			msg = "internal error";
3214 
3215 		engine->execlists.error_interrupt = 0;
3216 		execlists_reset(engine, msg);
3217 	}
3218 
3219 	if (!READ_ONCE(engine->execlists.pending[0]) || timeout) {
3220 		unsigned long flags;
3221 
3222 		spin_lock_irqsave(&engine->active.lock, flags);
3223 		__execlists_submission_tasklet(engine);
3224 		spin_unlock_irqrestore(&engine->active.lock, flags);
3225 
3226 		/* Recheck after serialising with direct-submission */
3227 		if (unlikely(timeout && preempt_timeout(engine))) {
3228 			cancel_timer(&engine->execlists.preempt);
3229 			execlists_reset(engine, "preemption time out");
3230 		}
3231 	}
3232 }
3233 
3234 static void __execlists_kick(struct intel_engine_execlists *execlists)
3235 {
3236 	/* Kick the tasklet for some interrupt coalescing and reset handling */
3237 	tasklet_hi_schedule(&execlists->tasklet);
3238 }
3239 
3240 #define execlists_kick(t, member) \
3241 	__execlists_kick(container_of(t, struct intel_engine_execlists, member))
3242 
3243 static void execlists_timeslice(struct timer_list *timer)
3244 {
3245 	execlists_kick(timer, timer);
3246 }
3247 
3248 static void execlists_preempt(struct timer_list *timer)
3249 {
3250 	execlists_kick(timer, preempt);
3251 }
3252 
3253 static void queue_request(struct intel_engine_cs *engine,
3254 			  struct i915_request *rq)
3255 {
3256 	GEM_BUG_ON(!list_empty(&rq->sched.link));
3257 	list_add_tail(&rq->sched.link,
3258 		      i915_sched_lookup_priolist(engine, rq_prio(rq)));
3259 	set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
3260 }
3261 
3262 static void __submit_queue_imm(struct intel_engine_cs *engine)
3263 {
3264 	struct intel_engine_execlists * const execlists = &engine->execlists;
3265 
3266 	if (reset_in_progress(execlists))
3267 		return; /* defer until we restart the engine following reset */
3268 
3269 	__execlists_submission_tasklet(engine);
3270 }
3271 
3272 static void submit_queue(struct intel_engine_cs *engine,
3273 			 const struct i915_request *rq)
3274 {
3275 	struct intel_engine_execlists *execlists = &engine->execlists;
3276 
3277 	if (rq_prio(rq) <= execlists->queue_priority_hint)
3278 		return;
3279 
3280 	execlists->queue_priority_hint = rq_prio(rq);
3281 	__submit_queue_imm(engine);
3282 }
3283 
3284 static bool ancestor_on_hold(const struct intel_engine_cs *engine,
3285 			     const struct i915_request *rq)
3286 {
3287 	GEM_BUG_ON(i915_request_on_hold(rq));
3288 	return !list_empty(&engine->active.hold) && hold_request(rq);
3289 }
3290 
3291 static void flush_csb(struct intel_engine_cs *engine)
3292 {
3293 	struct intel_engine_execlists *el = &engine->execlists;
3294 
3295 	if (READ_ONCE(el->pending[0]) && tasklet_trylock(&el->tasklet)) {
3296 		if (!reset_in_progress(el))
3297 			process_csb(engine);
3298 		tasklet_unlock(&el->tasklet);
3299 	}
3300 }
3301 
3302 static void execlists_submit_request(struct i915_request *request)
3303 {
3304 	struct intel_engine_cs *engine = request->engine;
3305 	unsigned long flags;
3306 
3307 	/* Hopefully we clear execlists->pending[] to let us through */
3308 	flush_csb(engine);
3309 
3310 	/* Will be called from irq-context when using foreign fences. */
3311 	spin_lock_irqsave(&engine->active.lock, flags);
3312 
3313 	if (unlikely(ancestor_on_hold(engine, request))) {
3314 		RQ_TRACE(request, "ancestor on hold\n");
3315 		list_add_tail(&request->sched.link, &engine->active.hold);
3316 		i915_request_set_hold(request);
3317 	} else {
3318 		queue_request(engine, request);
3319 
3320 		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
3321 		GEM_BUG_ON(list_empty(&request->sched.link));
3322 
3323 		submit_queue(engine, request);
3324 	}
3325 
3326 	spin_unlock_irqrestore(&engine->active.lock, flags);
3327 }
3328 
3329 static void __execlists_context_fini(struct intel_context *ce)
3330 {
3331 	intel_ring_put(ce->ring);
3332 	i915_vma_put(ce->state);
3333 }
3334 
3335 static void execlists_context_destroy(struct kref *kref)
3336 {
3337 	struct intel_context *ce = container_of(kref, typeof(*ce), ref);
3338 
3339 	GEM_BUG_ON(!i915_active_is_idle(&ce->active));
3340 	GEM_BUG_ON(intel_context_is_pinned(ce));
3341 
3342 	if (ce->state)
3343 		__execlists_context_fini(ce);
3344 
3345 	intel_context_fini(ce);
3346 	intel_context_free(ce);
3347 }
3348 
3349 static void
3350 set_redzone(void *vaddr, const struct intel_engine_cs *engine)
3351 {
3352 	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
3353 		return;
3354 
3355 	vaddr += engine->context_size;
3356 
3357 	memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE);
3358 }
3359 
3360 static void
3361 check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
3362 {
3363 	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
3364 		return;
3365 
3366 	vaddr += engine->context_size;
3367 
3368 	if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
3369 		drm_err_once(&engine->i915->drm,
3370 			     "%s context redzone overwritten!\n",
3371 			     engine->name);
3372 }
3373 
3374 static void execlists_context_unpin(struct intel_context *ce)
3375 {
3376 	check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET,
3377 		      ce->engine);
3378 }
3379 
3380 static void execlists_context_post_unpin(struct intel_context *ce)
3381 {
3382 	i915_gem_object_unpin_map(ce->state->obj);
3383 }
3384 
3385 static u32 *
3386 gen12_emit_timestamp_wa(const struct intel_context *ce, u32 *cs)
3387 {
3388 	*cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
3389 		MI_SRM_LRM_GLOBAL_GTT |
3390 		MI_LRI_LRM_CS_MMIO;
3391 	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3392 	*cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
3393 		CTX_TIMESTAMP * sizeof(u32);
3394 	*cs++ = 0;
3395 
3396 	*cs++ = MI_LOAD_REGISTER_REG |
3397 		MI_LRR_SOURCE_CS_MMIO |
3398 		MI_LRI_LRM_CS_MMIO;
3399 	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3400 	*cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
3401 
3402 	*cs++ = MI_LOAD_REGISTER_REG |
3403 		MI_LRR_SOURCE_CS_MMIO |
3404 		MI_LRI_LRM_CS_MMIO;
3405 	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3406 	*cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
3407 
3408 	return cs;
3409 }
3410 
3411 static u32 *
3412 gen12_emit_restore_scratch(const struct intel_context *ce, u32 *cs)
3413 {
3414 	GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -1);
3415 
3416 	*cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
3417 		MI_SRM_LRM_GLOBAL_GTT |
3418 		MI_LRI_LRM_CS_MMIO;
3419 	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3420 	*cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
3421 		(lrc_ring_gpr0(ce->engine) + 1) * sizeof(u32);
3422 	*cs++ = 0;
3423 
3424 	return cs;
3425 }
3426 
3427 static u32 *
3428 gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs)
3429 {
3430 	GEM_BUG_ON(lrc_ring_cmd_buf_cctl(ce->engine) == -1);
3431 
3432 	*cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
3433 		MI_SRM_LRM_GLOBAL_GTT |
3434 		MI_LRI_LRM_CS_MMIO;
3435 	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3436 	*cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
3437 		(lrc_ring_cmd_buf_cctl(ce->engine) + 1) * sizeof(u32);
3438 	*cs++ = 0;
3439 
3440 	*cs++ = MI_LOAD_REGISTER_REG |
3441 		MI_LRR_SOURCE_CS_MMIO |
3442 		MI_LRI_LRM_CS_MMIO;
3443 	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3444 	*cs++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(0));
3445 
3446 	return cs;
3447 }
3448 
3449 static u32 *
3450 gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
3451 {
3452 	cs = gen12_emit_timestamp_wa(ce, cs);
3453 	cs = gen12_emit_cmd_buf_wa(ce, cs);
3454 	cs = gen12_emit_restore_scratch(ce, cs);
3455 
3456 	return cs;
3457 }
3458 
3459 static u32 *
3460 gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
3461 {
3462 	cs = gen12_emit_timestamp_wa(ce, cs);
3463 	cs = gen12_emit_restore_scratch(ce, cs);
3464 
3465 	return cs;
3466 }
3467 
3468 static inline u32 context_wa_bb_offset(const struct intel_context *ce)
3469 {
3470 	return PAGE_SIZE * ce->wa_bb_page;
3471 }
3472 
3473 static u32 *context_indirect_bb(const struct intel_context *ce)
3474 {
3475 	void *ptr;
3476 
3477 	GEM_BUG_ON(!ce->wa_bb_page);
3478 
3479 	ptr = ce->lrc_reg_state;
3480 	ptr -= LRC_STATE_OFFSET; /* back to start of context image */
3481 	ptr += context_wa_bb_offset(ce);
3482 
3483 	return ptr;
3484 }
3485 
3486 static void
3487 setup_indirect_ctx_bb(const struct intel_context *ce,
3488 		      const struct intel_engine_cs *engine,
3489 		      u32 *(*emit)(const struct intel_context *, u32 *))
3490 {
3491 	u32 * const start = context_indirect_bb(ce);
3492 	u32 *cs;
3493 
3494 	cs = emit(ce, start);
3495 	GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs));
3496 	while ((unsigned long)cs % CACHELINE_BYTES)
3497 		*cs++ = MI_NOOP;
3498 
3499 	lrc_ring_setup_indirect_ctx(ce->lrc_reg_state, engine,
3500 				    i915_ggtt_offset(ce->state) +
3501 				    context_wa_bb_offset(ce),
3502 				    (cs - start) * sizeof(*cs));
3503 }
3504 
3505 static void
3506 __execlists_update_reg_state(const struct intel_context *ce,
3507 			     const struct intel_engine_cs *engine,
3508 			     u32 head)
3509 {
3510 	struct intel_ring *ring = ce->ring;
3511 	u32 *regs = ce->lrc_reg_state;
3512 
3513 	GEM_BUG_ON(!intel_ring_offset_valid(ring, head));
3514 	GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
3515 
3516 	regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
3517 	regs[CTX_RING_HEAD] = head;
3518 	regs[CTX_RING_TAIL] = ring->tail;
3519 	regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
3520 
3521 	/* RPCS */
3522 	if (engine->class == RENDER_CLASS) {
3523 		regs[CTX_R_PWR_CLK_STATE] =
3524 			intel_sseu_make_rpcs(engine->gt, &ce->sseu);
3525 
3526 		i915_oa_init_reg_state(ce, engine);
3527 	}
3528 
3529 	if (ce->wa_bb_page) {
3530 		u32 *(*fn)(const struct intel_context *ce, u32 *cs);
3531 
3532 		fn = gen12_emit_indirect_ctx_xcs;
3533 		if (ce->engine->class == RENDER_CLASS)
3534 			fn = gen12_emit_indirect_ctx_rcs;
3535 
3536 		/* Mutually exclusive wrt to global indirect bb */
3537 		GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size);
3538 		setup_indirect_ctx_bb(ce, engine, fn);
3539 	}
3540 }
3541 
3542 static int
3543 execlists_context_pre_pin(struct intel_context *ce,
3544 			  struct i915_gem_ww_ctx *ww, void **vaddr)
3545 {
3546 	GEM_BUG_ON(!ce->state);
3547 	GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
3548 
3549 	*vaddr = i915_gem_object_pin_map(ce->state->obj,
3550 					i915_coherent_map_type(ce->engine->i915) |
3551 					I915_MAP_OVERRIDE);
3552 
3553 	return PTR_ERR_OR_ZERO(*vaddr);
3554 }
3555 
3556 static int
3557 __execlists_context_pin(struct intel_context *ce,
3558 			struct intel_engine_cs *engine,
3559 			void *vaddr)
3560 {
3561 	ce->lrc.lrca = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
3562 	ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET;
3563 	__execlists_update_reg_state(ce, engine, ce->ring->tail);
3564 
3565 	return 0;
3566 }
3567 
3568 static int execlists_context_pin(struct intel_context *ce, void *vaddr)
3569 {
3570 	return __execlists_context_pin(ce, ce->engine, vaddr);
3571 }
3572 
3573 static int execlists_context_alloc(struct intel_context *ce)
3574 {
3575 	return __execlists_context_alloc(ce, ce->engine);
3576 }
3577 
3578 static void execlists_context_reset(struct intel_context *ce)
3579 {
3580 	CE_TRACE(ce, "reset\n");
3581 	GEM_BUG_ON(!intel_context_is_pinned(ce));
3582 
3583 	intel_ring_reset(ce->ring, ce->ring->emit);
3584 
3585 	/* Scrub away the garbage */
3586 	execlists_init_reg_state(ce->lrc_reg_state,
3587 				 ce, ce->engine, ce->ring, true);
3588 	__execlists_update_reg_state(ce, ce->engine, ce->ring->tail);
3589 
3590 	ce->lrc.desc |= CTX_DESC_FORCE_RESTORE;
3591 }
3592 
3593 static const struct intel_context_ops execlists_context_ops = {
3594 	.alloc = execlists_context_alloc,
3595 
3596 	.pre_pin = execlists_context_pre_pin,
3597 	.pin = execlists_context_pin,
3598 	.unpin = execlists_context_unpin,
3599 	.post_unpin = execlists_context_post_unpin,
3600 
3601 	.enter = intel_context_enter_engine,
3602 	.exit = intel_context_exit_engine,
3603 
3604 	.reset = execlists_context_reset,
3605 	.destroy = execlists_context_destroy,
3606 };
3607 
3608 static u32 hwsp_offset(const struct i915_request *rq)
3609 {
3610 	const struct intel_timeline_cacheline *cl;
3611 
3612 	/* Before the request is executed, the timeline/cachline is fixed */
3613 
3614 	cl = rcu_dereference_protected(rq->hwsp_cacheline, 1);
3615 	if (cl)
3616 		return cl->ggtt_offset;
3617 
3618 	return rcu_dereference_protected(rq->timeline, 1)->hwsp_offset;
3619 }
3620 
3621 static int gen8_emit_init_breadcrumb(struct i915_request *rq)
3622 {
3623 	u32 *cs;
3624 
3625 	GEM_BUG_ON(i915_request_has_initial_breadcrumb(rq));
3626 	if (!i915_request_timeline(rq)->has_initial_breadcrumb)
3627 		return 0;
3628 
3629 	cs = intel_ring_begin(rq, 6);
3630 	if (IS_ERR(cs))
3631 		return PTR_ERR(cs);
3632 
3633 	/*
3634 	 * Check if we have been preempted before we even get started.
3635 	 *
3636 	 * After this point i915_request_started() reports true, even if
3637 	 * we get preempted and so are no longer running.
3638 	 */
3639 	*cs++ = MI_ARB_CHECK;
3640 	*cs++ = MI_NOOP;
3641 
3642 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
3643 	*cs++ = hwsp_offset(rq);
3644 	*cs++ = 0;
3645 	*cs++ = rq->fence.seqno - 1;
3646 
3647 	intel_ring_advance(rq, cs);
3648 
3649 	/* Record the updated position of the request's payload */
3650 	rq->infix = intel_ring_offset(rq, cs);
3651 
3652 	__set_bit(I915_FENCE_FLAG_INITIAL_BREADCRUMB, &rq->fence.flags);
3653 
3654 	return 0;
3655 }
3656 
3657 static int emit_pdps(struct i915_request *rq)
3658 {
3659 	const struct intel_engine_cs * const engine = rq->engine;
3660 	struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(rq->context->vm);
3661 	int err, i;
3662 	u32 *cs;
3663 
3664 	GEM_BUG_ON(intel_vgpu_active(rq->engine->i915));
3665 
3666 	/*
3667 	 * Beware ye of the dragons, this sequence is magic!
3668 	 *
3669 	 * Small changes to this sequence can cause anything from
3670 	 * GPU hangs to forcewake errors and machine lockups!
3671 	 */
3672 
3673 	/* Flush any residual operations from the context load */
3674 	err = engine->emit_flush(rq, EMIT_FLUSH);
3675 	if (err)
3676 		return err;
3677 
3678 	/* Magic required to prevent forcewake errors! */
3679 	err = engine->emit_flush(rq, EMIT_INVALIDATE);
3680 	if (err)
3681 		return err;
3682 
3683 	cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2);
3684 	if (IS_ERR(cs))
3685 		return PTR_ERR(cs);
3686 
3687 	/* Ensure the LRI have landed before we invalidate & continue */
3688 	*cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED;
3689 	for (i = GEN8_3LVL_PDPES; i--; ) {
3690 		const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
3691 		u32 base = engine->mmio_base;
3692 
3693 		*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i));
3694 		*cs++ = upper_32_bits(pd_daddr);
3695 		*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i));
3696 		*cs++ = lower_32_bits(pd_daddr);
3697 	}
3698 	*cs++ = MI_NOOP;
3699 
3700 	intel_ring_advance(rq, cs);
3701 
3702 	return 0;
3703 }
3704 
3705 static int execlists_request_alloc(struct i915_request *request)
3706 {
3707 	int ret;
3708 
3709 	GEM_BUG_ON(!intel_context_is_pinned(request->context));
3710 
3711 	/*
3712 	 * Flush enough space to reduce the likelihood of waiting after
3713 	 * we start building the request - in which case we will just
3714 	 * have to repeat work.
3715 	 */
3716 	request->reserved_space += EXECLISTS_REQUEST_SIZE;
3717 
3718 	/*
3719 	 * Note that after this point, we have committed to using
3720 	 * this request as it is being used to both track the
3721 	 * state of engine initialisation and liveness of the
3722 	 * golden renderstate above. Think twice before you try
3723 	 * to cancel/unwind this request now.
3724 	 */
3725 
3726 	if (!i915_vm_is_4lvl(request->context->vm)) {
3727 		ret = emit_pdps(request);
3728 		if (ret)
3729 			return ret;
3730 	}
3731 
3732 	/* Unconditionally invalidate GPU caches and TLBs. */
3733 	ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
3734 	if (ret)
3735 		return ret;
3736 
3737 	request->reserved_space -= EXECLISTS_REQUEST_SIZE;
3738 	return 0;
3739 }
3740 
3741 /*
3742  * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
3743  * PIPE_CONTROL instruction. This is required for the flush to happen correctly
3744  * but there is a slight complication as this is applied in WA batch where the
3745  * values are only initialized once so we cannot take register value at the
3746  * beginning and reuse it further; hence we save its value to memory, upload a
3747  * constant value with bit21 set and then we restore it back with the saved value.
3748  * To simplify the WA, a constant value is formed by using the default value
3749  * of this register. This shouldn't be a problem because we are only modifying
3750  * it for a short period and this batch in non-premptible. We can ofcourse
3751  * use additional instructions that read the actual value of the register
3752  * at that time and set our bit of interest but it makes the WA complicated.
3753  *
3754  * This WA is also required for Gen9 so extracting as a function avoids
3755  * code duplication.
3756  */
3757 static u32 *
3758 gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
3759 {
3760 	/* NB no one else is allowed to scribble over scratch + 256! */
3761 	*batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
3762 	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3763 	*batch++ = intel_gt_scratch_offset(engine->gt,
3764 					   INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
3765 	*batch++ = 0;
3766 
3767 	*batch++ = MI_LOAD_REGISTER_IMM(1);
3768 	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3769 	*batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
3770 
3771 	batch = gen8_emit_pipe_control(batch,
3772 				       PIPE_CONTROL_CS_STALL |
3773 				       PIPE_CONTROL_DC_FLUSH_ENABLE,
3774 				       0);
3775 
3776 	*batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
3777 	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3778 	*batch++ = intel_gt_scratch_offset(engine->gt,
3779 					   INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
3780 	*batch++ = 0;
3781 
3782 	return batch;
3783 }
3784 
3785 /*
3786  * Typically we only have one indirect_ctx and per_ctx batch buffer which are
3787  * initialized at the beginning and shared across all contexts but this field
3788  * helps us to have multiple batches at different offsets and select them based
3789  * on a criteria. At the moment this batch always start at the beginning of the page
3790  * and at this point we don't have multiple wa_ctx batch buffers.
3791  *
3792  * The number of WA applied are not known at the beginning; we use this field
3793  * to return the no of DWORDS written.
3794  *
3795  * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
3796  * so it adds NOOPs as padding to make it cacheline aligned.
3797  * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
3798  * makes a complete batch buffer.
3799  */
3800 static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3801 {
3802 	/* WaDisableCtxRestoreArbitration:bdw,chv */
3803 	*batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3804 
3805 	/* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
3806 	if (IS_BROADWELL(engine->i915))
3807 		batch = gen8_emit_flush_coherentl3_wa(engine, batch);
3808 
3809 	/* WaClearSlmSpaceAtContextSwitch:bdw,chv */
3810 	/* Actual scratch location is at 128 bytes offset */
3811 	batch = gen8_emit_pipe_control(batch,
3812 				       PIPE_CONTROL_FLUSH_L3 |
3813 				       PIPE_CONTROL_STORE_DATA_INDEX |
3814 				       PIPE_CONTROL_CS_STALL |
3815 				       PIPE_CONTROL_QW_WRITE,
3816 				       LRC_PPHWSP_SCRATCH_ADDR);
3817 
3818 	*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3819 
3820 	/* Pad to end of cacheline */
3821 	while ((unsigned long)batch % CACHELINE_BYTES)
3822 		*batch++ = MI_NOOP;
3823 
3824 	/*
3825 	 * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
3826 	 * execution depends on the length specified in terms of cache lines
3827 	 * in the register CTX_RCS_INDIRECT_CTX
3828 	 */
3829 
3830 	return batch;
3831 }
3832 
3833 struct lri {
3834 	i915_reg_t reg;
3835 	u32 value;
3836 };
3837 
3838 static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
3839 {
3840 	GEM_BUG_ON(!count || count > 63);
3841 
3842 	*batch++ = MI_LOAD_REGISTER_IMM(count);
3843 	do {
3844 		*batch++ = i915_mmio_reg_offset(lri->reg);
3845 		*batch++ = lri->value;
3846 	} while (lri++, --count);
3847 	*batch++ = MI_NOOP;
3848 
3849 	return batch;
3850 }
3851 
3852 static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3853 {
3854 	static const struct lri lri[] = {
3855 		/* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
3856 		{
3857 			COMMON_SLICE_CHICKEN2,
3858 			__MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
3859 				       0),
3860 		},
3861 
3862 		/* BSpec: 11391 */
3863 		{
3864 			FF_SLICE_CHICKEN,
3865 			__MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
3866 				       FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
3867 		},
3868 
3869 		/* BSpec: 11299 */
3870 		{
3871 			_3D_CHICKEN3,
3872 			__MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
3873 				       _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
3874 		}
3875 	};
3876 
3877 	*batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3878 
3879 	/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
3880 	batch = gen8_emit_flush_coherentl3_wa(engine, batch);
3881 
3882 	/* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
3883 	batch = gen8_emit_pipe_control(batch,
3884 				       PIPE_CONTROL_FLUSH_L3 |
3885 				       PIPE_CONTROL_STORE_DATA_INDEX |
3886 				       PIPE_CONTROL_CS_STALL |
3887 				       PIPE_CONTROL_QW_WRITE,
3888 				       LRC_PPHWSP_SCRATCH_ADDR);
3889 
3890 	batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
3891 
3892 	/* WaMediaPoolStateCmdInWABB:bxt,glk */
3893 	if (HAS_POOLED_EU(engine->i915)) {
3894 		/*
3895 		 * EU pool configuration is setup along with golden context
3896 		 * during context initialization. This value depends on
3897 		 * device type (2x6 or 3x6) and needs to be updated based
3898 		 * on which subslice is disabled especially for 2x6
3899 		 * devices, however it is safe to load default
3900 		 * configuration of 3x6 device instead of masking off
3901 		 * corresponding bits because HW ignores bits of a disabled
3902 		 * subslice and drops down to appropriate config. Please
3903 		 * see render_state_setup() in i915_gem_render_state.c for
3904 		 * possible configurations, to avoid duplication they are
3905 		 * not shown here again.
3906 		 */
3907 		*batch++ = GEN9_MEDIA_POOL_STATE;
3908 		*batch++ = GEN9_MEDIA_POOL_ENABLE;
3909 		*batch++ = 0x00777000;
3910 		*batch++ = 0;
3911 		*batch++ = 0;
3912 		*batch++ = 0;
3913 	}
3914 
3915 	*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3916 
3917 	/* Pad to end of cacheline */
3918 	while ((unsigned long)batch % CACHELINE_BYTES)
3919 		*batch++ = MI_NOOP;
3920 
3921 	return batch;
3922 }
3923 
3924 static u32 *
3925 gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3926 {
3927 	int i;
3928 
3929 	/*
3930 	 * WaPipeControlBefore3DStateSamplePattern: cnl
3931 	 *
3932 	 * Ensure the engine is idle prior to programming a
3933 	 * 3DSTATE_SAMPLE_PATTERN during a context restore.
3934 	 */
3935 	batch = gen8_emit_pipe_control(batch,
3936 				       PIPE_CONTROL_CS_STALL,
3937 				       0);
3938 	/*
3939 	 * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for
3940 	 * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in
3941 	 * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is
3942 	 * confusing. Since gen8_emit_pipe_control() already advances the
3943 	 * batch by 6 dwords, we advance the other 10 here, completing a
3944 	 * cacheline. It's not clear if the workaround requires this padding
3945 	 * before other commands, or if it's just the regular padding we would
3946 	 * already have for the workaround bb, so leave it here for now.
3947 	 */
3948 	for (i = 0; i < 10; i++)
3949 		*batch++ = MI_NOOP;
3950 
3951 	/* Pad to end of cacheline */
3952 	while ((unsigned long)batch % CACHELINE_BYTES)
3953 		*batch++ = MI_NOOP;
3954 
3955 	return batch;
3956 }
3957 
3958 #define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE)
3959 
3960 static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
3961 {
3962 	struct drm_i915_gem_object *obj;
3963 	struct i915_vma *vma;
3964 	int err;
3965 
3966 	obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_OBJ_SIZE);
3967 	if (IS_ERR(obj))
3968 		return PTR_ERR(obj);
3969 
3970 	vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
3971 	if (IS_ERR(vma)) {
3972 		err = PTR_ERR(vma);
3973 		goto err;
3974 	}
3975 
3976 	err = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH);
3977 	if (err)
3978 		goto err;
3979 
3980 	engine->wa_ctx.vma = vma;
3981 	return 0;
3982 
3983 err:
3984 	i915_gem_object_put(obj);
3985 	return err;
3986 }
3987 
3988 static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine)
3989 {
3990 	i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
3991 
3992 	/* Called on error unwind, clear all flags to prevent further use */
3993 	memset(&engine->wa_ctx, 0, sizeof(engine->wa_ctx));
3994 }
3995 
3996 typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
3997 
3998 static int intel_init_workaround_bb(struct intel_engine_cs *engine)
3999 {
4000 	struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
4001 	struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx,
4002 					    &wa_ctx->per_ctx };
4003 	wa_bb_func_t wa_bb_fn[2];
4004 	void *batch, *batch_ptr;
4005 	unsigned int i;
4006 	int ret;
4007 
4008 	if (engine->class != RENDER_CLASS)
4009 		return 0;
4010 
4011 	switch (INTEL_GEN(engine->i915)) {
4012 	case 12:
4013 	case 11:
4014 		return 0;
4015 	case 10:
4016 		wa_bb_fn[0] = gen10_init_indirectctx_bb;
4017 		wa_bb_fn[1] = NULL;
4018 		break;
4019 	case 9:
4020 		wa_bb_fn[0] = gen9_init_indirectctx_bb;
4021 		wa_bb_fn[1] = NULL;
4022 		break;
4023 	case 8:
4024 		wa_bb_fn[0] = gen8_init_indirectctx_bb;
4025 		wa_bb_fn[1] = NULL;
4026 		break;
4027 	default:
4028 		MISSING_CASE(INTEL_GEN(engine->i915));
4029 		return 0;
4030 	}
4031 
4032 	ret = lrc_setup_wa_ctx(engine);
4033 	if (ret) {
4034 		drm_dbg(&engine->i915->drm,
4035 			"Failed to setup context WA page: %d\n", ret);
4036 		return ret;
4037 	}
4038 
4039 	batch = i915_gem_object_pin_map(wa_ctx->vma->obj, I915_MAP_WB);
4040 
4041 	/*
4042 	 * Emit the two workaround batch buffers, recording the offset from the
4043 	 * start of the workaround batch buffer object for each and their
4044 	 * respective sizes.
4045 	 */
4046 	batch_ptr = batch;
4047 	for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
4048 		wa_bb[i]->offset = batch_ptr - batch;
4049 		if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
4050 						  CACHELINE_BYTES))) {
4051 			ret = -EINVAL;
4052 			break;
4053 		}
4054 		if (wa_bb_fn[i])
4055 			batch_ptr = wa_bb_fn[i](engine, batch_ptr);
4056 		wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
4057 	}
4058 	GEM_BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE);
4059 
4060 	__i915_gem_object_flush_map(wa_ctx->vma->obj, 0, batch_ptr - batch);
4061 	__i915_gem_object_release_map(wa_ctx->vma->obj);
4062 	if (ret)
4063 		lrc_destroy_wa_ctx(engine);
4064 
4065 	return ret;
4066 }
4067 
4068 static void reset_csb_pointers(struct intel_engine_cs *engine)
4069 {
4070 	struct intel_engine_execlists * const execlists = &engine->execlists;
4071 	const unsigned int reset_value = execlists->csb_size - 1;
4072 
4073 	ring_set_paused(engine, 0);
4074 
4075 	/*
4076 	 * Sometimes Icelake forgets to reset its pointers on a GPU reset.
4077 	 * Bludgeon them with a mmio update to be sure.
4078 	 */
4079 	ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
4080 		     0xffff << 16 | reset_value << 8 | reset_value);
4081 	ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
4082 
4083 	/*
4084 	 * After a reset, the HW starts writing into CSB entry [0]. We
4085 	 * therefore have to set our HEAD pointer back one entry so that
4086 	 * the *first* entry we check is entry 0. To complicate this further,
4087 	 * as we don't wait for the first interrupt after reset, we have to
4088 	 * fake the HW write to point back to the last entry so that our
4089 	 * inline comparison of our cached head position against the last HW
4090 	 * write works even before the first interrupt.
4091 	 */
4092 	execlists->csb_head = reset_value;
4093 	WRITE_ONCE(*execlists->csb_write, reset_value);
4094 	wmb(); /* Make sure this is visible to HW (paranoia?) */
4095 
4096 	/* Check that the GPU does indeed update the CSB entries! */
4097 	memset(execlists->csb_status, -1, (reset_value + 1) * sizeof(u64));
4098 	invalidate_csb_entries(&execlists->csb_status[0],
4099 			       &execlists->csb_status[reset_value]);
4100 
4101 	/* Once more for luck and our trusty paranoia */
4102 	ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
4103 		     0xffff << 16 | reset_value << 8 | reset_value);
4104 	ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
4105 
4106 	GEM_BUG_ON(READ_ONCE(*execlists->csb_write) != reset_value);
4107 }
4108 
4109 static void execlists_sanitize(struct intel_engine_cs *engine)
4110 {
4111 	GEM_BUG_ON(execlists_active(&engine->execlists));
4112 
4113 	/*
4114 	 * Poison residual state on resume, in case the suspend didn't!
4115 	 *
4116 	 * We have to assume that across suspend/resume (or other loss
4117 	 * of control) that the contents of our pinned buffers has been
4118 	 * lost, replaced by garbage. Since this doesn't always happen,
4119 	 * let's poison such state so that we more quickly spot when
4120 	 * we falsely assume it has been preserved.
4121 	 */
4122 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
4123 		memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE);
4124 
4125 	reset_csb_pointers(engine);
4126 
4127 	/*
4128 	 * The kernel_context HWSP is stored in the status_page. As above,
4129 	 * that may be lost on resume/initialisation, and so we need to
4130 	 * reset the value in the HWSP.
4131 	 */
4132 	intel_timeline_reset_seqno(engine->kernel_context->timeline);
4133 
4134 	/* And scrub the dirty cachelines for the HWSP */
4135 	clflush_cache_range(engine->status_page.addr, PAGE_SIZE);
4136 }
4137 
4138 static void enable_error_interrupt(struct intel_engine_cs *engine)
4139 {
4140 	u32 status;
4141 
4142 	engine->execlists.error_interrupt = 0;
4143 	ENGINE_WRITE(engine, RING_EMR, ~0u);
4144 	ENGINE_WRITE(engine, RING_EIR, ~0u); /* clear all existing errors */
4145 
4146 	status = ENGINE_READ(engine, RING_ESR);
4147 	if (unlikely(status)) {
4148 		drm_err(&engine->i915->drm,
4149 			"engine '%s' resumed still in error: %08x\n",
4150 			engine->name, status);
4151 		__intel_gt_reset(engine->gt, engine->mask);
4152 	}
4153 
4154 	/*
4155 	 * On current gen8+, we have 2 signals to play with
4156 	 *
4157 	 * - I915_ERROR_INSTUCTION (bit 0)
4158 	 *
4159 	 *    Generate an error if the command parser encounters an invalid
4160 	 *    instruction
4161 	 *
4162 	 *    This is a fatal error.
4163 	 *
4164 	 * - CP_PRIV (bit 2)
4165 	 *
4166 	 *    Generate an error on privilege violation (where the CP replaces
4167 	 *    the instruction with a no-op). This also fires for writes into
4168 	 *    read-only scratch pages.
4169 	 *
4170 	 *    This is a non-fatal error, parsing continues.
4171 	 *
4172 	 * * there are a few others defined for odd HW that we do not use
4173 	 *
4174 	 * Since CP_PRIV fires for cases where we have chosen to ignore the
4175 	 * error (as the HW is validating and suppressing the mistakes), we
4176 	 * only unmask the instruction error bit.
4177 	 */
4178 	ENGINE_WRITE(engine, RING_EMR, ~I915_ERROR_INSTRUCTION);
4179 }
4180 
4181 static void enable_execlists(struct intel_engine_cs *engine)
4182 {
4183 	u32 mode;
4184 
4185 	assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
4186 
4187 	intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
4188 
4189 	if (INTEL_GEN(engine->i915) >= 11)
4190 		mode = _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE);
4191 	else
4192 		mode = _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE);
4193 	ENGINE_WRITE_FW(engine, RING_MODE_GEN7, mode);
4194 
4195 	ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
4196 
4197 	ENGINE_WRITE_FW(engine,
4198 			RING_HWS_PGA,
4199 			i915_ggtt_offset(engine->status_page.vma));
4200 	ENGINE_POSTING_READ(engine, RING_HWS_PGA);
4201 
4202 	enable_error_interrupt(engine);
4203 
4204 	engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0);
4205 }
4206 
4207 static bool unexpected_starting_state(struct intel_engine_cs *engine)
4208 {
4209 	bool unexpected = false;
4210 
4211 	if (ENGINE_READ_FW(engine, RING_MI_MODE) & STOP_RING) {
4212 		drm_dbg(&engine->i915->drm,
4213 			"STOP_RING still set in RING_MI_MODE\n");
4214 		unexpected = true;
4215 	}
4216 
4217 	return unexpected;
4218 }
4219 
4220 static int execlists_resume(struct intel_engine_cs *engine)
4221 {
4222 	intel_mocs_init_engine(engine);
4223 
4224 	intel_breadcrumbs_reset(engine->breadcrumbs);
4225 
4226 	if (GEM_SHOW_DEBUG() && unexpected_starting_state(engine)) {
4227 		struct drm_printer p = drm_debug_printer(__func__);
4228 
4229 		intel_engine_dump(engine, &p, NULL);
4230 	}
4231 
4232 	enable_execlists(engine);
4233 
4234 	return 0;
4235 }
4236 
4237 static void execlists_reset_prepare(struct intel_engine_cs *engine)
4238 {
4239 	struct intel_engine_execlists * const execlists = &engine->execlists;
4240 	unsigned long flags;
4241 
4242 	ENGINE_TRACE(engine, "depth<-%d\n",
4243 		     atomic_read(&execlists->tasklet.count));
4244 
4245 	/*
4246 	 * Prevent request submission to the hardware until we have
4247 	 * completed the reset in i915_gem_reset_finish(). If a request
4248 	 * is completed by one engine, it may then queue a request
4249 	 * to a second via its execlists->tasklet *just* as we are
4250 	 * calling engine->resume() and also writing the ELSP.
4251 	 * Turning off the execlists->tasklet until the reset is over
4252 	 * prevents the race.
4253 	 */
4254 	__tasklet_disable_sync_once(&execlists->tasklet);
4255 	GEM_BUG_ON(!reset_in_progress(execlists));
4256 
4257 	/* And flush any current direct submission. */
4258 	spin_lock_irqsave(&engine->active.lock, flags);
4259 	spin_unlock_irqrestore(&engine->active.lock, flags);
4260 
4261 	/*
4262 	 * We stop engines, otherwise we might get failed reset and a
4263 	 * dead gpu (on elk). Also as modern gpu as kbl can suffer
4264 	 * from system hang if batchbuffer is progressing when
4265 	 * the reset is issued, regardless of READY_TO_RESET ack.
4266 	 * Thus assume it is best to stop engines on all gens
4267 	 * where we have a gpu reset.
4268 	 *
4269 	 * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
4270 	 *
4271 	 * FIXME: Wa for more modern gens needs to be validated
4272 	 */
4273 	ring_set_paused(engine, 1);
4274 	intel_engine_stop_cs(engine);
4275 
4276 	engine->execlists.reset_ccid = active_ccid(engine);
4277 }
4278 
4279 static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
4280 {
4281 	int x;
4282 
4283 	x = lrc_ring_mi_mode(engine);
4284 	if (x != -1) {
4285 		regs[x + 1] &= ~STOP_RING;
4286 		regs[x + 1] |= STOP_RING << 16;
4287 	}
4288 }
4289 
4290 static void __execlists_reset_reg_state(const struct intel_context *ce,
4291 					const struct intel_engine_cs *engine)
4292 {
4293 	u32 *regs = ce->lrc_reg_state;
4294 
4295 	__reset_stop_ring(regs, engine);
4296 }
4297 
4298 static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
4299 {
4300 	struct intel_engine_execlists * const execlists = &engine->execlists;
4301 	struct intel_context *ce;
4302 	struct i915_request *rq;
4303 	u32 head;
4304 
4305 	mb(); /* paranoia: read the CSB pointers from after the reset */
4306 	clflush(execlists->csb_write);
4307 	mb();
4308 
4309 	process_csb(engine); /* drain preemption events */
4310 
4311 	/* Following the reset, we need to reload the CSB read/write pointers */
4312 	reset_csb_pointers(engine);
4313 
4314 	/*
4315 	 * Save the currently executing context, even if we completed
4316 	 * its request, it was still running at the time of the
4317 	 * reset and will have been clobbered.
4318 	 */
4319 	rq = active_context(engine, engine->execlists.reset_ccid);
4320 	if (!rq)
4321 		goto unwind;
4322 
4323 	ce = rq->context;
4324 	GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
4325 
4326 	if (i915_request_completed(rq)) {
4327 		/* Idle context; tidy up the ring so we can restart afresh */
4328 		head = intel_ring_wrap(ce->ring, rq->tail);
4329 		goto out_replay;
4330 	}
4331 
4332 	/* We still have requests in-flight; the engine should be active */
4333 	GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
4334 
4335 	/* Context has requests still in-flight; it should not be idle! */
4336 	GEM_BUG_ON(i915_active_is_idle(&ce->active));
4337 
4338 	rq = active_request(ce->timeline, rq);
4339 	head = intel_ring_wrap(ce->ring, rq->head);
4340 	GEM_BUG_ON(head == ce->ring->tail);
4341 
4342 	/*
4343 	 * If this request hasn't started yet, e.g. it is waiting on a
4344 	 * semaphore, we need to avoid skipping the request or else we
4345 	 * break the signaling chain. However, if the context is corrupt
4346 	 * the request will not restart and we will be stuck with a wedged
4347 	 * device. It is quite often the case that if we issue a reset
4348 	 * while the GPU is loading the context image, that the context
4349 	 * image becomes corrupt.
4350 	 *
4351 	 * Otherwise, if we have not started yet, the request should replay
4352 	 * perfectly and we do not need to flag the result as being erroneous.
4353 	 */
4354 	if (!i915_request_started(rq))
4355 		goto out_replay;
4356 
4357 	/*
4358 	 * If the request was innocent, we leave the request in the ELSP
4359 	 * and will try to replay it on restarting. The context image may
4360 	 * have been corrupted by the reset, in which case we may have
4361 	 * to service a new GPU hang, but more likely we can continue on
4362 	 * without impact.
4363 	 *
4364 	 * If the request was guilty, we presume the context is corrupt
4365 	 * and have to at least restore the RING register in the context
4366 	 * image back to the expected values to skip over the guilty request.
4367 	 */
4368 	__i915_request_reset(rq, stalled);
4369 
4370 	/*
4371 	 * We want a simple context + ring to execute the breadcrumb update.
4372 	 * We cannot rely on the context being intact across the GPU hang,
4373 	 * so clear it and rebuild just what we need for the breadcrumb.
4374 	 * All pending requests for this context will be zapped, and any
4375 	 * future request will be after userspace has had the opportunity
4376 	 * to recreate its own state.
4377 	 */
4378 out_replay:
4379 	ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n",
4380 		     head, ce->ring->tail);
4381 	__execlists_reset_reg_state(ce, engine);
4382 	__execlists_update_reg_state(ce, engine, head);
4383 	ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
4384 
4385 unwind:
4386 	/* Push back any incomplete requests for replay after the reset. */
4387 	cancel_port_requests(execlists);
4388 	__unwind_incomplete_requests(engine);
4389 }
4390 
4391 static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled)
4392 {
4393 	unsigned long flags;
4394 
4395 	ENGINE_TRACE(engine, "\n");
4396 
4397 	spin_lock_irqsave(&engine->active.lock, flags);
4398 
4399 	__execlists_reset(engine, stalled);
4400 
4401 	spin_unlock_irqrestore(&engine->active.lock, flags);
4402 }
4403 
4404 static void nop_submission_tasklet(unsigned long data)
4405 {
4406 	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
4407 
4408 	/* The driver is wedged; don't process any more events. */
4409 	WRITE_ONCE(engine->execlists.queue_priority_hint, INT_MIN);
4410 }
4411 
4412 static void execlists_reset_cancel(struct intel_engine_cs *engine)
4413 {
4414 	struct intel_engine_execlists * const execlists = &engine->execlists;
4415 	struct i915_request *rq, *rn;
4416 	struct rb_node *rb;
4417 	unsigned long flags;
4418 
4419 	ENGINE_TRACE(engine, "\n");
4420 
4421 	/*
4422 	 * Before we call engine->cancel_requests(), we should have exclusive
4423 	 * access to the submission state. This is arranged for us by the
4424 	 * caller disabling the interrupt generation, the tasklet and other
4425 	 * threads that may then access the same state, giving us a free hand
4426 	 * to reset state. However, we still need to let lockdep be aware that
4427 	 * we know this state may be accessed in hardirq context, so we
4428 	 * disable the irq around this manipulation and we want to keep
4429 	 * the spinlock focused on its duties and not accidentally conflate
4430 	 * coverage to the submission's irq state. (Similarly, although we
4431 	 * shouldn't need to disable irq around the manipulation of the
4432 	 * submission's irq state, we also wish to remind ourselves that
4433 	 * it is irq state.)
4434 	 */
4435 	spin_lock_irqsave(&engine->active.lock, flags);
4436 
4437 	__execlists_reset(engine, true);
4438 
4439 	/* Mark all executing requests as skipped. */
4440 	list_for_each_entry(rq, &engine->active.requests, sched.link)
4441 		mark_eio(rq);
4442 	intel_engine_signal_breadcrumbs(engine);
4443 
4444 	/* Flush the queued requests to the timeline list (for retiring). */
4445 	while ((rb = rb_first_cached(&execlists->queue))) {
4446 		struct i915_priolist *p = to_priolist(rb);
4447 		int i;
4448 
4449 		priolist_for_each_request_consume(rq, rn, p, i) {
4450 			mark_eio(rq);
4451 			__i915_request_submit(rq);
4452 		}
4453 
4454 		rb_erase_cached(&p->node, &execlists->queue);
4455 		i915_priolist_free(p);
4456 	}
4457 
4458 	/* On-hold requests will be flushed to timeline upon their release */
4459 	list_for_each_entry(rq, &engine->active.hold, sched.link)
4460 		mark_eio(rq);
4461 
4462 	/* Cancel all attached virtual engines */
4463 	while ((rb = rb_first_cached(&execlists->virtual))) {
4464 		struct virtual_engine *ve =
4465 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
4466 
4467 		rb_erase_cached(rb, &execlists->virtual);
4468 		RB_CLEAR_NODE(rb);
4469 
4470 		spin_lock(&ve->base.active.lock);
4471 		rq = fetch_and_zero(&ve->request);
4472 		if (rq) {
4473 			mark_eio(rq);
4474 
4475 			rq->engine = engine;
4476 			__i915_request_submit(rq);
4477 			i915_request_put(rq);
4478 
4479 			ve->base.execlists.queue_priority_hint = INT_MIN;
4480 		}
4481 		spin_unlock(&ve->base.active.lock);
4482 	}
4483 
4484 	/* Remaining _unready_ requests will be nop'ed when submitted */
4485 
4486 	execlists->queue_priority_hint = INT_MIN;
4487 	execlists->queue = RB_ROOT_CACHED;
4488 
4489 	GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet));
4490 	execlists->tasklet.func = nop_submission_tasklet;
4491 
4492 	spin_unlock_irqrestore(&engine->active.lock, flags);
4493 }
4494 
4495 static void execlists_reset_finish(struct intel_engine_cs *engine)
4496 {
4497 	struct intel_engine_execlists * const execlists = &engine->execlists;
4498 
4499 	/*
4500 	 * After a GPU reset, we may have requests to replay. Do so now while
4501 	 * we still have the forcewake to be sure that the GPU is not allowed
4502 	 * to sleep before we restart and reload a context.
4503 	 */
4504 	GEM_BUG_ON(!reset_in_progress(execlists));
4505 	if (!RB_EMPTY_ROOT(&execlists->queue.rb_root))
4506 		execlists->tasklet.func(execlists->tasklet.data);
4507 
4508 	if (__tasklet_enable(&execlists->tasklet))
4509 		/* And kick in case we missed a new request submission. */
4510 		tasklet_hi_schedule(&execlists->tasklet);
4511 	ENGINE_TRACE(engine, "depth->%d\n",
4512 		     atomic_read(&execlists->tasklet.count));
4513 }
4514 
4515 static int gen8_emit_bb_start_noarb(struct i915_request *rq,
4516 				    u64 offset, u32 len,
4517 				    const unsigned int flags)
4518 {
4519 	u32 *cs;
4520 
4521 	cs = intel_ring_begin(rq, 4);
4522 	if (IS_ERR(cs))
4523 		return PTR_ERR(cs);
4524 
4525 	/*
4526 	 * WaDisableCtxRestoreArbitration:bdw,chv
4527 	 *
4528 	 * We don't need to perform MI_ARB_ENABLE as often as we do (in
4529 	 * particular all the gen that do not need the w/a at all!), if we
4530 	 * took care to make sure that on every switch into this context
4531 	 * (both ordinary and for preemption) that arbitrartion was enabled
4532 	 * we would be fine.  However, for gen8 there is another w/a that
4533 	 * requires us to not preempt inside GPGPU execution, so we keep
4534 	 * arbitration disabled for gen8 batches. Arbitration will be
4535 	 * re-enabled before we close the request
4536 	 * (engine->emit_fini_breadcrumb).
4537 	 */
4538 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
4539 
4540 	/* FIXME(BDW+): Address space and security selectors. */
4541 	*cs++ = MI_BATCH_BUFFER_START_GEN8 |
4542 		(flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
4543 	*cs++ = lower_32_bits(offset);
4544 	*cs++ = upper_32_bits(offset);
4545 
4546 	intel_ring_advance(rq, cs);
4547 
4548 	return 0;
4549 }
4550 
4551 static int gen8_emit_bb_start(struct i915_request *rq,
4552 			      u64 offset, u32 len,
4553 			      const unsigned int flags)
4554 {
4555 	u32 *cs;
4556 
4557 	cs = intel_ring_begin(rq, 6);
4558 	if (IS_ERR(cs))
4559 		return PTR_ERR(cs);
4560 
4561 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4562 
4563 	*cs++ = MI_BATCH_BUFFER_START_GEN8 |
4564 		(flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
4565 	*cs++ = lower_32_bits(offset);
4566 	*cs++ = upper_32_bits(offset);
4567 
4568 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
4569 	*cs++ = MI_NOOP;
4570 
4571 	intel_ring_advance(rq, cs);
4572 
4573 	return 0;
4574 }
4575 
4576 static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine)
4577 {
4578 	ENGINE_WRITE(engine, RING_IMR,
4579 		     ~(engine->irq_enable_mask | engine->irq_keep_mask));
4580 	ENGINE_POSTING_READ(engine, RING_IMR);
4581 }
4582 
4583 static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine)
4584 {
4585 	ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
4586 }
4587 
4588 static int gen8_emit_flush(struct i915_request *request, u32 mode)
4589 {
4590 	u32 cmd, *cs;
4591 
4592 	cs = intel_ring_begin(request, 4);
4593 	if (IS_ERR(cs))
4594 		return PTR_ERR(cs);
4595 
4596 	cmd = MI_FLUSH_DW + 1;
4597 
4598 	/* We always require a command barrier so that subsequent
4599 	 * commands, such as breadcrumb interrupts, are strictly ordered
4600 	 * wrt the contents of the write cache being flushed to memory
4601 	 * (and thus being coherent from the CPU).
4602 	 */
4603 	cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
4604 
4605 	if (mode & EMIT_INVALIDATE) {
4606 		cmd |= MI_INVALIDATE_TLB;
4607 		if (request->engine->class == VIDEO_DECODE_CLASS)
4608 			cmd |= MI_INVALIDATE_BSD;
4609 	}
4610 
4611 	*cs++ = cmd;
4612 	*cs++ = LRC_PPHWSP_SCRATCH_ADDR;
4613 	*cs++ = 0; /* upper addr */
4614 	*cs++ = 0; /* value */
4615 	intel_ring_advance(request, cs);
4616 
4617 	return 0;
4618 }
4619 
4620 static int gen8_emit_flush_render(struct i915_request *request,
4621 				  u32 mode)
4622 {
4623 	bool vf_flush_wa = false, dc_flush_wa = false;
4624 	u32 *cs, flags = 0;
4625 	int len;
4626 
4627 	flags |= PIPE_CONTROL_CS_STALL;
4628 
4629 	if (mode & EMIT_FLUSH) {
4630 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
4631 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
4632 		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
4633 		flags |= PIPE_CONTROL_FLUSH_ENABLE;
4634 	}
4635 
4636 	if (mode & EMIT_INVALIDATE) {
4637 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
4638 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
4639 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
4640 		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
4641 		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
4642 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
4643 		flags |= PIPE_CONTROL_QW_WRITE;
4644 		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4645 
4646 		/*
4647 		 * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
4648 		 * pipe control.
4649 		 */
4650 		if (IS_GEN(request->engine->i915, 9))
4651 			vf_flush_wa = true;
4652 
4653 		/* WaForGAMHang:kbl */
4654 		if (IS_KBL_GT_REVID(request->engine->i915, 0, KBL_REVID_B0))
4655 			dc_flush_wa = true;
4656 	}
4657 
4658 	len = 6;
4659 
4660 	if (vf_flush_wa)
4661 		len += 6;
4662 
4663 	if (dc_flush_wa)
4664 		len += 12;
4665 
4666 	cs = intel_ring_begin(request, len);
4667 	if (IS_ERR(cs))
4668 		return PTR_ERR(cs);
4669 
4670 	if (vf_flush_wa)
4671 		cs = gen8_emit_pipe_control(cs, 0, 0);
4672 
4673 	if (dc_flush_wa)
4674 		cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE,
4675 					    0);
4676 
4677 	cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4678 
4679 	if (dc_flush_wa)
4680 		cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0);
4681 
4682 	intel_ring_advance(request, cs);
4683 
4684 	return 0;
4685 }
4686 
4687 static int gen11_emit_flush_render(struct i915_request *request,
4688 				   u32 mode)
4689 {
4690 	if (mode & EMIT_FLUSH) {
4691 		u32 *cs;
4692 		u32 flags = 0;
4693 
4694 		flags |= PIPE_CONTROL_CS_STALL;
4695 
4696 		flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
4697 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
4698 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
4699 		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
4700 		flags |= PIPE_CONTROL_FLUSH_ENABLE;
4701 		flags |= PIPE_CONTROL_QW_WRITE;
4702 		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4703 
4704 		cs = intel_ring_begin(request, 6);
4705 		if (IS_ERR(cs))
4706 			return PTR_ERR(cs);
4707 
4708 		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4709 		intel_ring_advance(request, cs);
4710 	}
4711 
4712 	if (mode & EMIT_INVALIDATE) {
4713 		u32 *cs;
4714 		u32 flags = 0;
4715 
4716 		flags |= PIPE_CONTROL_CS_STALL;
4717 
4718 		flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
4719 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
4720 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
4721 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
4722 		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
4723 		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
4724 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
4725 		flags |= PIPE_CONTROL_QW_WRITE;
4726 		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4727 
4728 		cs = intel_ring_begin(request, 6);
4729 		if (IS_ERR(cs))
4730 			return PTR_ERR(cs);
4731 
4732 		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4733 		intel_ring_advance(request, cs);
4734 	}
4735 
4736 	return 0;
4737 }
4738 
4739 static u32 preparser_disable(bool state)
4740 {
4741 	return MI_ARB_CHECK | 1 << 8 | state;
4742 }
4743 
4744 static i915_reg_t aux_inv_reg(const struct intel_engine_cs *engine)
4745 {
4746 	static const i915_reg_t vd[] = {
4747 		GEN12_VD0_AUX_NV,
4748 		GEN12_VD1_AUX_NV,
4749 		GEN12_VD2_AUX_NV,
4750 		GEN12_VD3_AUX_NV,
4751 	};
4752 
4753 	static const i915_reg_t ve[] = {
4754 		GEN12_VE0_AUX_NV,
4755 		GEN12_VE1_AUX_NV,
4756 	};
4757 
4758 	if (engine->class == VIDEO_DECODE_CLASS)
4759 		return vd[engine->instance];
4760 
4761 	if (engine->class == VIDEO_ENHANCEMENT_CLASS)
4762 		return ve[engine->instance];
4763 
4764 	GEM_BUG_ON("unknown aux_inv_reg\n");
4765 
4766 	return INVALID_MMIO_REG;
4767 }
4768 
4769 static u32 *
4770 gen12_emit_aux_table_inv(const i915_reg_t inv_reg, u32 *cs)
4771 {
4772 	*cs++ = MI_LOAD_REGISTER_IMM(1);
4773 	*cs++ = i915_mmio_reg_offset(inv_reg);
4774 	*cs++ = AUX_INV;
4775 	*cs++ = MI_NOOP;
4776 
4777 	return cs;
4778 }
4779 
4780 static int gen12_emit_flush_render(struct i915_request *request,
4781 				   u32 mode)
4782 {
4783 	if (mode & EMIT_FLUSH) {
4784 		u32 flags = 0;
4785 		u32 *cs;
4786 
4787 		flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
4788 		flags |= PIPE_CONTROL_FLUSH_L3;
4789 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
4790 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
4791 		/* Wa_1409600907:tgl */
4792 		flags |= PIPE_CONTROL_DEPTH_STALL;
4793 		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
4794 		flags |= PIPE_CONTROL_FLUSH_ENABLE;
4795 
4796 		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4797 		flags |= PIPE_CONTROL_QW_WRITE;
4798 
4799 		flags |= PIPE_CONTROL_CS_STALL;
4800 
4801 		cs = intel_ring_begin(request, 6);
4802 		if (IS_ERR(cs))
4803 			return PTR_ERR(cs);
4804 
4805 		cs = gen12_emit_pipe_control(cs,
4806 					     PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
4807 					     flags, LRC_PPHWSP_SCRATCH_ADDR);
4808 		intel_ring_advance(request, cs);
4809 	}
4810 
4811 	if (mode & EMIT_INVALIDATE) {
4812 		u32 flags = 0;
4813 		u32 *cs;
4814 
4815 		flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
4816 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
4817 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
4818 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
4819 		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
4820 		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
4821 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
4822 
4823 		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4824 		flags |= PIPE_CONTROL_QW_WRITE;
4825 
4826 		flags |= PIPE_CONTROL_CS_STALL;
4827 
4828 		cs = intel_ring_begin(request, 8 + 4);
4829 		if (IS_ERR(cs))
4830 			return PTR_ERR(cs);
4831 
4832 		/*
4833 		 * Prevent the pre-parser from skipping past the TLB
4834 		 * invalidate and loading a stale page for the batch
4835 		 * buffer / request payload.
4836 		 */
4837 		*cs++ = preparser_disable(true);
4838 
4839 		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4840 
4841 		/* hsdes: 1809175790 */
4842 		cs = gen12_emit_aux_table_inv(GEN12_GFX_CCS_AUX_NV, cs);
4843 
4844 		*cs++ = preparser_disable(false);
4845 		intel_ring_advance(request, cs);
4846 	}
4847 
4848 	return 0;
4849 }
4850 
4851 static int gen12_emit_flush(struct i915_request *request, u32 mode)
4852 {
4853 	intel_engine_mask_t aux_inv = 0;
4854 	u32 cmd, *cs;
4855 
4856 	cmd = 4;
4857 	if (mode & EMIT_INVALIDATE)
4858 		cmd += 2;
4859 	if (mode & EMIT_INVALIDATE)
4860 		aux_inv = request->engine->mask & ~BIT(BCS0);
4861 	if (aux_inv)
4862 		cmd += 2 * hweight8(aux_inv) + 2;
4863 
4864 	cs = intel_ring_begin(request, cmd);
4865 	if (IS_ERR(cs))
4866 		return PTR_ERR(cs);
4867 
4868 	if (mode & EMIT_INVALIDATE)
4869 		*cs++ = preparser_disable(true);
4870 
4871 	cmd = MI_FLUSH_DW + 1;
4872 
4873 	/* We always require a command barrier so that subsequent
4874 	 * commands, such as breadcrumb interrupts, are strictly ordered
4875 	 * wrt the contents of the write cache being flushed to memory
4876 	 * (and thus being coherent from the CPU).
4877 	 */
4878 	cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
4879 
4880 	if (mode & EMIT_INVALIDATE) {
4881 		cmd |= MI_INVALIDATE_TLB;
4882 		if (request->engine->class == VIDEO_DECODE_CLASS)
4883 			cmd |= MI_INVALIDATE_BSD;
4884 	}
4885 
4886 	*cs++ = cmd;
4887 	*cs++ = LRC_PPHWSP_SCRATCH_ADDR;
4888 	*cs++ = 0; /* upper addr */
4889 	*cs++ = 0; /* value */
4890 
4891 	if (aux_inv) { /* hsdes: 1809175790 */
4892 		struct intel_engine_cs *engine;
4893 		unsigned int tmp;
4894 
4895 		*cs++ = MI_LOAD_REGISTER_IMM(hweight8(aux_inv));
4896 		for_each_engine_masked(engine, request->engine->gt,
4897 				       aux_inv, tmp) {
4898 			*cs++ = i915_mmio_reg_offset(aux_inv_reg(engine));
4899 			*cs++ = AUX_INV;
4900 		}
4901 		*cs++ = MI_NOOP;
4902 	}
4903 
4904 	if (mode & EMIT_INVALIDATE)
4905 		*cs++ = preparser_disable(false);
4906 
4907 	intel_ring_advance(request, cs);
4908 
4909 	return 0;
4910 }
4911 
4912 static void assert_request_valid(struct i915_request *rq)
4913 {
4914 	struct intel_ring *ring __maybe_unused = rq->ring;
4915 
4916 	/* Can we unwind this request without appearing to go forwards? */
4917 	GEM_BUG_ON(intel_ring_direction(ring, rq->wa_tail, rq->head) <= 0);
4918 }
4919 
4920 /*
4921  * Reserve space for 2 NOOPs at the end of each request to be
4922  * used as a workaround for not being allowed to do lite
4923  * restore with HEAD==TAIL (WaIdleLiteRestore).
4924  */
4925 static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
4926 {
4927 	/* Ensure there's always at least one preemption point per-request. */
4928 	*cs++ = MI_ARB_CHECK;
4929 	*cs++ = MI_NOOP;
4930 	request->wa_tail = intel_ring_offset(request, cs);
4931 
4932 	/* Check that entire request is less than half the ring */
4933 	assert_request_valid(request);
4934 
4935 	return cs;
4936 }
4937 
4938 static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs)
4939 {
4940 	*cs++ = MI_SEMAPHORE_WAIT |
4941 		MI_SEMAPHORE_GLOBAL_GTT |
4942 		MI_SEMAPHORE_POLL |
4943 		MI_SEMAPHORE_SAD_EQ_SDD;
4944 	*cs++ = 0;
4945 	*cs++ = intel_hws_preempt_address(request->engine);
4946 	*cs++ = 0;
4947 
4948 	return cs;
4949 }
4950 
4951 static __always_inline u32*
4952 gen8_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
4953 {
4954 	*cs++ = MI_USER_INTERRUPT;
4955 
4956 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4957 	if (intel_engine_has_semaphores(request->engine))
4958 		cs = emit_preempt_busywait(request, cs);
4959 
4960 	request->tail = intel_ring_offset(request, cs);
4961 	assert_ring_tail_valid(request->ring, request->tail);
4962 
4963 	return gen8_emit_wa_tail(request, cs);
4964 }
4965 
4966 static u32 *emit_xcs_breadcrumb(struct i915_request *rq, u32 *cs)
4967 {
4968 	return gen8_emit_ggtt_write(cs, rq->fence.seqno, hwsp_offset(rq), 0);
4969 }
4970 
4971 static u32 *gen8_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
4972 {
4973 	return gen8_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs));
4974 }
4975 
4976 static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4977 {
4978 	cs = gen8_emit_pipe_control(cs,
4979 				    PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
4980 				    PIPE_CONTROL_DEPTH_CACHE_FLUSH |
4981 				    PIPE_CONTROL_DC_FLUSH_ENABLE,
4982 				    0);
4983 
4984 	/* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
4985 	cs = gen8_emit_ggtt_write_rcs(cs,
4986 				      request->fence.seqno,
4987 				      hwsp_offset(request),
4988 				      PIPE_CONTROL_FLUSH_ENABLE |
4989 				      PIPE_CONTROL_CS_STALL);
4990 
4991 	return gen8_emit_fini_breadcrumb_tail(request, cs);
4992 }
4993 
4994 static u32 *
4995 gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4996 {
4997 	cs = gen8_emit_ggtt_write_rcs(cs,
4998 				      request->fence.seqno,
4999 				      hwsp_offset(request),
5000 				      PIPE_CONTROL_CS_STALL |
5001 				      PIPE_CONTROL_TILE_CACHE_FLUSH |
5002 				      PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
5003 				      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
5004 				      PIPE_CONTROL_DC_FLUSH_ENABLE |
5005 				      PIPE_CONTROL_FLUSH_ENABLE);
5006 
5007 	return gen8_emit_fini_breadcrumb_tail(request, cs);
5008 }
5009 
5010 /*
5011  * Note that the CS instruction pre-parser will not stall on the breadcrumb
5012  * flush and will continue pre-fetching the instructions after it before the
5013  * memory sync is completed. On pre-gen12 HW, the pre-parser will stop at
5014  * BB_START/END instructions, so, even though we might pre-fetch the pre-amble
5015  * of the next request before the memory has been flushed, we're guaranteed that
5016  * we won't access the batch itself too early.
5017  * However, on gen12+ the parser can pre-fetch across the BB_START/END commands,
5018  * so, if the current request is modifying an instruction in the next request on
5019  * the same intel_context, we might pre-fetch and then execute the pre-update
5020  * instruction. To avoid this, the users of self-modifying code should either
5021  * disable the parser around the code emitting the memory writes, via a new flag
5022  * added to MI_ARB_CHECK, or emit the writes from a different intel_context. For
5023  * the in-kernel use-cases we've opted to use a separate context, see
5024  * reloc_gpu() as an example.
5025  * All the above applies only to the instructions themselves. Non-inline data
5026  * used by the instructions is not pre-fetched.
5027  */
5028 
5029 static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs)
5030 {
5031 	*cs++ = MI_SEMAPHORE_WAIT_TOKEN |
5032 		MI_SEMAPHORE_GLOBAL_GTT |
5033 		MI_SEMAPHORE_POLL |
5034 		MI_SEMAPHORE_SAD_EQ_SDD;
5035 	*cs++ = 0;
5036 	*cs++ = intel_hws_preempt_address(request->engine);
5037 	*cs++ = 0;
5038 	*cs++ = 0;
5039 	*cs++ = MI_NOOP;
5040 
5041 	return cs;
5042 }
5043 
5044 static __always_inline u32*
5045 gen12_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
5046 {
5047 	*cs++ = MI_USER_INTERRUPT;
5048 
5049 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
5050 	if (intel_engine_has_semaphores(request->engine))
5051 		cs = gen12_emit_preempt_busywait(request, cs);
5052 
5053 	request->tail = intel_ring_offset(request, cs);
5054 	assert_ring_tail_valid(request->ring, request->tail);
5055 
5056 	return gen8_emit_wa_tail(request, cs);
5057 }
5058 
5059 static u32 *gen12_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
5060 {
5061 	/* XXX Stalling flush before seqno write; post-sync not */
5062 	cs = emit_xcs_breadcrumb(rq, __gen8_emit_flush_dw(cs, 0, 0, 0));
5063 	return gen12_emit_fini_breadcrumb_tail(rq, cs);
5064 }
5065 
5066 static u32 *
5067 gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
5068 {
5069 	cs = gen12_emit_ggtt_write_rcs(cs,
5070 				       request->fence.seqno,
5071 				       hwsp_offset(request),
5072 				       PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
5073 				       PIPE_CONTROL_CS_STALL |
5074 				       PIPE_CONTROL_TILE_CACHE_FLUSH |
5075 				       PIPE_CONTROL_FLUSH_L3 |
5076 				       PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
5077 				       PIPE_CONTROL_DEPTH_CACHE_FLUSH |
5078 				       /* Wa_1409600907:tgl */
5079 				       PIPE_CONTROL_DEPTH_STALL |
5080 				       PIPE_CONTROL_DC_FLUSH_ENABLE |
5081 				       PIPE_CONTROL_FLUSH_ENABLE);
5082 
5083 	return gen12_emit_fini_breadcrumb_tail(request, cs);
5084 }
5085 
5086 static void execlists_park(struct intel_engine_cs *engine)
5087 {
5088 	cancel_timer(&engine->execlists.timer);
5089 	cancel_timer(&engine->execlists.preempt);
5090 }
5091 
5092 void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
5093 {
5094 	engine->submit_request = execlists_submit_request;
5095 	engine->schedule = i915_schedule;
5096 	engine->execlists.tasklet.func = execlists_submission_tasklet;
5097 
5098 	engine->reset.prepare = execlists_reset_prepare;
5099 	engine->reset.rewind = execlists_reset_rewind;
5100 	engine->reset.cancel = execlists_reset_cancel;
5101 	engine->reset.finish = execlists_reset_finish;
5102 
5103 	engine->park = execlists_park;
5104 	engine->unpark = NULL;
5105 
5106 	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
5107 	if (!intel_vgpu_active(engine->i915)) {
5108 		engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
5109 		if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) {
5110 			engine->flags |= I915_ENGINE_HAS_PREEMPTION;
5111 			if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
5112 				engine->flags |= I915_ENGINE_HAS_TIMESLICES;
5113 		}
5114 	}
5115 
5116 	if (INTEL_GEN(engine->i915) >= 12)
5117 		engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO;
5118 
5119 	if (intel_engine_has_preemption(engine))
5120 		engine->emit_bb_start = gen8_emit_bb_start;
5121 	else
5122 		engine->emit_bb_start = gen8_emit_bb_start_noarb;
5123 }
5124 
5125 static void execlists_shutdown(struct intel_engine_cs *engine)
5126 {
5127 	/* Synchronise with residual timers and any softirq they raise */
5128 	del_timer_sync(&engine->execlists.timer);
5129 	del_timer_sync(&engine->execlists.preempt);
5130 	tasklet_kill(&engine->execlists.tasklet);
5131 }
5132 
5133 static void execlists_release(struct intel_engine_cs *engine)
5134 {
5135 	engine->sanitize = NULL; /* no longer in control, nothing to sanitize */
5136 
5137 	execlists_shutdown(engine);
5138 
5139 	intel_engine_cleanup_common(engine);
5140 	lrc_destroy_wa_ctx(engine);
5141 }
5142 
5143 static void
5144 logical_ring_default_vfuncs(struct intel_engine_cs *engine)
5145 {
5146 	/* Default vfuncs which can be overriden by each engine. */
5147 
5148 	engine->resume = execlists_resume;
5149 
5150 	engine->cops = &execlists_context_ops;
5151 	engine->request_alloc = execlists_request_alloc;
5152 
5153 	engine->emit_flush = gen8_emit_flush;
5154 	engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
5155 	engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
5156 	if (INTEL_GEN(engine->i915) >= 12) {
5157 		engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb;
5158 		engine->emit_flush = gen12_emit_flush;
5159 	}
5160 	engine->set_default_submission = intel_execlists_set_default_submission;
5161 
5162 	if (INTEL_GEN(engine->i915) < 11) {
5163 		engine->irq_enable = gen8_logical_ring_enable_irq;
5164 		engine->irq_disable = gen8_logical_ring_disable_irq;
5165 	} else {
5166 		/*
5167 		 * TODO: On Gen11 interrupt masks need to be clear
5168 		 * to allow C6 entry. Keep interrupts enabled at
5169 		 * and take the hit of generating extra interrupts
5170 		 * until a more refined solution exists.
5171 		 */
5172 	}
5173 }
5174 
5175 static inline void
5176 logical_ring_default_irqs(struct intel_engine_cs *engine)
5177 {
5178 	unsigned int shift = 0;
5179 
5180 	if (INTEL_GEN(engine->i915) < 11) {
5181 		const u8 irq_shifts[] = {
5182 			[RCS0]  = GEN8_RCS_IRQ_SHIFT,
5183 			[BCS0]  = GEN8_BCS_IRQ_SHIFT,
5184 			[VCS0]  = GEN8_VCS0_IRQ_SHIFT,
5185 			[VCS1]  = GEN8_VCS1_IRQ_SHIFT,
5186 			[VECS0] = GEN8_VECS_IRQ_SHIFT,
5187 		};
5188 
5189 		shift = irq_shifts[engine->id];
5190 	}
5191 
5192 	engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
5193 	engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
5194 	engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT << shift;
5195 	engine->irq_keep_mask |= GT_WAIT_SEMAPHORE_INTERRUPT << shift;
5196 }
5197 
5198 static void rcs_submission_override(struct intel_engine_cs *engine)
5199 {
5200 	switch (INTEL_GEN(engine->i915)) {
5201 	case 12:
5202 		engine->emit_flush = gen12_emit_flush_render;
5203 		engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
5204 		break;
5205 	case 11:
5206 		engine->emit_flush = gen11_emit_flush_render;
5207 		engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
5208 		break;
5209 	default:
5210 		engine->emit_flush = gen8_emit_flush_render;
5211 		engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
5212 		break;
5213 	}
5214 }
5215 
5216 int intel_execlists_submission_setup(struct intel_engine_cs *engine)
5217 {
5218 	struct intel_engine_execlists * const execlists = &engine->execlists;
5219 	struct drm_i915_private *i915 = engine->i915;
5220 	struct intel_uncore *uncore = engine->uncore;
5221 	u32 base = engine->mmio_base;
5222 
5223 	tasklet_init(&engine->execlists.tasklet,
5224 		     execlists_submission_tasklet, (unsigned long)engine);
5225 	timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
5226 	timer_setup(&engine->execlists.preempt, execlists_preempt, 0);
5227 
5228 	logical_ring_default_vfuncs(engine);
5229 	logical_ring_default_irqs(engine);
5230 
5231 	if (engine->class == RENDER_CLASS)
5232 		rcs_submission_override(engine);
5233 
5234 	if (intel_init_workaround_bb(engine))
5235 		/*
5236 		 * We continue even if we fail to initialize WA batch
5237 		 * because we only expect rare glitches but nothing
5238 		 * critical to prevent us from using GPU
5239 		 */
5240 		drm_err(&i915->drm, "WA batch buffer initialization failed\n");
5241 
5242 	if (HAS_LOGICAL_RING_ELSQ(i915)) {
5243 		execlists->submit_reg = uncore->regs +
5244 			i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base));
5245 		execlists->ctrl_reg = uncore->regs +
5246 			i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base));
5247 	} else {
5248 		execlists->submit_reg = uncore->regs +
5249 			i915_mmio_reg_offset(RING_ELSP(base));
5250 	}
5251 
5252 	execlists->csb_status =
5253 		(u64 *)&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
5254 
5255 	execlists->csb_write =
5256 		&engine->status_page.addr[intel_hws_csb_write_index(i915)];
5257 
5258 	if (INTEL_GEN(i915) < 11)
5259 		execlists->csb_size = GEN8_CSB_ENTRIES;
5260 	else
5261 		execlists->csb_size = GEN11_CSB_ENTRIES;
5262 
5263 	if (INTEL_GEN(engine->i915) >= 11) {
5264 		execlists->ccid |= engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT - 32);
5265 		execlists->ccid |= engine->class << (GEN11_ENGINE_CLASS_SHIFT - 32);
5266 	}
5267 
5268 	/* Finally, take ownership and responsibility for cleanup! */
5269 	engine->sanitize = execlists_sanitize;
5270 	engine->release = execlists_release;
5271 
5272 	return 0;
5273 }
5274 
5275 static void init_common_reg_state(u32 * const regs,
5276 				  const struct intel_engine_cs *engine,
5277 				  const struct intel_ring *ring,
5278 				  bool inhibit)
5279 {
5280 	u32 ctl;
5281 
5282 	ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
5283 	ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
5284 	if (inhibit)
5285 		ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
5286 	if (INTEL_GEN(engine->i915) < 11)
5287 		ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
5288 					   CTX_CTRL_RS_CTX_ENABLE);
5289 	regs[CTX_CONTEXT_CONTROL] = ctl;
5290 
5291 	regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
5292 	regs[CTX_TIMESTAMP] = 0;
5293 }
5294 
5295 static void init_wa_bb_reg_state(u32 * const regs,
5296 				 const struct intel_engine_cs *engine)
5297 {
5298 	const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
5299 
5300 	if (wa_ctx->per_ctx.size) {
5301 		const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
5302 
5303 		GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1);
5304 		regs[lrc_ring_wa_bb_per_ctx(engine) + 1] =
5305 			(ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
5306 	}
5307 
5308 	if (wa_ctx->indirect_ctx.size) {
5309 		lrc_ring_setup_indirect_ctx(regs, engine,
5310 					    i915_ggtt_offset(wa_ctx->vma) +
5311 					    wa_ctx->indirect_ctx.offset,
5312 					    wa_ctx->indirect_ctx.size);
5313 	}
5314 }
5315 
5316 static void init_ppgtt_reg_state(u32 *regs, const struct i915_ppgtt *ppgtt)
5317 {
5318 	if (i915_vm_is_4lvl(&ppgtt->vm)) {
5319 		/* 64b PPGTT (48bit canonical)
5320 		 * PDP0_DESCRIPTOR contains the base address to PML4 and
5321 		 * other PDP Descriptors are ignored.
5322 		 */
5323 		ASSIGN_CTX_PML4(ppgtt, regs);
5324 	} else {
5325 		ASSIGN_CTX_PDP(ppgtt, regs, 3);
5326 		ASSIGN_CTX_PDP(ppgtt, regs, 2);
5327 		ASSIGN_CTX_PDP(ppgtt, regs, 1);
5328 		ASSIGN_CTX_PDP(ppgtt, regs, 0);
5329 	}
5330 }
5331 
5332 static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
5333 {
5334 	if (i915_is_ggtt(vm))
5335 		return i915_vm_to_ggtt(vm)->alias;
5336 	else
5337 		return i915_vm_to_ppgtt(vm);
5338 }
5339 
5340 static void execlists_init_reg_state(u32 *regs,
5341 				     const struct intel_context *ce,
5342 				     const struct intel_engine_cs *engine,
5343 				     const struct intel_ring *ring,
5344 				     bool inhibit)
5345 {
5346 	/*
5347 	 * A context is actually a big batch buffer with several
5348 	 * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
5349 	 * values we are setting here are only for the first context restore:
5350 	 * on a subsequent save, the GPU will recreate this batchbuffer with new
5351 	 * values (including all the missing MI_LOAD_REGISTER_IMM commands that
5352 	 * we are not initializing here).
5353 	 *
5354 	 * Must keep consistent with virtual_update_register_offsets().
5355 	 */
5356 	set_offsets(regs, reg_offsets(engine), engine, inhibit);
5357 
5358 	init_common_reg_state(regs, engine, ring, inhibit);
5359 	init_ppgtt_reg_state(regs, vm_alias(ce->vm));
5360 
5361 	init_wa_bb_reg_state(regs, engine);
5362 
5363 	__reset_stop_ring(regs, engine);
5364 }
5365 
5366 static int
5367 populate_lr_context(struct intel_context *ce,
5368 		    struct drm_i915_gem_object *ctx_obj,
5369 		    struct intel_engine_cs *engine,
5370 		    struct intel_ring *ring)
5371 {
5372 	bool inhibit = true;
5373 	void *vaddr;
5374 
5375 	vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
5376 	if (IS_ERR(vaddr)) {
5377 		drm_dbg(&engine->i915->drm, "Could not map object pages!\n");
5378 		return PTR_ERR(vaddr);
5379 	}
5380 
5381 	set_redzone(vaddr, engine);
5382 
5383 	if (engine->default_state) {
5384 		shmem_read(engine->default_state, 0,
5385 			   vaddr, engine->context_size);
5386 		__set_bit(CONTEXT_VALID_BIT, &ce->flags);
5387 		inhibit = false;
5388 	}
5389 
5390 	/* Clear the ppHWSP (inc. per-context counters) */
5391 	memset(vaddr, 0, PAGE_SIZE);
5392 
5393 	/*
5394 	 * The second page of the context object contains some registers which
5395 	 * must be set up prior to the first execution.
5396 	 */
5397 	execlists_init_reg_state(vaddr + LRC_STATE_OFFSET,
5398 				 ce, engine, ring, inhibit);
5399 
5400 	__i915_gem_object_flush_map(ctx_obj, 0, engine->context_size);
5401 	i915_gem_object_unpin_map(ctx_obj);
5402 	return 0;
5403 }
5404 
5405 static struct intel_timeline *pinned_timeline(struct intel_context *ce)
5406 {
5407 	struct intel_timeline *tl = fetch_and_zero(&ce->timeline);
5408 
5409 	return intel_timeline_create_from_engine(ce->engine,
5410 						 page_unmask_bits(tl));
5411 }
5412 
5413 static int __execlists_context_alloc(struct intel_context *ce,
5414 				     struct intel_engine_cs *engine)
5415 {
5416 	struct drm_i915_gem_object *ctx_obj;
5417 	struct intel_ring *ring;
5418 	struct i915_vma *vma;
5419 	u32 context_size;
5420 	int ret;
5421 
5422 	GEM_BUG_ON(ce->state);
5423 	context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
5424 
5425 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
5426 		context_size += I915_GTT_PAGE_SIZE; /* for redzone */
5427 
5428 	if (INTEL_GEN(engine->i915) == 12) {
5429 		ce->wa_bb_page = context_size / PAGE_SIZE;
5430 		context_size += PAGE_SIZE;
5431 	}
5432 
5433 	ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size);
5434 	if (IS_ERR(ctx_obj))
5435 		return PTR_ERR(ctx_obj);
5436 
5437 	vma = i915_vma_instance(ctx_obj, &engine->gt->ggtt->vm, NULL);
5438 	if (IS_ERR(vma)) {
5439 		ret = PTR_ERR(vma);
5440 		goto error_deref_obj;
5441 	}
5442 
5443 	if (!page_mask_bits(ce->timeline)) {
5444 		struct intel_timeline *tl;
5445 
5446 		/*
5447 		 * Use the static global HWSP for the kernel context, and
5448 		 * a dynamically allocated cacheline for everyone else.
5449 		 */
5450 		if (unlikely(ce->timeline))
5451 			tl = pinned_timeline(ce);
5452 		else
5453 			tl = intel_timeline_create(engine->gt);
5454 		if (IS_ERR(tl)) {
5455 			ret = PTR_ERR(tl);
5456 			goto error_deref_obj;
5457 		}
5458 
5459 		ce->timeline = tl;
5460 	}
5461 
5462 	ring = intel_engine_create_ring(engine, (unsigned long)ce->ring);
5463 	if (IS_ERR(ring)) {
5464 		ret = PTR_ERR(ring);
5465 		goto error_deref_obj;
5466 	}
5467 
5468 	ret = populate_lr_context(ce, ctx_obj, engine, ring);
5469 	if (ret) {
5470 		drm_dbg(&engine->i915->drm,
5471 			"Failed to populate LRC: %d\n", ret);
5472 		goto error_ring_free;
5473 	}
5474 
5475 	ce->ring = ring;
5476 	ce->state = vma;
5477 
5478 	return 0;
5479 
5480 error_ring_free:
5481 	intel_ring_put(ring);
5482 error_deref_obj:
5483 	i915_gem_object_put(ctx_obj);
5484 	return ret;
5485 }
5486 
5487 static struct list_head *virtual_queue(struct virtual_engine *ve)
5488 {
5489 	return &ve->base.execlists.default_priolist.requests[0];
5490 }
5491 
5492 static void rcu_virtual_context_destroy(struct work_struct *wrk)
5493 {
5494 	struct virtual_engine *ve =
5495 		container_of(wrk, typeof(*ve), rcu.work);
5496 	unsigned int n;
5497 
5498 	GEM_BUG_ON(ve->context.inflight);
5499 
5500 	/* Preempt-to-busy may leave a stale request behind. */
5501 	if (unlikely(ve->request)) {
5502 		struct i915_request *old;
5503 
5504 		spin_lock_irq(&ve->base.active.lock);
5505 
5506 		old = fetch_and_zero(&ve->request);
5507 		if (old) {
5508 			GEM_BUG_ON(!i915_request_completed(old));
5509 			__i915_request_submit(old);
5510 			i915_request_put(old);
5511 		}
5512 
5513 		spin_unlock_irq(&ve->base.active.lock);
5514 	}
5515 
5516 	/*
5517 	 * Flush the tasklet in case it is still running on another core.
5518 	 *
5519 	 * This needs to be done before we remove ourselves from the siblings'
5520 	 * rbtrees as in the case it is running in parallel, it may reinsert
5521 	 * the rb_node into a sibling.
5522 	 */
5523 	tasklet_kill(&ve->base.execlists.tasklet);
5524 
5525 	/* Decouple ourselves from the siblings, no more access allowed. */
5526 	for (n = 0; n < ve->num_siblings; n++) {
5527 		struct intel_engine_cs *sibling = ve->siblings[n];
5528 		struct rb_node *node = &ve->nodes[sibling->id].rb;
5529 
5530 		if (RB_EMPTY_NODE(node))
5531 			continue;
5532 
5533 		spin_lock_irq(&sibling->active.lock);
5534 
5535 		/* Detachment is lazily performed in the execlists tasklet */
5536 		if (!RB_EMPTY_NODE(node))
5537 			rb_erase_cached(node, &sibling->execlists.virtual);
5538 
5539 		spin_unlock_irq(&sibling->active.lock);
5540 	}
5541 	GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
5542 	GEM_BUG_ON(!list_empty(virtual_queue(ve)));
5543 
5544 	if (ve->context.state)
5545 		__execlists_context_fini(&ve->context);
5546 	intel_context_fini(&ve->context);
5547 
5548 	intel_breadcrumbs_free(ve->base.breadcrumbs);
5549 	intel_engine_free_request_pool(&ve->base);
5550 
5551 	kfree(ve->bonds);
5552 	kfree(ve);
5553 }
5554 
5555 static void virtual_context_destroy(struct kref *kref)
5556 {
5557 	struct virtual_engine *ve =
5558 		container_of(kref, typeof(*ve), context.ref);
5559 
5560 	GEM_BUG_ON(!list_empty(&ve->context.signals));
5561 
5562 	/*
5563 	 * When destroying the virtual engine, we have to be aware that
5564 	 * it may still be in use from an hardirq/softirq context causing
5565 	 * the resubmission of a completed request (background completion
5566 	 * due to preempt-to-busy). Before we can free the engine, we need
5567 	 * to flush the submission code and tasklets that are still potentially
5568 	 * accessing the engine. Flushing the tasklets requires process context,
5569 	 * and since we can guard the resubmit onto the engine with an RCU read
5570 	 * lock, we can delegate the free of the engine to an RCU worker.
5571 	 */
5572 	INIT_RCU_WORK(&ve->rcu, rcu_virtual_context_destroy);
5573 	queue_rcu_work(system_wq, &ve->rcu);
5574 }
5575 
5576 static void virtual_engine_initial_hint(struct virtual_engine *ve)
5577 {
5578 	int swp;
5579 
5580 	/*
5581 	 * Pick a random sibling on starting to help spread the load around.
5582 	 *
5583 	 * New contexts are typically created with exactly the same order
5584 	 * of siblings, and often started in batches. Due to the way we iterate
5585 	 * the array of sibling when submitting requests, sibling[0] is
5586 	 * prioritised for dequeuing. If we make sure that sibling[0] is fairly
5587 	 * randomised across the system, we also help spread the load by the
5588 	 * first engine we inspect being different each time.
5589 	 *
5590 	 * NB This does not force us to execute on this engine, it will just
5591 	 * typically be the first we inspect for submission.
5592 	 */
5593 	swp = prandom_u32_max(ve->num_siblings);
5594 	if (swp)
5595 		swap(ve->siblings[swp], ve->siblings[0]);
5596 }
5597 
5598 static int virtual_context_alloc(struct intel_context *ce)
5599 {
5600 	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
5601 
5602 	return __execlists_context_alloc(ce, ve->siblings[0]);
5603 }
5604 
5605 static int virtual_context_pin(struct intel_context *ce, void *vaddr)
5606 {
5607 	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
5608 
5609 	/* Note: we must use a real engine class for setting up reg state */
5610 	return __execlists_context_pin(ce, ve->siblings[0], vaddr);
5611 }
5612 
5613 static void virtual_context_enter(struct intel_context *ce)
5614 {
5615 	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
5616 	unsigned int n;
5617 
5618 	for (n = 0; n < ve->num_siblings; n++)
5619 		intel_engine_pm_get(ve->siblings[n]);
5620 
5621 	intel_timeline_enter(ce->timeline);
5622 }
5623 
5624 static void virtual_context_exit(struct intel_context *ce)
5625 {
5626 	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
5627 	unsigned int n;
5628 
5629 	intel_timeline_exit(ce->timeline);
5630 
5631 	for (n = 0; n < ve->num_siblings; n++)
5632 		intel_engine_pm_put(ve->siblings[n]);
5633 }
5634 
5635 static const struct intel_context_ops virtual_context_ops = {
5636 	.alloc = virtual_context_alloc,
5637 
5638 	.pre_pin = execlists_context_pre_pin,
5639 	.pin = virtual_context_pin,
5640 	.unpin = execlists_context_unpin,
5641 	.post_unpin = execlists_context_post_unpin,
5642 
5643 	.enter = virtual_context_enter,
5644 	.exit = virtual_context_exit,
5645 
5646 	.destroy = virtual_context_destroy,
5647 };
5648 
5649 static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
5650 {
5651 	struct i915_request *rq;
5652 	intel_engine_mask_t mask;
5653 
5654 	rq = READ_ONCE(ve->request);
5655 	if (!rq)
5656 		return 0;
5657 
5658 	/* The rq is ready for submission; rq->execution_mask is now stable. */
5659 	mask = rq->execution_mask;
5660 	if (unlikely(!mask)) {
5661 		/* Invalid selection, submit to a random engine in error */
5662 		i915_request_set_error_once(rq, -ENODEV);
5663 		mask = ve->siblings[0]->mask;
5664 	}
5665 
5666 	ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n",
5667 		     rq->fence.context, rq->fence.seqno,
5668 		     mask, ve->base.execlists.queue_priority_hint);
5669 
5670 	return mask;
5671 }
5672 
5673 static void virtual_submission_tasklet(unsigned long data)
5674 {
5675 	struct virtual_engine * const ve = (struct virtual_engine *)data;
5676 	const int prio = READ_ONCE(ve->base.execlists.queue_priority_hint);
5677 	intel_engine_mask_t mask;
5678 	unsigned int n;
5679 
5680 	rcu_read_lock();
5681 	mask = virtual_submission_mask(ve);
5682 	rcu_read_unlock();
5683 	if (unlikely(!mask))
5684 		return;
5685 
5686 	local_irq_disable();
5687 	for (n = 0; n < ve->num_siblings; n++) {
5688 		struct intel_engine_cs *sibling = READ_ONCE(ve->siblings[n]);
5689 		struct ve_node * const node = &ve->nodes[sibling->id];
5690 		struct rb_node **parent, *rb;
5691 		bool first;
5692 
5693 		if (!READ_ONCE(ve->request))
5694 			break; /* already handled by a sibling's tasklet */
5695 
5696 		if (unlikely(!(mask & sibling->mask))) {
5697 			if (!RB_EMPTY_NODE(&node->rb)) {
5698 				spin_lock(&sibling->active.lock);
5699 				rb_erase_cached(&node->rb,
5700 						&sibling->execlists.virtual);
5701 				RB_CLEAR_NODE(&node->rb);
5702 				spin_unlock(&sibling->active.lock);
5703 			}
5704 			continue;
5705 		}
5706 
5707 		spin_lock(&sibling->active.lock);
5708 
5709 		if (!RB_EMPTY_NODE(&node->rb)) {
5710 			/*
5711 			 * Cheat and avoid rebalancing the tree if we can
5712 			 * reuse this node in situ.
5713 			 */
5714 			first = rb_first_cached(&sibling->execlists.virtual) ==
5715 				&node->rb;
5716 			if (prio == node->prio || (prio > node->prio && first))
5717 				goto submit_engine;
5718 
5719 			rb_erase_cached(&node->rb, &sibling->execlists.virtual);
5720 		}
5721 
5722 		rb = NULL;
5723 		first = true;
5724 		parent = &sibling->execlists.virtual.rb_root.rb_node;
5725 		while (*parent) {
5726 			struct ve_node *other;
5727 
5728 			rb = *parent;
5729 			other = rb_entry(rb, typeof(*other), rb);
5730 			if (prio > other->prio) {
5731 				parent = &rb->rb_left;
5732 			} else {
5733 				parent = &rb->rb_right;
5734 				first = false;
5735 			}
5736 		}
5737 
5738 		rb_link_node(&node->rb, rb, parent);
5739 		rb_insert_color_cached(&node->rb,
5740 				       &sibling->execlists.virtual,
5741 				       first);
5742 
5743 submit_engine:
5744 		GEM_BUG_ON(RB_EMPTY_NODE(&node->rb));
5745 		node->prio = prio;
5746 		if (first && prio > sibling->execlists.queue_priority_hint)
5747 			tasklet_hi_schedule(&sibling->execlists.tasklet);
5748 
5749 		spin_unlock(&sibling->active.lock);
5750 	}
5751 	local_irq_enable();
5752 }
5753 
5754 static void virtual_submit_request(struct i915_request *rq)
5755 {
5756 	struct virtual_engine *ve = to_virtual_engine(rq->engine);
5757 	struct i915_request *old;
5758 	unsigned long flags;
5759 
5760 	ENGINE_TRACE(&ve->base, "rq=%llx:%lld\n",
5761 		     rq->fence.context,
5762 		     rq->fence.seqno);
5763 
5764 	GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
5765 
5766 	spin_lock_irqsave(&ve->base.active.lock, flags);
5767 
5768 	old = ve->request;
5769 	if (old) { /* background completion event from preempt-to-busy */
5770 		GEM_BUG_ON(!i915_request_completed(old));
5771 		__i915_request_submit(old);
5772 		i915_request_put(old);
5773 	}
5774 
5775 	if (i915_request_completed(rq)) {
5776 		__i915_request_submit(rq);
5777 
5778 		ve->base.execlists.queue_priority_hint = INT_MIN;
5779 		ve->request = NULL;
5780 	} else {
5781 		ve->base.execlists.queue_priority_hint = rq_prio(rq);
5782 		ve->request = i915_request_get(rq);
5783 
5784 		GEM_BUG_ON(!list_empty(virtual_queue(ve)));
5785 		list_move_tail(&rq->sched.link, virtual_queue(ve));
5786 
5787 		tasklet_hi_schedule(&ve->base.execlists.tasklet);
5788 	}
5789 
5790 	spin_unlock_irqrestore(&ve->base.active.lock, flags);
5791 }
5792 
5793 static struct ve_bond *
5794 virtual_find_bond(struct virtual_engine *ve,
5795 		  const struct intel_engine_cs *master)
5796 {
5797 	int i;
5798 
5799 	for (i = 0; i < ve->num_bonds; i++) {
5800 		if (ve->bonds[i].master == master)
5801 			return &ve->bonds[i];
5802 	}
5803 
5804 	return NULL;
5805 }
5806 
5807 static void
5808 virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
5809 {
5810 	struct virtual_engine *ve = to_virtual_engine(rq->engine);
5811 	intel_engine_mask_t allowed, exec;
5812 	struct ve_bond *bond;
5813 
5814 	allowed = ~to_request(signal)->engine->mask;
5815 
5816 	bond = virtual_find_bond(ve, to_request(signal)->engine);
5817 	if (bond)
5818 		allowed &= bond->sibling_mask;
5819 
5820 	/* Restrict the bonded request to run on only the available engines */
5821 	exec = READ_ONCE(rq->execution_mask);
5822 	while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed))
5823 		;
5824 
5825 	/* Prevent the master from being re-run on the bonded engines */
5826 	to_request(signal)->execution_mask &= ~allowed;
5827 }
5828 
5829 struct intel_context *
5830 intel_execlists_create_virtual(struct intel_engine_cs **siblings,
5831 			       unsigned int count)
5832 {
5833 	struct virtual_engine *ve;
5834 	unsigned int n;
5835 	int err;
5836 
5837 	if (count == 0)
5838 		return ERR_PTR(-EINVAL);
5839 
5840 	if (count == 1)
5841 		return intel_context_create(siblings[0]);
5842 
5843 	ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL);
5844 	if (!ve)
5845 		return ERR_PTR(-ENOMEM);
5846 
5847 	ve->base.i915 = siblings[0]->i915;
5848 	ve->base.gt = siblings[0]->gt;
5849 	ve->base.uncore = siblings[0]->uncore;
5850 	ve->base.id = -1;
5851 
5852 	ve->base.class = OTHER_CLASS;
5853 	ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
5854 	ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
5855 	ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
5856 
5857 	/*
5858 	 * The decision on whether to submit a request using semaphores
5859 	 * depends on the saturated state of the engine. We only compute
5860 	 * this during HW submission of the request, and we need for this
5861 	 * state to be globally applied to all requests being submitted
5862 	 * to this engine. Virtual engines encompass more than one physical
5863 	 * engine and so we cannot accurately tell in advance if one of those
5864 	 * engines is already saturated and so cannot afford to use a semaphore
5865 	 * and be pessimized in priority for doing so -- if we are the only
5866 	 * context using semaphores after all other clients have stopped, we
5867 	 * will be starved on the saturated system. Such a global switch for
5868 	 * semaphores is less than ideal, but alas is the current compromise.
5869 	 */
5870 	ve->base.saturated = ALL_ENGINES;
5871 
5872 	snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
5873 
5874 	intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
5875 	intel_engine_init_execlists(&ve->base);
5876 
5877 	ve->base.cops = &virtual_context_ops;
5878 	ve->base.request_alloc = execlists_request_alloc;
5879 
5880 	ve->base.schedule = i915_schedule;
5881 	ve->base.submit_request = virtual_submit_request;
5882 	ve->base.bond_execute = virtual_bond_execute;
5883 
5884 	INIT_LIST_HEAD(virtual_queue(ve));
5885 	ve->base.execlists.queue_priority_hint = INT_MIN;
5886 	tasklet_init(&ve->base.execlists.tasklet,
5887 		     virtual_submission_tasklet,
5888 		     (unsigned long)ve);
5889 
5890 	intel_context_init(&ve->context, &ve->base);
5891 
5892 	ve->base.breadcrumbs = intel_breadcrumbs_create(NULL);
5893 	if (!ve->base.breadcrumbs) {
5894 		err = -ENOMEM;
5895 		goto err_put;
5896 	}
5897 
5898 	for (n = 0; n < count; n++) {
5899 		struct intel_engine_cs *sibling = siblings[n];
5900 
5901 		GEM_BUG_ON(!is_power_of_2(sibling->mask));
5902 		if (sibling->mask & ve->base.mask) {
5903 			DRM_DEBUG("duplicate %s entry in load balancer\n",
5904 				  sibling->name);
5905 			err = -EINVAL;
5906 			goto err_put;
5907 		}
5908 
5909 		/*
5910 		 * The virtual engine implementation is tightly coupled to
5911 		 * the execlists backend -- we push out request directly
5912 		 * into a tree inside each physical engine. We could support
5913 		 * layering if we handle cloning of the requests and
5914 		 * submitting a copy into each backend.
5915 		 */
5916 		if (sibling->execlists.tasklet.func !=
5917 		    execlists_submission_tasklet) {
5918 			err = -ENODEV;
5919 			goto err_put;
5920 		}
5921 
5922 		GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb));
5923 		RB_CLEAR_NODE(&ve->nodes[sibling->id].rb);
5924 
5925 		ve->siblings[ve->num_siblings++] = sibling;
5926 		ve->base.mask |= sibling->mask;
5927 
5928 		/*
5929 		 * All physical engines must be compatible for their emission
5930 		 * functions (as we build the instructions during request
5931 		 * construction and do not alter them before submission
5932 		 * on the physical engine). We use the engine class as a guide
5933 		 * here, although that could be refined.
5934 		 */
5935 		if (ve->base.class != OTHER_CLASS) {
5936 			if (ve->base.class != sibling->class) {
5937 				DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
5938 					  sibling->class, ve->base.class);
5939 				err = -EINVAL;
5940 				goto err_put;
5941 			}
5942 			continue;
5943 		}
5944 
5945 		ve->base.class = sibling->class;
5946 		ve->base.uabi_class = sibling->uabi_class;
5947 		snprintf(ve->base.name, sizeof(ve->base.name),
5948 			 "v%dx%d", ve->base.class, count);
5949 		ve->base.context_size = sibling->context_size;
5950 
5951 		ve->base.emit_bb_start = sibling->emit_bb_start;
5952 		ve->base.emit_flush = sibling->emit_flush;
5953 		ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb;
5954 		ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb;
5955 		ve->base.emit_fini_breadcrumb_dw =
5956 			sibling->emit_fini_breadcrumb_dw;
5957 
5958 		ve->base.flags = sibling->flags;
5959 	}
5960 
5961 	ve->base.flags |= I915_ENGINE_IS_VIRTUAL;
5962 
5963 	virtual_engine_initial_hint(ve);
5964 	return &ve->context;
5965 
5966 err_put:
5967 	intel_context_put(&ve->context);
5968 	return ERR_PTR(err);
5969 }
5970 
5971 struct intel_context *
5972 intel_execlists_clone_virtual(struct intel_engine_cs *src)
5973 {
5974 	struct virtual_engine *se = to_virtual_engine(src);
5975 	struct intel_context *dst;
5976 
5977 	dst = intel_execlists_create_virtual(se->siblings,
5978 					     se->num_siblings);
5979 	if (IS_ERR(dst))
5980 		return dst;
5981 
5982 	if (se->num_bonds) {
5983 		struct virtual_engine *de = to_virtual_engine(dst->engine);
5984 
5985 		de->bonds = kmemdup(se->bonds,
5986 				    sizeof(*se->bonds) * se->num_bonds,
5987 				    GFP_KERNEL);
5988 		if (!de->bonds) {
5989 			intel_context_put(dst);
5990 			return ERR_PTR(-ENOMEM);
5991 		}
5992 
5993 		de->num_bonds = se->num_bonds;
5994 	}
5995 
5996 	return dst;
5997 }
5998 
5999 int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
6000 				     const struct intel_engine_cs *master,
6001 				     const struct intel_engine_cs *sibling)
6002 {
6003 	struct virtual_engine *ve = to_virtual_engine(engine);
6004 	struct ve_bond *bond;
6005 	int n;
6006 
6007 	/* Sanity check the sibling is part of the virtual engine */
6008 	for (n = 0; n < ve->num_siblings; n++)
6009 		if (sibling == ve->siblings[n])
6010 			break;
6011 	if (n == ve->num_siblings)
6012 		return -EINVAL;
6013 
6014 	bond = virtual_find_bond(ve, master);
6015 	if (bond) {
6016 		bond->sibling_mask |= sibling->mask;
6017 		return 0;
6018 	}
6019 
6020 	bond = krealloc(ve->bonds,
6021 			sizeof(*bond) * (ve->num_bonds + 1),
6022 			GFP_KERNEL);
6023 	if (!bond)
6024 		return -ENOMEM;
6025 
6026 	bond[ve->num_bonds].master = master;
6027 	bond[ve->num_bonds].sibling_mask = sibling->mask;
6028 
6029 	ve->bonds = bond;
6030 	ve->num_bonds++;
6031 
6032 	return 0;
6033 }
6034 
6035 void intel_execlists_show_requests(struct intel_engine_cs *engine,
6036 				   struct drm_printer *m,
6037 				   void (*show_request)(struct drm_printer *m,
6038 							struct i915_request *rq,
6039 							const char *prefix),
6040 				   unsigned int max)
6041 {
6042 	const struct intel_engine_execlists *execlists = &engine->execlists;
6043 	struct i915_request *rq, *last;
6044 	unsigned long flags;
6045 	unsigned int count;
6046 	struct rb_node *rb;
6047 
6048 	spin_lock_irqsave(&engine->active.lock, flags);
6049 
6050 	last = NULL;
6051 	count = 0;
6052 	list_for_each_entry(rq, &engine->active.requests, sched.link) {
6053 		if (count++ < max - 1)
6054 			show_request(m, rq, "\t\tE ");
6055 		else
6056 			last = rq;
6057 	}
6058 	if (last) {
6059 		if (count > max) {
6060 			drm_printf(m,
6061 				   "\t\t...skipping %d executing requests...\n",
6062 				   count - max);
6063 		}
6064 		show_request(m, last, "\t\tE ");
6065 	}
6066 
6067 	if (execlists->switch_priority_hint != INT_MIN)
6068 		drm_printf(m, "\t\tSwitch priority hint: %d\n",
6069 			   READ_ONCE(execlists->switch_priority_hint));
6070 	if (execlists->queue_priority_hint != INT_MIN)
6071 		drm_printf(m, "\t\tQueue priority hint: %d\n",
6072 			   READ_ONCE(execlists->queue_priority_hint));
6073 
6074 	last = NULL;
6075 	count = 0;
6076 	for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
6077 		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
6078 		int i;
6079 
6080 		priolist_for_each_request(rq, p, i) {
6081 			if (count++ < max - 1)
6082 				show_request(m, rq, "\t\tQ ");
6083 			else
6084 				last = rq;
6085 		}
6086 	}
6087 	if (last) {
6088 		if (count > max) {
6089 			drm_printf(m,
6090 				   "\t\t...skipping %d queued requests...\n",
6091 				   count - max);
6092 		}
6093 		show_request(m, last, "\t\tQ ");
6094 	}
6095 
6096 	last = NULL;
6097 	count = 0;
6098 	for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) {
6099 		struct virtual_engine *ve =
6100 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
6101 		struct i915_request *rq = READ_ONCE(ve->request);
6102 
6103 		if (rq) {
6104 			if (count++ < max - 1)
6105 				show_request(m, rq, "\t\tV ");
6106 			else
6107 				last = rq;
6108 		}
6109 	}
6110 	if (last) {
6111 		if (count > max) {
6112 			drm_printf(m,
6113 				   "\t\t...skipping %d virtual requests...\n",
6114 				   count - max);
6115 		}
6116 		show_request(m, last, "\t\tV ");
6117 	}
6118 
6119 	spin_unlock_irqrestore(&engine->active.lock, flags);
6120 }
6121 
6122 void intel_lr_context_reset(struct intel_engine_cs *engine,
6123 			    struct intel_context *ce,
6124 			    u32 head,
6125 			    bool scrub)
6126 {
6127 	GEM_BUG_ON(!intel_context_is_pinned(ce));
6128 
6129 	/*
6130 	 * We want a simple context + ring to execute the breadcrumb update.
6131 	 * We cannot rely on the context being intact across the GPU hang,
6132 	 * so clear it and rebuild just what we need for the breadcrumb.
6133 	 * All pending requests for this context will be zapped, and any
6134 	 * future request will be after userspace has had the opportunity
6135 	 * to recreate its own state.
6136 	 */
6137 	if (scrub)
6138 		restore_default_state(ce, engine);
6139 
6140 	/* Rerun the request; its payload has been neutered (if guilty). */
6141 	__execlists_update_reg_state(ce, engine, head);
6142 }
6143 
6144 bool
6145 intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine)
6146 {
6147 	return engine->set_default_submission ==
6148 	       intel_execlists_set_default_submission;
6149 }
6150 
6151 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
6152 #include "selftest_lrc.c"
6153 #endif
6154