xref: /openbmc/linux/drivers/gpu/drm/i915/gt/selftest_lrc.c (revision 15a1fbdcfb519c2bd291ed01c6c94e0b89537a77)
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2018 Intel Corporation
5  */
6 
7 #include <linux/prime_numbers.h>
8 
9 #include "gem/i915_gem_pm.h"
10 #include "gt/intel_engine_heartbeat.h"
11 #include "gt/intel_reset.h"
12 
13 #include "i915_selftest.h"
14 #include "selftests/i915_random.h"
15 #include "selftests/igt_flush_test.h"
16 #include "selftests/igt_live_test.h"
17 #include "selftests/igt_spinner.h"
18 #include "selftests/lib_sw_fence.h"
19 
20 #include "gem/selftests/igt_gem_utils.h"
21 #include "gem/selftests/mock_context.h"
22 
23 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
24 #define NUM_GPR_DW (16 * 2) /* each GPR is 2 dwords */
25 
26 static struct i915_vma *create_scratch(struct intel_gt *gt)
27 {
28 	struct drm_i915_gem_object *obj;
29 	struct i915_vma *vma;
30 	int err;
31 
32 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
33 	if (IS_ERR(obj))
34 		return ERR_CAST(obj);
35 
36 	i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
37 
38 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
39 	if (IS_ERR(vma)) {
40 		i915_gem_object_put(obj);
41 		return vma;
42 	}
43 
44 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
45 	if (err) {
46 		i915_gem_object_put(obj);
47 		return ERR_PTR(err);
48 	}
49 
50 	return vma;
51 }
52 
53 static void engine_heartbeat_disable(struct intel_engine_cs *engine,
54 				     unsigned long *saved)
55 {
56 	*saved = engine->props.heartbeat_interval_ms;
57 	engine->props.heartbeat_interval_ms = 0;
58 
59 	intel_engine_pm_get(engine);
60 	intel_engine_park_heartbeat(engine);
61 }
62 
63 static void engine_heartbeat_enable(struct intel_engine_cs *engine,
64 				    unsigned long saved)
65 {
66 	intel_engine_pm_put(engine);
67 
68 	engine->props.heartbeat_interval_ms = saved;
69 }
70 
71 static int wait_for_submit(struct intel_engine_cs *engine,
72 			   struct i915_request *rq,
73 			   unsigned long timeout)
74 {
75 	timeout += jiffies;
76 	do {
77 		cond_resched();
78 		intel_engine_flush_submission(engine);
79 
80 		if (READ_ONCE(engine->execlists.pending[0]))
81 			continue;
82 
83 		if (i915_request_is_active(rq))
84 			return 0;
85 
86 		if (i915_request_started(rq)) /* that was quick! */
87 			return 0;
88 	} while (time_before(jiffies, timeout));
89 
90 	return -ETIME;
91 }
92 
93 static int live_sanitycheck(void *arg)
94 {
95 	struct intel_gt *gt = arg;
96 	struct intel_engine_cs *engine;
97 	enum intel_engine_id id;
98 	struct igt_spinner spin;
99 	int err = 0;
100 
101 	if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
102 		return 0;
103 
104 	if (igt_spinner_init(&spin, gt))
105 		return -ENOMEM;
106 
107 	for_each_engine(engine, gt, id) {
108 		struct intel_context *ce;
109 		struct i915_request *rq;
110 
111 		ce = intel_context_create(engine);
112 		if (IS_ERR(ce)) {
113 			err = PTR_ERR(ce);
114 			break;
115 		}
116 
117 		rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
118 		if (IS_ERR(rq)) {
119 			err = PTR_ERR(rq);
120 			goto out_ctx;
121 		}
122 
123 		i915_request_add(rq);
124 		if (!igt_wait_for_spinner(&spin, rq)) {
125 			GEM_TRACE("spinner failed to start\n");
126 			GEM_TRACE_DUMP();
127 			intel_gt_set_wedged(gt);
128 			err = -EIO;
129 			goto out_ctx;
130 		}
131 
132 		igt_spinner_end(&spin);
133 		if (igt_flush_test(gt->i915)) {
134 			err = -EIO;
135 			goto out_ctx;
136 		}
137 
138 out_ctx:
139 		intel_context_put(ce);
140 		if (err)
141 			break;
142 	}
143 
144 	igt_spinner_fini(&spin);
145 	return err;
146 }
147 
148 static int live_unlite_restore(struct intel_gt *gt, int prio)
149 {
150 	struct intel_engine_cs *engine;
151 	enum intel_engine_id id;
152 	struct igt_spinner spin;
153 	int err = -ENOMEM;
154 
155 	/*
156 	 * Check that we can correctly context switch between 2 instances
157 	 * on the same engine from the same parent context.
158 	 */
159 
160 	if (igt_spinner_init(&spin, gt))
161 		return err;
162 
163 	err = 0;
164 	for_each_engine(engine, gt, id) {
165 		struct intel_context *ce[2] = {};
166 		struct i915_request *rq[2];
167 		struct igt_live_test t;
168 		unsigned long saved;
169 		int n;
170 
171 		if (prio && !intel_engine_has_preemption(engine))
172 			continue;
173 
174 		if (!intel_engine_can_store_dword(engine))
175 			continue;
176 
177 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
178 			err = -EIO;
179 			break;
180 		}
181 		engine_heartbeat_disable(engine, &saved);
182 
183 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
184 			struct intel_context *tmp;
185 
186 			tmp = intel_context_create(engine);
187 			if (IS_ERR(tmp)) {
188 				err = PTR_ERR(tmp);
189 				goto err_ce;
190 			}
191 
192 			err = intel_context_pin(tmp);
193 			if (err) {
194 				intel_context_put(tmp);
195 				goto err_ce;
196 			}
197 
198 			/*
199 			 * Setup the pair of contexts such that if we
200 			 * lite-restore using the RING_TAIL from ce[1] it
201 			 * will execute garbage from ce[0]->ring.
202 			 */
203 			memset(tmp->ring->vaddr,
204 			       POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
205 			       tmp->ring->vma->size);
206 
207 			ce[n] = tmp;
208 		}
209 		GEM_BUG_ON(!ce[1]->ring->size);
210 		intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
211 		__execlists_update_reg_state(ce[1], engine, ce[1]->ring->head);
212 
213 		rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
214 		if (IS_ERR(rq[0])) {
215 			err = PTR_ERR(rq[0]);
216 			goto err_ce;
217 		}
218 
219 		i915_request_get(rq[0]);
220 		i915_request_add(rq[0]);
221 		GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
222 
223 		if (!igt_wait_for_spinner(&spin, rq[0])) {
224 			i915_request_put(rq[0]);
225 			goto err_ce;
226 		}
227 
228 		rq[1] = i915_request_create(ce[1]);
229 		if (IS_ERR(rq[1])) {
230 			err = PTR_ERR(rq[1]);
231 			i915_request_put(rq[0]);
232 			goto err_ce;
233 		}
234 
235 		if (!prio) {
236 			/*
237 			 * Ensure we do the switch to ce[1] on completion.
238 			 *
239 			 * rq[0] is already submitted, so this should reduce
240 			 * to a no-op (a wait on a request on the same engine
241 			 * uses the submit fence, not the completion fence),
242 			 * but it will install a dependency on rq[1] for rq[0]
243 			 * that will prevent the pair being reordered by
244 			 * timeslicing.
245 			 */
246 			i915_request_await_dma_fence(rq[1], &rq[0]->fence);
247 		}
248 
249 		i915_request_get(rq[1]);
250 		i915_request_add(rq[1]);
251 		GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
252 		i915_request_put(rq[0]);
253 
254 		if (prio) {
255 			struct i915_sched_attr attr = {
256 				.priority = prio,
257 			};
258 
259 			/* Alternatively preempt the spinner with ce[1] */
260 			engine->schedule(rq[1], &attr);
261 		}
262 
263 		/* And switch back to ce[0] for good measure */
264 		rq[0] = i915_request_create(ce[0]);
265 		if (IS_ERR(rq[0])) {
266 			err = PTR_ERR(rq[0]);
267 			i915_request_put(rq[1]);
268 			goto err_ce;
269 		}
270 
271 		i915_request_await_dma_fence(rq[0], &rq[1]->fence);
272 		i915_request_get(rq[0]);
273 		i915_request_add(rq[0]);
274 		GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
275 		i915_request_put(rq[1]);
276 		i915_request_put(rq[0]);
277 
278 err_ce:
279 		tasklet_kill(&engine->execlists.tasklet); /* flush submission */
280 		igt_spinner_end(&spin);
281 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
282 			if (IS_ERR_OR_NULL(ce[n]))
283 				break;
284 
285 			intel_context_unpin(ce[n]);
286 			intel_context_put(ce[n]);
287 		}
288 
289 		engine_heartbeat_enable(engine, saved);
290 		if (igt_live_test_end(&t))
291 			err = -EIO;
292 		if (err)
293 			break;
294 	}
295 
296 	igt_spinner_fini(&spin);
297 	return err;
298 }
299 
300 static int live_unlite_switch(void *arg)
301 {
302 	return live_unlite_restore(arg, 0);
303 }
304 
305 static int live_unlite_preempt(void *arg)
306 {
307 	return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX));
308 }
309 
310 static int live_pin_rewind(void *arg)
311 {
312 	struct intel_gt *gt = arg;
313 	struct intel_engine_cs *engine;
314 	enum intel_engine_id id;
315 	int err = 0;
316 
317 	/*
318 	 * We have to be careful not to trust intel_ring too much, for example
319 	 * ring->head is updated upon retire which is out of sync with pinning
320 	 * the context. Thus we cannot use ring->head to set CTX_RING_HEAD,
321 	 * or else we risk writing an older, stale value.
322 	 *
323 	 * To simulate this, let's apply a bit of deliberate sabotague.
324 	 */
325 
326 	for_each_engine(engine, gt, id) {
327 		struct intel_context *ce;
328 		struct i915_request *rq;
329 		struct intel_ring *ring;
330 		struct igt_live_test t;
331 
332 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
333 			err = -EIO;
334 			break;
335 		}
336 
337 		ce = intel_context_create(engine);
338 		if (IS_ERR(ce)) {
339 			err = PTR_ERR(ce);
340 			break;
341 		}
342 
343 		err = intel_context_pin(ce);
344 		if (err) {
345 			intel_context_put(ce);
346 			break;
347 		}
348 
349 		/* Keep the context awake while we play games */
350 		err = i915_active_acquire(&ce->active);
351 		if (err) {
352 			intel_context_unpin(ce);
353 			intel_context_put(ce);
354 			break;
355 		}
356 		ring = ce->ring;
357 
358 		/* Poison the ring, and offset the next request from HEAD */
359 		memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32));
360 		ring->emit = ring->size / 2;
361 		ring->tail = ring->emit;
362 		GEM_BUG_ON(ring->head);
363 
364 		intel_context_unpin(ce);
365 
366 		/* Submit a simple nop request */
367 		GEM_BUG_ON(intel_context_is_pinned(ce));
368 		rq = intel_context_create_request(ce);
369 		i915_active_release(&ce->active); /* e.g. async retire */
370 		intel_context_put(ce);
371 		if (IS_ERR(rq)) {
372 			err = PTR_ERR(rq);
373 			break;
374 		}
375 		GEM_BUG_ON(!rq->head);
376 		i915_request_add(rq);
377 
378 		/* Expect not to hang! */
379 		if (igt_live_test_end(&t)) {
380 			err = -EIO;
381 			break;
382 		}
383 	}
384 
385 	return err;
386 }
387 
388 static int live_hold_reset(void *arg)
389 {
390 	struct intel_gt *gt = arg;
391 	struct intel_engine_cs *engine;
392 	enum intel_engine_id id;
393 	struct igt_spinner spin;
394 	int err = 0;
395 
396 	/*
397 	 * In order to support offline error capture for fast preempt reset,
398 	 * we need to decouple the guilty request and ensure that it and its
399 	 * descendents are not executed while the capture is in progress.
400 	 */
401 
402 	if (!intel_has_reset_engine(gt))
403 		return 0;
404 
405 	if (igt_spinner_init(&spin, gt))
406 		return -ENOMEM;
407 
408 	for_each_engine(engine, gt, id) {
409 		struct intel_context *ce;
410 		unsigned long heartbeat;
411 		struct i915_request *rq;
412 
413 		ce = intel_context_create(engine);
414 		if (IS_ERR(ce)) {
415 			err = PTR_ERR(ce);
416 			break;
417 		}
418 
419 		engine_heartbeat_disable(engine, &heartbeat);
420 
421 		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
422 		if (IS_ERR(rq)) {
423 			err = PTR_ERR(rq);
424 			goto out;
425 		}
426 		i915_request_add(rq);
427 
428 		if (!igt_wait_for_spinner(&spin, rq)) {
429 			intel_gt_set_wedged(gt);
430 			err = -ETIME;
431 			goto out;
432 		}
433 
434 		/* We have our request executing, now remove it and reset */
435 
436 		if (test_and_set_bit(I915_RESET_ENGINE + id,
437 				     &gt->reset.flags)) {
438 			intel_gt_set_wedged(gt);
439 			err = -EBUSY;
440 			goto out;
441 		}
442 		tasklet_disable(&engine->execlists.tasklet);
443 
444 		engine->execlists.tasklet.func(engine->execlists.tasklet.data);
445 		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
446 
447 		i915_request_get(rq);
448 		execlists_hold(engine, rq);
449 		GEM_BUG_ON(!i915_request_on_hold(rq));
450 
451 		intel_engine_reset(engine, NULL);
452 		GEM_BUG_ON(rq->fence.error != -EIO);
453 
454 		tasklet_enable(&engine->execlists.tasklet);
455 		clear_and_wake_up_bit(I915_RESET_ENGINE + id,
456 				      &gt->reset.flags);
457 
458 		/* Check that we do not resubmit the held request */
459 		if (!i915_request_wait(rq, 0, HZ / 5)) {
460 			pr_err("%s: on hold request completed!\n",
461 			       engine->name);
462 			i915_request_put(rq);
463 			err = -EIO;
464 			goto out;
465 		}
466 		GEM_BUG_ON(!i915_request_on_hold(rq));
467 
468 		/* But is resubmitted on release */
469 		execlists_unhold(engine, rq);
470 		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
471 			pr_err("%s: held request did not complete!\n",
472 			       engine->name);
473 			intel_gt_set_wedged(gt);
474 			err = -ETIME;
475 		}
476 		i915_request_put(rq);
477 
478 out:
479 		engine_heartbeat_enable(engine, heartbeat);
480 		intel_context_put(ce);
481 		if (err)
482 			break;
483 	}
484 
485 	igt_spinner_fini(&spin);
486 	return err;
487 }
488 
489 static const char *error_repr(int err)
490 {
491 	return err ? "bad" : "good";
492 }
493 
494 static int live_error_interrupt(void *arg)
495 {
496 	static const struct error_phase {
497 		enum { GOOD = 0, BAD = -EIO } error[2];
498 	} phases[] = {
499 		{ { BAD,  GOOD } },
500 		{ { BAD,  BAD  } },
501 		{ { BAD,  GOOD } },
502 		{ { GOOD, GOOD } }, /* sentinel */
503 	};
504 	struct intel_gt *gt = arg;
505 	struct intel_engine_cs *engine;
506 	enum intel_engine_id id;
507 
508 	/*
509 	 * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning
510 	 * of invalid commands in user batches that will cause a GPU hang.
511 	 * This is a faster mechanism than using hangcheck/heartbeats, but
512 	 * only detects problems the HW knows about -- it will not warn when
513 	 * we kill the HW!
514 	 *
515 	 * To verify our detection and reset, we throw some invalid commands
516 	 * at the HW and wait for the interrupt.
517 	 */
518 
519 	if (!intel_has_reset_engine(gt))
520 		return 0;
521 
522 	for_each_engine(engine, gt, id) {
523 		const struct error_phase *p;
524 		unsigned long heartbeat;
525 		int err = 0;
526 
527 		engine_heartbeat_disable(engine, &heartbeat);
528 
529 		for (p = phases; p->error[0] != GOOD; p++) {
530 			struct i915_request *client[ARRAY_SIZE(phases->error)];
531 			u32 *cs;
532 			int i;
533 
534 			memset(client, 0, sizeof(*client));
535 			for (i = 0; i < ARRAY_SIZE(client); i++) {
536 				struct intel_context *ce;
537 				struct i915_request *rq;
538 
539 				ce = intel_context_create(engine);
540 				if (IS_ERR(ce)) {
541 					err = PTR_ERR(ce);
542 					goto out;
543 				}
544 
545 				rq = intel_context_create_request(ce);
546 				intel_context_put(ce);
547 				if (IS_ERR(rq)) {
548 					err = PTR_ERR(rq);
549 					goto out;
550 				}
551 
552 				if (rq->engine->emit_init_breadcrumb) {
553 					err = rq->engine->emit_init_breadcrumb(rq);
554 					if (err) {
555 						i915_request_add(rq);
556 						goto out;
557 					}
558 				}
559 
560 				cs = intel_ring_begin(rq, 2);
561 				if (IS_ERR(cs)) {
562 					i915_request_add(rq);
563 					err = PTR_ERR(cs);
564 					goto out;
565 				}
566 
567 				if (p->error[i]) {
568 					*cs++ = 0xdeadbeef;
569 					*cs++ = 0xdeadbeef;
570 				} else {
571 					*cs++ = MI_NOOP;
572 					*cs++ = MI_NOOP;
573 				}
574 
575 				client[i] = i915_request_get(rq);
576 				i915_request_add(rq);
577 			}
578 
579 			err = wait_for_submit(engine, client[0], HZ / 2);
580 			if (err) {
581 				pr_err("%s: first request did not start within time!\n",
582 				       engine->name);
583 				err = -ETIME;
584 				goto out;
585 			}
586 
587 			for (i = 0; i < ARRAY_SIZE(client); i++) {
588 				if (i915_request_wait(client[i], 0, HZ / 5) < 0)
589 					pr_debug("%s: %s request incomplete!\n",
590 						 engine->name,
591 						 error_repr(p->error[i]));
592 
593 				if (!i915_request_started(client[i])) {
594 					pr_debug("%s: %s request not stated!\n",
595 						 engine->name,
596 						 error_repr(p->error[i]));
597 					err = -ETIME;
598 					goto out;
599 				}
600 
601 				/* Kick the tasklet to process the error */
602 				intel_engine_flush_submission(engine);
603 				if (client[i]->fence.error != p->error[i]) {
604 					pr_err("%s: %s request completed with wrong error code: %d\n",
605 					       engine->name,
606 					       error_repr(p->error[i]),
607 					       client[i]->fence.error);
608 					err = -EINVAL;
609 					goto out;
610 				}
611 			}
612 
613 out:
614 			for (i = 0; i < ARRAY_SIZE(client); i++)
615 				if (client[i])
616 					i915_request_put(client[i]);
617 			if (err) {
618 				pr_err("%s: failed at phase[%zd] { %d, %d }\n",
619 				       engine->name, p - phases,
620 				       p->error[0], p->error[1]);
621 				break;
622 			}
623 		}
624 
625 		engine_heartbeat_enable(engine, heartbeat);
626 		if (err) {
627 			intel_gt_set_wedged(gt);
628 			return err;
629 		}
630 	}
631 
632 	return 0;
633 }
634 
635 static int
636 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
637 {
638 	u32 *cs;
639 
640 	cs = intel_ring_begin(rq, 10);
641 	if (IS_ERR(cs))
642 		return PTR_ERR(cs);
643 
644 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
645 
646 	*cs++ = MI_SEMAPHORE_WAIT |
647 		MI_SEMAPHORE_GLOBAL_GTT |
648 		MI_SEMAPHORE_POLL |
649 		MI_SEMAPHORE_SAD_NEQ_SDD;
650 	*cs++ = 0;
651 	*cs++ = i915_ggtt_offset(vma) + 4 * idx;
652 	*cs++ = 0;
653 
654 	if (idx > 0) {
655 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
656 		*cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
657 		*cs++ = 0;
658 		*cs++ = 1;
659 	} else {
660 		*cs++ = MI_NOOP;
661 		*cs++ = MI_NOOP;
662 		*cs++ = MI_NOOP;
663 		*cs++ = MI_NOOP;
664 	}
665 
666 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
667 
668 	intel_ring_advance(rq, cs);
669 	return 0;
670 }
671 
672 static struct i915_request *
673 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
674 {
675 	struct intel_context *ce;
676 	struct i915_request *rq;
677 	int err;
678 
679 	ce = intel_context_create(engine);
680 	if (IS_ERR(ce))
681 		return ERR_CAST(ce);
682 
683 	rq = intel_context_create_request(ce);
684 	if (IS_ERR(rq))
685 		goto out_ce;
686 
687 	err = 0;
688 	if (rq->engine->emit_init_breadcrumb)
689 		err = rq->engine->emit_init_breadcrumb(rq);
690 	if (err == 0)
691 		err = emit_semaphore_chain(rq, vma, idx);
692 	if (err == 0)
693 		i915_request_get(rq);
694 	i915_request_add(rq);
695 	if (err)
696 		rq = ERR_PTR(err);
697 
698 out_ce:
699 	intel_context_put(ce);
700 	return rq;
701 }
702 
703 static int
704 release_queue(struct intel_engine_cs *engine,
705 	      struct i915_vma *vma,
706 	      int idx, int prio)
707 {
708 	struct i915_sched_attr attr = {
709 		.priority = prio,
710 	};
711 	struct i915_request *rq;
712 	u32 *cs;
713 
714 	rq = intel_engine_create_kernel_request(engine);
715 	if (IS_ERR(rq))
716 		return PTR_ERR(rq);
717 
718 	cs = intel_ring_begin(rq, 4);
719 	if (IS_ERR(cs)) {
720 		i915_request_add(rq);
721 		return PTR_ERR(cs);
722 	}
723 
724 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
725 	*cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
726 	*cs++ = 0;
727 	*cs++ = 1;
728 
729 	intel_ring_advance(rq, cs);
730 
731 	i915_request_get(rq);
732 	i915_request_add(rq);
733 
734 	local_bh_disable();
735 	engine->schedule(rq, &attr);
736 	local_bh_enable(); /* kick tasklet */
737 
738 	i915_request_put(rq);
739 
740 	return 0;
741 }
742 
743 static int
744 slice_semaphore_queue(struct intel_engine_cs *outer,
745 		      struct i915_vma *vma,
746 		      int count)
747 {
748 	struct intel_engine_cs *engine;
749 	struct i915_request *head;
750 	enum intel_engine_id id;
751 	int err, i, n = 0;
752 
753 	head = semaphore_queue(outer, vma, n++);
754 	if (IS_ERR(head))
755 		return PTR_ERR(head);
756 
757 	for_each_engine(engine, outer->gt, id) {
758 		for (i = 0; i < count; i++) {
759 			struct i915_request *rq;
760 
761 			rq = semaphore_queue(engine, vma, n++);
762 			if (IS_ERR(rq)) {
763 				err = PTR_ERR(rq);
764 				goto out;
765 			}
766 
767 			i915_request_put(rq);
768 		}
769 	}
770 
771 	err = release_queue(outer, vma, n, INT_MAX);
772 	if (err)
773 		goto out;
774 
775 	if (i915_request_wait(head, 0,
776 			      2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) {
777 		pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n",
778 		       count, n);
779 		GEM_TRACE_DUMP();
780 		intel_gt_set_wedged(outer->gt);
781 		err = -EIO;
782 	}
783 
784 out:
785 	i915_request_put(head);
786 	return err;
787 }
788 
789 static int live_timeslice_preempt(void *arg)
790 {
791 	struct intel_gt *gt = arg;
792 	struct drm_i915_gem_object *obj;
793 	struct i915_vma *vma;
794 	void *vaddr;
795 	int err = 0;
796 	int count;
797 
798 	/*
799 	 * If a request takes too long, we would like to give other users
800 	 * a fair go on the GPU. In particular, users may create batches
801 	 * that wait upon external input, where that input may even be
802 	 * supplied by another GPU job. To avoid blocking forever, we
803 	 * need to preempt the current task and replace it with another
804 	 * ready task.
805 	 */
806 	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
807 		return 0;
808 
809 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
810 	if (IS_ERR(obj))
811 		return PTR_ERR(obj);
812 
813 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
814 	if (IS_ERR(vma)) {
815 		err = PTR_ERR(vma);
816 		goto err_obj;
817 	}
818 
819 	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
820 	if (IS_ERR(vaddr)) {
821 		err = PTR_ERR(vaddr);
822 		goto err_obj;
823 	}
824 
825 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
826 	if (err)
827 		goto err_map;
828 
829 	err = i915_vma_sync(vma);
830 	if (err)
831 		goto err_pin;
832 
833 	for_each_prime_number_from(count, 1, 16) {
834 		struct intel_engine_cs *engine;
835 		enum intel_engine_id id;
836 
837 		for_each_engine(engine, gt, id) {
838 			unsigned long saved;
839 
840 			if (!intel_engine_has_preemption(engine))
841 				continue;
842 
843 			memset(vaddr, 0, PAGE_SIZE);
844 
845 			engine_heartbeat_disable(engine, &saved);
846 			err = slice_semaphore_queue(engine, vma, count);
847 			engine_heartbeat_enable(engine, saved);
848 			if (err)
849 				goto err_pin;
850 
851 			if (igt_flush_test(gt->i915)) {
852 				err = -EIO;
853 				goto err_pin;
854 			}
855 		}
856 	}
857 
858 err_pin:
859 	i915_vma_unpin(vma);
860 err_map:
861 	i915_gem_object_unpin_map(obj);
862 err_obj:
863 	i915_gem_object_put(obj);
864 	return err;
865 }
866 
867 static struct i915_request *
868 create_rewinder(struct intel_context *ce,
869 		struct i915_request *wait,
870 		void *slot, int idx)
871 {
872 	const u32 offset =
873 		i915_ggtt_offset(ce->engine->status_page.vma) +
874 		offset_in_page(slot);
875 	struct i915_request *rq;
876 	u32 *cs;
877 	int err;
878 
879 	rq = intel_context_create_request(ce);
880 	if (IS_ERR(rq))
881 		return rq;
882 
883 	if (wait) {
884 		err = i915_request_await_dma_fence(rq, &wait->fence);
885 		if (err)
886 			goto err;
887 	}
888 
889 	cs = intel_ring_begin(rq, 10);
890 	if (IS_ERR(cs)) {
891 		err = PTR_ERR(cs);
892 		goto err;
893 	}
894 
895 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
896 	*cs++ = MI_NOOP;
897 
898 	*cs++ = MI_SEMAPHORE_WAIT |
899 		MI_SEMAPHORE_GLOBAL_GTT |
900 		MI_SEMAPHORE_POLL |
901 		MI_SEMAPHORE_SAD_NEQ_SDD;
902 	*cs++ = 0;
903 	*cs++ = offset;
904 	*cs++ = 0;
905 
906 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
907 	*cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
908 	*cs++ = offset + idx * sizeof(u32);
909 	*cs++ = 0;
910 
911 	intel_ring_advance(rq, cs);
912 
913 	rq->sched.attr.priority = I915_PRIORITY_MASK;
914 	err = 0;
915 err:
916 	i915_request_get(rq);
917 	i915_request_add(rq);
918 	if (err) {
919 		i915_request_put(rq);
920 		return ERR_PTR(err);
921 	}
922 
923 	return rq;
924 }
925 
926 static int live_timeslice_rewind(void *arg)
927 {
928 	struct intel_gt *gt = arg;
929 	struct intel_engine_cs *engine;
930 	enum intel_engine_id id;
931 
932 	/*
933 	 * The usual presumption on timeslice expiration is that we replace
934 	 * the active context with another. However, given a chain of
935 	 * dependencies we may end up with replacing the context with itself,
936 	 * but only a few of those requests, forcing us to rewind the
937 	 * RING_TAIL of the original request.
938 	 */
939 	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
940 		return 0;
941 
942 	for_each_engine(engine, gt, id) {
943 		enum { A1, A2, B1 };
944 		enum { X = 1, Y, Z };
945 		struct i915_request *rq[3] = {};
946 		struct intel_context *ce;
947 		unsigned long heartbeat;
948 		unsigned long timeslice;
949 		int i, err = 0;
950 		u32 *slot;
951 
952 		if (!intel_engine_has_timeslices(engine))
953 			continue;
954 
955 		/*
956 		 * A:rq1 -- semaphore wait, timestamp X
957 		 * A:rq2 -- write timestamp Y
958 		 *
959 		 * B:rq1 [await A:rq1] -- write timestamp Z
960 		 *
961 		 * Force timeslice, release semaphore.
962 		 *
963 		 * Expect execution/evaluation order XZY
964 		 */
965 
966 		engine_heartbeat_disable(engine, &heartbeat);
967 		timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
968 
969 		slot = memset32(engine->status_page.addr + 1000, 0, 4);
970 
971 		ce = intel_context_create(engine);
972 		if (IS_ERR(ce)) {
973 			err = PTR_ERR(ce);
974 			goto err;
975 		}
976 
977 		rq[0] = create_rewinder(ce, NULL, slot, 1);
978 		if (IS_ERR(rq[0])) {
979 			intel_context_put(ce);
980 			goto err;
981 		}
982 
983 		rq[1] = create_rewinder(ce, NULL, slot, 2);
984 		intel_context_put(ce);
985 		if (IS_ERR(rq[1]))
986 			goto err;
987 
988 		err = wait_for_submit(engine, rq[1], HZ / 2);
989 		if (err) {
990 			pr_err("%s: failed to submit first context\n",
991 			       engine->name);
992 			goto err;
993 		}
994 
995 		ce = intel_context_create(engine);
996 		if (IS_ERR(ce)) {
997 			err = PTR_ERR(ce);
998 			goto err;
999 		}
1000 
1001 		rq[2] = create_rewinder(ce, rq[0], slot, 3);
1002 		intel_context_put(ce);
1003 		if (IS_ERR(rq[2]))
1004 			goto err;
1005 
1006 		err = wait_for_submit(engine, rq[2], HZ / 2);
1007 		if (err) {
1008 			pr_err("%s: failed to submit second context\n",
1009 			       engine->name);
1010 			goto err;
1011 		}
1012 		GEM_BUG_ON(!timer_pending(&engine->execlists.timer));
1013 
1014 		/* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
1015 		GEM_BUG_ON(!i915_request_is_active(rq[A1]));
1016 		GEM_BUG_ON(!i915_request_is_active(rq[A2]));
1017 		GEM_BUG_ON(!i915_request_is_active(rq[B1]));
1018 
1019 		/* Wait for the timeslice to kick in */
1020 		del_timer(&engine->execlists.timer);
1021 		tasklet_hi_schedule(&engine->execlists.tasklet);
1022 		intel_engine_flush_submission(engine);
1023 
1024 		/* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
1025 		GEM_BUG_ON(!i915_request_is_active(rq[A1]));
1026 		GEM_BUG_ON(!i915_request_is_active(rq[B1]));
1027 		GEM_BUG_ON(i915_request_is_active(rq[A2]));
1028 
1029 		/* Release the hounds! */
1030 		slot[0] = 1;
1031 		wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */
1032 
1033 		for (i = 1; i <= 3; i++) {
1034 			unsigned long timeout = jiffies + HZ / 2;
1035 
1036 			while (!READ_ONCE(slot[i]) &&
1037 			       time_before(jiffies, timeout))
1038 				;
1039 
1040 			if (!time_before(jiffies, timeout)) {
1041 				pr_err("%s: rq[%d] timed out\n",
1042 				       engine->name, i - 1);
1043 				err = -ETIME;
1044 				goto err;
1045 			}
1046 
1047 			pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]);
1048 		}
1049 
1050 		/* XZY: XZ < XY */
1051 		if (slot[Z] - slot[X] >= slot[Y] - slot[X]) {
1052 			pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n",
1053 			       engine->name,
1054 			       slot[Z] - slot[X],
1055 			       slot[Y] - slot[X]);
1056 			err = -EINVAL;
1057 		}
1058 
1059 err:
1060 		memset32(&slot[0], -1, 4);
1061 		wmb();
1062 
1063 		engine->props.timeslice_duration_ms = timeslice;
1064 		engine_heartbeat_enable(engine, heartbeat);
1065 		for (i = 0; i < 3; i++)
1066 			i915_request_put(rq[i]);
1067 		if (igt_flush_test(gt->i915))
1068 			err = -EIO;
1069 		if (err)
1070 			return err;
1071 	}
1072 
1073 	return 0;
1074 }
1075 
1076 static struct i915_request *nop_request(struct intel_engine_cs *engine)
1077 {
1078 	struct i915_request *rq;
1079 
1080 	rq = intel_engine_create_kernel_request(engine);
1081 	if (IS_ERR(rq))
1082 		return rq;
1083 
1084 	i915_request_get(rq);
1085 	i915_request_add(rq);
1086 
1087 	return rq;
1088 }
1089 
1090 static long timeslice_threshold(const struct intel_engine_cs *engine)
1091 {
1092 	return 2 * msecs_to_jiffies_timeout(timeslice(engine)) + 1;
1093 }
1094 
1095 static int live_timeslice_queue(void *arg)
1096 {
1097 	struct intel_gt *gt = arg;
1098 	struct drm_i915_gem_object *obj;
1099 	struct intel_engine_cs *engine;
1100 	enum intel_engine_id id;
1101 	struct i915_vma *vma;
1102 	void *vaddr;
1103 	int err = 0;
1104 
1105 	/*
1106 	 * Make sure that even if ELSP[0] and ELSP[1] are filled with
1107 	 * timeslicing between them disabled, we *do* enable timeslicing
1108 	 * if the queue demands it. (Normally, we do not submit if
1109 	 * ELSP[1] is already occupied, so must rely on timeslicing to
1110 	 * eject ELSP[0] in favour of the queue.)
1111 	 */
1112 	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1113 		return 0;
1114 
1115 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1116 	if (IS_ERR(obj))
1117 		return PTR_ERR(obj);
1118 
1119 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1120 	if (IS_ERR(vma)) {
1121 		err = PTR_ERR(vma);
1122 		goto err_obj;
1123 	}
1124 
1125 	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
1126 	if (IS_ERR(vaddr)) {
1127 		err = PTR_ERR(vaddr);
1128 		goto err_obj;
1129 	}
1130 
1131 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1132 	if (err)
1133 		goto err_map;
1134 
1135 	err = i915_vma_sync(vma);
1136 	if (err)
1137 		goto err_pin;
1138 
1139 	for_each_engine(engine, gt, id) {
1140 		struct i915_sched_attr attr = {
1141 			.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
1142 		};
1143 		struct i915_request *rq, *nop;
1144 		unsigned long saved;
1145 
1146 		if (!intel_engine_has_preemption(engine))
1147 			continue;
1148 
1149 		engine_heartbeat_disable(engine, &saved);
1150 		memset(vaddr, 0, PAGE_SIZE);
1151 
1152 		/* ELSP[0]: semaphore wait */
1153 		rq = semaphore_queue(engine, vma, 0);
1154 		if (IS_ERR(rq)) {
1155 			err = PTR_ERR(rq);
1156 			goto err_heartbeat;
1157 		}
1158 		engine->schedule(rq, &attr);
1159 		err = wait_for_submit(engine, rq, HZ / 2);
1160 		if (err) {
1161 			pr_err("%s: Timed out trying to submit semaphores\n",
1162 			       engine->name);
1163 			goto err_rq;
1164 		}
1165 
1166 		/* ELSP[1]: nop request */
1167 		nop = nop_request(engine);
1168 		if (IS_ERR(nop)) {
1169 			err = PTR_ERR(nop);
1170 			goto err_rq;
1171 		}
1172 		err = wait_for_submit(engine, nop, HZ / 2);
1173 		i915_request_put(nop);
1174 		if (err) {
1175 			pr_err("%s: Timed out trying to submit nop\n",
1176 			       engine->name);
1177 			goto err_rq;
1178 		}
1179 
1180 		GEM_BUG_ON(i915_request_completed(rq));
1181 		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
1182 
1183 		/* Queue: semaphore signal, matching priority as semaphore */
1184 		err = release_queue(engine, vma, 1, effective_prio(rq));
1185 		if (err)
1186 			goto err_rq;
1187 
1188 		intel_engine_flush_submission(engine);
1189 		if (!READ_ONCE(engine->execlists.timer.expires) &&
1190 		    !i915_request_completed(rq)) {
1191 			struct drm_printer p =
1192 				drm_info_printer(gt->i915->drm.dev);
1193 
1194 			GEM_TRACE_ERR("%s: Failed to enable timeslicing!\n",
1195 				      engine->name);
1196 			intel_engine_dump(engine, &p,
1197 					  "%s\n", engine->name);
1198 			GEM_TRACE_DUMP();
1199 
1200 			memset(vaddr, 0xff, PAGE_SIZE);
1201 			err = -EINVAL;
1202 		}
1203 
1204 		/* Timeslice every jiffy, so within 2 we should signal */
1205 		if (i915_request_wait(rq, 0, timeslice_threshold(engine)) < 0) {
1206 			struct drm_printer p =
1207 				drm_info_printer(gt->i915->drm.dev);
1208 
1209 			pr_err("%s: Failed to timeslice into queue\n",
1210 			       engine->name);
1211 			intel_engine_dump(engine, &p,
1212 					  "%s\n", engine->name);
1213 
1214 			memset(vaddr, 0xff, PAGE_SIZE);
1215 			err = -EIO;
1216 		}
1217 err_rq:
1218 		i915_request_put(rq);
1219 err_heartbeat:
1220 		engine_heartbeat_enable(engine, saved);
1221 		if (err)
1222 			break;
1223 	}
1224 
1225 err_pin:
1226 	i915_vma_unpin(vma);
1227 err_map:
1228 	i915_gem_object_unpin_map(obj);
1229 err_obj:
1230 	i915_gem_object_put(obj);
1231 	return err;
1232 }
1233 
1234 static int live_busywait_preempt(void *arg)
1235 {
1236 	struct intel_gt *gt = arg;
1237 	struct i915_gem_context *ctx_hi, *ctx_lo;
1238 	struct intel_engine_cs *engine;
1239 	struct drm_i915_gem_object *obj;
1240 	struct i915_vma *vma;
1241 	enum intel_engine_id id;
1242 	int err = -ENOMEM;
1243 	u32 *map;
1244 
1245 	/*
1246 	 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
1247 	 * preempt the busywaits used to synchronise between rings.
1248 	 */
1249 
1250 	ctx_hi = kernel_context(gt->i915);
1251 	if (!ctx_hi)
1252 		return -ENOMEM;
1253 	ctx_hi->sched.priority =
1254 		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
1255 
1256 	ctx_lo = kernel_context(gt->i915);
1257 	if (!ctx_lo)
1258 		goto err_ctx_hi;
1259 	ctx_lo->sched.priority =
1260 		I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
1261 
1262 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1263 	if (IS_ERR(obj)) {
1264 		err = PTR_ERR(obj);
1265 		goto err_ctx_lo;
1266 	}
1267 
1268 	map = i915_gem_object_pin_map(obj, I915_MAP_WC);
1269 	if (IS_ERR(map)) {
1270 		err = PTR_ERR(map);
1271 		goto err_obj;
1272 	}
1273 
1274 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1275 	if (IS_ERR(vma)) {
1276 		err = PTR_ERR(vma);
1277 		goto err_map;
1278 	}
1279 
1280 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1281 	if (err)
1282 		goto err_map;
1283 
1284 	err = i915_vma_sync(vma);
1285 	if (err)
1286 		goto err_vma;
1287 
1288 	for_each_engine(engine, gt, id) {
1289 		struct i915_request *lo, *hi;
1290 		struct igt_live_test t;
1291 		u32 *cs;
1292 
1293 		if (!intel_engine_has_preemption(engine))
1294 			continue;
1295 
1296 		if (!intel_engine_can_store_dword(engine))
1297 			continue;
1298 
1299 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1300 			err = -EIO;
1301 			goto err_vma;
1302 		}
1303 
1304 		/*
1305 		 * We create two requests. The low priority request
1306 		 * busywaits on a semaphore (inside the ringbuffer where
1307 		 * is should be preemptible) and the high priority requests
1308 		 * uses a MI_STORE_DWORD_IMM to update the semaphore value
1309 		 * allowing the first request to complete. If preemption
1310 		 * fails, we hang instead.
1311 		 */
1312 
1313 		lo = igt_request_alloc(ctx_lo, engine);
1314 		if (IS_ERR(lo)) {
1315 			err = PTR_ERR(lo);
1316 			goto err_vma;
1317 		}
1318 
1319 		cs = intel_ring_begin(lo, 8);
1320 		if (IS_ERR(cs)) {
1321 			err = PTR_ERR(cs);
1322 			i915_request_add(lo);
1323 			goto err_vma;
1324 		}
1325 
1326 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1327 		*cs++ = i915_ggtt_offset(vma);
1328 		*cs++ = 0;
1329 		*cs++ = 1;
1330 
1331 		/* XXX Do we need a flush + invalidate here? */
1332 
1333 		*cs++ = MI_SEMAPHORE_WAIT |
1334 			MI_SEMAPHORE_GLOBAL_GTT |
1335 			MI_SEMAPHORE_POLL |
1336 			MI_SEMAPHORE_SAD_EQ_SDD;
1337 		*cs++ = 0;
1338 		*cs++ = i915_ggtt_offset(vma);
1339 		*cs++ = 0;
1340 
1341 		intel_ring_advance(lo, cs);
1342 
1343 		i915_request_get(lo);
1344 		i915_request_add(lo);
1345 
1346 		if (wait_for(READ_ONCE(*map), 10)) {
1347 			i915_request_put(lo);
1348 			err = -ETIMEDOUT;
1349 			goto err_vma;
1350 		}
1351 
1352 		/* Low priority request should be busywaiting now */
1353 		if (i915_request_wait(lo, 0, 1) != -ETIME) {
1354 			i915_request_put(lo);
1355 			pr_err("%s: Busywaiting request did not!\n",
1356 			       engine->name);
1357 			err = -EIO;
1358 			goto err_vma;
1359 		}
1360 
1361 		hi = igt_request_alloc(ctx_hi, engine);
1362 		if (IS_ERR(hi)) {
1363 			err = PTR_ERR(hi);
1364 			i915_request_put(lo);
1365 			goto err_vma;
1366 		}
1367 
1368 		cs = intel_ring_begin(hi, 4);
1369 		if (IS_ERR(cs)) {
1370 			err = PTR_ERR(cs);
1371 			i915_request_add(hi);
1372 			i915_request_put(lo);
1373 			goto err_vma;
1374 		}
1375 
1376 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1377 		*cs++ = i915_ggtt_offset(vma);
1378 		*cs++ = 0;
1379 		*cs++ = 0;
1380 
1381 		intel_ring_advance(hi, cs);
1382 		i915_request_add(hi);
1383 
1384 		if (i915_request_wait(lo, 0, HZ / 5) < 0) {
1385 			struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
1386 
1387 			pr_err("%s: Failed to preempt semaphore busywait!\n",
1388 			       engine->name);
1389 
1390 			intel_engine_dump(engine, &p, "%s\n", engine->name);
1391 			GEM_TRACE_DUMP();
1392 
1393 			i915_request_put(lo);
1394 			intel_gt_set_wedged(gt);
1395 			err = -EIO;
1396 			goto err_vma;
1397 		}
1398 		GEM_BUG_ON(READ_ONCE(*map));
1399 		i915_request_put(lo);
1400 
1401 		if (igt_live_test_end(&t)) {
1402 			err = -EIO;
1403 			goto err_vma;
1404 		}
1405 	}
1406 
1407 	err = 0;
1408 err_vma:
1409 	i915_vma_unpin(vma);
1410 err_map:
1411 	i915_gem_object_unpin_map(obj);
1412 err_obj:
1413 	i915_gem_object_put(obj);
1414 err_ctx_lo:
1415 	kernel_context_close(ctx_lo);
1416 err_ctx_hi:
1417 	kernel_context_close(ctx_hi);
1418 	return err;
1419 }
1420 
1421 static struct i915_request *
1422 spinner_create_request(struct igt_spinner *spin,
1423 		       struct i915_gem_context *ctx,
1424 		       struct intel_engine_cs *engine,
1425 		       u32 arb)
1426 {
1427 	struct intel_context *ce;
1428 	struct i915_request *rq;
1429 
1430 	ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
1431 	if (IS_ERR(ce))
1432 		return ERR_CAST(ce);
1433 
1434 	rq = igt_spinner_create_request(spin, ce, arb);
1435 	intel_context_put(ce);
1436 	return rq;
1437 }
1438 
1439 static int live_preempt(void *arg)
1440 {
1441 	struct intel_gt *gt = arg;
1442 	struct i915_gem_context *ctx_hi, *ctx_lo;
1443 	struct igt_spinner spin_hi, spin_lo;
1444 	struct intel_engine_cs *engine;
1445 	enum intel_engine_id id;
1446 	int err = -ENOMEM;
1447 
1448 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1449 		return 0;
1450 
1451 	if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
1452 		pr_err("Logical preemption supported, but not exposed\n");
1453 
1454 	if (igt_spinner_init(&spin_hi, gt))
1455 		return -ENOMEM;
1456 
1457 	if (igt_spinner_init(&spin_lo, gt))
1458 		goto err_spin_hi;
1459 
1460 	ctx_hi = kernel_context(gt->i915);
1461 	if (!ctx_hi)
1462 		goto err_spin_lo;
1463 	ctx_hi->sched.priority =
1464 		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
1465 
1466 	ctx_lo = kernel_context(gt->i915);
1467 	if (!ctx_lo)
1468 		goto err_ctx_hi;
1469 	ctx_lo->sched.priority =
1470 		I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
1471 
1472 	for_each_engine(engine, gt, id) {
1473 		struct igt_live_test t;
1474 		struct i915_request *rq;
1475 
1476 		if (!intel_engine_has_preemption(engine))
1477 			continue;
1478 
1479 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1480 			err = -EIO;
1481 			goto err_ctx_lo;
1482 		}
1483 
1484 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1485 					    MI_ARB_CHECK);
1486 		if (IS_ERR(rq)) {
1487 			err = PTR_ERR(rq);
1488 			goto err_ctx_lo;
1489 		}
1490 
1491 		i915_request_add(rq);
1492 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
1493 			GEM_TRACE("lo spinner failed to start\n");
1494 			GEM_TRACE_DUMP();
1495 			intel_gt_set_wedged(gt);
1496 			err = -EIO;
1497 			goto err_ctx_lo;
1498 		}
1499 
1500 		rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1501 					    MI_ARB_CHECK);
1502 		if (IS_ERR(rq)) {
1503 			igt_spinner_end(&spin_lo);
1504 			err = PTR_ERR(rq);
1505 			goto err_ctx_lo;
1506 		}
1507 
1508 		i915_request_add(rq);
1509 		if (!igt_wait_for_spinner(&spin_hi, rq)) {
1510 			GEM_TRACE("hi spinner failed to start\n");
1511 			GEM_TRACE_DUMP();
1512 			intel_gt_set_wedged(gt);
1513 			err = -EIO;
1514 			goto err_ctx_lo;
1515 		}
1516 
1517 		igt_spinner_end(&spin_hi);
1518 		igt_spinner_end(&spin_lo);
1519 
1520 		if (igt_live_test_end(&t)) {
1521 			err = -EIO;
1522 			goto err_ctx_lo;
1523 		}
1524 	}
1525 
1526 	err = 0;
1527 err_ctx_lo:
1528 	kernel_context_close(ctx_lo);
1529 err_ctx_hi:
1530 	kernel_context_close(ctx_hi);
1531 err_spin_lo:
1532 	igt_spinner_fini(&spin_lo);
1533 err_spin_hi:
1534 	igt_spinner_fini(&spin_hi);
1535 	return err;
1536 }
1537 
1538 static int live_late_preempt(void *arg)
1539 {
1540 	struct intel_gt *gt = arg;
1541 	struct i915_gem_context *ctx_hi, *ctx_lo;
1542 	struct igt_spinner spin_hi, spin_lo;
1543 	struct intel_engine_cs *engine;
1544 	struct i915_sched_attr attr = {};
1545 	enum intel_engine_id id;
1546 	int err = -ENOMEM;
1547 
1548 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1549 		return 0;
1550 
1551 	if (igt_spinner_init(&spin_hi, gt))
1552 		return -ENOMEM;
1553 
1554 	if (igt_spinner_init(&spin_lo, gt))
1555 		goto err_spin_hi;
1556 
1557 	ctx_hi = kernel_context(gt->i915);
1558 	if (!ctx_hi)
1559 		goto err_spin_lo;
1560 
1561 	ctx_lo = kernel_context(gt->i915);
1562 	if (!ctx_lo)
1563 		goto err_ctx_hi;
1564 
1565 	/* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
1566 	ctx_lo->sched.priority = I915_USER_PRIORITY(1);
1567 
1568 	for_each_engine(engine, gt, id) {
1569 		struct igt_live_test t;
1570 		struct i915_request *rq;
1571 
1572 		if (!intel_engine_has_preemption(engine))
1573 			continue;
1574 
1575 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1576 			err = -EIO;
1577 			goto err_ctx_lo;
1578 		}
1579 
1580 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1581 					    MI_ARB_CHECK);
1582 		if (IS_ERR(rq)) {
1583 			err = PTR_ERR(rq);
1584 			goto err_ctx_lo;
1585 		}
1586 
1587 		i915_request_add(rq);
1588 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
1589 			pr_err("First context failed to start\n");
1590 			goto err_wedged;
1591 		}
1592 
1593 		rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1594 					    MI_NOOP);
1595 		if (IS_ERR(rq)) {
1596 			igt_spinner_end(&spin_lo);
1597 			err = PTR_ERR(rq);
1598 			goto err_ctx_lo;
1599 		}
1600 
1601 		i915_request_add(rq);
1602 		if (igt_wait_for_spinner(&spin_hi, rq)) {
1603 			pr_err("Second context overtook first?\n");
1604 			goto err_wedged;
1605 		}
1606 
1607 		attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1608 		engine->schedule(rq, &attr);
1609 
1610 		if (!igt_wait_for_spinner(&spin_hi, rq)) {
1611 			pr_err("High priority context failed to preempt the low priority context\n");
1612 			GEM_TRACE_DUMP();
1613 			goto err_wedged;
1614 		}
1615 
1616 		igt_spinner_end(&spin_hi);
1617 		igt_spinner_end(&spin_lo);
1618 
1619 		if (igt_live_test_end(&t)) {
1620 			err = -EIO;
1621 			goto err_ctx_lo;
1622 		}
1623 	}
1624 
1625 	err = 0;
1626 err_ctx_lo:
1627 	kernel_context_close(ctx_lo);
1628 err_ctx_hi:
1629 	kernel_context_close(ctx_hi);
1630 err_spin_lo:
1631 	igt_spinner_fini(&spin_lo);
1632 err_spin_hi:
1633 	igt_spinner_fini(&spin_hi);
1634 	return err;
1635 
1636 err_wedged:
1637 	igt_spinner_end(&spin_hi);
1638 	igt_spinner_end(&spin_lo);
1639 	intel_gt_set_wedged(gt);
1640 	err = -EIO;
1641 	goto err_ctx_lo;
1642 }
1643 
1644 struct preempt_client {
1645 	struct igt_spinner spin;
1646 	struct i915_gem_context *ctx;
1647 };
1648 
1649 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
1650 {
1651 	c->ctx = kernel_context(gt->i915);
1652 	if (!c->ctx)
1653 		return -ENOMEM;
1654 
1655 	if (igt_spinner_init(&c->spin, gt))
1656 		goto err_ctx;
1657 
1658 	return 0;
1659 
1660 err_ctx:
1661 	kernel_context_close(c->ctx);
1662 	return -ENOMEM;
1663 }
1664 
1665 static void preempt_client_fini(struct preempt_client *c)
1666 {
1667 	igt_spinner_fini(&c->spin);
1668 	kernel_context_close(c->ctx);
1669 }
1670 
1671 static int live_nopreempt(void *arg)
1672 {
1673 	struct intel_gt *gt = arg;
1674 	struct intel_engine_cs *engine;
1675 	struct preempt_client a, b;
1676 	enum intel_engine_id id;
1677 	int err = -ENOMEM;
1678 
1679 	/*
1680 	 * Verify that we can disable preemption for an individual request
1681 	 * that may be being observed and not want to be interrupted.
1682 	 */
1683 
1684 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1685 		return 0;
1686 
1687 	if (preempt_client_init(gt, &a))
1688 		return -ENOMEM;
1689 	if (preempt_client_init(gt, &b))
1690 		goto err_client_a;
1691 	b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1692 
1693 	for_each_engine(engine, gt, id) {
1694 		struct i915_request *rq_a, *rq_b;
1695 
1696 		if (!intel_engine_has_preemption(engine))
1697 			continue;
1698 
1699 		engine->execlists.preempt_hang.count = 0;
1700 
1701 		rq_a = spinner_create_request(&a.spin,
1702 					      a.ctx, engine,
1703 					      MI_ARB_CHECK);
1704 		if (IS_ERR(rq_a)) {
1705 			err = PTR_ERR(rq_a);
1706 			goto err_client_b;
1707 		}
1708 
1709 		/* Low priority client, but unpreemptable! */
1710 		__set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
1711 
1712 		i915_request_add(rq_a);
1713 		if (!igt_wait_for_spinner(&a.spin, rq_a)) {
1714 			pr_err("First client failed to start\n");
1715 			goto err_wedged;
1716 		}
1717 
1718 		rq_b = spinner_create_request(&b.spin,
1719 					      b.ctx, engine,
1720 					      MI_ARB_CHECK);
1721 		if (IS_ERR(rq_b)) {
1722 			err = PTR_ERR(rq_b);
1723 			goto err_client_b;
1724 		}
1725 
1726 		i915_request_add(rq_b);
1727 
1728 		/* B is much more important than A! (But A is unpreemptable.) */
1729 		GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
1730 
1731 		/* Wait long enough for preemption and timeslicing */
1732 		if (igt_wait_for_spinner(&b.spin, rq_b)) {
1733 			pr_err("Second client started too early!\n");
1734 			goto err_wedged;
1735 		}
1736 
1737 		igt_spinner_end(&a.spin);
1738 
1739 		if (!igt_wait_for_spinner(&b.spin, rq_b)) {
1740 			pr_err("Second client failed to start\n");
1741 			goto err_wedged;
1742 		}
1743 
1744 		igt_spinner_end(&b.spin);
1745 
1746 		if (engine->execlists.preempt_hang.count) {
1747 			pr_err("Preemption recorded x%d; should have been suppressed!\n",
1748 			       engine->execlists.preempt_hang.count);
1749 			err = -EINVAL;
1750 			goto err_wedged;
1751 		}
1752 
1753 		if (igt_flush_test(gt->i915))
1754 			goto err_wedged;
1755 	}
1756 
1757 	err = 0;
1758 err_client_b:
1759 	preempt_client_fini(&b);
1760 err_client_a:
1761 	preempt_client_fini(&a);
1762 	return err;
1763 
1764 err_wedged:
1765 	igt_spinner_end(&b.spin);
1766 	igt_spinner_end(&a.spin);
1767 	intel_gt_set_wedged(gt);
1768 	err = -EIO;
1769 	goto err_client_b;
1770 }
1771 
1772 struct live_preempt_cancel {
1773 	struct intel_engine_cs *engine;
1774 	struct preempt_client a, b;
1775 };
1776 
1777 static int __cancel_active0(struct live_preempt_cancel *arg)
1778 {
1779 	struct i915_request *rq;
1780 	struct igt_live_test t;
1781 	int err;
1782 
1783 	/* Preempt cancel of ELSP0 */
1784 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1785 	if (igt_live_test_begin(&t, arg->engine->i915,
1786 				__func__, arg->engine->name))
1787 		return -EIO;
1788 
1789 	rq = spinner_create_request(&arg->a.spin,
1790 				    arg->a.ctx, arg->engine,
1791 				    MI_ARB_CHECK);
1792 	if (IS_ERR(rq))
1793 		return PTR_ERR(rq);
1794 
1795 	clear_bit(CONTEXT_BANNED, &rq->context->flags);
1796 	i915_request_get(rq);
1797 	i915_request_add(rq);
1798 	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
1799 		err = -EIO;
1800 		goto out;
1801 	}
1802 
1803 	intel_context_set_banned(rq->context);
1804 	err = intel_engine_pulse(arg->engine);
1805 	if (err)
1806 		goto out;
1807 
1808 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
1809 		err = -EIO;
1810 		goto out;
1811 	}
1812 
1813 	if (rq->fence.error != -EIO) {
1814 		pr_err("Cancelled inflight0 request did not report -EIO\n");
1815 		err = -EINVAL;
1816 		goto out;
1817 	}
1818 
1819 out:
1820 	i915_request_put(rq);
1821 	if (igt_live_test_end(&t))
1822 		err = -EIO;
1823 	return err;
1824 }
1825 
1826 static int __cancel_active1(struct live_preempt_cancel *arg)
1827 {
1828 	struct i915_request *rq[2] = {};
1829 	struct igt_live_test t;
1830 	int err;
1831 
1832 	/* Preempt cancel of ELSP1 */
1833 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1834 	if (igt_live_test_begin(&t, arg->engine->i915,
1835 				__func__, arg->engine->name))
1836 		return -EIO;
1837 
1838 	rq[0] = spinner_create_request(&arg->a.spin,
1839 				       arg->a.ctx, arg->engine,
1840 				       MI_NOOP); /* no preemption */
1841 	if (IS_ERR(rq[0]))
1842 		return PTR_ERR(rq[0]);
1843 
1844 	clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
1845 	i915_request_get(rq[0]);
1846 	i915_request_add(rq[0]);
1847 	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
1848 		err = -EIO;
1849 		goto out;
1850 	}
1851 
1852 	rq[1] = spinner_create_request(&arg->b.spin,
1853 				       arg->b.ctx, arg->engine,
1854 				       MI_ARB_CHECK);
1855 	if (IS_ERR(rq[1])) {
1856 		err = PTR_ERR(rq[1]);
1857 		goto out;
1858 	}
1859 
1860 	clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
1861 	i915_request_get(rq[1]);
1862 	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
1863 	i915_request_add(rq[1]);
1864 	if (err)
1865 		goto out;
1866 
1867 	intel_context_set_banned(rq[1]->context);
1868 	err = intel_engine_pulse(arg->engine);
1869 	if (err)
1870 		goto out;
1871 
1872 	igt_spinner_end(&arg->a.spin);
1873 	if (i915_request_wait(rq[1], 0, HZ / 5) < 0) {
1874 		err = -EIO;
1875 		goto out;
1876 	}
1877 
1878 	if (rq[0]->fence.error != 0) {
1879 		pr_err("Normal inflight0 request did not complete\n");
1880 		err = -EINVAL;
1881 		goto out;
1882 	}
1883 
1884 	if (rq[1]->fence.error != -EIO) {
1885 		pr_err("Cancelled inflight1 request did not report -EIO\n");
1886 		err = -EINVAL;
1887 		goto out;
1888 	}
1889 
1890 out:
1891 	i915_request_put(rq[1]);
1892 	i915_request_put(rq[0]);
1893 	if (igt_live_test_end(&t))
1894 		err = -EIO;
1895 	return err;
1896 }
1897 
1898 static int __cancel_queued(struct live_preempt_cancel *arg)
1899 {
1900 	struct i915_request *rq[3] = {};
1901 	struct igt_live_test t;
1902 	int err;
1903 
1904 	/* Full ELSP and one in the wings */
1905 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1906 	if (igt_live_test_begin(&t, arg->engine->i915,
1907 				__func__, arg->engine->name))
1908 		return -EIO;
1909 
1910 	rq[0] = spinner_create_request(&arg->a.spin,
1911 				       arg->a.ctx, arg->engine,
1912 				       MI_ARB_CHECK);
1913 	if (IS_ERR(rq[0]))
1914 		return PTR_ERR(rq[0]);
1915 
1916 	clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
1917 	i915_request_get(rq[0]);
1918 	i915_request_add(rq[0]);
1919 	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
1920 		err = -EIO;
1921 		goto out;
1922 	}
1923 
1924 	rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
1925 	if (IS_ERR(rq[1])) {
1926 		err = PTR_ERR(rq[1]);
1927 		goto out;
1928 	}
1929 
1930 	clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
1931 	i915_request_get(rq[1]);
1932 	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
1933 	i915_request_add(rq[1]);
1934 	if (err)
1935 		goto out;
1936 
1937 	rq[2] = spinner_create_request(&arg->b.spin,
1938 				       arg->a.ctx, arg->engine,
1939 				       MI_ARB_CHECK);
1940 	if (IS_ERR(rq[2])) {
1941 		err = PTR_ERR(rq[2]);
1942 		goto out;
1943 	}
1944 
1945 	i915_request_get(rq[2]);
1946 	err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
1947 	i915_request_add(rq[2]);
1948 	if (err)
1949 		goto out;
1950 
1951 	intel_context_set_banned(rq[2]->context);
1952 	err = intel_engine_pulse(arg->engine);
1953 	if (err)
1954 		goto out;
1955 
1956 	if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
1957 		err = -EIO;
1958 		goto out;
1959 	}
1960 
1961 	if (rq[0]->fence.error != -EIO) {
1962 		pr_err("Cancelled inflight0 request did not report -EIO\n");
1963 		err = -EINVAL;
1964 		goto out;
1965 	}
1966 
1967 	if (rq[1]->fence.error != 0) {
1968 		pr_err("Normal inflight1 request did not complete\n");
1969 		err = -EINVAL;
1970 		goto out;
1971 	}
1972 
1973 	if (rq[2]->fence.error != -EIO) {
1974 		pr_err("Cancelled queued request did not report -EIO\n");
1975 		err = -EINVAL;
1976 		goto out;
1977 	}
1978 
1979 out:
1980 	i915_request_put(rq[2]);
1981 	i915_request_put(rq[1]);
1982 	i915_request_put(rq[0]);
1983 	if (igt_live_test_end(&t))
1984 		err = -EIO;
1985 	return err;
1986 }
1987 
1988 static int __cancel_hostile(struct live_preempt_cancel *arg)
1989 {
1990 	struct i915_request *rq;
1991 	int err;
1992 
1993 	/* Preempt cancel non-preemptible spinner in ELSP0 */
1994 	if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
1995 		return 0;
1996 
1997 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1998 	rq = spinner_create_request(&arg->a.spin,
1999 				    arg->a.ctx, arg->engine,
2000 				    MI_NOOP); /* preemption disabled */
2001 	if (IS_ERR(rq))
2002 		return PTR_ERR(rq);
2003 
2004 	clear_bit(CONTEXT_BANNED, &rq->context->flags);
2005 	i915_request_get(rq);
2006 	i915_request_add(rq);
2007 	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2008 		err = -EIO;
2009 		goto out;
2010 	}
2011 
2012 	intel_context_set_banned(rq->context);
2013 	err = intel_engine_pulse(arg->engine); /* force reset */
2014 	if (err)
2015 		goto out;
2016 
2017 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2018 		err = -EIO;
2019 		goto out;
2020 	}
2021 
2022 	if (rq->fence.error != -EIO) {
2023 		pr_err("Cancelled inflight0 request did not report -EIO\n");
2024 		err = -EINVAL;
2025 		goto out;
2026 	}
2027 
2028 out:
2029 	i915_request_put(rq);
2030 	if (igt_flush_test(arg->engine->i915))
2031 		err = -EIO;
2032 	return err;
2033 }
2034 
2035 static int live_preempt_cancel(void *arg)
2036 {
2037 	struct intel_gt *gt = arg;
2038 	struct live_preempt_cancel data;
2039 	enum intel_engine_id id;
2040 	int err = -ENOMEM;
2041 
2042 	/*
2043 	 * To cancel an inflight context, we need to first remove it from the
2044 	 * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
2045 	 */
2046 
2047 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2048 		return 0;
2049 
2050 	if (preempt_client_init(gt, &data.a))
2051 		return -ENOMEM;
2052 	if (preempt_client_init(gt, &data.b))
2053 		goto err_client_a;
2054 
2055 	for_each_engine(data.engine, gt, id) {
2056 		if (!intel_engine_has_preemption(data.engine))
2057 			continue;
2058 
2059 		err = __cancel_active0(&data);
2060 		if (err)
2061 			goto err_wedged;
2062 
2063 		err = __cancel_active1(&data);
2064 		if (err)
2065 			goto err_wedged;
2066 
2067 		err = __cancel_queued(&data);
2068 		if (err)
2069 			goto err_wedged;
2070 
2071 		err = __cancel_hostile(&data);
2072 		if (err)
2073 			goto err_wedged;
2074 	}
2075 
2076 	err = 0;
2077 err_client_b:
2078 	preempt_client_fini(&data.b);
2079 err_client_a:
2080 	preempt_client_fini(&data.a);
2081 	return err;
2082 
2083 err_wedged:
2084 	GEM_TRACE_DUMP();
2085 	igt_spinner_end(&data.b.spin);
2086 	igt_spinner_end(&data.a.spin);
2087 	intel_gt_set_wedged(gt);
2088 	goto err_client_b;
2089 }
2090 
2091 static int live_suppress_self_preempt(void *arg)
2092 {
2093 	struct intel_gt *gt = arg;
2094 	struct intel_engine_cs *engine;
2095 	struct i915_sched_attr attr = {
2096 		.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX)
2097 	};
2098 	struct preempt_client a, b;
2099 	enum intel_engine_id id;
2100 	int err = -ENOMEM;
2101 
2102 	/*
2103 	 * Verify that if a preemption request does not cause a change in
2104 	 * the current execution order, the preempt-to-idle injection is
2105 	 * skipped and that we do not accidentally apply it after the CS
2106 	 * completion event.
2107 	 */
2108 
2109 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2110 		return 0;
2111 
2112 	if (intel_uc_uses_guc_submission(&gt->uc))
2113 		return 0; /* presume black blox */
2114 
2115 	if (intel_vgpu_active(gt->i915))
2116 		return 0; /* GVT forces single port & request submission */
2117 
2118 	if (preempt_client_init(gt, &a))
2119 		return -ENOMEM;
2120 	if (preempt_client_init(gt, &b))
2121 		goto err_client_a;
2122 
2123 	for_each_engine(engine, gt, id) {
2124 		struct i915_request *rq_a, *rq_b;
2125 		int depth;
2126 
2127 		if (!intel_engine_has_preemption(engine))
2128 			continue;
2129 
2130 		if (igt_flush_test(gt->i915))
2131 			goto err_wedged;
2132 
2133 		intel_engine_pm_get(engine);
2134 		engine->execlists.preempt_hang.count = 0;
2135 
2136 		rq_a = spinner_create_request(&a.spin,
2137 					      a.ctx, engine,
2138 					      MI_NOOP);
2139 		if (IS_ERR(rq_a)) {
2140 			err = PTR_ERR(rq_a);
2141 			intel_engine_pm_put(engine);
2142 			goto err_client_b;
2143 		}
2144 
2145 		i915_request_add(rq_a);
2146 		if (!igt_wait_for_spinner(&a.spin, rq_a)) {
2147 			pr_err("First client failed to start\n");
2148 			intel_engine_pm_put(engine);
2149 			goto err_wedged;
2150 		}
2151 
2152 		/* Keep postponing the timer to avoid premature slicing */
2153 		mod_timer(&engine->execlists.timer, jiffies + HZ);
2154 		for (depth = 0; depth < 8; depth++) {
2155 			rq_b = spinner_create_request(&b.spin,
2156 						      b.ctx, engine,
2157 						      MI_NOOP);
2158 			if (IS_ERR(rq_b)) {
2159 				err = PTR_ERR(rq_b);
2160 				intel_engine_pm_put(engine);
2161 				goto err_client_b;
2162 			}
2163 			i915_request_add(rq_b);
2164 
2165 			GEM_BUG_ON(i915_request_completed(rq_a));
2166 			engine->schedule(rq_a, &attr);
2167 			igt_spinner_end(&a.spin);
2168 
2169 			if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2170 				pr_err("Second client failed to start\n");
2171 				intel_engine_pm_put(engine);
2172 				goto err_wedged;
2173 			}
2174 
2175 			swap(a, b);
2176 			rq_a = rq_b;
2177 		}
2178 		igt_spinner_end(&a.spin);
2179 
2180 		if (engine->execlists.preempt_hang.count) {
2181 			pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
2182 			       engine->name,
2183 			       engine->execlists.preempt_hang.count,
2184 			       depth);
2185 			intel_engine_pm_put(engine);
2186 			err = -EINVAL;
2187 			goto err_client_b;
2188 		}
2189 
2190 		intel_engine_pm_put(engine);
2191 		if (igt_flush_test(gt->i915))
2192 			goto err_wedged;
2193 	}
2194 
2195 	err = 0;
2196 err_client_b:
2197 	preempt_client_fini(&b);
2198 err_client_a:
2199 	preempt_client_fini(&a);
2200 	return err;
2201 
2202 err_wedged:
2203 	igt_spinner_end(&b.spin);
2204 	igt_spinner_end(&a.spin);
2205 	intel_gt_set_wedged(gt);
2206 	err = -EIO;
2207 	goto err_client_b;
2208 }
2209 
2210 static int __i915_sw_fence_call
2211 dummy_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
2212 {
2213 	return NOTIFY_DONE;
2214 }
2215 
2216 static struct i915_request *dummy_request(struct intel_engine_cs *engine)
2217 {
2218 	struct i915_request *rq;
2219 
2220 	rq = kzalloc(sizeof(*rq), GFP_KERNEL);
2221 	if (!rq)
2222 		return NULL;
2223 
2224 	rq->engine = engine;
2225 
2226 	spin_lock_init(&rq->lock);
2227 	INIT_LIST_HEAD(&rq->fence.cb_list);
2228 	rq->fence.lock = &rq->lock;
2229 	rq->fence.ops = &i915_fence_ops;
2230 
2231 	i915_sched_node_init(&rq->sched);
2232 
2233 	/* mark this request as permanently incomplete */
2234 	rq->fence.seqno = 1;
2235 	BUILD_BUG_ON(sizeof(rq->fence.seqno) != 8); /* upper 32b == 0 */
2236 	rq->hwsp_seqno = (u32 *)&rq->fence.seqno + 1;
2237 	GEM_BUG_ON(i915_request_completed(rq));
2238 
2239 	i915_sw_fence_init(&rq->submit, dummy_notify);
2240 	set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
2241 
2242 	spin_lock_init(&rq->lock);
2243 	rq->fence.lock = &rq->lock;
2244 	INIT_LIST_HEAD(&rq->fence.cb_list);
2245 
2246 	return rq;
2247 }
2248 
2249 static void dummy_request_free(struct i915_request *dummy)
2250 {
2251 	/* We have to fake the CS interrupt to kick the next request */
2252 	i915_sw_fence_commit(&dummy->submit);
2253 
2254 	i915_request_mark_complete(dummy);
2255 	dma_fence_signal(&dummy->fence);
2256 
2257 	i915_sched_node_fini(&dummy->sched);
2258 	i915_sw_fence_fini(&dummy->submit);
2259 
2260 	dma_fence_free(&dummy->fence);
2261 }
2262 
2263 static int live_suppress_wait_preempt(void *arg)
2264 {
2265 	struct intel_gt *gt = arg;
2266 	struct preempt_client client[4];
2267 	struct i915_request *rq[ARRAY_SIZE(client)] = {};
2268 	struct intel_engine_cs *engine;
2269 	enum intel_engine_id id;
2270 	int err = -ENOMEM;
2271 	int i;
2272 
2273 	/*
2274 	 * Waiters are given a little priority nudge, but not enough
2275 	 * to actually cause any preemption. Double check that we do
2276 	 * not needlessly generate preempt-to-idle cycles.
2277 	 */
2278 
2279 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2280 		return 0;
2281 
2282 	if (preempt_client_init(gt, &client[0])) /* ELSP[0] */
2283 		return -ENOMEM;
2284 	if (preempt_client_init(gt, &client[1])) /* ELSP[1] */
2285 		goto err_client_0;
2286 	if (preempt_client_init(gt, &client[2])) /* head of queue */
2287 		goto err_client_1;
2288 	if (preempt_client_init(gt, &client[3])) /* bystander */
2289 		goto err_client_2;
2290 
2291 	for_each_engine(engine, gt, id) {
2292 		int depth;
2293 
2294 		if (!intel_engine_has_preemption(engine))
2295 			continue;
2296 
2297 		if (!engine->emit_init_breadcrumb)
2298 			continue;
2299 
2300 		for (depth = 0; depth < ARRAY_SIZE(client); depth++) {
2301 			struct i915_request *dummy;
2302 
2303 			engine->execlists.preempt_hang.count = 0;
2304 
2305 			dummy = dummy_request(engine);
2306 			if (!dummy)
2307 				goto err_client_3;
2308 
2309 			for (i = 0; i < ARRAY_SIZE(client); i++) {
2310 				struct i915_request *this;
2311 
2312 				this = spinner_create_request(&client[i].spin,
2313 							      client[i].ctx, engine,
2314 							      MI_NOOP);
2315 				if (IS_ERR(this)) {
2316 					err = PTR_ERR(this);
2317 					goto err_wedged;
2318 				}
2319 
2320 				/* Disable NEWCLIENT promotion */
2321 				__i915_active_fence_set(&i915_request_timeline(this)->last_request,
2322 							&dummy->fence);
2323 
2324 				rq[i] = i915_request_get(this);
2325 				i915_request_add(this);
2326 			}
2327 
2328 			dummy_request_free(dummy);
2329 
2330 			GEM_BUG_ON(i915_request_completed(rq[0]));
2331 			if (!igt_wait_for_spinner(&client[0].spin, rq[0])) {
2332 				pr_err("%s: First client failed to start\n",
2333 				       engine->name);
2334 				goto err_wedged;
2335 			}
2336 			GEM_BUG_ON(!i915_request_started(rq[0]));
2337 
2338 			if (i915_request_wait(rq[depth],
2339 					      I915_WAIT_PRIORITY,
2340 					      1) != -ETIME) {
2341 				pr_err("%s: Waiter depth:%d completed!\n",
2342 				       engine->name, depth);
2343 				goto err_wedged;
2344 			}
2345 
2346 			for (i = 0; i < ARRAY_SIZE(client); i++) {
2347 				igt_spinner_end(&client[i].spin);
2348 				i915_request_put(rq[i]);
2349 				rq[i] = NULL;
2350 			}
2351 
2352 			if (igt_flush_test(gt->i915))
2353 				goto err_wedged;
2354 
2355 			if (engine->execlists.preempt_hang.count) {
2356 				pr_err("%s: Preemption recorded x%d, depth %d; should have been suppressed!\n",
2357 				       engine->name,
2358 				       engine->execlists.preempt_hang.count,
2359 				       depth);
2360 				err = -EINVAL;
2361 				goto err_client_3;
2362 			}
2363 		}
2364 	}
2365 
2366 	err = 0;
2367 err_client_3:
2368 	preempt_client_fini(&client[3]);
2369 err_client_2:
2370 	preempt_client_fini(&client[2]);
2371 err_client_1:
2372 	preempt_client_fini(&client[1]);
2373 err_client_0:
2374 	preempt_client_fini(&client[0]);
2375 	return err;
2376 
2377 err_wedged:
2378 	for (i = 0; i < ARRAY_SIZE(client); i++) {
2379 		igt_spinner_end(&client[i].spin);
2380 		i915_request_put(rq[i]);
2381 	}
2382 	intel_gt_set_wedged(gt);
2383 	err = -EIO;
2384 	goto err_client_3;
2385 }
2386 
2387 static int live_chain_preempt(void *arg)
2388 {
2389 	struct intel_gt *gt = arg;
2390 	struct intel_engine_cs *engine;
2391 	struct preempt_client hi, lo;
2392 	enum intel_engine_id id;
2393 	int err = -ENOMEM;
2394 
2395 	/*
2396 	 * Build a chain AB...BA between two contexts (A, B) and request
2397 	 * preemption of the last request. It should then complete before
2398 	 * the previously submitted spinner in B.
2399 	 */
2400 
2401 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2402 		return 0;
2403 
2404 	if (preempt_client_init(gt, &hi))
2405 		return -ENOMEM;
2406 
2407 	if (preempt_client_init(gt, &lo))
2408 		goto err_client_hi;
2409 
2410 	for_each_engine(engine, gt, id) {
2411 		struct i915_sched_attr attr = {
2412 			.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
2413 		};
2414 		struct igt_live_test t;
2415 		struct i915_request *rq;
2416 		int ring_size, count, i;
2417 
2418 		if (!intel_engine_has_preemption(engine))
2419 			continue;
2420 
2421 		rq = spinner_create_request(&lo.spin,
2422 					    lo.ctx, engine,
2423 					    MI_ARB_CHECK);
2424 		if (IS_ERR(rq))
2425 			goto err_wedged;
2426 
2427 		i915_request_get(rq);
2428 		i915_request_add(rq);
2429 
2430 		ring_size = rq->wa_tail - rq->head;
2431 		if (ring_size < 0)
2432 			ring_size += rq->ring->size;
2433 		ring_size = rq->ring->size / ring_size;
2434 		pr_debug("%s(%s): Using maximum of %d requests\n",
2435 			 __func__, engine->name, ring_size);
2436 
2437 		igt_spinner_end(&lo.spin);
2438 		if (i915_request_wait(rq, 0, HZ / 2) < 0) {
2439 			pr_err("Timed out waiting to flush %s\n", engine->name);
2440 			i915_request_put(rq);
2441 			goto err_wedged;
2442 		}
2443 		i915_request_put(rq);
2444 
2445 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
2446 			err = -EIO;
2447 			goto err_wedged;
2448 		}
2449 
2450 		for_each_prime_number_from(count, 1, ring_size) {
2451 			rq = spinner_create_request(&hi.spin,
2452 						    hi.ctx, engine,
2453 						    MI_ARB_CHECK);
2454 			if (IS_ERR(rq))
2455 				goto err_wedged;
2456 			i915_request_add(rq);
2457 			if (!igt_wait_for_spinner(&hi.spin, rq))
2458 				goto err_wedged;
2459 
2460 			rq = spinner_create_request(&lo.spin,
2461 						    lo.ctx, engine,
2462 						    MI_ARB_CHECK);
2463 			if (IS_ERR(rq))
2464 				goto err_wedged;
2465 			i915_request_add(rq);
2466 
2467 			for (i = 0; i < count; i++) {
2468 				rq = igt_request_alloc(lo.ctx, engine);
2469 				if (IS_ERR(rq))
2470 					goto err_wedged;
2471 				i915_request_add(rq);
2472 			}
2473 
2474 			rq = igt_request_alloc(hi.ctx, engine);
2475 			if (IS_ERR(rq))
2476 				goto err_wedged;
2477 
2478 			i915_request_get(rq);
2479 			i915_request_add(rq);
2480 			engine->schedule(rq, &attr);
2481 
2482 			igt_spinner_end(&hi.spin);
2483 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2484 				struct drm_printer p =
2485 					drm_info_printer(gt->i915->drm.dev);
2486 
2487 				pr_err("Failed to preempt over chain of %d\n",
2488 				       count);
2489 				intel_engine_dump(engine, &p,
2490 						  "%s\n", engine->name);
2491 				i915_request_put(rq);
2492 				goto err_wedged;
2493 			}
2494 			igt_spinner_end(&lo.spin);
2495 			i915_request_put(rq);
2496 
2497 			rq = igt_request_alloc(lo.ctx, engine);
2498 			if (IS_ERR(rq))
2499 				goto err_wedged;
2500 
2501 			i915_request_get(rq);
2502 			i915_request_add(rq);
2503 
2504 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2505 				struct drm_printer p =
2506 					drm_info_printer(gt->i915->drm.dev);
2507 
2508 				pr_err("Failed to flush low priority chain of %d requests\n",
2509 				       count);
2510 				intel_engine_dump(engine, &p,
2511 						  "%s\n", engine->name);
2512 
2513 				i915_request_put(rq);
2514 				goto err_wedged;
2515 			}
2516 			i915_request_put(rq);
2517 		}
2518 
2519 		if (igt_live_test_end(&t)) {
2520 			err = -EIO;
2521 			goto err_wedged;
2522 		}
2523 	}
2524 
2525 	err = 0;
2526 err_client_lo:
2527 	preempt_client_fini(&lo);
2528 err_client_hi:
2529 	preempt_client_fini(&hi);
2530 	return err;
2531 
2532 err_wedged:
2533 	igt_spinner_end(&hi.spin);
2534 	igt_spinner_end(&lo.spin);
2535 	intel_gt_set_wedged(gt);
2536 	err = -EIO;
2537 	goto err_client_lo;
2538 }
2539 
2540 static int create_gang(struct intel_engine_cs *engine,
2541 		       struct i915_request **prev)
2542 {
2543 	struct drm_i915_gem_object *obj;
2544 	struct intel_context *ce;
2545 	struct i915_request *rq;
2546 	struct i915_vma *vma;
2547 	u32 *cs;
2548 	int err;
2549 
2550 	ce = intel_context_create(engine);
2551 	if (IS_ERR(ce))
2552 		return PTR_ERR(ce);
2553 
2554 	obj = i915_gem_object_create_internal(engine->i915, 4096);
2555 	if (IS_ERR(obj)) {
2556 		err = PTR_ERR(obj);
2557 		goto err_ce;
2558 	}
2559 
2560 	vma = i915_vma_instance(obj, ce->vm, NULL);
2561 	if (IS_ERR(vma)) {
2562 		err = PTR_ERR(vma);
2563 		goto err_obj;
2564 	}
2565 
2566 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
2567 	if (err)
2568 		goto err_obj;
2569 
2570 	cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
2571 	if (IS_ERR(cs))
2572 		goto err_obj;
2573 
2574 	/* Semaphore target: spin until zero */
2575 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2576 
2577 	*cs++ = MI_SEMAPHORE_WAIT |
2578 		MI_SEMAPHORE_POLL |
2579 		MI_SEMAPHORE_SAD_EQ_SDD;
2580 	*cs++ = 0;
2581 	*cs++ = lower_32_bits(vma->node.start);
2582 	*cs++ = upper_32_bits(vma->node.start);
2583 
2584 	if (*prev) {
2585 		u64 offset = (*prev)->batch->node.start;
2586 
2587 		/* Terminate the spinner in the next lower priority batch. */
2588 		*cs++ = MI_STORE_DWORD_IMM_GEN4;
2589 		*cs++ = lower_32_bits(offset);
2590 		*cs++ = upper_32_bits(offset);
2591 		*cs++ = 0;
2592 	}
2593 
2594 	*cs++ = MI_BATCH_BUFFER_END;
2595 	i915_gem_object_flush_map(obj);
2596 	i915_gem_object_unpin_map(obj);
2597 
2598 	rq = intel_context_create_request(ce);
2599 	if (IS_ERR(rq))
2600 		goto err_obj;
2601 
2602 	rq->batch = vma;
2603 	i915_request_get(rq);
2604 
2605 	i915_vma_lock(vma);
2606 	err = i915_request_await_object(rq, vma->obj, false);
2607 	if (!err)
2608 		err = i915_vma_move_to_active(vma, rq, 0);
2609 	if (!err)
2610 		err = rq->engine->emit_bb_start(rq,
2611 						vma->node.start,
2612 						PAGE_SIZE, 0);
2613 	i915_vma_unlock(vma);
2614 	i915_request_add(rq);
2615 	if (err)
2616 		goto err_rq;
2617 
2618 	i915_gem_object_put(obj);
2619 	intel_context_put(ce);
2620 
2621 	rq->client_link.next = &(*prev)->client_link;
2622 	*prev = rq;
2623 	return 0;
2624 
2625 err_rq:
2626 	i915_request_put(rq);
2627 err_obj:
2628 	i915_gem_object_put(obj);
2629 err_ce:
2630 	intel_context_put(ce);
2631 	return err;
2632 }
2633 
2634 static int live_preempt_gang(void *arg)
2635 {
2636 	struct intel_gt *gt = arg;
2637 	struct intel_engine_cs *engine;
2638 	enum intel_engine_id id;
2639 
2640 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2641 		return 0;
2642 
2643 	/*
2644 	 * Build as long a chain of preempters as we can, with each
2645 	 * request higher priority than the last. Once we are ready, we release
2646 	 * the last batch which then precolates down the chain, each releasing
2647 	 * the next oldest in turn. The intent is to simply push as hard as we
2648 	 * can with the number of preemptions, trying to exceed narrow HW
2649 	 * limits. At a minimum, we insist that we can sort all the user
2650 	 * high priority levels into execution order.
2651 	 */
2652 
2653 	for_each_engine(engine, gt, id) {
2654 		struct i915_request *rq = NULL;
2655 		struct igt_live_test t;
2656 		IGT_TIMEOUT(end_time);
2657 		int prio = 0;
2658 		int err = 0;
2659 		u32 *cs;
2660 
2661 		if (!intel_engine_has_preemption(engine))
2662 			continue;
2663 
2664 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
2665 			return -EIO;
2666 
2667 		do {
2668 			struct i915_sched_attr attr = {
2669 				.priority = I915_USER_PRIORITY(prio++),
2670 			};
2671 
2672 			err = create_gang(engine, &rq);
2673 			if (err)
2674 				break;
2675 
2676 			/* Submit each spinner at increasing priority */
2677 			engine->schedule(rq, &attr);
2678 
2679 			if (prio <= I915_PRIORITY_MAX)
2680 				continue;
2681 
2682 			if (prio > (INT_MAX >> I915_USER_PRIORITY_SHIFT))
2683 				break;
2684 
2685 			if (__igt_timeout(end_time, NULL))
2686 				break;
2687 		} while (1);
2688 		pr_debug("%s: Preempt chain of %d requests\n",
2689 			 engine->name, prio);
2690 
2691 		/*
2692 		 * Such that the last spinner is the highest priority and
2693 		 * should execute first. When that spinner completes,
2694 		 * it will terminate the next lowest spinner until there
2695 		 * are no more spinners and the gang is complete.
2696 		 */
2697 		cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC);
2698 		if (!IS_ERR(cs)) {
2699 			*cs = 0;
2700 			i915_gem_object_unpin_map(rq->batch->obj);
2701 		} else {
2702 			err = PTR_ERR(cs);
2703 			intel_gt_set_wedged(gt);
2704 		}
2705 
2706 		while (rq) { /* wait for each rq from highest to lowest prio */
2707 			struct i915_request *n =
2708 				list_next_entry(rq, client_link);
2709 
2710 			if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
2711 				struct drm_printer p =
2712 					drm_info_printer(engine->i915->drm.dev);
2713 
2714 				pr_err("Failed to flush chain of %d requests, at %d\n",
2715 				       prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT);
2716 				intel_engine_dump(engine, &p,
2717 						  "%s\n", engine->name);
2718 
2719 				err = -ETIME;
2720 			}
2721 
2722 			i915_request_put(rq);
2723 			rq = n;
2724 		}
2725 
2726 		if (igt_live_test_end(&t))
2727 			err = -EIO;
2728 		if (err)
2729 			return err;
2730 	}
2731 
2732 	return 0;
2733 }
2734 
2735 static int live_preempt_timeout(void *arg)
2736 {
2737 	struct intel_gt *gt = arg;
2738 	struct i915_gem_context *ctx_hi, *ctx_lo;
2739 	struct igt_spinner spin_lo;
2740 	struct intel_engine_cs *engine;
2741 	enum intel_engine_id id;
2742 	int err = -ENOMEM;
2743 
2744 	/*
2745 	 * Check that we force preemption to occur by cancelling the previous
2746 	 * context if it refuses to yield the GPU.
2747 	 */
2748 	if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2749 		return 0;
2750 
2751 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2752 		return 0;
2753 
2754 	if (!intel_has_reset_engine(gt))
2755 		return 0;
2756 
2757 	if (igt_spinner_init(&spin_lo, gt))
2758 		return -ENOMEM;
2759 
2760 	ctx_hi = kernel_context(gt->i915);
2761 	if (!ctx_hi)
2762 		goto err_spin_lo;
2763 	ctx_hi->sched.priority =
2764 		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
2765 
2766 	ctx_lo = kernel_context(gt->i915);
2767 	if (!ctx_lo)
2768 		goto err_ctx_hi;
2769 	ctx_lo->sched.priority =
2770 		I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
2771 
2772 	for_each_engine(engine, gt, id) {
2773 		unsigned long saved_timeout;
2774 		struct i915_request *rq;
2775 
2776 		if (!intel_engine_has_preemption(engine))
2777 			continue;
2778 
2779 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
2780 					    MI_NOOP); /* preemption disabled */
2781 		if (IS_ERR(rq)) {
2782 			err = PTR_ERR(rq);
2783 			goto err_ctx_lo;
2784 		}
2785 
2786 		i915_request_add(rq);
2787 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
2788 			intel_gt_set_wedged(gt);
2789 			err = -EIO;
2790 			goto err_ctx_lo;
2791 		}
2792 
2793 		rq = igt_request_alloc(ctx_hi, engine);
2794 		if (IS_ERR(rq)) {
2795 			igt_spinner_end(&spin_lo);
2796 			err = PTR_ERR(rq);
2797 			goto err_ctx_lo;
2798 		}
2799 
2800 		/* Flush the previous CS ack before changing timeouts */
2801 		while (READ_ONCE(engine->execlists.pending[0]))
2802 			cpu_relax();
2803 
2804 		saved_timeout = engine->props.preempt_timeout_ms;
2805 		engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
2806 
2807 		i915_request_get(rq);
2808 		i915_request_add(rq);
2809 
2810 		intel_engine_flush_submission(engine);
2811 		engine->props.preempt_timeout_ms = saved_timeout;
2812 
2813 		if (i915_request_wait(rq, 0, HZ / 10) < 0) {
2814 			intel_gt_set_wedged(gt);
2815 			i915_request_put(rq);
2816 			err = -ETIME;
2817 			goto err_ctx_lo;
2818 		}
2819 
2820 		igt_spinner_end(&spin_lo);
2821 		i915_request_put(rq);
2822 	}
2823 
2824 	err = 0;
2825 err_ctx_lo:
2826 	kernel_context_close(ctx_lo);
2827 err_ctx_hi:
2828 	kernel_context_close(ctx_hi);
2829 err_spin_lo:
2830 	igt_spinner_fini(&spin_lo);
2831 	return err;
2832 }
2833 
2834 static int random_range(struct rnd_state *rnd, int min, int max)
2835 {
2836 	return i915_prandom_u32_max_state(max - min, rnd) + min;
2837 }
2838 
2839 static int random_priority(struct rnd_state *rnd)
2840 {
2841 	return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
2842 }
2843 
2844 struct preempt_smoke {
2845 	struct intel_gt *gt;
2846 	struct i915_gem_context **contexts;
2847 	struct intel_engine_cs *engine;
2848 	struct drm_i915_gem_object *batch;
2849 	unsigned int ncontext;
2850 	struct rnd_state prng;
2851 	unsigned long count;
2852 };
2853 
2854 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
2855 {
2856 	return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
2857 							  &smoke->prng)];
2858 }
2859 
2860 static int smoke_submit(struct preempt_smoke *smoke,
2861 			struct i915_gem_context *ctx, int prio,
2862 			struct drm_i915_gem_object *batch)
2863 {
2864 	struct i915_request *rq;
2865 	struct i915_vma *vma = NULL;
2866 	int err = 0;
2867 
2868 	if (batch) {
2869 		struct i915_address_space *vm;
2870 
2871 		vm = i915_gem_context_get_vm_rcu(ctx);
2872 		vma = i915_vma_instance(batch, vm, NULL);
2873 		i915_vm_put(vm);
2874 		if (IS_ERR(vma))
2875 			return PTR_ERR(vma);
2876 
2877 		err = i915_vma_pin(vma, 0, 0, PIN_USER);
2878 		if (err)
2879 			return err;
2880 	}
2881 
2882 	ctx->sched.priority = prio;
2883 
2884 	rq = igt_request_alloc(ctx, smoke->engine);
2885 	if (IS_ERR(rq)) {
2886 		err = PTR_ERR(rq);
2887 		goto unpin;
2888 	}
2889 
2890 	if (vma) {
2891 		i915_vma_lock(vma);
2892 		err = i915_request_await_object(rq, vma->obj, false);
2893 		if (!err)
2894 			err = i915_vma_move_to_active(vma, rq, 0);
2895 		if (!err)
2896 			err = rq->engine->emit_bb_start(rq,
2897 							vma->node.start,
2898 							PAGE_SIZE, 0);
2899 		i915_vma_unlock(vma);
2900 	}
2901 
2902 	i915_request_add(rq);
2903 
2904 unpin:
2905 	if (vma)
2906 		i915_vma_unpin(vma);
2907 
2908 	return err;
2909 }
2910 
2911 static int smoke_crescendo_thread(void *arg)
2912 {
2913 	struct preempt_smoke *smoke = arg;
2914 	IGT_TIMEOUT(end_time);
2915 	unsigned long count;
2916 
2917 	count = 0;
2918 	do {
2919 		struct i915_gem_context *ctx = smoke_context(smoke);
2920 		int err;
2921 
2922 		err = smoke_submit(smoke,
2923 				   ctx, count % I915_PRIORITY_MAX,
2924 				   smoke->batch);
2925 		if (err)
2926 			return err;
2927 
2928 		count++;
2929 	} while (!__igt_timeout(end_time, NULL));
2930 
2931 	smoke->count = count;
2932 	return 0;
2933 }
2934 
2935 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
2936 #define BATCH BIT(0)
2937 {
2938 	struct task_struct *tsk[I915_NUM_ENGINES] = {};
2939 	struct preempt_smoke arg[I915_NUM_ENGINES];
2940 	struct intel_engine_cs *engine;
2941 	enum intel_engine_id id;
2942 	unsigned long count;
2943 	int err = 0;
2944 
2945 	for_each_engine(engine, smoke->gt, id) {
2946 		arg[id] = *smoke;
2947 		arg[id].engine = engine;
2948 		if (!(flags & BATCH))
2949 			arg[id].batch = NULL;
2950 		arg[id].count = 0;
2951 
2952 		tsk[id] = kthread_run(smoke_crescendo_thread, &arg,
2953 				      "igt/smoke:%d", id);
2954 		if (IS_ERR(tsk[id])) {
2955 			err = PTR_ERR(tsk[id]);
2956 			break;
2957 		}
2958 		get_task_struct(tsk[id]);
2959 	}
2960 
2961 	yield(); /* start all threads before we kthread_stop() */
2962 
2963 	count = 0;
2964 	for_each_engine(engine, smoke->gt, id) {
2965 		int status;
2966 
2967 		if (IS_ERR_OR_NULL(tsk[id]))
2968 			continue;
2969 
2970 		status = kthread_stop(tsk[id]);
2971 		if (status && !err)
2972 			err = status;
2973 
2974 		count += arg[id].count;
2975 
2976 		put_task_struct(tsk[id]);
2977 	}
2978 
2979 	pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
2980 		count, flags,
2981 		RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
2982 	return 0;
2983 }
2984 
2985 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
2986 {
2987 	enum intel_engine_id id;
2988 	IGT_TIMEOUT(end_time);
2989 	unsigned long count;
2990 
2991 	count = 0;
2992 	do {
2993 		for_each_engine(smoke->engine, smoke->gt, id) {
2994 			struct i915_gem_context *ctx = smoke_context(smoke);
2995 			int err;
2996 
2997 			err = smoke_submit(smoke,
2998 					   ctx, random_priority(&smoke->prng),
2999 					   flags & BATCH ? smoke->batch : NULL);
3000 			if (err)
3001 				return err;
3002 
3003 			count++;
3004 		}
3005 	} while (!__igt_timeout(end_time, NULL));
3006 
3007 	pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
3008 		count, flags,
3009 		RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
3010 	return 0;
3011 }
3012 
3013 static int live_preempt_smoke(void *arg)
3014 {
3015 	struct preempt_smoke smoke = {
3016 		.gt = arg,
3017 		.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
3018 		.ncontext = 1024,
3019 	};
3020 	const unsigned int phase[] = { 0, BATCH };
3021 	struct igt_live_test t;
3022 	int err = -ENOMEM;
3023 	u32 *cs;
3024 	int n;
3025 
3026 	if (!HAS_LOGICAL_RING_PREEMPTION(smoke.gt->i915))
3027 		return 0;
3028 
3029 	smoke.contexts = kmalloc_array(smoke.ncontext,
3030 				       sizeof(*smoke.contexts),
3031 				       GFP_KERNEL);
3032 	if (!smoke.contexts)
3033 		return -ENOMEM;
3034 
3035 	smoke.batch =
3036 		i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
3037 	if (IS_ERR(smoke.batch)) {
3038 		err = PTR_ERR(smoke.batch);
3039 		goto err_free;
3040 	}
3041 
3042 	cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB);
3043 	if (IS_ERR(cs)) {
3044 		err = PTR_ERR(cs);
3045 		goto err_batch;
3046 	}
3047 	for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++)
3048 		cs[n] = MI_ARB_CHECK;
3049 	cs[n] = MI_BATCH_BUFFER_END;
3050 	i915_gem_object_flush_map(smoke.batch);
3051 	i915_gem_object_unpin_map(smoke.batch);
3052 
3053 	if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
3054 		err = -EIO;
3055 		goto err_batch;
3056 	}
3057 
3058 	for (n = 0; n < smoke.ncontext; n++) {
3059 		smoke.contexts[n] = kernel_context(smoke.gt->i915);
3060 		if (!smoke.contexts[n])
3061 			goto err_ctx;
3062 	}
3063 
3064 	for (n = 0; n < ARRAY_SIZE(phase); n++) {
3065 		err = smoke_crescendo(&smoke, phase[n]);
3066 		if (err)
3067 			goto err_ctx;
3068 
3069 		err = smoke_random(&smoke, phase[n]);
3070 		if (err)
3071 			goto err_ctx;
3072 	}
3073 
3074 err_ctx:
3075 	if (igt_live_test_end(&t))
3076 		err = -EIO;
3077 
3078 	for (n = 0; n < smoke.ncontext; n++) {
3079 		if (!smoke.contexts[n])
3080 			break;
3081 		kernel_context_close(smoke.contexts[n]);
3082 	}
3083 
3084 err_batch:
3085 	i915_gem_object_put(smoke.batch);
3086 err_free:
3087 	kfree(smoke.contexts);
3088 
3089 	return err;
3090 }
3091 
3092 static int nop_virtual_engine(struct intel_gt *gt,
3093 			      struct intel_engine_cs **siblings,
3094 			      unsigned int nsibling,
3095 			      unsigned int nctx,
3096 			      unsigned int flags)
3097 #define CHAIN BIT(0)
3098 {
3099 	IGT_TIMEOUT(end_time);
3100 	struct i915_request *request[16] = {};
3101 	struct intel_context *ve[16];
3102 	unsigned long n, prime, nc;
3103 	struct igt_live_test t;
3104 	ktime_t times[2] = {};
3105 	int err;
3106 
3107 	GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
3108 
3109 	for (n = 0; n < nctx; n++) {
3110 		ve[n] = intel_execlists_create_virtual(siblings, nsibling);
3111 		if (IS_ERR(ve[n])) {
3112 			err = PTR_ERR(ve[n]);
3113 			nctx = n;
3114 			goto out;
3115 		}
3116 
3117 		err = intel_context_pin(ve[n]);
3118 		if (err) {
3119 			intel_context_put(ve[n]);
3120 			nctx = n;
3121 			goto out;
3122 		}
3123 	}
3124 
3125 	err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
3126 	if (err)
3127 		goto out;
3128 
3129 	for_each_prime_number_from(prime, 1, 8192) {
3130 		times[1] = ktime_get_raw();
3131 
3132 		if (flags & CHAIN) {
3133 			for (nc = 0; nc < nctx; nc++) {
3134 				for (n = 0; n < prime; n++) {
3135 					struct i915_request *rq;
3136 
3137 					rq = i915_request_create(ve[nc]);
3138 					if (IS_ERR(rq)) {
3139 						err = PTR_ERR(rq);
3140 						goto out;
3141 					}
3142 
3143 					if (request[nc])
3144 						i915_request_put(request[nc]);
3145 					request[nc] = i915_request_get(rq);
3146 					i915_request_add(rq);
3147 				}
3148 			}
3149 		} else {
3150 			for (n = 0; n < prime; n++) {
3151 				for (nc = 0; nc < nctx; nc++) {
3152 					struct i915_request *rq;
3153 
3154 					rq = i915_request_create(ve[nc]);
3155 					if (IS_ERR(rq)) {
3156 						err = PTR_ERR(rq);
3157 						goto out;
3158 					}
3159 
3160 					if (request[nc])
3161 						i915_request_put(request[nc]);
3162 					request[nc] = i915_request_get(rq);
3163 					i915_request_add(rq);
3164 				}
3165 			}
3166 		}
3167 
3168 		for (nc = 0; nc < nctx; nc++) {
3169 			if (i915_request_wait(request[nc], 0, HZ / 10) < 0) {
3170 				pr_err("%s(%s): wait for %llx:%lld timed out\n",
3171 				       __func__, ve[0]->engine->name,
3172 				       request[nc]->fence.context,
3173 				       request[nc]->fence.seqno);
3174 
3175 				GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3176 					  __func__, ve[0]->engine->name,
3177 					  request[nc]->fence.context,
3178 					  request[nc]->fence.seqno);
3179 				GEM_TRACE_DUMP();
3180 				intel_gt_set_wedged(gt);
3181 				break;
3182 			}
3183 		}
3184 
3185 		times[1] = ktime_sub(ktime_get_raw(), times[1]);
3186 		if (prime == 1)
3187 			times[0] = times[1];
3188 
3189 		for (nc = 0; nc < nctx; nc++) {
3190 			i915_request_put(request[nc]);
3191 			request[nc] = NULL;
3192 		}
3193 
3194 		if (__igt_timeout(end_time, NULL))
3195 			break;
3196 	}
3197 
3198 	err = igt_live_test_end(&t);
3199 	if (err)
3200 		goto out;
3201 
3202 	pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
3203 		nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
3204 		prime, div64_u64(ktime_to_ns(times[1]), prime));
3205 
3206 out:
3207 	if (igt_flush_test(gt->i915))
3208 		err = -EIO;
3209 
3210 	for (nc = 0; nc < nctx; nc++) {
3211 		i915_request_put(request[nc]);
3212 		intel_context_unpin(ve[nc]);
3213 		intel_context_put(ve[nc]);
3214 	}
3215 	return err;
3216 }
3217 
3218 static int live_virtual_engine(void *arg)
3219 {
3220 	struct intel_gt *gt = arg;
3221 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3222 	struct intel_engine_cs *engine;
3223 	enum intel_engine_id id;
3224 	unsigned int class, inst;
3225 	int err;
3226 
3227 	if (intel_uc_uses_guc_submission(&gt->uc))
3228 		return 0;
3229 
3230 	for_each_engine(engine, gt, id) {
3231 		err = nop_virtual_engine(gt, &engine, 1, 1, 0);
3232 		if (err) {
3233 			pr_err("Failed to wrap engine %s: err=%d\n",
3234 			       engine->name, err);
3235 			return err;
3236 		}
3237 	}
3238 
3239 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3240 		int nsibling, n;
3241 
3242 		nsibling = 0;
3243 		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3244 			if (!gt->engine_class[class][inst])
3245 				continue;
3246 
3247 			siblings[nsibling++] = gt->engine_class[class][inst];
3248 		}
3249 		if (nsibling < 2)
3250 			continue;
3251 
3252 		for (n = 1; n <= nsibling + 1; n++) {
3253 			err = nop_virtual_engine(gt, siblings, nsibling,
3254 						 n, 0);
3255 			if (err)
3256 				return err;
3257 		}
3258 
3259 		err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
3260 		if (err)
3261 			return err;
3262 	}
3263 
3264 	return 0;
3265 }
3266 
3267 static int mask_virtual_engine(struct intel_gt *gt,
3268 			       struct intel_engine_cs **siblings,
3269 			       unsigned int nsibling)
3270 {
3271 	struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
3272 	struct intel_context *ve;
3273 	struct igt_live_test t;
3274 	unsigned int n;
3275 	int err;
3276 
3277 	/*
3278 	 * Check that by setting the execution mask on a request, we can
3279 	 * restrict it to our desired engine within the virtual engine.
3280 	 */
3281 
3282 	ve = intel_execlists_create_virtual(siblings, nsibling);
3283 	if (IS_ERR(ve)) {
3284 		err = PTR_ERR(ve);
3285 		goto out_close;
3286 	}
3287 
3288 	err = intel_context_pin(ve);
3289 	if (err)
3290 		goto out_put;
3291 
3292 	err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3293 	if (err)
3294 		goto out_unpin;
3295 
3296 	for (n = 0; n < nsibling; n++) {
3297 		request[n] = i915_request_create(ve);
3298 		if (IS_ERR(request[n])) {
3299 			err = PTR_ERR(request[n]);
3300 			nsibling = n;
3301 			goto out;
3302 		}
3303 
3304 		/* Reverse order as it's more likely to be unnatural */
3305 		request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
3306 
3307 		i915_request_get(request[n]);
3308 		i915_request_add(request[n]);
3309 	}
3310 
3311 	for (n = 0; n < nsibling; n++) {
3312 		if (i915_request_wait(request[n], 0, HZ / 10) < 0) {
3313 			pr_err("%s(%s): wait for %llx:%lld timed out\n",
3314 			       __func__, ve->engine->name,
3315 			       request[n]->fence.context,
3316 			       request[n]->fence.seqno);
3317 
3318 			GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3319 				  __func__, ve->engine->name,
3320 				  request[n]->fence.context,
3321 				  request[n]->fence.seqno);
3322 			GEM_TRACE_DUMP();
3323 			intel_gt_set_wedged(gt);
3324 			err = -EIO;
3325 			goto out;
3326 		}
3327 
3328 		if (request[n]->engine != siblings[nsibling - n - 1]) {
3329 			pr_err("Executed on wrong sibling '%s', expected '%s'\n",
3330 			       request[n]->engine->name,
3331 			       siblings[nsibling - n - 1]->name);
3332 			err = -EINVAL;
3333 			goto out;
3334 		}
3335 	}
3336 
3337 	err = igt_live_test_end(&t);
3338 out:
3339 	if (igt_flush_test(gt->i915))
3340 		err = -EIO;
3341 
3342 	for (n = 0; n < nsibling; n++)
3343 		i915_request_put(request[n]);
3344 
3345 out_unpin:
3346 	intel_context_unpin(ve);
3347 out_put:
3348 	intel_context_put(ve);
3349 out_close:
3350 	return err;
3351 }
3352 
3353 static int live_virtual_mask(void *arg)
3354 {
3355 	struct intel_gt *gt = arg;
3356 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3357 	unsigned int class, inst;
3358 	int err;
3359 
3360 	if (intel_uc_uses_guc_submission(&gt->uc))
3361 		return 0;
3362 
3363 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3364 		unsigned int nsibling;
3365 
3366 		nsibling = 0;
3367 		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3368 			if (!gt->engine_class[class][inst])
3369 				break;
3370 
3371 			siblings[nsibling++] = gt->engine_class[class][inst];
3372 		}
3373 		if (nsibling < 2)
3374 			continue;
3375 
3376 		err = mask_virtual_engine(gt, siblings, nsibling);
3377 		if (err)
3378 			return err;
3379 	}
3380 
3381 	return 0;
3382 }
3383 
3384 static int preserved_virtual_engine(struct intel_gt *gt,
3385 				    struct intel_engine_cs **siblings,
3386 				    unsigned int nsibling)
3387 {
3388 	struct i915_request *last = NULL;
3389 	struct intel_context *ve;
3390 	struct i915_vma *scratch;
3391 	struct igt_live_test t;
3392 	unsigned int n;
3393 	int err = 0;
3394 	u32 *cs;
3395 
3396 	scratch = create_scratch(siblings[0]->gt);
3397 	if (IS_ERR(scratch))
3398 		return PTR_ERR(scratch);
3399 
3400 	err = i915_vma_sync(scratch);
3401 	if (err)
3402 		goto out_scratch;
3403 
3404 	ve = intel_execlists_create_virtual(siblings, nsibling);
3405 	if (IS_ERR(ve)) {
3406 		err = PTR_ERR(ve);
3407 		goto out_scratch;
3408 	}
3409 
3410 	err = intel_context_pin(ve);
3411 	if (err)
3412 		goto out_put;
3413 
3414 	err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3415 	if (err)
3416 		goto out_unpin;
3417 
3418 	for (n = 0; n < NUM_GPR_DW; n++) {
3419 		struct intel_engine_cs *engine = siblings[n % nsibling];
3420 		struct i915_request *rq;
3421 
3422 		rq = i915_request_create(ve);
3423 		if (IS_ERR(rq)) {
3424 			err = PTR_ERR(rq);
3425 			goto out_end;
3426 		}
3427 
3428 		i915_request_put(last);
3429 		last = i915_request_get(rq);
3430 
3431 		cs = intel_ring_begin(rq, 8);
3432 		if (IS_ERR(cs)) {
3433 			i915_request_add(rq);
3434 			err = PTR_ERR(cs);
3435 			goto out_end;
3436 		}
3437 
3438 		*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
3439 		*cs++ = CS_GPR(engine, n);
3440 		*cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
3441 		*cs++ = 0;
3442 
3443 		*cs++ = MI_LOAD_REGISTER_IMM(1);
3444 		*cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
3445 		*cs++ = n + 1;
3446 
3447 		*cs++ = MI_NOOP;
3448 		intel_ring_advance(rq, cs);
3449 
3450 		/* Restrict this request to run on a particular engine */
3451 		rq->execution_mask = engine->mask;
3452 		i915_request_add(rq);
3453 	}
3454 
3455 	if (i915_request_wait(last, 0, HZ / 5) < 0) {
3456 		err = -ETIME;
3457 		goto out_end;
3458 	}
3459 
3460 	cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
3461 	if (IS_ERR(cs)) {
3462 		err = PTR_ERR(cs);
3463 		goto out_end;
3464 	}
3465 
3466 	for (n = 0; n < NUM_GPR_DW; n++) {
3467 		if (cs[n] != n) {
3468 			pr_err("Incorrect value[%d] found for GPR[%d]\n",
3469 			       cs[n], n);
3470 			err = -EINVAL;
3471 			break;
3472 		}
3473 	}
3474 
3475 	i915_gem_object_unpin_map(scratch->obj);
3476 
3477 out_end:
3478 	if (igt_live_test_end(&t))
3479 		err = -EIO;
3480 	i915_request_put(last);
3481 out_unpin:
3482 	intel_context_unpin(ve);
3483 out_put:
3484 	intel_context_put(ve);
3485 out_scratch:
3486 	i915_vma_unpin_and_release(&scratch, 0);
3487 	return err;
3488 }
3489 
3490 static int live_virtual_preserved(void *arg)
3491 {
3492 	struct intel_gt *gt = arg;
3493 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3494 	unsigned int class, inst;
3495 
3496 	/*
3497 	 * Check that the context image retains non-privileged (user) registers
3498 	 * from one engine to the next. For this we check that the CS_GPR
3499 	 * are preserved.
3500 	 */
3501 
3502 	if (intel_uc_uses_guc_submission(&gt->uc))
3503 		return 0;
3504 
3505 	/* As we use CS_GPR we cannot run before they existed on all engines. */
3506 	if (INTEL_GEN(gt->i915) < 9)
3507 		return 0;
3508 
3509 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3510 		int nsibling, err;
3511 
3512 		nsibling = 0;
3513 		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3514 			if (!gt->engine_class[class][inst])
3515 				continue;
3516 
3517 			siblings[nsibling++] = gt->engine_class[class][inst];
3518 		}
3519 		if (nsibling < 2)
3520 			continue;
3521 
3522 		err = preserved_virtual_engine(gt, siblings, nsibling);
3523 		if (err)
3524 			return err;
3525 	}
3526 
3527 	return 0;
3528 }
3529 
3530 static int bond_virtual_engine(struct intel_gt *gt,
3531 			       unsigned int class,
3532 			       struct intel_engine_cs **siblings,
3533 			       unsigned int nsibling,
3534 			       unsigned int flags)
3535 #define BOND_SCHEDULE BIT(0)
3536 {
3537 	struct intel_engine_cs *master;
3538 	struct i915_request *rq[16];
3539 	enum intel_engine_id id;
3540 	struct igt_spinner spin;
3541 	unsigned long n;
3542 	int err;
3543 
3544 	/*
3545 	 * A set of bonded requests is intended to be run concurrently
3546 	 * across a number of engines. We use one request per-engine
3547 	 * and a magic fence to schedule each of the bonded requests
3548 	 * at the same time. A consequence of our current scheduler is that
3549 	 * we only move requests to the HW ready queue when the request
3550 	 * becomes ready, that is when all of its prerequisite fences have
3551 	 * been signaled. As one of those fences is the master submit fence,
3552 	 * there is a delay on all secondary fences as the HW may be
3553 	 * currently busy. Equally, as all the requests are independent,
3554 	 * they may have other fences that delay individual request
3555 	 * submission to HW. Ergo, we do not guarantee that all requests are
3556 	 * immediately submitted to HW at the same time, just that if the
3557 	 * rules are abided by, they are ready at the same time as the
3558 	 * first is submitted. Userspace can embed semaphores in its batch
3559 	 * to ensure parallel execution of its phases as it requires.
3560 	 * Though naturally it gets requested that perhaps the scheduler should
3561 	 * take care of parallel execution, even across preemption events on
3562 	 * different HW. (The proper answer is of course "lalalala".)
3563 	 *
3564 	 * With the submit-fence, we have identified three possible phases
3565 	 * of synchronisation depending on the master fence: queued (not
3566 	 * ready), executing, and signaled. The first two are quite simple
3567 	 * and checked below. However, the signaled master fence handling is
3568 	 * contentious. Currently we do not distinguish between a signaled
3569 	 * fence and an expired fence, as once signaled it does not convey
3570 	 * any information about the previous execution. It may even be freed
3571 	 * and hence checking later it may not exist at all. Ergo we currently
3572 	 * do not apply the bonding constraint for an already signaled fence,
3573 	 * as our expectation is that it should not constrain the secondaries
3574 	 * and is outside of the scope of the bonded request API (i.e. all
3575 	 * userspace requests are meant to be running in parallel). As
3576 	 * it imposes no constraint, and is effectively a no-op, we do not
3577 	 * check below as normal execution flows are checked extensively above.
3578 	 *
3579 	 * XXX Is the degenerate handling of signaled submit fences the
3580 	 * expected behaviour for userpace?
3581 	 */
3582 
3583 	GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
3584 
3585 	if (igt_spinner_init(&spin, gt))
3586 		return -ENOMEM;
3587 
3588 	err = 0;
3589 	rq[0] = ERR_PTR(-ENOMEM);
3590 	for_each_engine(master, gt, id) {
3591 		struct i915_sw_fence fence = {};
3592 		struct intel_context *ce;
3593 
3594 		if (master->class == class)
3595 			continue;
3596 
3597 		ce = intel_context_create(master);
3598 		if (IS_ERR(ce)) {
3599 			err = PTR_ERR(ce);
3600 			goto out;
3601 		}
3602 
3603 		memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
3604 
3605 		rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP);
3606 		intel_context_put(ce);
3607 		if (IS_ERR(rq[0])) {
3608 			err = PTR_ERR(rq[0]);
3609 			goto out;
3610 		}
3611 		i915_request_get(rq[0]);
3612 
3613 		if (flags & BOND_SCHEDULE) {
3614 			onstack_fence_init(&fence);
3615 			err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
3616 							       &fence,
3617 							       GFP_KERNEL);
3618 		}
3619 
3620 		i915_request_add(rq[0]);
3621 		if (err < 0)
3622 			goto out;
3623 
3624 		if (!(flags & BOND_SCHEDULE) &&
3625 		    !igt_wait_for_spinner(&spin, rq[0])) {
3626 			err = -EIO;
3627 			goto out;
3628 		}
3629 
3630 		for (n = 0; n < nsibling; n++) {
3631 			struct intel_context *ve;
3632 
3633 			ve = intel_execlists_create_virtual(siblings, nsibling);
3634 			if (IS_ERR(ve)) {
3635 				err = PTR_ERR(ve);
3636 				onstack_fence_fini(&fence);
3637 				goto out;
3638 			}
3639 
3640 			err = intel_virtual_engine_attach_bond(ve->engine,
3641 							       master,
3642 							       siblings[n]);
3643 			if (err) {
3644 				intel_context_put(ve);
3645 				onstack_fence_fini(&fence);
3646 				goto out;
3647 			}
3648 
3649 			err = intel_context_pin(ve);
3650 			intel_context_put(ve);
3651 			if (err) {
3652 				onstack_fence_fini(&fence);
3653 				goto out;
3654 			}
3655 
3656 			rq[n + 1] = i915_request_create(ve);
3657 			intel_context_unpin(ve);
3658 			if (IS_ERR(rq[n + 1])) {
3659 				err = PTR_ERR(rq[n + 1]);
3660 				onstack_fence_fini(&fence);
3661 				goto out;
3662 			}
3663 			i915_request_get(rq[n + 1]);
3664 
3665 			err = i915_request_await_execution(rq[n + 1],
3666 							   &rq[0]->fence,
3667 							   ve->engine->bond_execute);
3668 			i915_request_add(rq[n + 1]);
3669 			if (err < 0) {
3670 				onstack_fence_fini(&fence);
3671 				goto out;
3672 			}
3673 		}
3674 		onstack_fence_fini(&fence);
3675 		intel_engine_flush_submission(master);
3676 		igt_spinner_end(&spin);
3677 
3678 		if (i915_request_wait(rq[0], 0, HZ / 10) < 0) {
3679 			pr_err("Master request did not execute (on %s)!\n",
3680 			       rq[0]->engine->name);
3681 			err = -EIO;
3682 			goto out;
3683 		}
3684 
3685 		for (n = 0; n < nsibling; n++) {
3686 			if (i915_request_wait(rq[n + 1], 0,
3687 					      MAX_SCHEDULE_TIMEOUT) < 0) {
3688 				err = -EIO;
3689 				goto out;
3690 			}
3691 
3692 			if (rq[n + 1]->engine != siblings[n]) {
3693 				pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
3694 				       siblings[n]->name,
3695 				       rq[n + 1]->engine->name,
3696 				       rq[0]->engine->name);
3697 				err = -EINVAL;
3698 				goto out;
3699 			}
3700 		}
3701 
3702 		for (n = 0; !IS_ERR(rq[n]); n++)
3703 			i915_request_put(rq[n]);
3704 		rq[0] = ERR_PTR(-ENOMEM);
3705 	}
3706 
3707 out:
3708 	for (n = 0; !IS_ERR(rq[n]); n++)
3709 		i915_request_put(rq[n]);
3710 	if (igt_flush_test(gt->i915))
3711 		err = -EIO;
3712 
3713 	igt_spinner_fini(&spin);
3714 	return err;
3715 }
3716 
3717 static int live_virtual_bond(void *arg)
3718 {
3719 	static const struct phase {
3720 		const char *name;
3721 		unsigned int flags;
3722 	} phases[] = {
3723 		{ "", 0 },
3724 		{ "schedule", BOND_SCHEDULE },
3725 		{ },
3726 	};
3727 	struct intel_gt *gt = arg;
3728 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3729 	unsigned int class, inst;
3730 	int err;
3731 
3732 	if (intel_uc_uses_guc_submission(&gt->uc))
3733 		return 0;
3734 
3735 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3736 		const struct phase *p;
3737 		int nsibling;
3738 
3739 		nsibling = 0;
3740 		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3741 			if (!gt->engine_class[class][inst])
3742 				break;
3743 
3744 			GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings));
3745 			siblings[nsibling++] = gt->engine_class[class][inst];
3746 		}
3747 		if (nsibling < 2)
3748 			continue;
3749 
3750 		for (p = phases; p->name; p++) {
3751 			err = bond_virtual_engine(gt,
3752 						  class, siblings, nsibling,
3753 						  p->flags);
3754 			if (err) {
3755 				pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
3756 				       __func__, p->name, class, nsibling, err);
3757 				return err;
3758 			}
3759 		}
3760 	}
3761 
3762 	return 0;
3763 }
3764 
3765 static int reset_virtual_engine(struct intel_gt *gt,
3766 				struct intel_engine_cs **siblings,
3767 				unsigned int nsibling)
3768 {
3769 	struct intel_engine_cs *engine;
3770 	struct intel_context *ve;
3771 	unsigned long *heartbeat;
3772 	struct igt_spinner spin;
3773 	struct i915_request *rq;
3774 	unsigned int n;
3775 	int err = 0;
3776 
3777 	/*
3778 	 * In order to support offline error capture for fast preempt reset,
3779 	 * we need to decouple the guilty request and ensure that it and its
3780 	 * descendents are not executed while the capture is in progress.
3781 	 */
3782 
3783 	heartbeat = kmalloc_array(nsibling, sizeof(*heartbeat), GFP_KERNEL);
3784 	if (!heartbeat)
3785 		return -ENOMEM;
3786 
3787 	if (igt_spinner_init(&spin, gt)) {
3788 		err = -ENOMEM;
3789 		goto out_free;
3790 	}
3791 
3792 	ve = intel_execlists_create_virtual(siblings, nsibling);
3793 	if (IS_ERR(ve)) {
3794 		err = PTR_ERR(ve);
3795 		goto out_spin;
3796 	}
3797 
3798 	for (n = 0; n < nsibling; n++)
3799 		engine_heartbeat_disable(siblings[n], &heartbeat[n]);
3800 
3801 	rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
3802 	if (IS_ERR(rq)) {
3803 		err = PTR_ERR(rq);
3804 		goto out_heartbeat;
3805 	}
3806 	i915_request_add(rq);
3807 
3808 	if (!igt_wait_for_spinner(&spin, rq)) {
3809 		intel_gt_set_wedged(gt);
3810 		err = -ETIME;
3811 		goto out_heartbeat;
3812 	}
3813 
3814 	engine = rq->engine;
3815 	GEM_BUG_ON(engine == ve->engine);
3816 
3817 	/* Take ownership of the reset and tasklet */
3818 	if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
3819 			     &gt->reset.flags)) {
3820 		intel_gt_set_wedged(gt);
3821 		err = -EBUSY;
3822 		goto out_heartbeat;
3823 	}
3824 	tasklet_disable(&engine->execlists.tasklet);
3825 
3826 	engine->execlists.tasklet.func(engine->execlists.tasklet.data);
3827 	GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
3828 
3829 	/* Fake a preemption event; failed of course */
3830 	spin_lock_irq(&engine->active.lock);
3831 	__unwind_incomplete_requests(engine);
3832 	spin_unlock_irq(&engine->active.lock);
3833 	GEM_BUG_ON(rq->engine != ve->engine);
3834 
3835 	/* Reset the engine while keeping our active request on hold */
3836 	execlists_hold(engine, rq);
3837 	GEM_BUG_ON(!i915_request_on_hold(rq));
3838 
3839 	intel_engine_reset(engine, NULL);
3840 	GEM_BUG_ON(rq->fence.error != -EIO);
3841 
3842 	/* Release our grasp on the engine, letting CS flow again */
3843 	tasklet_enable(&engine->execlists.tasklet);
3844 	clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, &gt->reset.flags);
3845 
3846 	/* Check that we do not resubmit the held request */
3847 	i915_request_get(rq);
3848 	if (!i915_request_wait(rq, 0, HZ / 5)) {
3849 		pr_err("%s: on hold request completed!\n",
3850 		       engine->name);
3851 		intel_gt_set_wedged(gt);
3852 		err = -EIO;
3853 		goto out_rq;
3854 	}
3855 	GEM_BUG_ON(!i915_request_on_hold(rq));
3856 
3857 	/* But is resubmitted on release */
3858 	execlists_unhold(engine, rq);
3859 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
3860 		pr_err("%s: held request did not complete!\n",
3861 		       engine->name);
3862 		intel_gt_set_wedged(gt);
3863 		err = -ETIME;
3864 	}
3865 
3866 out_rq:
3867 	i915_request_put(rq);
3868 out_heartbeat:
3869 	for (n = 0; n < nsibling; n++)
3870 		engine_heartbeat_enable(siblings[n], heartbeat[n]);
3871 
3872 	intel_context_put(ve);
3873 out_spin:
3874 	igt_spinner_fini(&spin);
3875 out_free:
3876 	kfree(heartbeat);
3877 	return err;
3878 }
3879 
3880 static int live_virtual_reset(void *arg)
3881 {
3882 	struct intel_gt *gt = arg;
3883 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3884 	unsigned int class, inst;
3885 
3886 	/*
3887 	 * Check that we handle a reset event within a virtual engine.
3888 	 * Only the physical engine is reset, but we have to check the flow
3889 	 * of the virtual requests around the reset, and make sure it is not
3890 	 * forgotten.
3891 	 */
3892 
3893 	if (intel_uc_uses_guc_submission(&gt->uc))
3894 		return 0;
3895 
3896 	if (!intel_has_reset_engine(gt))
3897 		return 0;
3898 
3899 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3900 		int nsibling, err;
3901 
3902 		nsibling = 0;
3903 		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3904 			if (!gt->engine_class[class][inst])
3905 				continue;
3906 
3907 			siblings[nsibling++] = gt->engine_class[class][inst];
3908 		}
3909 		if (nsibling < 2)
3910 			continue;
3911 
3912 		err = reset_virtual_engine(gt, siblings, nsibling);
3913 		if (err)
3914 			return err;
3915 	}
3916 
3917 	return 0;
3918 }
3919 
3920 int intel_execlists_live_selftests(struct drm_i915_private *i915)
3921 {
3922 	static const struct i915_subtest tests[] = {
3923 		SUBTEST(live_sanitycheck),
3924 		SUBTEST(live_unlite_switch),
3925 		SUBTEST(live_unlite_preempt),
3926 		SUBTEST(live_pin_rewind),
3927 		SUBTEST(live_hold_reset),
3928 		SUBTEST(live_error_interrupt),
3929 		SUBTEST(live_timeslice_preempt),
3930 		SUBTEST(live_timeslice_rewind),
3931 		SUBTEST(live_timeslice_queue),
3932 		SUBTEST(live_busywait_preempt),
3933 		SUBTEST(live_preempt),
3934 		SUBTEST(live_late_preempt),
3935 		SUBTEST(live_nopreempt),
3936 		SUBTEST(live_preempt_cancel),
3937 		SUBTEST(live_suppress_self_preempt),
3938 		SUBTEST(live_suppress_wait_preempt),
3939 		SUBTEST(live_chain_preempt),
3940 		SUBTEST(live_preempt_gang),
3941 		SUBTEST(live_preempt_timeout),
3942 		SUBTEST(live_preempt_smoke),
3943 		SUBTEST(live_virtual_engine),
3944 		SUBTEST(live_virtual_mask),
3945 		SUBTEST(live_virtual_preserved),
3946 		SUBTEST(live_virtual_bond),
3947 		SUBTEST(live_virtual_reset),
3948 	};
3949 
3950 	if (!HAS_EXECLISTS(i915))
3951 		return 0;
3952 
3953 	if (intel_gt_is_wedged(&i915->gt))
3954 		return 0;
3955 
3956 	return intel_gt_live_subtests(tests, &i915->gt);
3957 }
3958 
3959 static void hexdump(const void *buf, size_t len)
3960 {
3961 	const size_t rowsize = 8 * sizeof(u32);
3962 	const void *prev = NULL;
3963 	bool skip = false;
3964 	size_t pos;
3965 
3966 	for (pos = 0; pos < len; pos += rowsize) {
3967 		char line[128];
3968 
3969 		if (prev && !memcmp(prev, buf + pos, rowsize)) {
3970 			if (!skip) {
3971 				pr_info("*\n");
3972 				skip = true;
3973 			}
3974 			continue;
3975 		}
3976 
3977 		WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
3978 						rowsize, sizeof(u32),
3979 						line, sizeof(line),
3980 						false) >= sizeof(line));
3981 		pr_info("[%04zx] %s\n", pos, line);
3982 
3983 		prev = buf + pos;
3984 		skip = false;
3985 	}
3986 }
3987 
3988 static int emit_semaphore_signal(struct intel_context *ce, void *slot)
3989 {
3990 	const u32 offset =
3991 		i915_ggtt_offset(ce->engine->status_page.vma) +
3992 		offset_in_page(slot);
3993 	struct i915_request *rq;
3994 	u32 *cs;
3995 
3996 	rq = intel_context_create_request(ce);
3997 	if (IS_ERR(rq))
3998 		return PTR_ERR(rq);
3999 
4000 	cs = intel_ring_begin(rq, 4);
4001 	if (IS_ERR(cs)) {
4002 		i915_request_add(rq);
4003 		return PTR_ERR(cs);
4004 	}
4005 
4006 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
4007 	*cs++ = offset;
4008 	*cs++ = 0;
4009 	*cs++ = 1;
4010 
4011 	intel_ring_advance(rq, cs);
4012 
4013 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
4014 	i915_request_add(rq);
4015 	return 0;
4016 }
4017 
4018 static int live_lrc_layout(void *arg)
4019 {
4020 	struct intel_gt *gt = arg;
4021 	struct intel_engine_cs *engine;
4022 	enum intel_engine_id id;
4023 	u32 *lrc;
4024 	int err;
4025 
4026 	/*
4027 	 * Check the registers offsets we use to create the initial reg state
4028 	 * match the layout saved by HW.
4029 	 */
4030 
4031 	lrc = kmalloc(PAGE_SIZE, GFP_KERNEL);
4032 	if (!lrc)
4033 		return -ENOMEM;
4034 
4035 	err = 0;
4036 	for_each_engine(engine, gt, id) {
4037 		u32 *hw;
4038 		int dw;
4039 
4040 		if (!engine->default_state)
4041 			continue;
4042 
4043 		hw = i915_gem_object_pin_map(engine->default_state,
4044 					     I915_MAP_WB);
4045 		if (IS_ERR(hw)) {
4046 			err = PTR_ERR(hw);
4047 			break;
4048 		}
4049 		hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
4050 
4051 		execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE),
4052 					 engine->kernel_context,
4053 					 engine,
4054 					 engine->kernel_context->ring,
4055 					 true);
4056 
4057 		dw = 0;
4058 		do {
4059 			u32 lri = hw[dw];
4060 
4061 			if (lri == 0) {
4062 				dw++;
4063 				continue;
4064 			}
4065 
4066 			if (lrc[dw] == 0) {
4067 				pr_debug("%s: skipped instruction %x at dword %d\n",
4068 					 engine->name, lri, dw);
4069 				dw++;
4070 				continue;
4071 			}
4072 
4073 			if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
4074 				pr_err("%s: Expected LRI command at dword %d, found %08x\n",
4075 				       engine->name, dw, lri);
4076 				err = -EINVAL;
4077 				break;
4078 			}
4079 
4080 			if (lrc[dw] != lri) {
4081 				pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
4082 				       engine->name, dw, lri, lrc[dw]);
4083 				err = -EINVAL;
4084 				break;
4085 			}
4086 
4087 			lri &= 0x7f;
4088 			lri++;
4089 			dw++;
4090 
4091 			while (lri) {
4092 				if (hw[dw] != lrc[dw]) {
4093 					pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
4094 					       engine->name, dw, hw[dw], lrc[dw]);
4095 					err = -EINVAL;
4096 					break;
4097 				}
4098 
4099 				/*
4100 				 * Skip over the actual register value as we
4101 				 * expect that to differ.
4102 				 */
4103 				dw += 2;
4104 				lri -= 2;
4105 			}
4106 		} while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
4107 
4108 		if (err) {
4109 			pr_info("%s: HW register image:\n", engine->name);
4110 			hexdump(hw, PAGE_SIZE);
4111 
4112 			pr_info("%s: SW register image:\n", engine->name);
4113 			hexdump(lrc, PAGE_SIZE);
4114 		}
4115 
4116 		i915_gem_object_unpin_map(engine->default_state);
4117 		if (err)
4118 			break;
4119 	}
4120 
4121 	kfree(lrc);
4122 	return err;
4123 }
4124 
4125 static int find_offset(const u32 *lri, u32 offset)
4126 {
4127 	int i;
4128 
4129 	for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
4130 		if (lri[i] == offset)
4131 			return i;
4132 
4133 	return -1;
4134 }
4135 
4136 static int live_lrc_fixed(void *arg)
4137 {
4138 	struct intel_gt *gt = arg;
4139 	struct intel_engine_cs *engine;
4140 	enum intel_engine_id id;
4141 	int err = 0;
4142 
4143 	/*
4144 	 * Check the assumed register offsets match the actual locations in
4145 	 * the context image.
4146 	 */
4147 
4148 	for_each_engine(engine, gt, id) {
4149 		const struct {
4150 			u32 reg;
4151 			u32 offset;
4152 			const char *name;
4153 		} tbl[] = {
4154 			{
4155 				i915_mmio_reg_offset(RING_START(engine->mmio_base)),
4156 				CTX_RING_START - 1,
4157 				"RING_START"
4158 			},
4159 			{
4160 				i915_mmio_reg_offset(RING_CTL(engine->mmio_base)),
4161 				CTX_RING_CTL - 1,
4162 				"RING_CTL"
4163 			},
4164 			{
4165 				i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)),
4166 				CTX_RING_HEAD - 1,
4167 				"RING_HEAD"
4168 			},
4169 			{
4170 				i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)),
4171 				CTX_RING_TAIL - 1,
4172 				"RING_TAIL"
4173 			},
4174 			{
4175 				i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)),
4176 				lrc_ring_mi_mode(engine),
4177 				"RING_MI_MODE"
4178 			},
4179 			{
4180 				i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)),
4181 				CTX_BB_STATE - 1,
4182 				"BB_STATE"
4183 			},
4184 			{
4185 				i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)),
4186 				CTX_TIMESTAMP - 1,
4187 				"RING_CTX_TIMESTAMP"
4188 			},
4189 			{ },
4190 		}, *t;
4191 		u32 *hw;
4192 
4193 		if (!engine->default_state)
4194 			continue;
4195 
4196 		hw = i915_gem_object_pin_map(engine->default_state,
4197 					     I915_MAP_WB);
4198 		if (IS_ERR(hw)) {
4199 			err = PTR_ERR(hw);
4200 			break;
4201 		}
4202 		hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
4203 
4204 		for (t = tbl; t->name; t++) {
4205 			int dw = find_offset(hw, t->reg);
4206 
4207 			if (dw != t->offset) {
4208 				pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
4209 				       engine->name,
4210 				       t->name,
4211 				       t->reg,
4212 				       dw,
4213 				       t->offset);
4214 				err = -EINVAL;
4215 			}
4216 		}
4217 
4218 		i915_gem_object_unpin_map(engine->default_state);
4219 	}
4220 
4221 	return err;
4222 }
4223 
4224 static int __live_lrc_state(struct intel_engine_cs *engine,
4225 			    struct i915_vma *scratch)
4226 {
4227 	struct intel_context *ce;
4228 	struct i915_request *rq;
4229 	enum {
4230 		RING_START_IDX = 0,
4231 		RING_TAIL_IDX,
4232 		MAX_IDX
4233 	};
4234 	u32 expected[MAX_IDX];
4235 	u32 *cs;
4236 	int err;
4237 	int n;
4238 
4239 	ce = intel_context_create(engine);
4240 	if (IS_ERR(ce))
4241 		return PTR_ERR(ce);
4242 
4243 	err = intel_context_pin(ce);
4244 	if (err)
4245 		goto err_put;
4246 
4247 	rq = i915_request_create(ce);
4248 	if (IS_ERR(rq)) {
4249 		err = PTR_ERR(rq);
4250 		goto err_unpin;
4251 	}
4252 
4253 	cs = intel_ring_begin(rq, 4 * MAX_IDX);
4254 	if (IS_ERR(cs)) {
4255 		err = PTR_ERR(cs);
4256 		i915_request_add(rq);
4257 		goto err_unpin;
4258 	}
4259 
4260 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4261 	*cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
4262 	*cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32);
4263 	*cs++ = 0;
4264 
4265 	expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma);
4266 
4267 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4268 	*cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
4269 	*cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
4270 	*cs++ = 0;
4271 
4272 	i915_vma_lock(scratch);
4273 	err = i915_request_await_object(rq, scratch->obj, true);
4274 	if (!err)
4275 		err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
4276 	i915_vma_unlock(scratch);
4277 
4278 	i915_request_get(rq);
4279 	i915_request_add(rq);
4280 	if (err)
4281 		goto err_rq;
4282 
4283 	intel_engine_flush_submission(engine);
4284 	expected[RING_TAIL_IDX] = ce->ring->tail;
4285 
4286 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4287 		err = -ETIME;
4288 		goto err_rq;
4289 	}
4290 
4291 	cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
4292 	if (IS_ERR(cs)) {
4293 		err = PTR_ERR(cs);
4294 		goto err_rq;
4295 	}
4296 
4297 	for (n = 0; n < MAX_IDX; n++) {
4298 		if (cs[n] != expected[n]) {
4299 			pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
4300 			       engine->name, n, cs[n], expected[n]);
4301 			err = -EINVAL;
4302 			break;
4303 		}
4304 	}
4305 
4306 	i915_gem_object_unpin_map(scratch->obj);
4307 
4308 err_rq:
4309 	i915_request_put(rq);
4310 err_unpin:
4311 	intel_context_unpin(ce);
4312 err_put:
4313 	intel_context_put(ce);
4314 	return err;
4315 }
4316 
4317 static int live_lrc_state(void *arg)
4318 {
4319 	struct intel_gt *gt = arg;
4320 	struct intel_engine_cs *engine;
4321 	struct i915_vma *scratch;
4322 	enum intel_engine_id id;
4323 	int err = 0;
4324 
4325 	/*
4326 	 * Check the live register state matches what we expect for this
4327 	 * intel_context.
4328 	 */
4329 
4330 	scratch = create_scratch(gt);
4331 	if (IS_ERR(scratch))
4332 		return PTR_ERR(scratch);
4333 
4334 	for_each_engine(engine, gt, id) {
4335 		err = __live_lrc_state(engine, scratch);
4336 		if (err)
4337 			break;
4338 	}
4339 
4340 	if (igt_flush_test(gt->i915))
4341 		err = -EIO;
4342 
4343 	i915_vma_unpin_and_release(&scratch, 0);
4344 	return err;
4345 }
4346 
4347 static int gpr_make_dirty(struct intel_context *ce)
4348 {
4349 	struct i915_request *rq;
4350 	u32 *cs;
4351 	int n;
4352 
4353 	rq = intel_context_create_request(ce);
4354 	if (IS_ERR(rq))
4355 		return PTR_ERR(rq);
4356 
4357 	cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2);
4358 	if (IS_ERR(cs)) {
4359 		i915_request_add(rq);
4360 		return PTR_ERR(cs);
4361 	}
4362 
4363 	*cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
4364 	for (n = 0; n < NUM_GPR_DW; n++) {
4365 		*cs++ = CS_GPR(ce->engine, n);
4366 		*cs++ = STACK_MAGIC;
4367 	}
4368 	*cs++ = MI_NOOP;
4369 
4370 	intel_ring_advance(rq, cs);
4371 
4372 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
4373 	i915_request_add(rq);
4374 
4375 	return 0;
4376 }
4377 
4378 static struct i915_request *
4379 __gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot)
4380 {
4381 	const u32 offset =
4382 		i915_ggtt_offset(ce->engine->status_page.vma) +
4383 		offset_in_page(slot);
4384 	struct i915_request *rq;
4385 	u32 *cs;
4386 	int err;
4387 	int n;
4388 
4389 	rq = intel_context_create_request(ce);
4390 	if (IS_ERR(rq))
4391 		return rq;
4392 
4393 	cs = intel_ring_begin(rq, 6 + 4 * NUM_GPR_DW);
4394 	if (IS_ERR(cs)) {
4395 		i915_request_add(rq);
4396 		return ERR_CAST(cs);
4397 	}
4398 
4399 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4400 	*cs++ = MI_NOOP;
4401 
4402 	*cs++ = MI_SEMAPHORE_WAIT |
4403 		MI_SEMAPHORE_GLOBAL_GTT |
4404 		MI_SEMAPHORE_POLL |
4405 		MI_SEMAPHORE_SAD_NEQ_SDD;
4406 	*cs++ = 0;
4407 	*cs++ = offset;
4408 	*cs++ = 0;
4409 
4410 	for (n = 0; n < NUM_GPR_DW; n++) {
4411 		*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4412 		*cs++ = CS_GPR(ce->engine, n);
4413 		*cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
4414 		*cs++ = 0;
4415 	}
4416 
4417 	i915_vma_lock(scratch);
4418 	err = i915_request_await_object(rq, scratch->obj, true);
4419 	if (!err)
4420 		err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
4421 	i915_vma_unlock(scratch);
4422 
4423 	i915_request_get(rq);
4424 	i915_request_add(rq);
4425 	if (err) {
4426 		i915_request_put(rq);
4427 		rq = ERR_PTR(err);
4428 	}
4429 
4430 	return rq;
4431 }
4432 
4433 static int __live_lrc_gpr(struct intel_engine_cs *engine,
4434 			  struct i915_vma *scratch,
4435 			  bool preempt)
4436 {
4437 	u32 *slot = memset32(engine->status_page.addr + 1000, 0, 4);
4438 	struct intel_context *ce;
4439 	struct i915_request *rq;
4440 	u32 *cs;
4441 	int err;
4442 	int n;
4443 
4444 	if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS)
4445 		return 0; /* GPR only on rcs0 for gen8 */
4446 
4447 	err = gpr_make_dirty(engine->kernel_context);
4448 	if (err)
4449 		return err;
4450 
4451 	ce = intel_context_create(engine);
4452 	if (IS_ERR(ce))
4453 		return PTR_ERR(ce);
4454 
4455 	rq = __gpr_read(ce, scratch, slot);
4456 	if (IS_ERR(rq)) {
4457 		err = PTR_ERR(rq);
4458 		goto err_put;
4459 	}
4460 
4461 	err = wait_for_submit(engine, rq, HZ / 2);
4462 	if (err)
4463 		goto err_rq;
4464 
4465 	if (preempt) {
4466 		err = gpr_make_dirty(engine->kernel_context);
4467 		if (err)
4468 			goto err_rq;
4469 
4470 		err = emit_semaphore_signal(engine->kernel_context, slot);
4471 		if (err)
4472 			goto err_rq;
4473 	} else {
4474 		slot[0] = 1;
4475 		wmb();
4476 	}
4477 
4478 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4479 		err = -ETIME;
4480 		goto err_rq;
4481 	}
4482 
4483 	cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
4484 	if (IS_ERR(cs)) {
4485 		err = PTR_ERR(cs);
4486 		goto err_rq;
4487 	}
4488 
4489 	for (n = 0; n < NUM_GPR_DW; n++) {
4490 		if (cs[n]) {
4491 			pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
4492 			       engine->name,
4493 			       n / 2, n & 1 ? "udw" : "ldw",
4494 			       cs[n]);
4495 			err = -EINVAL;
4496 			break;
4497 		}
4498 	}
4499 
4500 	i915_gem_object_unpin_map(scratch->obj);
4501 
4502 err_rq:
4503 	memset32(&slot[0], -1, 4);
4504 	wmb();
4505 	i915_request_put(rq);
4506 err_put:
4507 	intel_context_put(ce);
4508 	return err;
4509 }
4510 
4511 static int live_lrc_gpr(void *arg)
4512 {
4513 	struct intel_gt *gt = arg;
4514 	struct intel_engine_cs *engine;
4515 	struct i915_vma *scratch;
4516 	enum intel_engine_id id;
4517 	int err = 0;
4518 
4519 	/*
4520 	 * Check that GPR registers are cleared in new contexts as we need
4521 	 * to avoid leaking any information from previous contexts.
4522 	 */
4523 
4524 	scratch = create_scratch(gt);
4525 	if (IS_ERR(scratch))
4526 		return PTR_ERR(scratch);
4527 
4528 	for_each_engine(engine, gt, id) {
4529 		unsigned long heartbeat;
4530 
4531 		engine_heartbeat_disable(engine, &heartbeat);
4532 
4533 		err = __live_lrc_gpr(engine, scratch, false);
4534 		if (err)
4535 			goto err;
4536 
4537 		err = __live_lrc_gpr(engine, scratch, true);
4538 		if (err)
4539 			goto err;
4540 
4541 err:
4542 		engine_heartbeat_enable(engine, heartbeat);
4543 		if (igt_flush_test(gt->i915))
4544 			err = -EIO;
4545 		if (err)
4546 			break;
4547 	}
4548 
4549 	i915_vma_unpin_and_release(&scratch, 0);
4550 	return err;
4551 }
4552 
4553 static struct i915_request *
4554 create_timestamp(struct intel_context *ce, void *slot, int idx)
4555 {
4556 	const u32 offset =
4557 		i915_ggtt_offset(ce->engine->status_page.vma) +
4558 		offset_in_page(slot);
4559 	struct i915_request *rq;
4560 	u32 *cs;
4561 	int err;
4562 
4563 	rq = intel_context_create_request(ce);
4564 	if (IS_ERR(rq))
4565 		return rq;
4566 
4567 	cs = intel_ring_begin(rq, 10);
4568 	if (IS_ERR(cs)) {
4569 		err = PTR_ERR(cs);
4570 		goto err;
4571 	}
4572 
4573 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4574 	*cs++ = MI_NOOP;
4575 
4576 	*cs++ = MI_SEMAPHORE_WAIT |
4577 		MI_SEMAPHORE_GLOBAL_GTT |
4578 		MI_SEMAPHORE_POLL |
4579 		MI_SEMAPHORE_SAD_NEQ_SDD;
4580 	*cs++ = 0;
4581 	*cs++ = offset;
4582 	*cs++ = 0;
4583 
4584 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4585 	*cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base));
4586 	*cs++ = offset + idx * sizeof(u32);
4587 	*cs++ = 0;
4588 
4589 	intel_ring_advance(rq, cs);
4590 
4591 	rq->sched.attr.priority = I915_PRIORITY_MASK;
4592 	err = 0;
4593 err:
4594 	i915_request_get(rq);
4595 	i915_request_add(rq);
4596 	if (err) {
4597 		i915_request_put(rq);
4598 		return ERR_PTR(err);
4599 	}
4600 
4601 	return rq;
4602 }
4603 
4604 struct lrc_timestamp {
4605 	struct intel_engine_cs *engine;
4606 	struct intel_context *ce[2];
4607 	u32 poison;
4608 };
4609 
4610 static bool timestamp_advanced(u32 start, u32 end)
4611 {
4612 	return (s32)(end - start) > 0;
4613 }
4614 
4615 static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt)
4616 {
4617 	u32 *slot = memset32(arg->engine->status_page.addr + 1000, 0, 4);
4618 	struct i915_request *rq;
4619 	u32 timestamp;
4620 	int err = 0;
4621 
4622 	arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison;
4623 	rq = create_timestamp(arg->ce[0], slot, 1);
4624 	if (IS_ERR(rq))
4625 		return PTR_ERR(rq);
4626 
4627 	err = wait_for_submit(rq->engine, rq, HZ / 2);
4628 	if (err)
4629 		goto err;
4630 
4631 	if (preempt) {
4632 		arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef;
4633 		err = emit_semaphore_signal(arg->ce[1], slot);
4634 		if (err)
4635 			goto err;
4636 	} else {
4637 		slot[0] = 1;
4638 		wmb();
4639 	}
4640 
4641 	if (i915_request_wait(rq, 0, HZ / 2) < 0) {
4642 		err = -ETIME;
4643 		goto err;
4644 	}
4645 
4646 	/* and wait for switch to kernel */
4647 	if (igt_flush_test(arg->engine->i915)) {
4648 		err = -EIO;
4649 		goto err;
4650 	}
4651 
4652 	rmb();
4653 
4654 	if (!timestamp_advanced(arg->poison, slot[1])) {
4655 		pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n",
4656 		       arg->engine->name, preempt ? "preempt" : "simple",
4657 		       arg->poison, slot[1]);
4658 		err = -EINVAL;
4659 	}
4660 
4661 	timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]);
4662 	if (!timestamp_advanced(slot[1], timestamp)) {
4663 		pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n",
4664 		       arg->engine->name, preempt ? "preempt" : "simple",
4665 		       slot[1], timestamp);
4666 		err = -EINVAL;
4667 	}
4668 
4669 err:
4670 	memset32(slot, -1, 4);
4671 	i915_request_put(rq);
4672 	return err;
4673 }
4674 
4675 static int live_lrc_timestamp(void *arg)
4676 {
4677 	struct intel_gt *gt = arg;
4678 	enum intel_engine_id id;
4679 	struct lrc_timestamp data;
4680 	const u32 poison[] = {
4681 		0,
4682 		S32_MAX,
4683 		(u32)S32_MAX + 1,
4684 		U32_MAX,
4685 	};
4686 
4687 	/*
4688 	 * We want to verify that the timestamp is saved and restore across
4689 	 * context switches and is monotonic.
4690 	 *
4691 	 * So we do this with a little bit of LRC poisoning to check various
4692 	 * boundary conditions, and see what happens if we preempt the context
4693 	 * with a second request (carrying more poison into the timestamp).
4694 	 */
4695 
4696 	for_each_engine(data.engine, gt, id) {
4697 		unsigned long heartbeat;
4698 		int i, err = 0;
4699 
4700 		engine_heartbeat_disable(data.engine, &heartbeat);
4701 
4702 		for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
4703 			struct intel_context *tmp;
4704 
4705 			tmp = intel_context_create(data.engine);
4706 			if (IS_ERR(tmp)) {
4707 				err = PTR_ERR(tmp);
4708 				goto err;
4709 			}
4710 
4711 			err = intel_context_pin(tmp);
4712 			if (err) {
4713 				intel_context_put(tmp);
4714 				goto err;
4715 			}
4716 
4717 			data.ce[i] = tmp;
4718 		}
4719 
4720 		for (i = 0; i < ARRAY_SIZE(poison); i++) {
4721 			data.poison = poison[i];
4722 
4723 			err = __lrc_timestamp(&data, false);
4724 			if (err)
4725 				break;
4726 
4727 			err = __lrc_timestamp(&data, true);
4728 			if (err)
4729 				break;
4730 		}
4731 
4732 err:
4733 		engine_heartbeat_enable(data.engine, heartbeat);
4734 		for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
4735 			if (!data.ce[i])
4736 				break;
4737 
4738 			intel_context_unpin(data.ce[i]);
4739 			intel_context_put(data.ce[i]);
4740 		}
4741 
4742 		if (igt_flush_test(gt->i915))
4743 			err = -EIO;
4744 		if (err)
4745 			return err;
4746 	}
4747 
4748 	return 0;
4749 }
4750 
4751 static int __live_pphwsp_runtime(struct intel_engine_cs *engine)
4752 {
4753 	struct intel_context *ce;
4754 	struct i915_request *rq;
4755 	IGT_TIMEOUT(end_time);
4756 	int err;
4757 
4758 	ce = intel_context_create(engine);
4759 	if (IS_ERR(ce))
4760 		return PTR_ERR(ce);
4761 
4762 	ce->runtime.num_underflow = 0;
4763 	ce->runtime.max_underflow = 0;
4764 
4765 	do {
4766 		unsigned int loop = 1024;
4767 
4768 		while (loop) {
4769 			rq = intel_context_create_request(ce);
4770 			if (IS_ERR(rq)) {
4771 				err = PTR_ERR(rq);
4772 				goto err_rq;
4773 			}
4774 
4775 			if (--loop == 0)
4776 				i915_request_get(rq);
4777 
4778 			i915_request_add(rq);
4779 		}
4780 
4781 		if (__igt_timeout(end_time, NULL))
4782 			break;
4783 
4784 		i915_request_put(rq);
4785 	} while (1);
4786 
4787 	err = i915_request_wait(rq, 0, HZ / 5);
4788 	if (err < 0) {
4789 		pr_err("%s: request not completed!\n", engine->name);
4790 		goto err_wait;
4791 	}
4792 
4793 	igt_flush_test(engine->i915);
4794 
4795 	pr_info("%s: pphwsp runtime %lluns, average %lluns\n",
4796 		engine->name,
4797 		intel_context_get_total_runtime_ns(ce),
4798 		intel_context_get_avg_runtime_ns(ce));
4799 
4800 	err = 0;
4801 	if (ce->runtime.num_underflow) {
4802 		pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n",
4803 		       engine->name,
4804 		       ce->runtime.num_underflow,
4805 		       ce->runtime.max_underflow);
4806 		GEM_TRACE_DUMP();
4807 		err = -EOVERFLOW;
4808 	}
4809 
4810 err_wait:
4811 	i915_request_put(rq);
4812 err_rq:
4813 	intel_context_put(ce);
4814 	return err;
4815 }
4816 
4817 static int live_pphwsp_runtime(void *arg)
4818 {
4819 	struct intel_gt *gt = arg;
4820 	struct intel_engine_cs *engine;
4821 	enum intel_engine_id id;
4822 	int err = 0;
4823 
4824 	/*
4825 	 * Check that cumulative context runtime as stored in the pphwsp[16]
4826 	 * is monotonic.
4827 	 */
4828 
4829 	for_each_engine(engine, gt, id) {
4830 		err = __live_pphwsp_runtime(engine);
4831 		if (err)
4832 			break;
4833 	}
4834 
4835 	if (igt_flush_test(gt->i915))
4836 		err = -EIO;
4837 
4838 	return err;
4839 }
4840 
4841 int intel_lrc_live_selftests(struct drm_i915_private *i915)
4842 {
4843 	static const struct i915_subtest tests[] = {
4844 		SUBTEST(live_lrc_layout),
4845 		SUBTEST(live_lrc_fixed),
4846 		SUBTEST(live_lrc_state),
4847 		SUBTEST(live_lrc_gpr),
4848 		SUBTEST(live_lrc_timestamp),
4849 		SUBTEST(live_pphwsp_runtime),
4850 	};
4851 
4852 	if (!HAS_LOGICAL_RING_CONTEXTS(i915))
4853 		return 0;
4854 
4855 	return intel_gt_live_subtests(tests, &i915->gt);
4856 }
4857