xref: /openbmc/linux/drivers/gpu/drm/i915/gt/selftest_execlists.c (revision 56ea353ea49ad21dd4c14e7baa235493ec27e766)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2018 Intel Corporation
4  */
5 
6 #include <linux/prime_numbers.h>
7 
8 #include "gem/i915_gem_internal.h"
9 #include "gem/i915_gem_pm.h"
10 #include "gt/intel_engine_heartbeat.h"
11 #include "gt/intel_reset.h"
12 #include "gt/selftest_engine_heartbeat.h"
13 
14 #include "i915_selftest.h"
15 #include "selftests/i915_random.h"
16 #include "selftests/igt_flush_test.h"
17 #include "selftests/igt_live_test.h"
18 #include "selftests/igt_spinner.h"
19 #include "selftests/lib_sw_fence.h"
20 
21 #include "gem/selftests/igt_gem_utils.h"
22 #include "gem/selftests/mock_context.h"
23 
24 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
25 #define NUM_GPR 16
26 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
27 
28 static bool is_active(struct i915_request *rq)
29 {
30 	if (i915_request_is_active(rq))
31 		return true;
32 
33 	if (i915_request_on_hold(rq))
34 		return true;
35 
36 	if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq))
37 		return true;
38 
39 	return false;
40 }
41 
42 static int wait_for_submit(struct intel_engine_cs *engine,
43 			   struct i915_request *rq,
44 			   unsigned long timeout)
45 {
46 	/* Ignore our own attempts to suppress excess tasklets */
47 	tasklet_hi_schedule(&engine->sched_engine->tasklet);
48 
49 	timeout += jiffies;
50 	do {
51 		bool done = time_after(jiffies, timeout);
52 
53 		if (i915_request_completed(rq)) /* that was quick! */
54 			return 0;
55 
56 		/* Wait until the HW has acknowleged the submission (or err) */
57 		intel_engine_flush_submission(engine);
58 		if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
59 			return 0;
60 
61 		if (done)
62 			return -ETIME;
63 
64 		cond_resched();
65 	} while (1);
66 }
67 
68 static int wait_for_reset(struct intel_engine_cs *engine,
69 			  struct i915_request *rq,
70 			  unsigned long timeout)
71 {
72 	timeout += jiffies;
73 
74 	do {
75 		cond_resched();
76 		intel_engine_flush_submission(engine);
77 
78 		if (READ_ONCE(engine->execlists.pending[0]))
79 			continue;
80 
81 		if (i915_request_completed(rq))
82 			break;
83 
84 		if (READ_ONCE(rq->fence.error))
85 			break;
86 	} while (time_before(jiffies, timeout));
87 
88 	if (rq->fence.error != -EIO) {
89 		pr_err("%s: hanging request %llx:%lld not reset\n",
90 		       engine->name,
91 		       rq->fence.context,
92 		       rq->fence.seqno);
93 		return -EINVAL;
94 	}
95 
96 	/* Give the request a jiffie to complete after flushing the worker */
97 	if (i915_request_wait(rq, 0,
98 			      max(0l, (long)(timeout - jiffies)) + 1) < 0) {
99 		pr_err("%s: hanging request %llx:%lld did not complete\n",
100 		       engine->name,
101 		       rq->fence.context,
102 		       rq->fence.seqno);
103 		return -ETIME;
104 	}
105 
106 	return 0;
107 }
108 
109 static int live_sanitycheck(void *arg)
110 {
111 	struct intel_gt *gt = arg;
112 	struct intel_engine_cs *engine;
113 	enum intel_engine_id id;
114 	struct igt_spinner spin;
115 	int err = 0;
116 
117 	if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
118 		return 0;
119 
120 	if (igt_spinner_init(&spin, gt))
121 		return -ENOMEM;
122 
123 	for_each_engine(engine, gt, id) {
124 		struct intel_context *ce;
125 		struct i915_request *rq;
126 
127 		ce = intel_context_create(engine);
128 		if (IS_ERR(ce)) {
129 			err = PTR_ERR(ce);
130 			break;
131 		}
132 
133 		rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
134 		if (IS_ERR(rq)) {
135 			err = PTR_ERR(rq);
136 			goto out_ctx;
137 		}
138 
139 		i915_request_add(rq);
140 		if (!igt_wait_for_spinner(&spin, rq)) {
141 			GEM_TRACE("spinner failed to start\n");
142 			GEM_TRACE_DUMP();
143 			intel_gt_set_wedged(gt);
144 			err = -EIO;
145 			goto out_ctx;
146 		}
147 
148 		igt_spinner_end(&spin);
149 		if (igt_flush_test(gt->i915)) {
150 			err = -EIO;
151 			goto out_ctx;
152 		}
153 
154 out_ctx:
155 		intel_context_put(ce);
156 		if (err)
157 			break;
158 	}
159 
160 	igt_spinner_fini(&spin);
161 	return err;
162 }
163 
164 static int live_unlite_restore(struct intel_gt *gt, int prio)
165 {
166 	struct intel_engine_cs *engine;
167 	enum intel_engine_id id;
168 	struct igt_spinner spin;
169 	int err = -ENOMEM;
170 
171 	/*
172 	 * Check that we can correctly context switch between 2 instances
173 	 * on the same engine from the same parent context.
174 	 */
175 
176 	if (igt_spinner_init(&spin, gt))
177 		return err;
178 
179 	err = 0;
180 	for_each_engine(engine, gt, id) {
181 		struct intel_context *ce[2] = {};
182 		struct i915_request *rq[2];
183 		struct igt_live_test t;
184 		int n;
185 
186 		if (prio && !intel_engine_has_preemption(engine))
187 			continue;
188 
189 		if (!intel_engine_can_store_dword(engine))
190 			continue;
191 
192 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
193 			err = -EIO;
194 			break;
195 		}
196 		st_engine_heartbeat_disable(engine);
197 
198 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
199 			struct intel_context *tmp;
200 
201 			tmp = intel_context_create(engine);
202 			if (IS_ERR(tmp)) {
203 				err = PTR_ERR(tmp);
204 				goto err_ce;
205 			}
206 
207 			err = intel_context_pin(tmp);
208 			if (err) {
209 				intel_context_put(tmp);
210 				goto err_ce;
211 			}
212 
213 			/*
214 			 * Setup the pair of contexts such that if we
215 			 * lite-restore using the RING_TAIL from ce[1] it
216 			 * will execute garbage from ce[0]->ring.
217 			 */
218 			memset(tmp->ring->vaddr,
219 			       POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
220 			       tmp->ring->vma->size);
221 
222 			ce[n] = tmp;
223 		}
224 		GEM_BUG_ON(!ce[1]->ring->size);
225 		intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
226 		lrc_update_regs(ce[1], engine, ce[1]->ring->head);
227 
228 		rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
229 		if (IS_ERR(rq[0])) {
230 			err = PTR_ERR(rq[0]);
231 			goto err_ce;
232 		}
233 
234 		i915_request_get(rq[0]);
235 		i915_request_add(rq[0]);
236 		GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
237 
238 		if (!igt_wait_for_spinner(&spin, rq[0])) {
239 			i915_request_put(rq[0]);
240 			goto err_ce;
241 		}
242 
243 		rq[1] = i915_request_create(ce[1]);
244 		if (IS_ERR(rq[1])) {
245 			err = PTR_ERR(rq[1]);
246 			i915_request_put(rq[0]);
247 			goto err_ce;
248 		}
249 
250 		if (!prio) {
251 			/*
252 			 * Ensure we do the switch to ce[1] on completion.
253 			 *
254 			 * rq[0] is already submitted, so this should reduce
255 			 * to a no-op (a wait on a request on the same engine
256 			 * uses the submit fence, not the completion fence),
257 			 * but it will install a dependency on rq[1] for rq[0]
258 			 * that will prevent the pair being reordered by
259 			 * timeslicing.
260 			 */
261 			i915_request_await_dma_fence(rq[1], &rq[0]->fence);
262 		}
263 
264 		i915_request_get(rq[1]);
265 		i915_request_add(rq[1]);
266 		GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
267 		i915_request_put(rq[0]);
268 
269 		if (prio) {
270 			struct i915_sched_attr attr = {
271 				.priority = prio,
272 			};
273 
274 			/* Alternatively preempt the spinner with ce[1] */
275 			engine->sched_engine->schedule(rq[1], &attr);
276 		}
277 
278 		/* And switch back to ce[0] for good measure */
279 		rq[0] = i915_request_create(ce[0]);
280 		if (IS_ERR(rq[0])) {
281 			err = PTR_ERR(rq[0]);
282 			i915_request_put(rq[1]);
283 			goto err_ce;
284 		}
285 
286 		i915_request_await_dma_fence(rq[0], &rq[1]->fence);
287 		i915_request_get(rq[0]);
288 		i915_request_add(rq[0]);
289 		GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
290 		i915_request_put(rq[1]);
291 		i915_request_put(rq[0]);
292 
293 err_ce:
294 		intel_engine_flush_submission(engine);
295 		igt_spinner_end(&spin);
296 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
297 			if (IS_ERR_OR_NULL(ce[n]))
298 				break;
299 
300 			intel_context_unpin(ce[n]);
301 			intel_context_put(ce[n]);
302 		}
303 
304 		st_engine_heartbeat_enable(engine);
305 		if (igt_live_test_end(&t))
306 			err = -EIO;
307 		if (err)
308 			break;
309 	}
310 
311 	igt_spinner_fini(&spin);
312 	return err;
313 }
314 
315 static int live_unlite_switch(void *arg)
316 {
317 	return live_unlite_restore(arg, 0);
318 }
319 
320 static int live_unlite_preempt(void *arg)
321 {
322 	return live_unlite_restore(arg, I915_PRIORITY_MAX);
323 }
324 
325 static int live_unlite_ring(void *arg)
326 {
327 	struct intel_gt *gt = arg;
328 	struct intel_engine_cs *engine;
329 	struct igt_spinner spin;
330 	enum intel_engine_id id;
331 	int err = 0;
332 
333 	/*
334 	 * Setup a preemption event that will cause almost the entire ring
335 	 * to be unwound, potentially fooling our intel_ring_direction()
336 	 * into emitting a forward lite-restore instead of the rollback.
337 	 */
338 
339 	if (igt_spinner_init(&spin, gt))
340 		return -ENOMEM;
341 
342 	for_each_engine(engine, gt, id) {
343 		struct intel_context *ce[2] = {};
344 		struct i915_request *rq;
345 		struct igt_live_test t;
346 		int n;
347 
348 		if (!intel_engine_has_preemption(engine))
349 			continue;
350 
351 		if (!intel_engine_can_store_dword(engine))
352 			continue;
353 
354 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
355 			err = -EIO;
356 			break;
357 		}
358 		st_engine_heartbeat_disable(engine);
359 
360 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
361 			struct intel_context *tmp;
362 
363 			tmp = intel_context_create(engine);
364 			if (IS_ERR(tmp)) {
365 				err = PTR_ERR(tmp);
366 				goto err_ce;
367 			}
368 
369 			err = intel_context_pin(tmp);
370 			if (err) {
371 				intel_context_put(tmp);
372 				goto err_ce;
373 			}
374 
375 			memset32(tmp->ring->vaddr,
376 				 0xdeadbeef, /* trigger a hang if executed */
377 				 tmp->ring->vma->size / sizeof(u32));
378 
379 			ce[n] = tmp;
380 		}
381 
382 		/* Create max prio spinner, followed by N low prio nops */
383 		rq = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
384 		if (IS_ERR(rq)) {
385 			err = PTR_ERR(rq);
386 			goto err_ce;
387 		}
388 
389 		i915_request_get(rq);
390 		rq->sched.attr.priority = I915_PRIORITY_BARRIER;
391 		i915_request_add(rq);
392 
393 		if (!igt_wait_for_spinner(&spin, rq)) {
394 			intel_gt_set_wedged(gt);
395 			i915_request_put(rq);
396 			err = -ETIME;
397 			goto err_ce;
398 		}
399 
400 		/* Fill the ring, until we will cause a wrap */
401 		n = 0;
402 		while (intel_ring_direction(ce[0]->ring,
403 					    rq->wa_tail,
404 					    ce[0]->ring->tail) <= 0) {
405 			struct i915_request *tmp;
406 
407 			tmp = intel_context_create_request(ce[0]);
408 			if (IS_ERR(tmp)) {
409 				err = PTR_ERR(tmp);
410 				i915_request_put(rq);
411 				goto err_ce;
412 			}
413 
414 			i915_request_add(tmp);
415 			intel_engine_flush_submission(engine);
416 			n++;
417 		}
418 		intel_engine_flush_submission(engine);
419 		pr_debug("%s: Filled ring with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
420 			 engine->name, n,
421 			 ce[0]->ring->size,
422 			 ce[0]->ring->tail,
423 			 ce[0]->ring->emit,
424 			 rq->tail);
425 		GEM_BUG_ON(intel_ring_direction(ce[0]->ring,
426 						rq->tail,
427 						ce[0]->ring->tail) <= 0);
428 		i915_request_put(rq);
429 
430 		/* Create a second ring to preempt the first ring after rq[0] */
431 		rq = intel_context_create_request(ce[1]);
432 		if (IS_ERR(rq)) {
433 			err = PTR_ERR(rq);
434 			goto err_ce;
435 		}
436 
437 		rq->sched.attr.priority = I915_PRIORITY_BARRIER;
438 		i915_request_get(rq);
439 		i915_request_add(rq);
440 
441 		err = wait_for_submit(engine, rq, HZ / 2);
442 		i915_request_put(rq);
443 		if (err) {
444 			pr_err("%s: preemption request was not submitted\n",
445 			       engine->name);
446 			err = -ETIME;
447 		}
448 
449 		pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
450 			 engine->name,
451 			 ce[0]->ring->tail, ce[0]->ring->emit,
452 			 ce[1]->ring->tail, ce[1]->ring->emit);
453 
454 err_ce:
455 		intel_engine_flush_submission(engine);
456 		igt_spinner_end(&spin);
457 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
458 			if (IS_ERR_OR_NULL(ce[n]))
459 				break;
460 
461 			intel_context_unpin(ce[n]);
462 			intel_context_put(ce[n]);
463 		}
464 		st_engine_heartbeat_enable(engine);
465 		if (igt_live_test_end(&t))
466 			err = -EIO;
467 		if (err)
468 			break;
469 	}
470 
471 	igt_spinner_fini(&spin);
472 	return err;
473 }
474 
475 static int live_pin_rewind(void *arg)
476 {
477 	struct intel_gt *gt = arg;
478 	struct intel_engine_cs *engine;
479 	enum intel_engine_id id;
480 	int err = 0;
481 
482 	/*
483 	 * We have to be careful not to trust intel_ring too much, for example
484 	 * ring->head is updated upon retire which is out of sync with pinning
485 	 * the context. Thus we cannot use ring->head to set CTX_RING_HEAD,
486 	 * or else we risk writing an older, stale value.
487 	 *
488 	 * To simulate this, let's apply a bit of deliberate sabotague.
489 	 */
490 
491 	for_each_engine(engine, gt, id) {
492 		struct intel_context *ce;
493 		struct i915_request *rq;
494 		struct intel_ring *ring;
495 		struct igt_live_test t;
496 
497 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
498 			err = -EIO;
499 			break;
500 		}
501 
502 		ce = intel_context_create(engine);
503 		if (IS_ERR(ce)) {
504 			err = PTR_ERR(ce);
505 			break;
506 		}
507 
508 		err = intel_context_pin(ce);
509 		if (err) {
510 			intel_context_put(ce);
511 			break;
512 		}
513 
514 		/* Keep the context awake while we play games */
515 		err = i915_active_acquire(&ce->active);
516 		if (err) {
517 			intel_context_unpin(ce);
518 			intel_context_put(ce);
519 			break;
520 		}
521 		ring = ce->ring;
522 
523 		/* Poison the ring, and offset the next request from HEAD */
524 		memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32));
525 		ring->emit = ring->size / 2;
526 		ring->tail = ring->emit;
527 		GEM_BUG_ON(ring->head);
528 
529 		intel_context_unpin(ce);
530 
531 		/* Submit a simple nop request */
532 		GEM_BUG_ON(intel_context_is_pinned(ce));
533 		rq = intel_context_create_request(ce);
534 		i915_active_release(&ce->active); /* e.g. async retire */
535 		intel_context_put(ce);
536 		if (IS_ERR(rq)) {
537 			err = PTR_ERR(rq);
538 			break;
539 		}
540 		GEM_BUG_ON(!rq->head);
541 		i915_request_add(rq);
542 
543 		/* Expect not to hang! */
544 		if (igt_live_test_end(&t)) {
545 			err = -EIO;
546 			break;
547 		}
548 	}
549 
550 	return err;
551 }
552 
553 static int engine_lock_reset_tasklet(struct intel_engine_cs *engine)
554 {
555 	tasklet_disable(&engine->sched_engine->tasklet);
556 	local_bh_disable();
557 
558 	if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
559 			     &engine->gt->reset.flags)) {
560 		local_bh_enable();
561 		tasklet_enable(&engine->sched_engine->tasklet);
562 
563 		intel_gt_set_wedged(engine->gt);
564 		return -EBUSY;
565 	}
566 
567 	return 0;
568 }
569 
570 static void engine_unlock_reset_tasklet(struct intel_engine_cs *engine)
571 {
572 	clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id,
573 			      &engine->gt->reset.flags);
574 
575 	local_bh_enable();
576 	tasklet_enable(&engine->sched_engine->tasklet);
577 }
578 
579 static int live_hold_reset(void *arg)
580 {
581 	struct intel_gt *gt = arg;
582 	struct intel_engine_cs *engine;
583 	enum intel_engine_id id;
584 	struct igt_spinner spin;
585 	int err = 0;
586 
587 	/*
588 	 * In order to support offline error capture for fast preempt reset,
589 	 * we need to decouple the guilty request and ensure that it and its
590 	 * descendents are not executed while the capture is in progress.
591 	 */
592 
593 	if (!intel_has_reset_engine(gt))
594 		return 0;
595 
596 	if (igt_spinner_init(&spin, gt))
597 		return -ENOMEM;
598 
599 	for_each_engine(engine, gt, id) {
600 		struct intel_context *ce;
601 		struct i915_request *rq;
602 
603 		ce = intel_context_create(engine);
604 		if (IS_ERR(ce)) {
605 			err = PTR_ERR(ce);
606 			break;
607 		}
608 
609 		st_engine_heartbeat_disable(engine);
610 
611 		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
612 		if (IS_ERR(rq)) {
613 			err = PTR_ERR(rq);
614 			goto out;
615 		}
616 		i915_request_add(rq);
617 
618 		if (!igt_wait_for_spinner(&spin, rq)) {
619 			intel_gt_set_wedged(gt);
620 			err = -ETIME;
621 			goto out;
622 		}
623 
624 		/* We have our request executing, now remove it and reset */
625 
626 		err = engine_lock_reset_tasklet(engine);
627 		if (err)
628 			goto out;
629 
630 		engine->sched_engine->tasklet.callback(&engine->sched_engine->tasklet);
631 		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
632 
633 		i915_request_get(rq);
634 		execlists_hold(engine, rq);
635 		GEM_BUG_ON(!i915_request_on_hold(rq));
636 
637 		__intel_engine_reset_bh(engine, NULL);
638 		GEM_BUG_ON(rq->fence.error != -EIO);
639 
640 		engine_unlock_reset_tasklet(engine);
641 
642 		/* Check that we do not resubmit the held request */
643 		if (!i915_request_wait(rq, 0, HZ / 5)) {
644 			pr_err("%s: on hold request completed!\n",
645 			       engine->name);
646 			i915_request_put(rq);
647 			err = -EIO;
648 			goto out;
649 		}
650 		GEM_BUG_ON(!i915_request_on_hold(rq));
651 
652 		/* But is resubmitted on release */
653 		execlists_unhold(engine, rq);
654 		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
655 			pr_err("%s: held request did not complete!\n",
656 			       engine->name);
657 			intel_gt_set_wedged(gt);
658 			err = -ETIME;
659 		}
660 		i915_request_put(rq);
661 
662 out:
663 		st_engine_heartbeat_enable(engine);
664 		intel_context_put(ce);
665 		if (err)
666 			break;
667 	}
668 
669 	igt_spinner_fini(&spin);
670 	return err;
671 }
672 
673 static const char *error_repr(int err)
674 {
675 	return err ? "bad" : "good";
676 }
677 
678 static int live_error_interrupt(void *arg)
679 {
680 	static const struct error_phase {
681 		enum { GOOD = 0, BAD = -EIO } error[2];
682 	} phases[] = {
683 		{ { BAD,  GOOD } },
684 		{ { BAD,  BAD  } },
685 		{ { BAD,  GOOD } },
686 		{ { GOOD, GOOD } }, /* sentinel */
687 	};
688 	struct intel_gt *gt = arg;
689 	struct intel_engine_cs *engine;
690 	enum intel_engine_id id;
691 
692 	/*
693 	 * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning
694 	 * of invalid commands in user batches that will cause a GPU hang.
695 	 * This is a faster mechanism than using hangcheck/heartbeats, but
696 	 * only detects problems the HW knows about -- it will not warn when
697 	 * we kill the HW!
698 	 *
699 	 * To verify our detection and reset, we throw some invalid commands
700 	 * at the HW and wait for the interrupt.
701 	 */
702 
703 	if (!intel_has_reset_engine(gt))
704 		return 0;
705 
706 	for_each_engine(engine, gt, id) {
707 		const struct error_phase *p;
708 		int err = 0;
709 
710 		st_engine_heartbeat_disable(engine);
711 
712 		for (p = phases; p->error[0] != GOOD; p++) {
713 			struct i915_request *client[ARRAY_SIZE(phases->error)];
714 			u32 *cs;
715 			int i;
716 
717 			memset(client, 0, sizeof(*client));
718 			for (i = 0; i < ARRAY_SIZE(client); i++) {
719 				struct intel_context *ce;
720 				struct i915_request *rq;
721 
722 				ce = intel_context_create(engine);
723 				if (IS_ERR(ce)) {
724 					err = PTR_ERR(ce);
725 					goto out;
726 				}
727 
728 				rq = intel_context_create_request(ce);
729 				intel_context_put(ce);
730 				if (IS_ERR(rq)) {
731 					err = PTR_ERR(rq);
732 					goto out;
733 				}
734 
735 				if (rq->engine->emit_init_breadcrumb) {
736 					err = rq->engine->emit_init_breadcrumb(rq);
737 					if (err) {
738 						i915_request_add(rq);
739 						goto out;
740 					}
741 				}
742 
743 				cs = intel_ring_begin(rq, 2);
744 				if (IS_ERR(cs)) {
745 					i915_request_add(rq);
746 					err = PTR_ERR(cs);
747 					goto out;
748 				}
749 
750 				if (p->error[i]) {
751 					*cs++ = 0xdeadbeef;
752 					*cs++ = 0xdeadbeef;
753 				} else {
754 					*cs++ = MI_NOOP;
755 					*cs++ = MI_NOOP;
756 				}
757 
758 				client[i] = i915_request_get(rq);
759 				i915_request_add(rq);
760 			}
761 
762 			err = wait_for_submit(engine, client[0], HZ / 2);
763 			if (err) {
764 				pr_err("%s: first request did not start within time!\n",
765 				       engine->name);
766 				err = -ETIME;
767 				goto out;
768 			}
769 
770 			for (i = 0; i < ARRAY_SIZE(client); i++) {
771 				if (i915_request_wait(client[i], 0, HZ / 5) < 0)
772 					pr_debug("%s: %s request incomplete!\n",
773 						 engine->name,
774 						 error_repr(p->error[i]));
775 
776 				if (!i915_request_started(client[i])) {
777 					pr_err("%s: %s request not started!\n",
778 					       engine->name,
779 					       error_repr(p->error[i]));
780 					err = -ETIME;
781 					goto out;
782 				}
783 
784 				/* Kick the tasklet to process the error */
785 				intel_engine_flush_submission(engine);
786 				if (client[i]->fence.error != p->error[i]) {
787 					pr_err("%s: %s request (%s) with wrong error code: %d\n",
788 					       engine->name,
789 					       error_repr(p->error[i]),
790 					       i915_request_completed(client[i]) ? "completed" : "running",
791 					       client[i]->fence.error);
792 					err = -EINVAL;
793 					goto out;
794 				}
795 			}
796 
797 out:
798 			for (i = 0; i < ARRAY_SIZE(client); i++)
799 				if (client[i])
800 					i915_request_put(client[i]);
801 			if (err) {
802 				pr_err("%s: failed at phase[%zd] { %d, %d }\n",
803 				       engine->name, p - phases,
804 				       p->error[0], p->error[1]);
805 				break;
806 			}
807 		}
808 
809 		st_engine_heartbeat_enable(engine);
810 		if (err) {
811 			intel_gt_set_wedged(gt);
812 			return err;
813 		}
814 	}
815 
816 	return 0;
817 }
818 
819 static int
820 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
821 {
822 	u32 *cs;
823 
824 	cs = intel_ring_begin(rq, 10);
825 	if (IS_ERR(cs))
826 		return PTR_ERR(cs);
827 
828 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
829 
830 	*cs++ = MI_SEMAPHORE_WAIT |
831 		MI_SEMAPHORE_GLOBAL_GTT |
832 		MI_SEMAPHORE_POLL |
833 		MI_SEMAPHORE_SAD_NEQ_SDD;
834 	*cs++ = 0;
835 	*cs++ = i915_ggtt_offset(vma) + 4 * idx;
836 	*cs++ = 0;
837 
838 	if (idx > 0) {
839 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
840 		*cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
841 		*cs++ = 0;
842 		*cs++ = 1;
843 	} else {
844 		*cs++ = MI_NOOP;
845 		*cs++ = MI_NOOP;
846 		*cs++ = MI_NOOP;
847 		*cs++ = MI_NOOP;
848 	}
849 
850 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
851 
852 	intel_ring_advance(rq, cs);
853 	return 0;
854 }
855 
856 static struct i915_request *
857 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
858 {
859 	struct intel_context *ce;
860 	struct i915_request *rq;
861 	int err;
862 
863 	ce = intel_context_create(engine);
864 	if (IS_ERR(ce))
865 		return ERR_CAST(ce);
866 
867 	rq = intel_context_create_request(ce);
868 	if (IS_ERR(rq))
869 		goto out_ce;
870 
871 	err = 0;
872 	if (rq->engine->emit_init_breadcrumb)
873 		err = rq->engine->emit_init_breadcrumb(rq);
874 	if (err == 0)
875 		err = emit_semaphore_chain(rq, vma, idx);
876 	if (err == 0)
877 		i915_request_get(rq);
878 	i915_request_add(rq);
879 	if (err)
880 		rq = ERR_PTR(err);
881 
882 out_ce:
883 	intel_context_put(ce);
884 	return rq;
885 }
886 
887 static int
888 release_queue(struct intel_engine_cs *engine,
889 	      struct i915_vma *vma,
890 	      int idx, int prio)
891 {
892 	struct i915_sched_attr attr = {
893 		.priority = prio,
894 	};
895 	struct i915_request *rq;
896 	u32 *cs;
897 
898 	rq = intel_engine_create_kernel_request(engine);
899 	if (IS_ERR(rq))
900 		return PTR_ERR(rq);
901 
902 	cs = intel_ring_begin(rq, 4);
903 	if (IS_ERR(cs)) {
904 		i915_request_add(rq);
905 		return PTR_ERR(cs);
906 	}
907 
908 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
909 	*cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
910 	*cs++ = 0;
911 	*cs++ = 1;
912 
913 	intel_ring_advance(rq, cs);
914 
915 	i915_request_get(rq);
916 	i915_request_add(rq);
917 
918 	local_bh_disable();
919 	engine->sched_engine->schedule(rq, &attr);
920 	local_bh_enable(); /* kick tasklet */
921 
922 	i915_request_put(rq);
923 
924 	return 0;
925 }
926 
927 static int
928 slice_semaphore_queue(struct intel_engine_cs *outer,
929 		      struct i915_vma *vma,
930 		      int count)
931 {
932 	struct intel_engine_cs *engine;
933 	struct i915_request *head;
934 	enum intel_engine_id id;
935 	int err, i, n = 0;
936 
937 	head = semaphore_queue(outer, vma, n++);
938 	if (IS_ERR(head))
939 		return PTR_ERR(head);
940 
941 	for_each_engine(engine, outer->gt, id) {
942 		if (!intel_engine_has_preemption(engine))
943 			continue;
944 
945 		for (i = 0; i < count; i++) {
946 			struct i915_request *rq;
947 
948 			rq = semaphore_queue(engine, vma, n++);
949 			if (IS_ERR(rq)) {
950 				err = PTR_ERR(rq);
951 				goto out;
952 			}
953 
954 			i915_request_put(rq);
955 		}
956 	}
957 
958 	err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER);
959 	if (err)
960 		goto out;
961 
962 	if (i915_request_wait(head, 0,
963 			      2 * outer->gt->info.num_engines * (count + 2) * (count + 3)) < 0) {
964 		pr_err("%s: Failed to slice along semaphore chain of length (%d, %d)!\n",
965 		       outer->name, count, n);
966 		GEM_TRACE_DUMP();
967 		intel_gt_set_wedged(outer->gt);
968 		err = -EIO;
969 	}
970 
971 out:
972 	i915_request_put(head);
973 	return err;
974 }
975 
976 static int live_timeslice_preempt(void *arg)
977 {
978 	struct intel_gt *gt = arg;
979 	struct drm_i915_gem_object *obj;
980 	struct intel_engine_cs *engine;
981 	enum intel_engine_id id;
982 	struct i915_vma *vma;
983 	void *vaddr;
984 	int err = 0;
985 
986 	/*
987 	 * If a request takes too long, we would like to give other users
988 	 * a fair go on the GPU. In particular, users may create batches
989 	 * that wait upon external input, where that input may even be
990 	 * supplied by another GPU job. To avoid blocking forever, we
991 	 * need to preempt the current task and replace it with another
992 	 * ready task.
993 	 */
994 	if (!CONFIG_DRM_I915_TIMESLICE_DURATION)
995 		return 0;
996 
997 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
998 	if (IS_ERR(obj))
999 		return PTR_ERR(obj);
1000 
1001 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1002 	if (IS_ERR(vma)) {
1003 		err = PTR_ERR(vma);
1004 		goto err_obj;
1005 	}
1006 
1007 	vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1008 	if (IS_ERR(vaddr)) {
1009 		err = PTR_ERR(vaddr);
1010 		goto err_obj;
1011 	}
1012 
1013 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1014 	if (err)
1015 		goto err_map;
1016 
1017 	err = i915_vma_sync(vma);
1018 	if (err)
1019 		goto err_pin;
1020 
1021 	for_each_engine(engine, gt, id) {
1022 		if (!intel_engine_has_preemption(engine))
1023 			continue;
1024 
1025 		memset(vaddr, 0, PAGE_SIZE);
1026 
1027 		st_engine_heartbeat_disable(engine);
1028 		err = slice_semaphore_queue(engine, vma, 5);
1029 		st_engine_heartbeat_enable(engine);
1030 		if (err)
1031 			goto err_pin;
1032 
1033 		if (igt_flush_test(gt->i915)) {
1034 			err = -EIO;
1035 			goto err_pin;
1036 		}
1037 	}
1038 
1039 err_pin:
1040 	i915_vma_unpin(vma);
1041 err_map:
1042 	i915_gem_object_unpin_map(obj);
1043 err_obj:
1044 	i915_gem_object_put(obj);
1045 	return err;
1046 }
1047 
1048 static struct i915_request *
1049 create_rewinder(struct intel_context *ce,
1050 		struct i915_request *wait,
1051 		void *slot, int idx)
1052 {
1053 	const u32 offset =
1054 		i915_ggtt_offset(ce->engine->status_page.vma) +
1055 		offset_in_page(slot);
1056 	struct i915_request *rq;
1057 	u32 *cs;
1058 	int err;
1059 
1060 	rq = intel_context_create_request(ce);
1061 	if (IS_ERR(rq))
1062 		return rq;
1063 
1064 	if (wait) {
1065 		err = i915_request_await_dma_fence(rq, &wait->fence);
1066 		if (err)
1067 			goto err;
1068 	}
1069 
1070 	cs = intel_ring_begin(rq, 14);
1071 	if (IS_ERR(cs)) {
1072 		err = PTR_ERR(cs);
1073 		goto err;
1074 	}
1075 
1076 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1077 	*cs++ = MI_NOOP;
1078 
1079 	*cs++ = MI_SEMAPHORE_WAIT |
1080 		MI_SEMAPHORE_GLOBAL_GTT |
1081 		MI_SEMAPHORE_POLL |
1082 		MI_SEMAPHORE_SAD_GTE_SDD;
1083 	*cs++ = idx;
1084 	*cs++ = offset;
1085 	*cs++ = 0;
1086 
1087 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
1088 	*cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
1089 	*cs++ = offset + idx * sizeof(u32);
1090 	*cs++ = 0;
1091 
1092 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1093 	*cs++ = offset;
1094 	*cs++ = 0;
1095 	*cs++ = idx + 1;
1096 
1097 	intel_ring_advance(rq, cs);
1098 
1099 	err = 0;
1100 err:
1101 	i915_request_get(rq);
1102 	i915_request_add(rq);
1103 	if (err) {
1104 		i915_request_put(rq);
1105 		return ERR_PTR(err);
1106 	}
1107 
1108 	return rq;
1109 }
1110 
1111 static int live_timeslice_rewind(void *arg)
1112 {
1113 	struct intel_gt *gt = arg;
1114 	struct intel_engine_cs *engine;
1115 	enum intel_engine_id id;
1116 
1117 	/*
1118 	 * The usual presumption on timeslice expiration is that we replace
1119 	 * the active context with another. However, given a chain of
1120 	 * dependencies we may end up with replacing the context with itself,
1121 	 * but only a few of those requests, forcing us to rewind the
1122 	 * RING_TAIL of the original request.
1123 	 */
1124 	if (!CONFIG_DRM_I915_TIMESLICE_DURATION)
1125 		return 0;
1126 
1127 	for_each_engine(engine, gt, id) {
1128 		enum { A1, A2, B1 };
1129 		enum { X = 1, Z, Y };
1130 		struct i915_request *rq[3] = {};
1131 		struct intel_context *ce;
1132 		unsigned long timeslice;
1133 		int i, err = 0;
1134 		u32 *slot;
1135 
1136 		if (!intel_engine_has_timeslices(engine))
1137 			continue;
1138 
1139 		/*
1140 		 * A:rq1 -- semaphore wait, timestamp X
1141 		 * A:rq2 -- write timestamp Y
1142 		 *
1143 		 * B:rq1 [await A:rq1] -- write timestamp Z
1144 		 *
1145 		 * Force timeslice, release semaphore.
1146 		 *
1147 		 * Expect execution/evaluation order XZY
1148 		 */
1149 
1150 		st_engine_heartbeat_disable(engine);
1151 		timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1152 
1153 		slot = memset32(engine->status_page.addr + 1000, 0, 4);
1154 
1155 		ce = intel_context_create(engine);
1156 		if (IS_ERR(ce)) {
1157 			err = PTR_ERR(ce);
1158 			goto err;
1159 		}
1160 
1161 		rq[A1] = create_rewinder(ce, NULL, slot, X);
1162 		if (IS_ERR(rq[A1])) {
1163 			intel_context_put(ce);
1164 			goto err;
1165 		}
1166 
1167 		rq[A2] = create_rewinder(ce, NULL, slot, Y);
1168 		intel_context_put(ce);
1169 		if (IS_ERR(rq[A2]))
1170 			goto err;
1171 
1172 		err = wait_for_submit(engine, rq[A2], HZ / 2);
1173 		if (err) {
1174 			pr_err("%s: failed to submit first context\n",
1175 			       engine->name);
1176 			goto err;
1177 		}
1178 
1179 		ce = intel_context_create(engine);
1180 		if (IS_ERR(ce)) {
1181 			err = PTR_ERR(ce);
1182 			goto err;
1183 		}
1184 
1185 		rq[B1] = create_rewinder(ce, rq[A1], slot, Z);
1186 		intel_context_put(ce);
1187 		if (IS_ERR(rq[2]))
1188 			goto err;
1189 
1190 		err = wait_for_submit(engine, rq[B1], HZ / 2);
1191 		if (err) {
1192 			pr_err("%s: failed to submit second context\n",
1193 			       engine->name);
1194 			goto err;
1195 		}
1196 
1197 		/* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
1198 		ENGINE_TRACE(engine, "forcing tasklet for rewind\n");
1199 		while (i915_request_is_active(rq[A2])) { /* semaphore yield! */
1200 			/* Wait for the timeslice to kick in */
1201 			del_timer(&engine->execlists.timer);
1202 			tasklet_hi_schedule(&engine->sched_engine->tasklet);
1203 			intel_engine_flush_submission(engine);
1204 		}
1205 		/* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
1206 		GEM_BUG_ON(!i915_request_is_active(rq[A1]));
1207 		GEM_BUG_ON(!i915_request_is_active(rq[B1]));
1208 		GEM_BUG_ON(i915_request_is_active(rq[A2]));
1209 
1210 		/* Release the hounds! */
1211 		slot[0] = 1;
1212 		wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */
1213 
1214 		for (i = 1; i <= 3; i++) {
1215 			unsigned long timeout = jiffies + HZ / 2;
1216 
1217 			while (!READ_ONCE(slot[i]) &&
1218 			       time_before(jiffies, timeout))
1219 				;
1220 
1221 			if (!time_before(jiffies, timeout)) {
1222 				pr_err("%s: rq[%d] timed out\n",
1223 				       engine->name, i - 1);
1224 				err = -ETIME;
1225 				goto err;
1226 			}
1227 
1228 			pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]);
1229 		}
1230 
1231 		/* XZY: XZ < XY */
1232 		if (slot[Z] - slot[X] >= slot[Y] - slot[X]) {
1233 			pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n",
1234 			       engine->name,
1235 			       slot[Z] - slot[X],
1236 			       slot[Y] - slot[X]);
1237 			err = -EINVAL;
1238 		}
1239 
1240 err:
1241 		memset32(&slot[0], -1, 4);
1242 		wmb();
1243 
1244 		engine->props.timeslice_duration_ms = timeslice;
1245 		st_engine_heartbeat_enable(engine);
1246 		for (i = 0; i < 3; i++)
1247 			i915_request_put(rq[i]);
1248 		if (igt_flush_test(gt->i915))
1249 			err = -EIO;
1250 		if (err)
1251 			return err;
1252 	}
1253 
1254 	return 0;
1255 }
1256 
1257 static struct i915_request *nop_request(struct intel_engine_cs *engine)
1258 {
1259 	struct i915_request *rq;
1260 
1261 	rq = intel_engine_create_kernel_request(engine);
1262 	if (IS_ERR(rq))
1263 		return rq;
1264 
1265 	i915_request_get(rq);
1266 	i915_request_add(rq);
1267 
1268 	return rq;
1269 }
1270 
1271 static long slice_timeout(struct intel_engine_cs *engine)
1272 {
1273 	long timeout;
1274 
1275 	/* Enough time for a timeslice to kick in, and kick out */
1276 	timeout = 2 * msecs_to_jiffies_timeout(timeslice(engine));
1277 
1278 	/* Enough time for the nop request to complete */
1279 	timeout += HZ / 5;
1280 
1281 	return timeout + 1;
1282 }
1283 
1284 static int live_timeslice_queue(void *arg)
1285 {
1286 	struct intel_gt *gt = arg;
1287 	struct drm_i915_gem_object *obj;
1288 	struct intel_engine_cs *engine;
1289 	enum intel_engine_id id;
1290 	struct i915_vma *vma;
1291 	void *vaddr;
1292 	int err = 0;
1293 
1294 	/*
1295 	 * Make sure that even if ELSP[0] and ELSP[1] are filled with
1296 	 * timeslicing between them disabled, we *do* enable timeslicing
1297 	 * if the queue demands it. (Normally, we do not submit if
1298 	 * ELSP[1] is already occupied, so must rely on timeslicing to
1299 	 * eject ELSP[0] in favour of the queue.)
1300 	 */
1301 	if (!CONFIG_DRM_I915_TIMESLICE_DURATION)
1302 		return 0;
1303 
1304 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1305 	if (IS_ERR(obj))
1306 		return PTR_ERR(obj);
1307 
1308 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1309 	if (IS_ERR(vma)) {
1310 		err = PTR_ERR(vma);
1311 		goto err_obj;
1312 	}
1313 
1314 	vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1315 	if (IS_ERR(vaddr)) {
1316 		err = PTR_ERR(vaddr);
1317 		goto err_obj;
1318 	}
1319 
1320 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1321 	if (err)
1322 		goto err_map;
1323 
1324 	err = i915_vma_sync(vma);
1325 	if (err)
1326 		goto err_pin;
1327 
1328 	for_each_engine(engine, gt, id) {
1329 		struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
1330 		struct i915_request *rq, *nop;
1331 
1332 		if (!intel_engine_has_preemption(engine))
1333 			continue;
1334 
1335 		st_engine_heartbeat_disable(engine);
1336 		memset(vaddr, 0, PAGE_SIZE);
1337 
1338 		/* ELSP[0]: semaphore wait */
1339 		rq = semaphore_queue(engine, vma, 0);
1340 		if (IS_ERR(rq)) {
1341 			err = PTR_ERR(rq);
1342 			goto err_heartbeat;
1343 		}
1344 		engine->sched_engine->schedule(rq, &attr);
1345 		err = wait_for_submit(engine, rq, HZ / 2);
1346 		if (err) {
1347 			pr_err("%s: Timed out trying to submit semaphores\n",
1348 			       engine->name);
1349 			goto err_rq;
1350 		}
1351 
1352 		/* ELSP[1]: nop request */
1353 		nop = nop_request(engine);
1354 		if (IS_ERR(nop)) {
1355 			err = PTR_ERR(nop);
1356 			goto err_rq;
1357 		}
1358 		err = wait_for_submit(engine, nop, HZ / 2);
1359 		i915_request_put(nop);
1360 		if (err) {
1361 			pr_err("%s: Timed out trying to submit nop\n",
1362 			       engine->name);
1363 			goto err_rq;
1364 		}
1365 
1366 		GEM_BUG_ON(i915_request_completed(rq));
1367 		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
1368 
1369 		/* Queue: semaphore signal, matching priority as semaphore */
1370 		err = release_queue(engine, vma, 1, effective_prio(rq));
1371 		if (err)
1372 			goto err_rq;
1373 
1374 		/* Wait until we ack the release_queue and start timeslicing */
1375 		do {
1376 			cond_resched();
1377 			intel_engine_flush_submission(engine);
1378 		} while (READ_ONCE(engine->execlists.pending[0]));
1379 
1380 		/* Timeslice every jiffy, so within 2 we should signal */
1381 		if (i915_request_wait(rq, 0, slice_timeout(engine)) < 0) {
1382 			struct drm_printer p =
1383 				drm_info_printer(gt->i915->drm.dev);
1384 
1385 			pr_err("%s: Failed to timeslice into queue\n",
1386 			       engine->name);
1387 			intel_engine_dump(engine, &p,
1388 					  "%s\n", engine->name);
1389 
1390 			memset(vaddr, 0xff, PAGE_SIZE);
1391 			err = -EIO;
1392 		}
1393 err_rq:
1394 		i915_request_put(rq);
1395 err_heartbeat:
1396 		st_engine_heartbeat_enable(engine);
1397 		if (err)
1398 			break;
1399 	}
1400 
1401 err_pin:
1402 	i915_vma_unpin(vma);
1403 err_map:
1404 	i915_gem_object_unpin_map(obj);
1405 err_obj:
1406 	i915_gem_object_put(obj);
1407 	return err;
1408 }
1409 
1410 static int live_timeslice_nopreempt(void *arg)
1411 {
1412 	struct intel_gt *gt = arg;
1413 	struct intel_engine_cs *engine;
1414 	enum intel_engine_id id;
1415 	struct igt_spinner spin;
1416 	int err = 0;
1417 
1418 	/*
1419 	 * We should not timeslice into a request that is marked with
1420 	 * I915_REQUEST_NOPREEMPT.
1421 	 */
1422 	if (!CONFIG_DRM_I915_TIMESLICE_DURATION)
1423 		return 0;
1424 
1425 	if (igt_spinner_init(&spin, gt))
1426 		return -ENOMEM;
1427 
1428 	for_each_engine(engine, gt, id) {
1429 		struct intel_context *ce;
1430 		struct i915_request *rq;
1431 		unsigned long timeslice;
1432 
1433 		if (!intel_engine_has_preemption(engine))
1434 			continue;
1435 
1436 		ce = intel_context_create(engine);
1437 		if (IS_ERR(ce)) {
1438 			err = PTR_ERR(ce);
1439 			break;
1440 		}
1441 
1442 		st_engine_heartbeat_disable(engine);
1443 		timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1444 
1445 		/* Create an unpreemptible spinner */
1446 
1447 		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
1448 		intel_context_put(ce);
1449 		if (IS_ERR(rq)) {
1450 			err = PTR_ERR(rq);
1451 			goto out_heartbeat;
1452 		}
1453 
1454 		i915_request_get(rq);
1455 		i915_request_add(rq);
1456 
1457 		if (!igt_wait_for_spinner(&spin, rq)) {
1458 			i915_request_put(rq);
1459 			err = -ETIME;
1460 			goto out_spin;
1461 		}
1462 
1463 		set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags);
1464 		i915_request_put(rq);
1465 
1466 		/* Followed by a maximum priority barrier (heartbeat) */
1467 
1468 		ce = intel_context_create(engine);
1469 		if (IS_ERR(ce)) {
1470 			err = PTR_ERR(ce);
1471 			goto out_spin;
1472 		}
1473 
1474 		rq = intel_context_create_request(ce);
1475 		intel_context_put(ce);
1476 		if (IS_ERR(rq)) {
1477 			err = PTR_ERR(rq);
1478 			goto out_spin;
1479 		}
1480 
1481 		rq->sched.attr.priority = I915_PRIORITY_BARRIER;
1482 		i915_request_get(rq);
1483 		i915_request_add(rq);
1484 
1485 		/*
1486 		 * Wait until the barrier is in ELSP, and we know timeslicing
1487 		 * will have been activated.
1488 		 */
1489 		if (wait_for_submit(engine, rq, HZ / 2)) {
1490 			i915_request_put(rq);
1491 			err = -ETIME;
1492 			goto out_spin;
1493 		}
1494 
1495 		/*
1496 		 * Since the ELSP[0] request is unpreemptible, it should not
1497 		 * allow the maximum priority barrier through. Wait long
1498 		 * enough to see if it is timesliced in by mistake.
1499 		 */
1500 		if (i915_request_wait(rq, 0, slice_timeout(engine)) >= 0) {
1501 			pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n",
1502 			       engine->name);
1503 			err = -EINVAL;
1504 		}
1505 		i915_request_put(rq);
1506 
1507 out_spin:
1508 		igt_spinner_end(&spin);
1509 out_heartbeat:
1510 		xchg(&engine->props.timeslice_duration_ms, timeslice);
1511 		st_engine_heartbeat_enable(engine);
1512 		if (err)
1513 			break;
1514 
1515 		if (igt_flush_test(gt->i915)) {
1516 			err = -EIO;
1517 			break;
1518 		}
1519 	}
1520 
1521 	igt_spinner_fini(&spin);
1522 	return err;
1523 }
1524 
1525 static int live_busywait_preempt(void *arg)
1526 {
1527 	struct intel_gt *gt = arg;
1528 	struct i915_gem_context *ctx_hi, *ctx_lo;
1529 	struct intel_engine_cs *engine;
1530 	struct drm_i915_gem_object *obj;
1531 	struct i915_vma *vma;
1532 	enum intel_engine_id id;
1533 	int err = -ENOMEM;
1534 	u32 *map;
1535 
1536 	/*
1537 	 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
1538 	 * preempt the busywaits used to synchronise between rings.
1539 	 */
1540 
1541 	ctx_hi = kernel_context(gt->i915, NULL);
1542 	if (!ctx_hi)
1543 		return -ENOMEM;
1544 	ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
1545 
1546 	ctx_lo = kernel_context(gt->i915, NULL);
1547 	if (!ctx_lo)
1548 		goto err_ctx_hi;
1549 	ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
1550 
1551 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1552 	if (IS_ERR(obj)) {
1553 		err = PTR_ERR(obj);
1554 		goto err_ctx_lo;
1555 	}
1556 
1557 	map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1558 	if (IS_ERR(map)) {
1559 		err = PTR_ERR(map);
1560 		goto err_obj;
1561 	}
1562 
1563 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1564 	if (IS_ERR(vma)) {
1565 		err = PTR_ERR(vma);
1566 		goto err_map;
1567 	}
1568 
1569 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1570 	if (err)
1571 		goto err_map;
1572 
1573 	err = i915_vma_sync(vma);
1574 	if (err)
1575 		goto err_vma;
1576 
1577 	for_each_engine(engine, gt, id) {
1578 		struct i915_request *lo, *hi;
1579 		struct igt_live_test t;
1580 		u32 *cs;
1581 
1582 		if (!intel_engine_has_preemption(engine))
1583 			continue;
1584 
1585 		if (!intel_engine_can_store_dword(engine))
1586 			continue;
1587 
1588 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1589 			err = -EIO;
1590 			goto err_vma;
1591 		}
1592 
1593 		/*
1594 		 * We create two requests. The low priority request
1595 		 * busywaits on a semaphore (inside the ringbuffer where
1596 		 * is should be preemptible) and the high priority requests
1597 		 * uses a MI_STORE_DWORD_IMM to update the semaphore value
1598 		 * allowing the first request to complete. If preemption
1599 		 * fails, we hang instead.
1600 		 */
1601 
1602 		lo = igt_request_alloc(ctx_lo, engine);
1603 		if (IS_ERR(lo)) {
1604 			err = PTR_ERR(lo);
1605 			goto err_vma;
1606 		}
1607 
1608 		cs = intel_ring_begin(lo, 8);
1609 		if (IS_ERR(cs)) {
1610 			err = PTR_ERR(cs);
1611 			i915_request_add(lo);
1612 			goto err_vma;
1613 		}
1614 
1615 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1616 		*cs++ = i915_ggtt_offset(vma);
1617 		*cs++ = 0;
1618 		*cs++ = 1;
1619 
1620 		/* XXX Do we need a flush + invalidate here? */
1621 
1622 		*cs++ = MI_SEMAPHORE_WAIT |
1623 			MI_SEMAPHORE_GLOBAL_GTT |
1624 			MI_SEMAPHORE_POLL |
1625 			MI_SEMAPHORE_SAD_EQ_SDD;
1626 		*cs++ = 0;
1627 		*cs++ = i915_ggtt_offset(vma);
1628 		*cs++ = 0;
1629 
1630 		intel_ring_advance(lo, cs);
1631 
1632 		i915_request_get(lo);
1633 		i915_request_add(lo);
1634 
1635 		if (wait_for(READ_ONCE(*map), 10)) {
1636 			i915_request_put(lo);
1637 			err = -ETIMEDOUT;
1638 			goto err_vma;
1639 		}
1640 
1641 		/* Low priority request should be busywaiting now */
1642 		if (i915_request_wait(lo, 0, 1) != -ETIME) {
1643 			i915_request_put(lo);
1644 			pr_err("%s: Busywaiting request did not!\n",
1645 			       engine->name);
1646 			err = -EIO;
1647 			goto err_vma;
1648 		}
1649 
1650 		hi = igt_request_alloc(ctx_hi, engine);
1651 		if (IS_ERR(hi)) {
1652 			err = PTR_ERR(hi);
1653 			i915_request_put(lo);
1654 			goto err_vma;
1655 		}
1656 
1657 		cs = intel_ring_begin(hi, 4);
1658 		if (IS_ERR(cs)) {
1659 			err = PTR_ERR(cs);
1660 			i915_request_add(hi);
1661 			i915_request_put(lo);
1662 			goto err_vma;
1663 		}
1664 
1665 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1666 		*cs++ = i915_ggtt_offset(vma);
1667 		*cs++ = 0;
1668 		*cs++ = 0;
1669 
1670 		intel_ring_advance(hi, cs);
1671 		i915_request_add(hi);
1672 
1673 		if (i915_request_wait(lo, 0, HZ / 5) < 0) {
1674 			struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
1675 
1676 			pr_err("%s: Failed to preempt semaphore busywait!\n",
1677 			       engine->name);
1678 
1679 			intel_engine_dump(engine, &p, "%s\n", engine->name);
1680 			GEM_TRACE_DUMP();
1681 
1682 			i915_request_put(lo);
1683 			intel_gt_set_wedged(gt);
1684 			err = -EIO;
1685 			goto err_vma;
1686 		}
1687 		GEM_BUG_ON(READ_ONCE(*map));
1688 		i915_request_put(lo);
1689 
1690 		if (igt_live_test_end(&t)) {
1691 			err = -EIO;
1692 			goto err_vma;
1693 		}
1694 	}
1695 
1696 	err = 0;
1697 err_vma:
1698 	i915_vma_unpin(vma);
1699 err_map:
1700 	i915_gem_object_unpin_map(obj);
1701 err_obj:
1702 	i915_gem_object_put(obj);
1703 err_ctx_lo:
1704 	kernel_context_close(ctx_lo);
1705 err_ctx_hi:
1706 	kernel_context_close(ctx_hi);
1707 	return err;
1708 }
1709 
1710 static struct i915_request *
1711 spinner_create_request(struct igt_spinner *spin,
1712 		       struct i915_gem_context *ctx,
1713 		       struct intel_engine_cs *engine,
1714 		       u32 arb)
1715 {
1716 	struct intel_context *ce;
1717 	struct i915_request *rq;
1718 
1719 	ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
1720 	if (IS_ERR(ce))
1721 		return ERR_CAST(ce);
1722 
1723 	rq = igt_spinner_create_request(spin, ce, arb);
1724 	intel_context_put(ce);
1725 	return rq;
1726 }
1727 
1728 static int live_preempt(void *arg)
1729 {
1730 	struct intel_gt *gt = arg;
1731 	struct i915_gem_context *ctx_hi, *ctx_lo;
1732 	struct igt_spinner spin_hi, spin_lo;
1733 	struct intel_engine_cs *engine;
1734 	enum intel_engine_id id;
1735 	int err = -ENOMEM;
1736 
1737 	ctx_hi = kernel_context(gt->i915, NULL);
1738 	if (!ctx_hi)
1739 		return -ENOMEM;
1740 	ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
1741 
1742 	ctx_lo = kernel_context(gt->i915, NULL);
1743 	if (!ctx_lo)
1744 		goto err_ctx_hi;
1745 	ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
1746 
1747 	if (igt_spinner_init(&spin_hi, gt))
1748 		goto err_ctx_lo;
1749 
1750 	if (igt_spinner_init(&spin_lo, gt))
1751 		goto err_spin_hi;
1752 
1753 	for_each_engine(engine, gt, id) {
1754 		struct igt_live_test t;
1755 		struct i915_request *rq;
1756 
1757 		if (!intel_engine_has_preemption(engine))
1758 			continue;
1759 
1760 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1761 			err = -EIO;
1762 			goto err_spin_lo;
1763 		}
1764 
1765 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1766 					    MI_ARB_CHECK);
1767 		if (IS_ERR(rq)) {
1768 			err = PTR_ERR(rq);
1769 			goto err_spin_lo;
1770 		}
1771 
1772 		i915_request_add(rq);
1773 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
1774 			GEM_TRACE("lo spinner failed to start\n");
1775 			GEM_TRACE_DUMP();
1776 			intel_gt_set_wedged(gt);
1777 			err = -EIO;
1778 			goto err_spin_lo;
1779 		}
1780 
1781 		rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1782 					    MI_ARB_CHECK);
1783 		if (IS_ERR(rq)) {
1784 			igt_spinner_end(&spin_lo);
1785 			err = PTR_ERR(rq);
1786 			goto err_spin_lo;
1787 		}
1788 
1789 		i915_request_add(rq);
1790 		if (!igt_wait_for_spinner(&spin_hi, rq)) {
1791 			GEM_TRACE("hi spinner failed to start\n");
1792 			GEM_TRACE_DUMP();
1793 			intel_gt_set_wedged(gt);
1794 			err = -EIO;
1795 			goto err_spin_lo;
1796 		}
1797 
1798 		igt_spinner_end(&spin_hi);
1799 		igt_spinner_end(&spin_lo);
1800 
1801 		if (igt_live_test_end(&t)) {
1802 			err = -EIO;
1803 			goto err_spin_lo;
1804 		}
1805 	}
1806 
1807 	err = 0;
1808 err_spin_lo:
1809 	igt_spinner_fini(&spin_lo);
1810 err_spin_hi:
1811 	igt_spinner_fini(&spin_hi);
1812 err_ctx_lo:
1813 	kernel_context_close(ctx_lo);
1814 err_ctx_hi:
1815 	kernel_context_close(ctx_hi);
1816 	return err;
1817 }
1818 
1819 static int live_late_preempt(void *arg)
1820 {
1821 	struct intel_gt *gt = arg;
1822 	struct i915_gem_context *ctx_hi, *ctx_lo;
1823 	struct igt_spinner spin_hi, spin_lo;
1824 	struct intel_engine_cs *engine;
1825 	struct i915_sched_attr attr = {};
1826 	enum intel_engine_id id;
1827 	int err = -ENOMEM;
1828 
1829 	ctx_hi = kernel_context(gt->i915, NULL);
1830 	if (!ctx_hi)
1831 		return -ENOMEM;
1832 
1833 	ctx_lo = kernel_context(gt->i915, NULL);
1834 	if (!ctx_lo)
1835 		goto err_ctx_hi;
1836 
1837 	if (igt_spinner_init(&spin_hi, gt))
1838 		goto err_ctx_lo;
1839 
1840 	if (igt_spinner_init(&spin_lo, gt))
1841 		goto err_spin_hi;
1842 
1843 	/* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
1844 	ctx_lo->sched.priority = 1;
1845 
1846 	for_each_engine(engine, gt, id) {
1847 		struct igt_live_test t;
1848 		struct i915_request *rq;
1849 
1850 		if (!intel_engine_has_preemption(engine))
1851 			continue;
1852 
1853 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1854 			err = -EIO;
1855 			goto err_spin_lo;
1856 		}
1857 
1858 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1859 					    MI_ARB_CHECK);
1860 		if (IS_ERR(rq)) {
1861 			err = PTR_ERR(rq);
1862 			goto err_spin_lo;
1863 		}
1864 
1865 		i915_request_add(rq);
1866 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
1867 			pr_err("First context failed to start\n");
1868 			goto err_wedged;
1869 		}
1870 
1871 		rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1872 					    MI_NOOP);
1873 		if (IS_ERR(rq)) {
1874 			igt_spinner_end(&spin_lo);
1875 			err = PTR_ERR(rq);
1876 			goto err_spin_lo;
1877 		}
1878 
1879 		i915_request_add(rq);
1880 		if (igt_wait_for_spinner(&spin_hi, rq)) {
1881 			pr_err("Second context overtook first?\n");
1882 			goto err_wedged;
1883 		}
1884 
1885 		attr.priority = I915_PRIORITY_MAX;
1886 		engine->sched_engine->schedule(rq, &attr);
1887 
1888 		if (!igt_wait_for_spinner(&spin_hi, rq)) {
1889 			pr_err("High priority context failed to preempt the low priority context\n");
1890 			GEM_TRACE_DUMP();
1891 			goto err_wedged;
1892 		}
1893 
1894 		igt_spinner_end(&spin_hi);
1895 		igt_spinner_end(&spin_lo);
1896 
1897 		if (igt_live_test_end(&t)) {
1898 			err = -EIO;
1899 			goto err_spin_lo;
1900 		}
1901 	}
1902 
1903 	err = 0;
1904 err_spin_lo:
1905 	igt_spinner_fini(&spin_lo);
1906 err_spin_hi:
1907 	igt_spinner_fini(&spin_hi);
1908 err_ctx_lo:
1909 	kernel_context_close(ctx_lo);
1910 err_ctx_hi:
1911 	kernel_context_close(ctx_hi);
1912 	return err;
1913 
1914 err_wedged:
1915 	igt_spinner_end(&spin_hi);
1916 	igt_spinner_end(&spin_lo);
1917 	intel_gt_set_wedged(gt);
1918 	err = -EIO;
1919 	goto err_spin_lo;
1920 }
1921 
1922 struct preempt_client {
1923 	struct igt_spinner spin;
1924 	struct i915_gem_context *ctx;
1925 };
1926 
1927 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
1928 {
1929 	c->ctx = kernel_context(gt->i915, NULL);
1930 	if (!c->ctx)
1931 		return -ENOMEM;
1932 
1933 	if (igt_spinner_init(&c->spin, gt))
1934 		goto err_ctx;
1935 
1936 	return 0;
1937 
1938 err_ctx:
1939 	kernel_context_close(c->ctx);
1940 	return -ENOMEM;
1941 }
1942 
1943 static void preempt_client_fini(struct preempt_client *c)
1944 {
1945 	igt_spinner_fini(&c->spin);
1946 	kernel_context_close(c->ctx);
1947 }
1948 
1949 static int live_nopreempt(void *arg)
1950 {
1951 	struct intel_gt *gt = arg;
1952 	struct intel_engine_cs *engine;
1953 	struct preempt_client a, b;
1954 	enum intel_engine_id id;
1955 	int err = -ENOMEM;
1956 
1957 	/*
1958 	 * Verify that we can disable preemption for an individual request
1959 	 * that may be being observed and not want to be interrupted.
1960 	 */
1961 
1962 	if (preempt_client_init(gt, &a))
1963 		return -ENOMEM;
1964 	if (preempt_client_init(gt, &b))
1965 		goto err_client_a;
1966 	b.ctx->sched.priority = I915_PRIORITY_MAX;
1967 
1968 	for_each_engine(engine, gt, id) {
1969 		struct i915_request *rq_a, *rq_b;
1970 
1971 		if (!intel_engine_has_preemption(engine))
1972 			continue;
1973 
1974 		engine->execlists.preempt_hang.count = 0;
1975 
1976 		rq_a = spinner_create_request(&a.spin,
1977 					      a.ctx, engine,
1978 					      MI_ARB_CHECK);
1979 		if (IS_ERR(rq_a)) {
1980 			err = PTR_ERR(rq_a);
1981 			goto err_client_b;
1982 		}
1983 
1984 		/* Low priority client, but unpreemptable! */
1985 		__set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
1986 
1987 		i915_request_add(rq_a);
1988 		if (!igt_wait_for_spinner(&a.spin, rq_a)) {
1989 			pr_err("First client failed to start\n");
1990 			goto err_wedged;
1991 		}
1992 
1993 		rq_b = spinner_create_request(&b.spin,
1994 					      b.ctx, engine,
1995 					      MI_ARB_CHECK);
1996 		if (IS_ERR(rq_b)) {
1997 			err = PTR_ERR(rq_b);
1998 			goto err_client_b;
1999 		}
2000 
2001 		i915_request_add(rq_b);
2002 
2003 		/* B is much more important than A! (But A is unpreemptable.) */
2004 		GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
2005 
2006 		/* Wait long enough for preemption and timeslicing */
2007 		if (igt_wait_for_spinner(&b.spin, rq_b)) {
2008 			pr_err("Second client started too early!\n");
2009 			goto err_wedged;
2010 		}
2011 
2012 		igt_spinner_end(&a.spin);
2013 
2014 		if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2015 			pr_err("Second client failed to start\n");
2016 			goto err_wedged;
2017 		}
2018 
2019 		igt_spinner_end(&b.spin);
2020 
2021 		if (engine->execlists.preempt_hang.count) {
2022 			pr_err("Preemption recorded x%d; should have been suppressed!\n",
2023 			       engine->execlists.preempt_hang.count);
2024 			err = -EINVAL;
2025 			goto err_wedged;
2026 		}
2027 
2028 		if (igt_flush_test(gt->i915))
2029 			goto err_wedged;
2030 	}
2031 
2032 	err = 0;
2033 err_client_b:
2034 	preempt_client_fini(&b);
2035 err_client_a:
2036 	preempt_client_fini(&a);
2037 	return err;
2038 
2039 err_wedged:
2040 	igt_spinner_end(&b.spin);
2041 	igt_spinner_end(&a.spin);
2042 	intel_gt_set_wedged(gt);
2043 	err = -EIO;
2044 	goto err_client_b;
2045 }
2046 
2047 struct live_preempt_cancel {
2048 	struct intel_engine_cs *engine;
2049 	struct preempt_client a, b;
2050 };
2051 
2052 static int __cancel_active0(struct live_preempt_cancel *arg)
2053 {
2054 	struct i915_request *rq;
2055 	struct igt_live_test t;
2056 	int err;
2057 
2058 	/* Preempt cancel of ELSP0 */
2059 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2060 	if (igt_live_test_begin(&t, arg->engine->i915,
2061 				__func__, arg->engine->name))
2062 		return -EIO;
2063 
2064 	rq = spinner_create_request(&arg->a.spin,
2065 				    arg->a.ctx, arg->engine,
2066 				    MI_ARB_CHECK);
2067 	if (IS_ERR(rq))
2068 		return PTR_ERR(rq);
2069 
2070 	clear_bit(CONTEXT_BANNED, &rq->context->flags);
2071 	i915_request_get(rq);
2072 	i915_request_add(rq);
2073 	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2074 		err = -EIO;
2075 		goto out;
2076 	}
2077 
2078 	intel_context_ban(rq->context, rq);
2079 	err = intel_engine_pulse(arg->engine);
2080 	if (err)
2081 		goto out;
2082 
2083 	err = wait_for_reset(arg->engine, rq, HZ / 2);
2084 	if (err) {
2085 		pr_err("Cancelled inflight0 request did not reset\n");
2086 		goto out;
2087 	}
2088 
2089 out:
2090 	i915_request_put(rq);
2091 	if (igt_live_test_end(&t))
2092 		err = -EIO;
2093 	return err;
2094 }
2095 
2096 static int __cancel_active1(struct live_preempt_cancel *arg)
2097 {
2098 	struct i915_request *rq[2] = {};
2099 	struct igt_live_test t;
2100 	int err;
2101 
2102 	/* Preempt cancel of ELSP1 */
2103 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2104 	if (igt_live_test_begin(&t, arg->engine->i915,
2105 				__func__, arg->engine->name))
2106 		return -EIO;
2107 
2108 	rq[0] = spinner_create_request(&arg->a.spin,
2109 				       arg->a.ctx, arg->engine,
2110 				       MI_NOOP); /* no preemption */
2111 	if (IS_ERR(rq[0]))
2112 		return PTR_ERR(rq[0]);
2113 
2114 	clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2115 	i915_request_get(rq[0]);
2116 	i915_request_add(rq[0]);
2117 	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2118 		err = -EIO;
2119 		goto out;
2120 	}
2121 
2122 	rq[1] = spinner_create_request(&arg->b.spin,
2123 				       arg->b.ctx, arg->engine,
2124 				       MI_ARB_CHECK);
2125 	if (IS_ERR(rq[1])) {
2126 		err = PTR_ERR(rq[1]);
2127 		goto out;
2128 	}
2129 
2130 	clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2131 	i915_request_get(rq[1]);
2132 	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2133 	i915_request_add(rq[1]);
2134 	if (err)
2135 		goto out;
2136 
2137 	intel_context_ban(rq[1]->context, rq[1]);
2138 	err = intel_engine_pulse(arg->engine);
2139 	if (err)
2140 		goto out;
2141 
2142 	igt_spinner_end(&arg->a.spin);
2143 	err = wait_for_reset(arg->engine, rq[1], HZ / 2);
2144 	if (err)
2145 		goto out;
2146 
2147 	if (rq[0]->fence.error != 0) {
2148 		pr_err("Normal inflight0 request did not complete\n");
2149 		err = -EINVAL;
2150 		goto out;
2151 	}
2152 
2153 	if (rq[1]->fence.error != -EIO) {
2154 		pr_err("Cancelled inflight1 request did not report -EIO\n");
2155 		err = -EINVAL;
2156 		goto out;
2157 	}
2158 
2159 out:
2160 	i915_request_put(rq[1]);
2161 	i915_request_put(rq[0]);
2162 	if (igt_live_test_end(&t))
2163 		err = -EIO;
2164 	return err;
2165 }
2166 
2167 static int __cancel_queued(struct live_preempt_cancel *arg)
2168 {
2169 	struct i915_request *rq[3] = {};
2170 	struct igt_live_test t;
2171 	int err;
2172 
2173 	/* Full ELSP and one in the wings */
2174 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2175 	if (igt_live_test_begin(&t, arg->engine->i915,
2176 				__func__, arg->engine->name))
2177 		return -EIO;
2178 
2179 	rq[0] = spinner_create_request(&arg->a.spin,
2180 				       arg->a.ctx, arg->engine,
2181 				       MI_ARB_CHECK);
2182 	if (IS_ERR(rq[0]))
2183 		return PTR_ERR(rq[0]);
2184 
2185 	clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2186 	i915_request_get(rq[0]);
2187 	i915_request_add(rq[0]);
2188 	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2189 		err = -EIO;
2190 		goto out;
2191 	}
2192 
2193 	rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
2194 	if (IS_ERR(rq[1])) {
2195 		err = PTR_ERR(rq[1]);
2196 		goto out;
2197 	}
2198 
2199 	clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2200 	i915_request_get(rq[1]);
2201 	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2202 	i915_request_add(rq[1]);
2203 	if (err)
2204 		goto out;
2205 
2206 	rq[2] = spinner_create_request(&arg->b.spin,
2207 				       arg->a.ctx, arg->engine,
2208 				       MI_ARB_CHECK);
2209 	if (IS_ERR(rq[2])) {
2210 		err = PTR_ERR(rq[2]);
2211 		goto out;
2212 	}
2213 
2214 	i915_request_get(rq[2]);
2215 	err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
2216 	i915_request_add(rq[2]);
2217 	if (err)
2218 		goto out;
2219 
2220 	intel_context_ban(rq[2]->context, rq[2]);
2221 	err = intel_engine_pulse(arg->engine);
2222 	if (err)
2223 		goto out;
2224 
2225 	err = wait_for_reset(arg->engine, rq[2], HZ / 2);
2226 	if (err)
2227 		goto out;
2228 
2229 	if (rq[0]->fence.error != -EIO) {
2230 		pr_err("Cancelled inflight0 request did not report -EIO\n");
2231 		err = -EINVAL;
2232 		goto out;
2233 	}
2234 
2235 	/*
2236 	 * The behavior between having semaphores and not is different. With
2237 	 * semaphores the subsequent request is on the hardware and not cancelled
2238 	 * while without the request is held in the driver and cancelled.
2239 	 */
2240 	if (intel_engine_has_semaphores(rq[1]->engine) &&
2241 	    rq[1]->fence.error != 0) {
2242 		pr_err("Normal inflight1 request did not complete\n");
2243 		err = -EINVAL;
2244 		goto out;
2245 	}
2246 
2247 	if (rq[2]->fence.error != -EIO) {
2248 		pr_err("Cancelled queued request did not report -EIO\n");
2249 		err = -EINVAL;
2250 		goto out;
2251 	}
2252 
2253 out:
2254 	i915_request_put(rq[2]);
2255 	i915_request_put(rq[1]);
2256 	i915_request_put(rq[0]);
2257 	if (igt_live_test_end(&t))
2258 		err = -EIO;
2259 	return err;
2260 }
2261 
2262 static int __cancel_hostile(struct live_preempt_cancel *arg)
2263 {
2264 	struct i915_request *rq;
2265 	int err;
2266 
2267 	/* Preempt cancel non-preemptible spinner in ELSP0 */
2268 	if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
2269 		return 0;
2270 
2271 	if (!intel_has_reset_engine(arg->engine->gt))
2272 		return 0;
2273 
2274 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2275 	rq = spinner_create_request(&arg->a.spin,
2276 				    arg->a.ctx, arg->engine,
2277 				    MI_NOOP); /* preemption disabled */
2278 	if (IS_ERR(rq))
2279 		return PTR_ERR(rq);
2280 
2281 	clear_bit(CONTEXT_BANNED, &rq->context->flags);
2282 	i915_request_get(rq);
2283 	i915_request_add(rq);
2284 	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2285 		err = -EIO;
2286 		goto out;
2287 	}
2288 
2289 	intel_context_ban(rq->context, rq);
2290 	err = intel_engine_pulse(arg->engine); /* force reset */
2291 	if (err)
2292 		goto out;
2293 
2294 	err = wait_for_reset(arg->engine, rq, HZ / 2);
2295 	if (err) {
2296 		pr_err("Cancelled inflight0 request did not reset\n");
2297 		goto out;
2298 	}
2299 
2300 out:
2301 	i915_request_put(rq);
2302 	if (igt_flush_test(arg->engine->i915))
2303 		err = -EIO;
2304 	return err;
2305 }
2306 
2307 static void force_reset_timeout(struct intel_engine_cs *engine)
2308 {
2309 	engine->reset_timeout.probability = 999;
2310 	atomic_set(&engine->reset_timeout.times, -1);
2311 }
2312 
2313 static void cancel_reset_timeout(struct intel_engine_cs *engine)
2314 {
2315 	memset(&engine->reset_timeout, 0, sizeof(engine->reset_timeout));
2316 }
2317 
2318 static int __cancel_fail(struct live_preempt_cancel *arg)
2319 {
2320 	struct intel_engine_cs *engine = arg->engine;
2321 	struct i915_request *rq;
2322 	int err;
2323 
2324 	if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
2325 		return 0;
2326 
2327 	if (!intel_has_reset_engine(engine->gt))
2328 		return 0;
2329 
2330 	GEM_TRACE("%s(%s)\n", __func__, engine->name);
2331 	rq = spinner_create_request(&arg->a.spin,
2332 				    arg->a.ctx, engine,
2333 				    MI_NOOP); /* preemption disabled */
2334 	if (IS_ERR(rq))
2335 		return PTR_ERR(rq);
2336 
2337 	clear_bit(CONTEXT_BANNED, &rq->context->flags);
2338 	i915_request_get(rq);
2339 	i915_request_add(rq);
2340 	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2341 		err = -EIO;
2342 		goto out;
2343 	}
2344 
2345 	intel_context_set_banned(rq->context);
2346 
2347 	err = intel_engine_pulse(engine);
2348 	if (err)
2349 		goto out;
2350 
2351 	force_reset_timeout(engine);
2352 
2353 	/* force preempt reset [failure] */
2354 	while (!engine->execlists.pending[0])
2355 		intel_engine_flush_submission(engine);
2356 	del_timer_sync(&engine->execlists.preempt);
2357 	intel_engine_flush_submission(engine);
2358 
2359 	cancel_reset_timeout(engine);
2360 
2361 	/* after failure, require heartbeats to reset device */
2362 	intel_engine_set_heartbeat(engine, 1);
2363 	err = wait_for_reset(engine, rq, HZ / 2);
2364 	intel_engine_set_heartbeat(engine,
2365 				   engine->defaults.heartbeat_interval_ms);
2366 	if (err) {
2367 		pr_err("Cancelled inflight0 request did not reset\n");
2368 		goto out;
2369 	}
2370 
2371 out:
2372 	i915_request_put(rq);
2373 	if (igt_flush_test(engine->i915))
2374 		err = -EIO;
2375 	return err;
2376 }
2377 
2378 static int live_preempt_cancel(void *arg)
2379 {
2380 	struct intel_gt *gt = arg;
2381 	struct live_preempt_cancel data;
2382 	enum intel_engine_id id;
2383 	int err = -ENOMEM;
2384 
2385 	/*
2386 	 * To cancel an inflight context, we need to first remove it from the
2387 	 * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
2388 	 */
2389 
2390 	if (preempt_client_init(gt, &data.a))
2391 		return -ENOMEM;
2392 	if (preempt_client_init(gt, &data.b))
2393 		goto err_client_a;
2394 
2395 	for_each_engine(data.engine, gt, id) {
2396 		if (!intel_engine_has_preemption(data.engine))
2397 			continue;
2398 
2399 		err = __cancel_active0(&data);
2400 		if (err)
2401 			goto err_wedged;
2402 
2403 		err = __cancel_active1(&data);
2404 		if (err)
2405 			goto err_wedged;
2406 
2407 		err = __cancel_queued(&data);
2408 		if (err)
2409 			goto err_wedged;
2410 
2411 		err = __cancel_hostile(&data);
2412 		if (err)
2413 			goto err_wedged;
2414 
2415 		err = __cancel_fail(&data);
2416 		if (err)
2417 			goto err_wedged;
2418 	}
2419 
2420 	err = 0;
2421 err_client_b:
2422 	preempt_client_fini(&data.b);
2423 err_client_a:
2424 	preempt_client_fini(&data.a);
2425 	return err;
2426 
2427 err_wedged:
2428 	GEM_TRACE_DUMP();
2429 	igt_spinner_end(&data.b.spin);
2430 	igt_spinner_end(&data.a.spin);
2431 	intel_gt_set_wedged(gt);
2432 	goto err_client_b;
2433 }
2434 
2435 static int live_suppress_self_preempt(void *arg)
2436 {
2437 	struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
2438 	struct intel_gt *gt = arg;
2439 	struct intel_engine_cs *engine;
2440 	struct preempt_client a, b;
2441 	enum intel_engine_id id;
2442 	int err = -ENOMEM;
2443 
2444 	/*
2445 	 * Verify that if a preemption request does not cause a change in
2446 	 * the current execution order, the preempt-to-idle injection is
2447 	 * skipped and that we do not accidentally apply it after the CS
2448 	 * completion event.
2449 	 */
2450 
2451 	if (intel_uc_uses_guc_submission(&gt->uc))
2452 		return 0; /* presume black blox */
2453 
2454 	if (intel_vgpu_active(gt->i915))
2455 		return 0; /* GVT forces single port & request submission */
2456 
2457 	if (preempt_client_init(gt, &a))
2458 		return -ENOMEM;
2459 	if (preempt_client_init(gt, &b))
2460 		goto err_client_a;
2461 
2462 	for_each_engine(engine, gt, id) {
2463 		struct i915_request *rq_a, *rq_b;
2464 		int depth;
2465 
2466 		if (!intel_engine_has_preemption(engine))
2467 			continue;
2468 
2469 		if (igt_flush_test(gt->i915))
2470 			goto err_wedged;
2471 
2472 		st_engine_heartbeat_disable(engine);
2473 		engine->execlists.preempt_hang.count = 0;
2474 
2475 		rq_a = spinner_create_request(&a.spin,
2476 					      a.ctx, engine,
2477 					      MI_NOOP);
2478 		if (IS_ERR(rq_a)) {
2479 			err = PTR_ERR(rq_a);
2480 			st_engine_heartbeat_enable(engine);
2481 			goto err_client_b;
2482 		}
2483 
2484 		i915_request_add(rq_a);
2485 		if (!igt_wait_for_spinner(&a.spin, rq_a)) {
2486 			pr_err("First client failed to start\n");
2487 			st_engine_heartbeat_enable(engine);
2488 			goto err_wedged;
2489 		}
2490 
2491 		/* Keep postponing the timer to avoid premature slicing */
2492 		mod_timer(&engine->execlists.timer, jiffies + HZ);
2493 		for (depth = 0; depth < 8; depth++) {
2494 			rq_b = spinner_create_request(&b.spin,
2495 						      b.ctx, engine,
2496 						      MI_NOOP);
2497 			if (IS_ERR(rq_b)) {
2498 				err = PTR_ERR(rq_b);
2499 				st_engine_heartbeat_enable(engine);
2500 				goto err_client_b;
2501 			}
2502 			i915_request_add(rq_b);
2503 
2504 			GEM_BUG_ON(i915_request_completed(rq_a));
2505 			engine->sched_engine->schedule(rq_a, &attr);
2506 			igt_spinner_end(&a.spin);
2507 
2508 			if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2509 				pr_err("Second client failed to start\n");
2510 				st_engine_heartbeat_enable(engine);
2511 				goto err_wedged;
2512 			}
2513 
2514 			swap(a, b);
2515 			rq_a = rq_b;
2516 		}
2517 		igt_spinner_end(&a.spin);
2518 
2519 		if (engine->execlists.preempt_hang.count) {
2520 			pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
2521 			       engine->name,
2522 			       engine->execlists.preempt_hang.count,
2523 			       depth);
2524 			st_engine_heartbeat_enable(engine);
2525 			err = -EINVAL;
2526 			goto err_client_b;
2527 		}
2528 
2529 		st_engine_heartbeat_enable(engine);
2530 		if (igt_flush_test(gt->i915))
2531 			goto err_wedged;
2532 	}
2533 
2534 	err = 0;
2535 err_client_b:
2536 	preempt_client_fini(&b);
2537 err_client_a:
2538 	preempt_client_fini(&a);
2539 	return err;
2540 
2541 err_wedged:
2542 	igt_spinner_end(&b.spin);
2543 	igt_spinner_end(&a.spin);
2544 	intel_gt_set_wedged(gt);
2545 	err = -EIO;
2546 	goto err_client_b;
2547 }
2548 
2549 static int live_chain_preempt(void *arg)
2550 {
2551 	struct intel_gt *gt = arg;
2552 	struct intel_engine_cs *engine;
2553 	struct preempt_client hi, lo;
2554 	enum intel_engine_id id;
2555 	int err = -ENOMEM;
2556 
2557 	/*
2558 	 * Build a chain AB...BA between two contexts (A, B) and request
2559 	 * preemption of the last request. It should then complete before
2560 	 * the previously submitted spinner in B.
2561 	 */
2562 
2563 	if (preempt_client_init(gt, &hi))
2564 		return -ENOMEM;
2565 
2566 	if (preempt_client_init(gt, &lo))
2567 		goto err_client_hi;
2568 
2569 	for_each_engine(engine, gt, id) {
2570 		struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
2571 		struct igt_live_test t;
2572 		struct i915_request *rq;
2573 		int ring_size, count, i;
2574 
2575 		if (!intel_engine_has_preemption(engine))
2576 			continue;
2577 
2578 		rq = spinner_create_request(&lo.spin,
2579 					    lo.ctx, engine,
2580 					    MI_ARB_CHECK);
2581 		if (IS_ERR(rq))
2582 			goto err_wedged;
2583 
2584 		i915_request_get(rq);
2585 		i915_request_add(rq);
2586 
2587 		ring_size = rq->wa_tail - rq->head;
2588 		if (ring_size < 0)
2589 			ring_size += rq->ring->size;
2590 		ring_size = rq->ring->size / ring_size;
2591 		pr_debug("%s(%s): Using maximum of %d requests\n",
2592 			 __func__, engine->name, ring_size);
2593 
2594 		igt_spinner_end(&lo.spin);
2595 		if (i915_request_wait(rq, 0, HZ / 2) < 0) {
2596 			pr_err("Timed out waiting to flush %s\n", engine->name);
2597 			i915_request_put(rq);
2598 			goto err_wedged;
2599 		}
2600 		i915_request_put(rq);
2601 
2602 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
2603 			err = -EIO;
2604 			goto err_wedged;
2605 		}
2606 
2607 		for_each_prime_number_from(count, 1, ring_size) {
2608 			rq = spinner_create_request(&hi.spin,
2609 						    hi.ctx, engine,
2610 						    MI_ARB_CHECK);
2611 			if (IS_ERR(rq))
2612 				goto err_wedged;
2613 			i915_request_add(rq);
2614 			if (!igt_wait_for_spinner(&hi.spin, rq))
2615 				goto err_wedged;
2616 
2617 			rq = spinner_create_request(&lo.spin,
2618 						    lo.ctx, engine,
2619 						    MI_ARB_CHECK);
2620 			if (IS_ERR(rq))
2621 				goto err_wedged;
2622 			i915_request_add(rq);
2623 
2624 			for (i = 0; i < count; i++) {
2625 				rq = igt_request_alloc(lo.ctx, engine);
2626 				if (IS_ERR(rq))
2627 					goto err_wedged;
2628 				i915_request_add(rq);
2629 			}
2630 
2631 			rq = igt_request_alloc(hi.ctx, engine);
2632 			if (IS_ERR(rq))
2633 				goto err_wedged;
2634 
2635 			i915_request_get(rq);
2636 			i915_request_add(rq);
2637 			engine->sched_engine->schedule(rq, &attr);
2638 
2639 			igt_spinner_end(&hi.spin);
2640 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2641 				struct drm_printer p =
2642 					drm_info_printer(gt->i915->drm.dev);
2643 
2644 				pr_err("Failed to preempt over chain of %d\n",
2645 				       count);
2646 				intel_engine_dump(engine, &p,
2647 						  "%s\n", engine->name);
2648 				i915_request_put(rq);
2649 				goto err_wedged;
2650 			}
2651 			igt_spinner_end(&lo.spin);
2652 			i915_request_put(rq);
2653 
2654 			rq = igt_request_alloc(lo.ctx, engine);
2655 			if (IS_ERR(rq))
2656 				goto err_wedged;
2657 
2658 			i915_request_get(rq);
2659 			i915_request_add(rq);
2660 
2661 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2662 				struct drm_printer p =
2663 					drm_info_printer(gt->i915->drm.dev);
2664 
2665 				pr_err("Failed to flush low priority chain of %d requests\n",
2666 				       count);
2667 				intel_engine_dump(engine, &p,
2668 						  "%s\n", engine->name);
2669 
2670 				i915_request_put(rq);
2671 				goto err_wedged;
2672 			}
2673 			i915_request_put(rq);
2674 		}
2675 
2676 		if (igt_live_test_end(&t)) {
2677 			err = -EIO;
2678 			goto err_wedged;
2679 		}
2680 	}
2681 
2682 	err = 0;
2683 err_client_lo:
2684 	preempt_client_fini(&lo);
2685 err_client_hi:
2686 	preempt_client_fini(&hi);
2687 	return err;
2688 
2689 err_wedged:
2690 	igt_spinner_end(&hi.spin);
2691 	igt_spinner_end(&lo.spin);
2692 	intel_gt_set_wedged(gt);
2693 	err = -EIO;
2694 	goto err_client_lo;
2695 }
2696 
2697 static int create_gang(struct intel_engine_cs *engine,
2698 		       struct i915_request **prev)
2699 {
2700 	struct drm_i915_gem_object *obj;
2701 	struct intel_context *ce;
2702 	struct i915_request *rq;
2703 	struct i915_vma *vma;
2704 	u32 *cs;
2705 	int err;
2706 
2707 	ce = intel_context_create(engine);
2708 	if (IS_ERR(ce))
2709 		return PTR_ERR(ce);
2710 
2711 	obj = i915_gem_object_create_internal(engine->i915, 4096);
2712 	if (IS_ERR(obj)) {
2713 		err = PTR_ERR(obj);
2714 		goto err_ce;
2715 	}
2716 
2717 	vma = i915_vma_instance(obj, ce->vm, NULL);
2718 	if (IS_ERR(vma)) {
2719 		err = PTR_ERR(vma);
2720 		goto err_obj;
2721 	}
2722 
2723 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
2724 	if (err)
2725 		goto err_obj;
2726 
2727 	cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
2728 	if (IS_ERR(cs)) {
2729 		err = PTR_ERR(cs);
2730 		goto err_obj;
2731 	}
2732 
2733 	/* Semaphore target: spin until zero */
2734 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2735 
2736 	*cs++ = MI_SEMAPHORE_WAIT |
2737 		MI_SEMAPHORE_POLL |
2738 		MI_SEMAPHORE_SAD_EQ_SDD;
2739 	*cs++ = 0;
2740 	*cs++ = lower_32_bits(vma->node.start);
2741 	*cs++ = upper_32_bits(vma->node.start);
2742 
2743 	if (*prev) {
2744 		u64 offset = (*prev)->batch->node.start;
2745 
2746 		/* Terminate the spinner in the next lower priority batch. */
2747 		*cs++ = MI_STORE_DWORD_IMM_GEN4;
2748 		*cs++ = lower_32_bits(offset);
2749 		*cs++ = upper_32_bits(offset);
2750 		*cs++ = 0;
2751 	}
2752 
2753 	*cs++ = MI_BATCH_BUFFER_END;
2754 	i915_gem_object_flush_map(obj);
2755 	i915_gem_object_unpin_map(obj);
2756 
2757 	rq = intel_context_create_request(ce);
2758 	if (IS_ERR(rq)) {
2759 		err = PTR_ERR(rq);
2760 		goto err_obj;
2761 	}
2762 
2763 	rq->batch = i915_vma_get(vma);
2764 	i915_request_get(rq);
2765 
2766 	i915_vma_lock(vma);
2767 	err = i915_request_await_object(rq, vma->obj, false);
2768 	if (!err)
2769 		err = i915_vma_move_to_active(vma, rq, 0);
2770 	if (!err)
2771 		err = rq->engine->emit_bb_start(rq,
2772 						vma->node.start,
2773 						PAGE_SIZE, 0);
2774 	i915_vma_unlock(vma);
2775 	i915_request_add(rq);
2776 	if (err)
2777 		goto err_rq;
2778 
2779 	i915_gem_object_put(obj);
2780 	intel_context_put(ce);
2781 
2782 	rq->mock.link.next = &(*prev)->mock.link;
2783 	*prev = rq;
2784 	return 0;
2785 
2786 err_rq:
2787 	i915_vma_put(rq->batch);
2788 	i915_request_put(rq);
2789 err_obj:
2790 	i915_gem_object_put(obj);
2791 err_ce:
2792 	intel_context_put(ce);
2793 	return err;
2794 }
2795 
2796 static int __live_preempt_ring(struct intel_engine_cs *engine,
2797 			       struct igt_spinner *spin,
2798 			       int queue_sz, int ring_sz)
2799 {
2800 	struct intel_context *ce[2] = {};
2801 	struct i915_request *rq;
2802 	struct igt_live_test t;
2803 	int err = 0;
2804 	int n;
2805 
2806 	if (igt_live_test_begin(&t, engine->i915, __func__, engine->name))
2807 		return -EIO;
2808 
2809 	for (n = 0; n < ARRAY_SIZE(ce); n++) {
2810 		struct intel_context *tmp;
2811 
2812 		tmp = intel_context_create(engine);
2813 		if (IS_ERR(tmp)) {
2814 			err = PTR_ERR(tmp);
2815 			goto err_ce;
2816 		}
2817 
2818 		tmp->ring_size = ring_sz;
2819 
2820 		err = intel_context_pin(tmp);
2821 		if (err) {
2822 			intel_context_put(tmp);
2823 			goto err_ce;
2824 		}
2825 
2826 		memset32(tmp->ring->vaddr,
2827 			 0xdeadbeef, /* trigger a hang if executed */
2828 			 tmp->ring->vma->size / sizeof(u32));
2829 
2830 		ce[n] = tmp;
2831 	}
2832 
2833 	rq = igt_spinner_create_request(spin, ce[0], MI_ARB_CHECK);
2834 	if (IS_ERR(rq)) {
2835 		err = PTR_ERR(rq);
2836 		goto err_ce;
2837 	}
2838 
2839 	i915_request_get(rq);
2840 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2841 	i915_request_add(rq);
2842 
2843 	if (!igt_wait_for_spinner(spin, rq)) {
2844 		intel_gt_set_wedged(engine->gt);
2845 		i915_request_put(rq);
2846 		err = -ETIME;
2847 		goto err_ce;
2848 	}
2849 
2850 	/* Fill the ring, until we will cause a wrap */
2851 	n = 0;
2852 	while (ce[0]->ring->tail - rq->wa_tail <= queue_sz) {
2853 		struct i915_request *tmp;
2854 
2855 		tmp = intel_context_create_request(ce[0]);
2856 		if (IS_ERR(tmp)) {
2857 			err = PTR_ERR(tmp);
2858 			i915_request_put(rq);
2859 			goto err_ce;
2860 		}
2861 
2862 		i915_request_add(tmp);
2863 		intel_engine_flush_submission(engine);
2864 		n++;
2865 	}
2866 	intel_engine_flush_submission(engine);
2867 	pr_debug("%s: Filled %d with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
2868 		 engine->name, queue_sz, n,
2869 		 ce[0]->ring->size,
2870 		 ce[0]->ring->tail,
2871 		 ce[0]->ring->emit,
2872 		 rq->tail);
2873 	i915_request_put(rq);
2874 
2875 	/* Create a second request to preempt the first ring */
2876 	rq = intel_context_create_request(ce[1]);
2877 	if (IS_ERR(rq)) {
2878 		err = PTR_ERR(rq);
2879 		goto err_ce;
2880 	}
2881 
2882 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2883 	i915_request_get(rq);
2884 	i915_request_add(rq);
2885 
2886 	err = wait_for_submit(engine, rq, HZ / 2);
2887 	i915_request_put(rq);
2888 	if (err) {
2889 		pr_err("%s: preemption request was not submitted\n",
2890 		       engine->name);
2891 		err = -ETIME;
2892 	}
2893 
2894 	pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
2895 		 engine->name,
2896 		 ce[0]->ring->tail, ce[0]->ring->emit,
2897 		 ce[1]->ring->tail, ce[1]->ring->emit);
2898 
2899 err_ce:
2900 	intel_engine_flush_submission(engine);
2901 	igt_spinner_end(spin);
2902 	for (n = 0; n < ARRAY_SIZE(ce); n++) {
2903 		if (IS_ERR_OR_NULL(ce[n]))
2904 			break;
2905 
2906 		intel_context_unpin(ce[n]);
2907 		intel_context_put(ce[n]);
2908 	}
2909 	if (igt_live_test_end(&t))
2910 		err = -EIO;
2911 	return err;
2912 }
2913 
2914 static int live_preempt_ring(void *arg)
2915 {
2916 	struct intel_gt *gt = arg;
2917 	struct intel_engine_cs *engine;
2918 	struct igt_spinner spin;
2919 	enum intel_engine_id id;
2920 	int err = 0;
2921 
2922 	/*
2923 	 * Check that we rollback large chunks of a ring in order to do a
2924 	 * preemption event. Similar to live_unlite_ring, but looking at
2925 	 * ring size rather than the impact of intel_ring_direction().
2926 	 */
2927 
2928 	if (igt_spinner_init(&spin, gt))
2929 		return -ENOMEM;
2930 
2931 	for_each_engine(engine, gt, id) {
2932 		int n;
2933 
2934 		if (!intel_engine_has_preemption(engine))
2935 			continue;
2936 
2937 		if (!intel_engine_can_store_dword(engine))
2938 			continue;
2939 
2940 		st_engine_heartbeat_disable(engine);
2941 
2942 		for (n = 0; n <= 3; n++) {
2943 			err = __live_preempt_ring(engine, &spin,
2944 						  n * SZ_4K / 4, SZ_4K);
2945 			if (err)
2946 				break;
2947 		}
2948 
2949 		st_engine_heartbeat_enable(engine);
2950 		if (err)
2951 			break;
2952 	}
2953 
2954 	igt_spinner_fini(&spin);
2955 	return err;
2956 }
2957 
2958 static int live_preempt_gang(void *arg)
2959 {
2960 	struct intel_gt *gt = arg;
2961 	struct intel_engine_cs *engine;
2962 	enum intel_engine_id id;
2963 
2964 	/*
2965 	 * Build as long a chain of preempters as we can, with each
2966 	 * request higher priority than the last. Once we are ready, we release
2967 	 * the last batch which then precolates down the chain, each releasing
2968 	 * the next oldest in turn. The intent is to simply push as hard as we
2969 	 * can with the number of preemptions, trying to exceed narrow HW
2970 	 * limits. At a minimum, we insist that we can sort all the user
2971 	 * high priority levels into execution order.
2972 	 */
2973 
2974 	for_each_engine(engine, gt, id) {
2975 		struct i915_request *rq = NULL;
2976 		struct igt_live_test t;
2977 		IGT_TIMEOUT(end_time);
2978 		int prio = 0;
2979 		int err = 0;
2980 		u32 *cs;
2981 
2982 		if (!intel_engine_has_preemption(engine))
2983 			continue;
2984 
2985 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
2986 			return -EIO;
2987 
2988 		do {
2989 			struct i915_sched_attr attr = { .priority = prio++ };
2990 
2991 			err = create_gang(engine, &rq);
2992 			if (err)
2993 				break;
2994 
2995 			/* Submit each spinner at increasing priority */
2996 			engine->sched_engine->schedule(rq, &attr);
2997 		} while (prio <= I915_PRIORITY_MAX &&
2998 			 !__igt_timeout(end_time, NULL));
2999 		pr_debug("%s: Preempt chain of %d requests\n",
3000 			 engine->name, prio);
3001 
3002 		/*
3003 		 * Such that the last spinner is the highest priority and
3004 		 * should execute first. When that spinner completes,
3005 		 * it will terminate the next lowest spinner until there
3006 		 * are no more spinners and the gang is complete.
3007 		 */
3008 		cs = i915_gem_object_pin_map_unlocked(rq->batch->obj, I915_MAP_WC);
3009 		if (!IS_ERR(cs)) {
3010 			*cs = 0;
3011 			i915_gem_object_unpin_map(rq->batch->obj);
3012 		} else {
3013 			err = PTR_ERR(cs);
3014 			intel_gt_set_wedged(gt);
3015 		}
3016 
3017 		while (rq) { /* wait for each rq from highest to lowest prio */
3018 			struct i915_request *n = list_next_entry(rq, mock.link);
3019 
3020 			if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
3021 				struct drm_printer p =
3022 					drm_info_printer(engine->i915->drm.dev);
3023 
3024 				pr_err("Failed to flush chain of %d requests, at %d\n",
3025 				       prio, rq_prio(rq));
3026 				intel_engine_dump(engine, &p,
3027 						  "%s\n", engine->name);
3028 
3029 				err = -ETIME;
3030 			}
3031 
3032 			i915_vma_put(rq->batch);
3033 			i915_request_put(rq);
3034 			rq = n;
3035 		}
3036 
3037 		if (igt_live_test_end(&t))
3038 			err = -EIO;
3039 		if (err)
3040 			return err;
3041 	}
3042 
3043 	return 0;
3044 }
3045 
3046 static struct i915_vma *
3047 create_gpr_user(struct intel_engine_cs *engine,
3048 		struct i915_vma *result,
3049 		unsigned int offset)
3050 {
3051 	struct drm_i915_gem_object *obj;
3052 	struct i915_vma *vma;
3053 	u32 *cs;
3054 	int err;
3055 	int i;
3056 
3057 	obj = i915_gem_object_create_internal(engine->i915, 4096);
3058 	if (IS_ERR(obj))
3059 		return ERR_CAST(obj);
3060 
3061 	vma = i915_vma_instance(obj, result->vm, NULL);
3062 	if (IS_ERR(vma)) {
3063 		i915_gem_object_put(obj);
3064 		return vma;
3065 	}
3066 
3067 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
3068 	if (err) {
3069 		i915_vma_put(vma);
3070 		return ERR_PTR(err);
3071 	}
3072 
3073 	cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
3074 	if (IS_ERR(cs)) {
3075 		i915_vma_put(vma);
3076 		return ERR_CAST(cs);
3077 	}
3078 
3079 	/* All GPR are clear for new contexts. We use GPR(0) as a constant */
3080 	*cs++ = MI_LOAD_REGISTER_IMM(1);
3081 	*cs++ = CS_GPR(engine, 0);
3082 	*cs++ = 1;
3083 
3084 	for (i = 1; i < NUM_GPR; i++) {
3085 		u64 addr;
3086 
3087 		/*
3088 		 * Perform: GPR[i]++
3089 		 *
3090 		 * As we read and write into the context saved GPR[i], if
3091 		 * we restart this batch buffer from an earlier point, we
3092 		 * will repeat the increment and store a value > 1.
3093 		 */
3094 		*cs++ = MI_MATH(4);
3095 		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(i));
3096 		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(0));
3097 		*cs++ = MI_MATH_ADD;
3098 		*cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU);
3099 
3100 		addr = result->node.start + offset + i * sizeof(*cs);
3101 		*cs++ = MI_STORE_REGISTER_MEM_GEN8;
3102 		*cs++ = CS_GPR(engine, 2 * i);
3103 		*cs++ = lower_32_bits(addr);
3104 		*cs++ = upper_32_bits(addr);
3105 
3106 		*cs++ = MI_SEMAPHORE_WAIT |
3107 			MI_SEMAPHORE_POLL |
3108 			MI_SEMAPHORE_SAD_GTE_SDD;
3109 		*cs++ = i;
3110 		*cs++ = lower_32_bits(result->node.start);
3111 		*cs++ = upper_32_bits(result->node.start);
3112 	}
3113 
3114 	*cs++ = MI_BATCH_BUFFER_END;
3115 	i915_gem_object_flush_map(obj);
3116 	i915_gem_object_unpin_map(obj);
3117 
3118 	return vma;
3119 }
3120 
3121 static struct i915_vma *create_global(struct intel_gt *gt, size_t sz)
3122 {
3123 	struct drm_i915_gem_object *obj;
3124 	struct i915_vma *vma;
3125 	int err;
3126 
3127 	obj = i915_gem_object_create_internal(gt->i915, sz);
3128 	if (IS_ERR(obj))
3129 		return ERR_CAST(obj);
3130 
3131 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
3132 	if (IS_ERR(vma)) {
3133 		i915_gem_object_put(obj);
3134 		return vma;
3135 	}
3136 
3137 	err = i915_ggtt_pin(vma, NULL, 0, 0);
3138 	if (err) {
3139 		i915_vma_put(vma);
3140 		return ERR_PTR(err);
3141 	}
3142 
3143 	return vma;
3144 }
3145 
3146 static struct i915_request *
3147 create_gpr_client(struct intel_engine_cs *engine,
3148 		  struct i915_vma *global,
3149 		  unsigned int offset)
3150 {
3151 	struct i915_vma *batch, *vma;
3152 	struct intel_context *ce;
3153 	struct i915_request *rq;
3154 	int err;
3155 
3156 	ce = intel_context_create(engine);
3157 	if (IS_ERR(ce))
3158 		return ERR_CAST(ce);
3159 
3160 	vma = i915_vma_instance(global->obj, ce->vm, NULL);
3161 	if (IS_ERR(vma)) {
3162 		err = PTR_ERR(vma);
3163 		goto out_ce;
3164 	}
3165 
3166 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
3167 	if (err)
3168 		goto out_ce;
3169 
3170 	batch = create_gpr_user(engine, vma, offset);
3171 	if (IS_ERR(batch)) {
3172 		err = PTR_ERR(batch);
3173 		goto out_vma;
3174 	}
3175 
3176 	rq = intel_context_create_request(ce);
3177 	if (IS_ERR(rq)) {
3178 		err = PTR_ERR(rq);
3179 		goto out_batch;
3180 	}
3181 
3182 	i915_vma_lock(vma);
3183 	err = i915_request_await_object(rq, vma->obj, false);
3184 	if (!err)
3185 		err = i915_vma_move_to_active(vma, rq, 0);
3186 	i915_vma_unlock(vma);
3187 
3188 	i915_vma_lock(batch);
3189 	if (!err)
3190 		err = i915_request_await_object(rq, batch->obj, false);
3191 	if (!err)
3192 		err = i915_vma_move_to_active(batch, rq, 0);
3193 	if (!err)
3194 		err = rq->engine->emit_bb_start(rq,
3195 						batch->node.start,
3196 						PAGE_SIZE, 0);
3197 	i915_vma_unlock(batch);
3198 	i915_vma_unpin(batch);
3199 
3200 	if (!err)
3201 		i915_request_get(rq);
3202 	i915_request_add(rq);
3203 
3204 out_batch:
3205 	i915_vma_put(batch);
3206 out_vma:
3207 	i915_vma_unpin(vma);
3208 out_ce:
3209 	intel_context_put(ce);
3210 	return err ? ERR_PTR(err) : rq;
3211 }
3212 
3213 static int preempt_user(struct intel_engine_cs *engine,
3214 			struct i915_vma *global,
3215 			int id)
3216 {
3217 	struct i915_sched_attr attr = {
3218 		.priority = I915_PRIORITY_MAX
3219 	};
3220 	struct i915_request *rq;
3221 	int err = 0;
3222 	u32 *cs;
3223 
3224 	rq = intel_engine_create_kernel_request(engine);
3225 	if (IS_ERR(rq))
3226 		return PTR_ERR(rq);
3227 
3228 	cs = intel_ring_begin(rq, 4);
3229 	if (IS_ERR(cs)) {
3230 		i915_request_add(rq);
3231 		return PTR_ERR(cs);
3232 	}
3233 
3234 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
3235 	*cs++ = i915_ggtt_offset(global);
3236 	*cs++ = 0;
3237 	*cs++ = id;
3238 
3239 	intel_ring_advance(rq, cs);
3240 
3241 	i915_request_get(rq);
3242 	i915_request_add(rq);
3243 
3244 	engine->sched_engine->schedule(rq, &attr);
3245 
3246 	if (i915_request_wait(rq, 0, HZ / 2) < 0)
3247 		err = -ETIME;
3248 	i915_request_put(rq);
3249 
3250 	return err;
3251 }
3252 
3253 static int live_preempt_user(void *arg)
3254 {
3255 	struct intel_gt *gt = arg;
3256 	struct intel_engine_cs *engine;
3257 	struct i915_vma *global;
3258 	enum intel_engine_id id;
3259 	u32 *result;
3260 	int err = 0;
3261 
3262 	/*
3263 	 * In our other tests, we look at preemption in carefully
3264 	 * controlled conditions in the ringbuffer. Since most of the
3265 	 * time is spent in user batches, most of our preemptions naturally
3266 	 * occur there. We want to verify that when we preempt inside a batch
3267 	 * we continue on from the current instruction and do not roll back
3268 	 * to the start, or another earlier arbitration point.
3269 	 *
3270 	 * To verify this, we create a batch which is a mixture of
3271 	 * MI_MATH (gpr++) MI_SRM (gpr) and preemption points. Then with
3272 	 * a few preempting contexts thrown into the mix, we look for any
3273 	 * repeated instructions (which show up as incorrect values).
3274 	 */
3275 
3276 	global = create_global(gt, 4096);
3277 	if (IS_ERR(global))
3278 		return PTR_ERR(global);
3279 
3280 	result = i915_gem_object_pin_map_unlocked(global->obj, I915_MAP_WC);
3281 	if (IS_ERR(result)) {
3282 		i915_vma_unpin_and_release(&global, 0);
3283 		return PTR_ERR(result);
3284 	}
3285 
3286 	for_each_engine(engine, gt, id) {
3287 		struct i915_request *client[3] = {};
3288 		struct igt_live_test t;
3289 		int i;
3290 
3291 		if (!intel_engine_has_preemption(engine))
3292 			continue;
3293 
3294 		if (GRAPHICS_VER(gt->i915) == 8 && engine->class != RENDER_CLASS)
3295 			continue; /* we need per-context GPR */
3296 
3297 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
3298 			err = -EIO;
3299 			break;
3300 		}
3301 
3302 		memset(result, 0, 4096);
3303 
3304 		for (i = 0; i < ARRAY_SIZE(client); i++) {
3305 			struct i915_request *rq;
3306 
3307 			rq = create_gpr_client(engine, global,
3308 					       NUM_GPR * i * sizeof(u32));
3309 			if (IS_ERR(rq)) {
3310 				err = PTR_ERR(rq);
3311 				goto end_test;
3312 			}
3313 
3314 			client[i] = rq;
3315 		}
3316 
3317 		/* Continuously preempt the set of 3 running contexts */
3318 		for (i = 1; i <= NUM_GPR; i++) {
3319 			err = preempt_user(engine, global, i);
3320 			if (err)
3321 				goto end_test;
3322 		}
3323 
3324 		if (READ_ONCE(result[0]) != NUM_GPR) {
3325 			pr_err("%s: Failed to release semaphore\n",
3326 			       engine->name);
3327 			err = -EIO;
3328 			goto end_test;
3329 		}
3330 
3331 		for (i = 0; i < ARRAY_SIZE(client); i++) {
3332 			int gpr;
3333 
3334 			if (i915_request_wait(client[i], 0, HZ / 2) < 0) {
3335 				err = -ETIME;
3336 				goto end_test;
3337 			}
3338 
3339 			for (gpr = 1; gpr < NUM_GPR; gpr++) {
3340 				if (result[NUM_GPR * i + gpr] != 1) {
3341 					pr_err("%s: Invalid result, client %d, gpr %d, result: %d\n",
3342 					       engine->name,
3343 					       i, gpr, result[NUM_GPR * i + gpr]);
3344 					err = -EINVAL;
3345 					goto end_test;
3346 				}
3347 			}
3348 		}
3349 
3350 end_test:
3351 		for (i = 0; i < ARRAY_SIZE(client); i++) {
3352 			if (!client[i])
3353 				break;
3354 
3355 			i915_request_put(client[i]);
3356 		}
3357 
3358 		/* Flush the semaphores on error */
3359 		smp_store_mb(result[0], -1);
3360 		if (igt_live_test_end(&t))
3361 			err = -EIO;
3362 		if (err)
3363 			break;
3364 	}
3365 
3366 	i915_vma_unpin_and_release(&global, I915_VMA_RELEASE_MAP);
3367 	return err;
3368 }
3369 
3370 static int live_preempt_timeout(void *arg)
3371 {
3372 	struct intel_gt *gt = arg;
3373 	struct i915_gem_context *ctx_hi, *ctx_lo;
3374 	struct igt_spinner spin_lo;
3375 	struct intel_engine_cs *engine;
3376 	enum intel_engine_id id;
3377 	int err = -ENOMEM;
3378 
3379 	/*
3380 	 * Check that we force preemption to occur by cancelling the previous
3381 	 * context if it refuses to yield the GPU.
3382 	 */
3383 	if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
3384 		return 0;
3385 
3386 	if (!intel_has_reset_engine(gt))
3387 		return 0;
3388 
3389 	ctx_hi = kernel_context(gt->i915, NULL);
3390 	if (!ctx_hi)
3391 		return -ENOMEM;
3392 	ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
3393 
3394 	ctx_lo = kernel_context(gt->i915, NULL);
3395 	if (!ctx_lo)
3396 		goto err_ctx_hi;
3397 	ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
3398 
3399 	if (igt_spinner_init(&spin_lo, gt))
3400 		goto err_ctx_lo;
3401 
3402 	for_each_engine(engine, gt, id) {
3403 		unsigned long saved_timeout;
3404 		struct i915_request *rq;
3405 
3406 		if (!intel_engine_has_preemption(engine))
3407 			continue;
3408 
3409 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
3410 					    MI_NOOP); /* preemption disabled */
3411 		if (IS_ERR(rq)) {
3412 			err = PTR_ERR(rq);
3413 			goto err_spin_lo;
3414 		}
3415 
3416 		i915_request_add(rq);
3417 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
3418 			intel_gt_set_wedged(gt);
3419 			err = -EIO;
3420 			goto err_spin_lo;
3421 		}
3422 
3423 		rq = igt_request_alloc(ctx_hi, engine);
3424 		if (IS_ERR(rq)) {
3425 			igt_spinner_end(&spin_lo);
3426 			err = PTR_ERR(rq);
3427 			goto err_spin_lo;
3428 		}
3429 
3430 		/* Flush the previous CS ack before changing timeouts */
3431 		while (READ_ONCE(engine->execlists.pending[0]))
3432 			cpu_relax();
3433 
3434 		saved_timeout = engine->props.preempt_timeout_ms;
3435 		engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
3436 
3437 		i915_request_get(rq);
3438 		i915_request_add(rq);
3439 
3440 		intel_engine_flush_submission(engine);
3441 		engine->props.preempt_timeout_ms = saved_timeout;
3442 
3443 		if (i915_request_wait(rq, 0, HZ / 10) < 0) {
3444 			intel_gt_set_wedged(gt);
3445 			i915_request_put(rq);
3446 			err = -ETIME;
3447 			goto err_spin_lo;
3448 		}
3449 
3450 		igt_spinner_end(&spin_lo);
3451 		i915_request_put(rq);
3452 	}
3453 
3454 	err = 0;
3455 err_spin_lo:
3456 	igt_spinner_fini(&spin_lo);
3457 err_ctx_lo:
3458 	kernel_context_close(ctx_lo);
3459 err_ctx_hi:
3460 	kernel_context_close(ctx_hi);
3461 	return err;
3462 }
3463 
3464 static int random_range(struct rnd_state *rnd, int min, int max)
3465 {
3466 	return i915_prandom_u32_max_state(max - min, rnd) + min;
3467 }
3468 
3469 static int random_priority(struct rnd_state *rnd)
3470 {
3471 	return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
3472 }
3473 
3474 struct preempt_smoke {
3475 	struct intel_gt *gt;
3476 	struct kthread_work work;
3477 	struct i915_gem_context **contexts;
3478 	struct intel_engine_cs *engine;
3479 	struct drm_i915_gem_object *batch;
3480 	unsigned int ncontext;
3481 	struct rnd_state prng;
3482 	unsigned long count;
3483 	int result;
3484 };
3485 
3486 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
3487 {
3488 	return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
3489 							  &smoke->prng)];
3490 }
3491 
3492 static int smoke_submit(struct preempt_smoke *smoke,
3493 			struct i915_gem_context *ctx, int prio,
3494 			struct drm_i915_gem_object *batch)
3495 {
3496 	struct i915_request *rq;
3497 	struct i915_vma *vma = NULL;
3498 	int err = 0;
3499 
3500 	if (batch) {
3501 		struct i915_address_space *vm;
3502 
3503 		vm = i915_gem_context_get_eb_vm(ctx);
3504 		vma = i915_vma_instance(batch, vm, NULL);
3505 		i915_vm_put(vm);
3506 		if (IS_ERR(vma))
3507 			return PTR_ERR(vma);
3508 
3509 		err = i915_vma_pin(vma, 0, 0, PIN_USER);
3510 		if (err)
3511 			return err;
3512 	}
3513 
3514 	ctx->sched.priority = prio;
3515 
3516 	rq = igt_request_alloc(ctx, smoke->engine);
3517 	if (IS_ERR(rq)) {
3518 		err = PTR_ERR(rq);
3519 		goto unpin;
3520 	}
3521 
3522 	if (vma) {
3523 		i915_vma_lock(vma);
3524 		err = i915_request_await_object(rq, vma->obj, false);
3525 		if (!err)
3526 			err = i915_vma_move_to_active(vma, rq, 0);
3527 		if (!err)
3528 			err = rq->engine->emit_bb_start(rq,
3529 							vma->node.start,
3530 							PAGE_SIZE, 0);
3531 		i915_vma_unlock(vma);
3532 	}
3533 
3534 	i915_request_add(rq);
3535 
3536 unpin:
3537 	if (vma)
3538 		i915_vma_unpin(vma);
3539 
3540 	return err;
3541 }
3542 
3543 static void smoke_crescendo_work(struct kthread_work *work)
3544 {
3545 	struct preempt_smoke *smoke = container_of(work, typeof(*smoke), work);
3546 	IGT_TIMEOUT(end_time);
3547 	unsigned long count;
3548 
3549 	count = 0;
3550 	do {
3551 		struct i915_gem_context *ctx = smoke_context(smoke);
3552 
3553 		smoke->result = smoke_submit(smoke, ctx,
3554 					     count % I915_PRIORITY_MAX,
3555 					     smoke->batch);
3556 
3557 		count++;
3558 	} while (!smoke->result && count < smoke->ncontext &&
3559 		 !__igt_timeout(end_time, NULL));
3560 
3561 	smoke->count = count;
3562 }
3563 
3564 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
3565 #define BATCH BIT(0)
3566 {
3567 	struct kthread_worker *worker[I915_NUM_ENGINES] = {};
3568 	struct preempt_smoke *arg;
3569 	struct intel_engine_cs *engine;
3570 	enum intel_engine_id id;
3571 	unsigned long count;
3572 	int err = 0;
3573 
3574 	arg = kmalloc_array(I915_NUM_ENGINES, sizeof(*arg), GFP_KERNEL);
3575 	if (!arg)
3576 		return -ENOMEM;
3577 
3578 	memset(arg, 0, I915_NUM_ENGINES * sizeof(*arg));
3579 
3580 	for_each_engine(engine, smoke->gt, id) {
3581 		arg[id] = *smoke;
3582 		arg[id].engine = engine;
3583 		if (!(flags & BATCH))
3584 			arg[id].batch = NULL;
3585 		arg[id].count = 0;
3586 
3587 		worker[id] = kthread_create_worker(0, "igt/smoke:%d", id);
3588 		if (IS_ERR(worker[id])) {
3589 			err = PTR_ERR(worker[id]);
3590 			break;
3591 		}
3592 
3593 		kthread_init_work(&arg[id].work, smoke_crescendo_work);
3594 		kthread_queue_work(worker[id], &arg[id].work);
3595 	}
3596 
3597 	count = 0;
3598 	for_each_engine(engine, smoke->gt, id) {
3599 		if (IS_ERR_OR_NULL(worker[id]))
3600 			continue;
3601 
3602 		kthread_flush_work(&arg[id].work);
3603 		if (arg[id].result && !err)
3604 			err = arg[id].result;
3605 
3606 		count += arg[id].count;
3607 
3608 		kthread_destroy_worker(worker[id]);
3609 	}
3610 
3611 	pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
3612 		count, flags, smoke->gt->info.num_engines, smoke->ncontext);
3613 
3614 	kfree(arg);
3615 	return 0;
3616 }
3617 
3618 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
3619 {
3620 	enum intel_engine_id id;
3621 	IGT_TIMEOUT(end_time);
3622 	unsigned long count;
3623 
3624 	count = 0;
3625 	do {
3626 		for_each_engine(smoke->engine, smoke->gt, id) {
3627 			struct i915_gem_context *ctx = smoke_context(smoke);
3628 			int err;
3629 
3630 			err = smoke_submit(smoke,
3631 					   ctx, random_priority(&smoke->prng),
3632 					   flags & BATCH ? smoke->batch : NULL);
3633 			if (err)
3634 				return err;
3635 
3636 			count++;
3637 		}
3638 	} while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
3639 
3640 	pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
3641 		count, flags, smoke->gt->info.num_engines, smoke->ncontext);
3642 	return 0;
3643 }
3644 
3645 static int live_preempt_smoke(void *arg)
3646 {
3647 	struct preempt_smoke smoke = {
3648 		.gt = arg,
3649 		.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
3650 		.ncontext = 256,
3651 	};
3652 	const unsigned int phase[] = { 0, BATCH };
3653 	struct igt_live_test t;
3654 	int err = -ENOMEM;
3655 	u32 *cs;
3656 	int n;
3657 
3658 	smoke.contexts = kmalloc_array(smoke.ncontext,
3659 				       sizeof(*smoke.contexts),
3660 				       GFP_KERNEL);
3661 	if (!smoke.contexts)
3662 		return -ENOMEM;
3663 
3664 	smoke.batch =
3665 		i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
3666 	if (IS_ERR(smoke.batch)) {
3667 		err = PTR_ERR(smoke.batch);
3668 		goto err_free;
3669 	}
3670 
3671 	cs = i915_gem_object_pin_map_unlocked(smoke.batch, I915_MAP_WB);
3672 	if (IS_ERR(cs)) {
3673 		err = PTR_ERR(cs);
3674 		goto err_batch;
3675 	}
3676 	for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++)
3677 		cs[n] = MI_ARB_CHECK;
3678 	cs[n] = MI_BATCH_BUFFER_END;
3679 	i915_gem_object_flush_map(smoke.batch);
3680 	i915_gem_object_unpin_map(smoke.batch);
3681 
3682 	if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
3683 		err = -EIO;
3684 		goto err_batch;
3685 	}
3686 
3687 	for (n = 0; n < smoke.ncontext; n++) {
3688 		smoke.contexts[n] = kernel_context(smoke.gt->i915, NULL);
3689 		if (!smoke.contexts[n])
3690 			goto err_ctx;
3691 	}
3692 
3693 	for (n = 0; n < ARRAY_SIZE(phase); n++) {
3694 		err = smoke_crescendo(&smoke, phase[n]);
3695 		if (err)
3696 			goto err_ctx;
3697 
3698 		err = smoke_random(&smoke, phase[n]);
3699 		if (err)
3700 			goto err_ctx;
3701 	}
3702 
3703 err_ctx:
3704 	if (igt_live_test_end(&t))
3705 		err = -EIO;
3706 
3707 	for (n = 0; n < smoke.ncontext; n++) {
3708 		if (!smoke.contexts[n])
3709 			break;
3710 		kernel_context_close(smoke.contexts[n]);
3711 	}
3712 
3713 err_batch:
3714 	i915_gem_object_put(smoke.batch);
3715 err_free:
3716 	kfree(smoke.contexts);
3717 
3718 	return err;
3719 }
3720 
3721 static int nop_virtual_engine(struct intel_gt *gt,
3722 			      struct intel_engine_cs **siblings,
3723 			      unsigned int nsibling,
3724 			      unsigned int nctx,
3725 			      unsigned int flags)
3726 #define CHAIN BIT(0)
3727 {
3728 	IGT_TIMEOUT(end_time);
3729 	struct i915_request *request[16] = {};
3730 	struct intel_context *ve[16];
3731 	unsigned long n, prime, nc;
3732 	struct igt_live_test t;
3733 	ktime_t times[2] = {};
3734 	int err;
3735 
3736 	GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
3737 
3738 	for (n = 0; n < nctx; n++) {
3739 		ve[n] = intel_engine_create_virtual(siblings, nsibling, 0);
3740 		if (IS_ERR(ve[n])) {
3741 			err = PTR_ERR(ve[n]);
3742 			nctx = n;
3743 			goto out;
3744 		}
3745 
3746 		err = intel_context_pin(ve[n]);
3747 		if (err) {
3748 			intel_context_put(ve[n]);
3749 			nctx = n;
3750 			goto out;
3751 		}
3752 	}
3753 
3754 	err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
3755 	if (err)
3756 		goto out;
3757 
3758 	for_each_prime_number_from(prime, 1, 8192) {
3759 		times[1] = ktime_get_raw();
3760 
3761 		if (flags & CHAIN) {
3762 			for (nc = 0; nc < nctx; nc++) {
3763 				for (n = 0; n < prime; n++) {
3764 					struct i915_request *rq;
3765 
3766 					rq = i915_request_create(ve[nc]);
3767 					if (IS_ERR(rq)) {
3768 						err = PTR_ERR(rq);
3769 						goto out;
3770 					}
3771 
3772 					if (request[nc])
3773 						i915_request_put(request[nc]);
3774 					request[nc] = i915_request_get(rq);
3775 					i915_request_add(rq);
3776 				}
3777 			}
3778 		} else {
3779 			for (n = 0; n < prime; n++) {
3780 				for (nc = 0; nc < nctx; nc++) {
3781 					struct i915_request *rq;
3782 
3783 					rq = i915_request_create(ve[nc]);
3784 					if (IS_ERR(rq)) {
3785 						err = PTR_ERR(rq);
3786 						goto out;
3787 					}
3788 
3789 					if (request[nc])
3790 						i915_request_put(request[nc]);
3791 					request[nc] = i915_request_get(rq);
3792 					i915_request_add(rq);
3793 				}
3794 			}
3795 		}
3796 
3797 		for (nc = 0; nc < nctx; nc++) {
3798 			if (i915_request_wait(request[nc], 0, HZ / 10) < 0) {
3799 				pr_err("%s(%s): wait for %llx:%lld timed out\n",
3800 				       __func__, ve[0]->engine->name,
3801 				       request[nc]->fence.context,
3802 				       request[nc]->fence.seqno);
3803 
3804 				GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3805 					  __func__, ve[0]->engine->name,
3806 					  request[nc]->fence.context,
3807 					  request[nc]->fence.seqno);
3808 				GEM_TRACE_DUMP();
3809 				intel_gt_set_wedged(gt);
3810 				break;
3811 			}
3812 		}
3813 
3814 		times[1] = ktime_sub(ktime_get_raw(), times[1]);
3815 		if (prime == 1)
3816 			times[0] = times[1];
3817 
3818 		for (nc = 0; nc < nctx; nc++) {
3819 			i915_request_put(request[nc]);
3820 			request[nc] = NULL;
3821 		}
3822 
3823 		if (__igt_timeout(end_time, NULL))
3824 			break;
3825 	}
3826 
3827 	err = igt_live_test_end(&t);
3828 	if (err)
3829 		goto out;
3830 
3831 	pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
3832 		nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
3833 		prime, div64_u64(ktime_to_ns(times[1]), prime));
3834 
3835 out:
3836 	if (igt_flush_test(gt->i915))
3837 		err = -EIO;
3838 
3839 	for (nc = 0; nc < nctx; nc++) {
3840 		i915_request_put(request[nc]);
3841 		intel_context_unpin(ve[nc]);
3842 		intel_context_put(ve[nc]);
3843 	}
3844 	return err;
3845 }
3846 
3847 static unsigned int
3848 __select_siblings(struct intel_gt *gt,
3849 		  unsigned int class,
3850 		  struct intel_engine_cs **siblings,
3851 		  bool (*filter)(const struct intel_engine_cs *))
3852 {
3853 	unsigned int n = 0;
3854 	unsigned int inst;
3855 
3856 	for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3857 		if (!gt->engine_class[class][inst])
3858 			continue;
3859 
3860 		if (filter && !filter(gt->engine_class[class][inst]))
3861 			continue;
3862 
3863 		siblings[n++] = gt->engine_class[class][inst];
3864 	}
3865 
3866 	return n;
3867 }
3868 
3869 static unsigned int
3870 select_siblings(struct intel_gt *gt,
3871 		unsigned int class,
3872 		struct intel_engine_cs **siblings)
3873 {
3874 	return __select_siblings(gt, class, siblings, NULL);
3875 }
3876 
3877 static int live_virtual_engine(void *arg)
3878 {
3879 	struct intel_gt *gt = arg;
3880 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3881 	struct intel_engine_cs *engine;
3882 	enum intel_engine_id id;
3883 	unsigned int class;
3884 	int err;
3885 
3886 	if (intel_uc_uses_guc_submission(&gt->uc))
3887 		return 0;
3888 
3889 	for_each_engine(engine, gt, id) {
3890 		err = nop_virtual_engine(gt, &engine, 1, 1, 0);
3891 		if (err) {
3892 			pr_err("Failed to wrap engine %s: err=%d\n",
3893 			       engine->name, err);
3894 			return err;
3895 		}
3896 	}
3897 
3898 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3899 		int nsibling, n;
3900 
3901 		nsibling = select_siblings(gt, class, siblings);
3902 		if (nsibling < 2)
3903 			continue;
3904 
3905 		for (n = 1; n <= nsibling + 1; n++) {
3906 			err = nop_virtual_engine(gt, siblings, nsibling,
3907 						 n, 0);
3908 			if (err)
3909 				return err;
3910 		}
3911 
3912 		err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
3913 		if (err)
3914 			return err;
3915 	}
3916 
3917 	return 0;
3918 }
3919 
3920 static int mask_virtual_engine(struct intel_gt *gt,
3921 			       struct intel_engine_cs **siblings,
3922 			       unsigned int nsibling)
3923 {
3924 	struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
3925 	struct intel_context *ve;
3926 	struct igt_live_test t;
3927 	unsigned int n;
3928 	int err;
3929 
3930 	/*
3931 	 * Check that by setting the execution mask on a request, we can
3932 	 * restrict it to our desired engine within the virtual engine.
3933 	 */
3934 
3935 	ve = intel_engine_create_virtual(siblings, nsibling, 0);
3936 	if (IS_ERR(ve)) {
3937 		err = PTR_ERR(ve);
3938 		goto out_close;
3939 	}
3940 
3941 	err = intel_context_pin(ve);
3942 	if (err)
3943 		goto out_put;
3944 
3945 	err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3946 	if (err)
3947 		goto out_unpin;
3948 
3949 	for (n = 0; n < nsibling; n++) {
3950 		request[n] = i915_request_create(ve);
3951 		if (IS_ERR(request[n])) {
3952 			err = PTR_ERR(request[n]);
3953 			nsibling = n;
3954 			goto out;
3955 		}
3956 
3957 		/* Reverse order as it's more likely to be unnatural */
3958 		request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
3959 
3960 		i915_request_get(request[n]);
3961 		i915_request_add(request[n]);
3962 	}
3963 
3964 	for (n = 0; n < nsibling; n++) {
3965 		if (i915_request_wait(request[n], 0, HZ / 10) < 0) {
3966 			pr_err("%s(%s): wait for %llx:%lld timed out\n",
3967 			       __func__, ve->engine->name,
3968 			       request[n]->fence.context,
3969 			       request[n]->fence.seqno);
3970 
3971 			GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3972 				  __func__, ve->engine->name,
3973 				  request[n]->fence.context,
3974 				  request[n]->fence.seqno);
3975 			GEM_TRACE_DUMP();
3976 			intel_gt_set_wedged(gt);
3977 			err = -EIO;
3978 			goto out;
3979 		}
3980 
3981 		if (request[n]->engine != siblings[nsibling - n - 1]) {
3982 			pr_err("Executed on wrong sibling '%s', expected '%s'\n",
3983 			       request[n]->engine->name,
3984 			       siblings[nsibling - n - 1]->name);
3985 			err = -EINVAL;
3986 			goto out;
3987 		}
3988 	}
3989 
3990 	err = igt_live_test_end(&t);
3991 out:
3992 	if (igt_flush_test(gt->i915))
3993 		err = -EIO;
3994 
3995 	for (n = 0; n < nsibling; n++)
3996 		i915_request_put(request[n]);
3997 
3998 out_unpin:
3999 	intel_context_unpin(ve);
4000 out_put:
4001 	intel_context_put(ve);
4002 out_close:
4003 	return err;
4004 }
4005 
4006 static int live_virtual_mask(void *arg)
4007 {
4008 	struct intel_gt *gt = arg;
4009 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4010 	unsigned int class;
4011 	int err;
4012 
4013 	if (intel_uc_uses_guc_submission(&gt->uc))
4014 		return 0;
4015 
4016 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4017 		unsigned int nsibling;
4018 
4019 		nsibling = select_siblings(gt, class, siblings);
4020 		if (nsibling < 2)
4021 			continue;
4022 
4023 		err = mask_virtual_engine(gt, siblings, nsibling);
4024 		if (err)
4025 			return err;
4026 	}
4027 
4028 	return 0;
4029 }
4030 
4031 static int slicein_virtual_engine(struct intel_gt *gt,
4032 				  struct intel_engine_cs **siblings,
4033 				  unsigned int nsibling)
4034 {
4035 	const long timeout = slice_timeout(siblings[0]);
4036 	struct intel_context *ce;
4037 	struct i915_request *rq;
4038 	struct igt_spinner spin;
4039 	unsigned int n;
4040 	int err = 0;
4041 
4042 	/*
4043 	 * Virtual requests must take part in timeslicing on the target engines.
4044 	 */
4045 
4046 	if (igt_spinner_init(&spin, gt))
4047 		return -ENOMEM;
4048 
4049 	for (n = 0; n < nsibling; n++) {
4050 		ce = intel_context_create(siblings[n]);
4051 		if (IS_ERR(ce)) {
4052 			err = PTR_ERR(ce);
4053 			goto out;
4054 		}
4055 
4056 		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
4057 		intel_context_put(ce);
4058 		if (IS_ERR(rq)) {
4059 			err = PTR_ERR(rq);
4060 			goto out;
4061 		}
4062 
4063 		i915_request_add(rq);
4064 	}
4065 
4066 	ce = intel_engine_create_virtual(siblings, nsibling, 0);
4067 	if (IS_ERR(ce)) {
4068 		err = PTR_ERR(ce);
4069 		goto out;
4070 	}
4071 
4072 	rq = intel_context_create_request(ce);
4073 	intel_context_put(ce);
4074 	if (IS_ERR(rq)) {
4075 		err = PTR_ERR(rq);
4076 		goto out;
4077 	}
4078 
4079 	i915_request_get(rq);
4080 	i915_request_add(rq);
4081 	if (i915_request_wait(rq, 0, timeout) < 0) {
4082 		GEM_TRACE_ERR("%s(%s) failed to slice in virtual request\n",
4083 			      __func__, rq->engine->name);
4084 		GEM_TRACE_DUMP();
4085 		intel_gt_set_wedged(gt);
4086 		err = -EIO;
4087 	}
4088 	i915_request_put(rq);
4089 
4090 out:
4091 	igt_spinner_end(&spin);
4092 	if (igt_flush_test(gt->i915))
4093 		err = -EIO;
4094 	igt_spinner_fini(&spin);
4095 	return err;
4096 }
4097 
4098 static int sliceout_virtual_engine(struct intel_gt *gt,
4099 				   struct intel_engine_cs **siblings,
4100 				   unsigned int nsibling)
4101 {
4102 	const long timeout = slice_timeout(siblings[0]);
4103 	struct intel_context *ce;
4104 	struct i915_request *rq;
4105 	struct igt_spinner spin;
4106 	unsigned int n;
4107 	int err = 0;
4108 
4109 	/*
4110 	 * Virtual requests must allow others a fair timeslice.
4111 	 */
4112 
4113 	if (igt_spinner_init(&spin, gt))
4114 		return -ENOMEM;
4115 
4116 	/* XXX We do not handle oversubscription and fairness with normal rq */
4117 	for (n = 0; n < nsibling; n++) {
4118 		ce = intel_engine_create_virtual(siblings, nsibling, 0);
4119 		if (IS_ERR(ce)) {
4120 			err = PTR_ERR(ce);
4121 			goto out;
4122 		}
4123 
4124 		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
4125 		intel_context_put(ce);
4126 		if (IS_ERR(rq)) {
4127 			err = PTR_ERR(rq);
4128 			goto out;
4129 		}
4130 
4131 		i915_request_add(rq);
4132 	}
4133 
4134 	for (n = 0; !err && n < nsibling; n++) {
4135 		ce = intel_context_create(siblings[n]);
4136 		if (IS_ERR(ce)) {
4137 			err = PTR_ERR(ce);
4138 			goto out;
4139 		}
4140 
4141 		rq = intel_context_create_request(ce);
4142 		intel_context_put(ce);
4143 		if (IS_ERR(rq)) {
4144 			err = PTR_ERR(rq);
4145 			goto out;
4146 		}
4147 
4148 		i915_request_get(rq);
4149 		i915_request_add(rq);
4150 		if (i915_request_wait(rq, 0, timeout) < 0) {
4151 			GEM_TRACE_ERR("%s(%s) failed to slice out virtual request\n",
4152 				      __func__, siblings[n]->name);
4153 			GEM_TRACE_DUMP();
4154 			intel_gt_set_wedged(gt);
4155 			err = -EIO;
4156 		}
4157 		i915_request_put(rq);
4158 	}
4159 
4160 out:
4161 	igt_spinner_end(&spin);
4162 	if (igt_flush_test(gt->i915))
4163 		err = -EIO;
4164 	igt_spinner_fini(&spin);
4165 	return err;
4166 }
4167 
4168 static int live_virtual_slice(void *arg)
4169 {
4170 	struct intel_gt *gt = arg;
4171 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4172 	unsigned int class;
4173 	int err;
4174 
4175 	if (intel_uc_uses_guc_submission(&gt->uc))
4176 		return 0;
4177 
4178 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4179 		unsigned int nsibling;
4180 
4181 		nsibling = __select_siblings(gt, class, siblings,
4182 					     intel_engine_has_timeslices);
4183 		if (nsibling < 2)
4184 			continue;
4185 
4186 		err = slicein_virtual_engine(gt, siblings, nsibling);
4187 		if (err)
4188 			return err;
4189 
4190 		err = sliceout_virtual_engine(gt, siblings, nsibling);
4191 		if (err)
4192 			return err;
4193 	}
4194 
4195 	return 0;
4196 }
4197 
4198 static int preserved_virtual_engine(struct intel_gt *gt,
4199 				    struct intel_engine_cs **siblings,
4200 				    unsigned int nsibling)
4201 {
4202 	struct i915_request *last = NULL;
4203 	struct intel_context *ve;
4204 	struct i915_vma *scratch;
4205 	struct igt_live_test t;
4206 	unsigned int n;
4207 	int err = 0;
4208 	u32 *cs;
4209 
4210 	scratch =
4211 		__vm_create_scratch_for_read_pinned(&siblings[0]->gt->ggtt->vm,
4212 						    PAGE_SIZE);
4213 	if (IS_ERR(scratch))
4214 		return PTR_ERR(scratch);
4215 
4216 	err = i915_vma_sync(scratch);
4217 	if (err)
4218 		goto out_scratch;
4219 
4220 	ve = intel_engine_create_virtual(siblings, nsibling, 0);
4221 	if (IS_ERR(ve)) {
4222 		err = PTR_ERR(ve);
4223 		goto out_scratch;
4224 	}
4225 
4226 	err = intel_context_pin(ve);
4227 	if (err)
4228 		goto out_put;
4229 
4230 	err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
4231 	if (err)
4232 		goto out_unpin;
4233 
4234 	for (n = 0; n < NUM_GPR_DW; n++) {
4235 		struct intel_engine_cs *engine = siblings[n % nsibling];
4236 		struct i915_request *rq;
4237 
4238 		rq = i915_request_create(ve);
4239 		if (IS_ERR(rq)) {
4240 			err = PTR_ERR(rq);
4241 			goto out_end;
4242 		}
4243 
4244 		i915_request_put(last);
4245 		last = i915_request_get(rq);
4246 
4247 		cs = intel_ring_begin(rq, 8);
4248 		if (IS_ERR(cs)) {
4249 			i915_request_add(rq);
4250 			err = PTR_ERR(cs);
4251 			goto out_end;
4252 		}
4253 
4254 		*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4255 		*cs++ = CS_GPR(engine, n);
4256 		*cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
4257 		*cs++ = 0;
4258 
4259 		*cs++ = MI_LOAD_REGISTER_IMM(1);
4260 		*cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
4261 		*cs++ = n + 1;
4262 
4263 		*cs++ = MI_NOOP;
4264 		intel_ring_advance(rq, cs);
4265 
4266 		/* Restrict this request to run on a particular engine */
4267 		rq->execution_mask = engine->mask;
4268 		i915_request_add(rq);
4269 	}
4270 
4271 	if (i915_request_wait(last, 0, HZ / 5) < 0) {
4272 		err = -ETIME;
4273 		goto out_end;
4274 	}
4275 
4276 	cs = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB);
4277 	if (IS_ERR(cs)) {
4278 		err = PTR_ERR(cs);
4279 		goto out_end;
4280 	}
4281 
4282 	for (n = 0; n < NUM_GPR_DW; n++) {
4283 		if (cs[n] != n) {
4284 			pr_err("Incorrect value[%d] found for GPR[%d]\n",
4285 			       cs[n], n);
4286 			err = -EINVAL;
4287 			break;
4288 		}
4289 	}
4290 
4291 	i915_gem_object_unpin_map(scratch->obj);
4292 
4293 out_end:
4294 	if (igt_live_test_end(&t))
4295 		err = -EIO;
4296 	i915_request_put(last);
4297 out_unpin:
4298 	intel_context_unpin(ve);
4299 out_put:
4300 	intel_context_put(ve);
4301 out_scratch:
4302 	i915_vma_unpin_and_release(&scratch, 0);
4303 	return err;
4304 }
4305 
4306 static int live_virtual_preserved(void *arg)
4307 {
4308 	struct intel_gt *gt = arg;
4309 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4310 	unsigned int class;
4311 
4312 	/*
4313 	 * Check that the context image retains non-privileged (user) registers
4314 	 * from one engine to the next. For this we check that the CS_GPR
4315 	 * are preserved.
4316 	 */
4317 
4318 	if (intel_uc_uses_guc_submission(&gt->uc))
4319 		return 0;
4320 
4321 	/* As we use CS_GPR we cannot run before they existed on all engines. */
4322 	if (GRAPHICS_VER(gt->i915) < 9)
4323 		return 0;
4324 
4325 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4326 		int nsibling, err;
4327 
4328 		nsibling = select_siblings(gt, class, siblings);
4329 		if (nsibling < 2)
4330 			continue;
4331 
4332 		err = preserved_virtual_engine(gt, siblings, nsibling);
4333 		if (err)
4334 			return err;
4335 	}
4336 
4337 	return 0;
4338 }
4339 
4340 static int reset_virtual_engine(struct intel_gt *gt,
4341 				struct intel_engine_cs **siblings,
4342 				unsigned int nsibling)
4343 {
4344 	struct intel_engine_cs *engine;
4345 	struct intel_context *ve;
4346 	struct igt_spinner spin;
4347 	struct i915_request *rq;
4348 	unsigned int n;
4349 	int err = 0;
4350 
4351 	/*
4352 	 * In order to support offline error capture for fast preempt reset,
4353 	 * we need to decouple the guilty request and ensure that it and its
4354 	 * descendents are not executed while the capture is in progress.
4355 	 */
4356 
4357 	if (igt_spinner_init(&spin, gt))
4358 		return -ENOMEM;
4359 
4360 	ve = intel_engine_create_virtual(siblings, nsibling, 0);
4361 	if (IS_ERR(ve)) {
4362 		err = PTR_ERR(ve);
4363 		goto out_spin;
4364 	}
4365 
4366 	for (n = 0; n < nsibling; n++)
4367 		st_engine_heartbeat_disable(siblings[n]);
4368 
4369 	rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
4370 	if (IS_ERR(rq)) {
4371 		err = PTR_ERR(rq);
4372 		goto out_heartbeat;
4373 	}
4374 	i915_request_add(rq);
4375 
4376 	if (!igt_wait_for_spinner(&spin, rq)) {
4377 		intel_gt_set_wedged(gt);
4378 		err = -ETIME;
4379 		goto out_heartbeat;
4380 	}
4381 
4382 	engine = rq->engine;
4383 	GEM_BUG_ON(engine == ve->engine);
4384 
4385 	/* Take ownership of the reset and tasklet */
4386 	err = engine_lock_reset_tasklet(engine);
4387 	if (err)
4388 		goto out_heartbeat;
4389 
4390 	engine->sched_engine->tasklet.callback(&engine->sched_engine->tasklet);
4391 	GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
4392 
4393 	/* Fake a preemption event; failed of course */
4394 	spin_lock_irq(&engine->sched_engine->lock);
4395 	__unwind_incomplete_requests(engine);
4396 	spin_unlock_irq(&engine->sched_engine->lock);
4397 	GEM_BUG_ON(rq->engine != engine);
4398 
4399 	/* Reset the engine while keeping our active request on hold */
4400 	execlists_hold(engine, rq);
4401 	GEM_BUG_ON(!i915_request_on_hold(rq));
4402 
4403 	__intel_engine_reset_bh(engine, NULL);
4404 	GEM_BUG_ON(rq->fence.error != -EIO);
4405 
4406 	/* Release our grasp on the engine, letting CS flow again */
4407 	engine_unlock_reset_tasklet(engine);
4408 
4409 	/* Check that we do not resubmit the held request */
4410 	i915_request_get(rq);
4411 	if (!i915_request_wait(rq, 0, HZ / 5)) {
4412 		pr_err("%s: on hold request completed!\n",
4413 		       engine->name);
4414 		intel_gt_set_wedged(gt);
4415 		err = -EIO;
4416 		goto out_rq;
4417 	}
4418 	GEM_BUG_ON(!i915_request_on_hold(rq));
4419 
4420 	/* But is resubmitted on release */
4421 	execlists_unhold(engine, rq);
4422 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4423 		pr_err("%s: held request did not complete!\n",
4424 		       engine->name);
4425 		intel_gt_set_wedged(gt);
4426 		err = -ETIME;
4427 	}
4428 
4429 out_rq:
4430 	i915_request_put(rq);
4431 out_heartbeat:
4432 	for (n = 0; n < nsibling; n++)
4433 		st_engine_heartbeat_enable(siblings[n]);
4434 
4435 	intel_context_put(ve);
4436 out_spin:
4437 	igt_spinner_fini(&spin);
4438 	return err;
4439 }
4440 
4441 static int live_virtual_reset(void *arg)
4442 {
4443 	struct intel_gt *gt = arg;
4444 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4445 	unsigned int class;
4446 
4447 	/*
4448 	 * Check that we handle a reset event within a virtual engine.
4449 	 * Only the physical engine is reset, but we have to check the flow
4450 	 * of the virtual requests around the reset, and make sure it is not
4451 	 * forgotten.
4452 	 */
4453 
4454 	if (intel_uc_uses_guc_submission(&gt->uc))
4455 		return 0;
4456 
4457 	if (!intel_has_reset_engine(gt))
4458 		return 0;
4459 
4460 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4461 		int nsibling, err;
4462 
4463 		nsibling = select_siblings(gt, class, siblings);
4464 		if (nsibling < 2)
4465 			continue;
4466 
4467 		err = reset_virtual_engine(gt, siblings, nsibling);
4468 		if (err)
4469 			return err;
4470 	}
4471 
4472 	return 0;
4473 }
4474 
4475 int intel_execlists_live_selftests(struct drm_i915_private *i915)
4476 {
4477 	static const struct i915_subtest tests[] = {
4478 		SUBTEST(live_sanitycheck),
4479 		SUBTEST(live_unlite_switch),
4480 		SUBTEST(live_unlite_preempt),
4481 		SUBTEST(live_unlite_ring),
4482 		SUBTEST(live_pin_rewind),
4483 		SUBTEST(live_hold_reset),
4484 		SUBTEST(live_error_interrupt),
4485 		SUBTEST(live_timeslice_preempt),
4486 		SUBTEST(live_timeslice_rewind),
4487 		SUBTEST(live_timeslice_queue),
4488 		SUBTEST(live_timeslice_nopreempt),
4489 		SUBTEST(live_busywait_preempt),
4490 		SUBTEST(live_preempt),
4491 		SUBTEST(live_late_preempt),
4492 		SUBTEST(live_nopreempt),
4493 		SUBTEST(live_preempt_cancel),
4494 		SUBTEST(live_suppress_self_preempt),
4495 		SUBTEST(live_chain_preempt),
4496 		SUBTEST(live_preempt_ring),
4497 		SUBTEST(live_preempt_gang),
4498 		SUBTEST(live_preempt_timeout),
4499 		SUBTEST(live_preempt_user),
4500 		SUBTEST(live_preempt_smoke),
4501 		SUBTEST(live_virtual_engine),
4502 		SUBTEST(live_virtual_mask),
4503 		SUBTEST(live_virtual_preserved),
4504 		SUBTEST(live_virtual_slice),
4505 		SUBTEST(live_virtual_reset),
4506 	};
4507 
4508 	if (to_gt(i915)->submission_method != INTEL_SUBMISSION_ELSP)
4509 		return 0;
4510 
4511 	if (intel_gt_is_wedged(to_gt(i915)))
4512 		return 0;
4513 
4514 	return intel_gt_live_subtests(tests, to_gt(i915));
4515 }
4516