1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2018 Intel Corporation
5  */
6 
7 #include <linux/prime_numbers.h>
8 
9 #include "gem/i915_gem_pm.h"
10 #include "gt/intel_engine_heartbeat.h"
11 #include "gt/intel_reset.h"
12 #include "gt/selftest_engine_heartbeat.h"
13 
14 #include "i915_selftest.h"
15 #include "selftests/i915_random.h"
16 #include "selftests/igt_flush_test.h"
17 #include "selftests/igt_live_test.h"
18 #include "selftests/igt_spinner.h"
19 #include "selftests/lib_sw_fence.h"
20 
21 #include "gem/selftests/igt_gem_utils.h"
22 #include "gem/selftests/mock_context.h"
23 
24 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
25 #define NUM_GPR 16
26 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
27 
28 static bool is_active(struct i915_request *rq)
29 {
30 	if (i915_request_is_active(rq))
31 		return true;
32 
33 	if (i915_request_on_hold(rq))
34 		return true;
35 
36 	if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq))
37 		return true;
38 
39 	return false;
40 }
41 
42 static int wait_for_submit(struct intel_engine_cs *engine,
43 			   struct i915_request *rq,
44 			   unsigned long timeout)
45 {
46 	/* Ignore our own attempts to suppress excess tasklets */
47 	tasklet_hi_schedule(&engine->execlists.tasklet);
48 
49 	timeout += jiffies;
50 	do {
51 		bool done = time_after(jiffies, timeout);
52 
53 		if (i915_request_completed(rq)) /* that was quick! */
54 			return 0;
55 
56 		/* Wait until the HW has acknowleged the submission (or err) */
57 		intel_engine_flush_submission(engine);
58 		if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
59 			return 0;
60 
61 		if (done)
62 			return -ETIME;
63 
64 		cond_resched();
65 	} while (1);
66 }
67 
68 static int wait_for_reset(struct intel_engine_cs *engine,
69 			  struct i915_request *rq,
70 			  unsigned long timeout)
71 {
72 	timeout += jiffies;
73 
74 	do {
75 		cond_resched();
76 		intel_engine_flush_submission(engine);
77 
78 		if (READ_ONCE(engine->execlists.pending[0]))
79 			continue;
80 
81 		if (i915_request_completed(rq))
82 			break;
83 
84 		if (READ_ONCE(rq->fence.error))
85 			break;
86 	} while (time_before(jiffies, timeout));
87 
88 	flush_scheduled_work();
89 
90 	if (rq->fence.error != -EIO) {
91 		pr_err("%s: hanging request %llx:%lld not reset\n",
92 		       engine->name,
93 		       rq->fence.context,
94 		       rq->fence.seqno);
95 		return -EINVAL;
96 	}
97 
98 	/* Give the request a jiffie to complete after flushing the worker */
99 	if (i915_request_wait(rq, 0,
100 			      max(0l, (long)(timeout - jiffies)) + 1) < 0) {
101 		pr_err("%s: hanging request %llx:%lld did not complete\n",
102 		       engine->name,
103 		       rq->fence.context,
104 		       rq->fence.seqno);
105 		return -ETIME;
106 	}
107 
108 	return 0;
109 }
110 
111 static int live_sanitycheck(void *arg)
112 {
113 	struct intel_gt *gt = arg;
114 	struct intel_engine_cs *engine;
115 	enum intel_engine_id id;
116 	struct igt_spinner spin;
117 	int err = 0;
118 
119 	if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
120 		return 0;
121 
122 	if (igt_spinner_init(&spin, gt))
123 		return -ENOMEM;
124 
125 	for_each_engine(engine, gt, id) {
126 		struct intel_context *ce;
127 		struct i915_request *rq;
128 
129 		ce = intel_context_create(engine);
130 		if (IS_ERR(ce)) {
131 			err = PTR_ERR(ce);
132 			break;
133 		}
134 
135 		rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
136 		if (IS_ERR(rq)) {
137 			err = PTR_ERR(rq);
138 			goto out_ctx;
139 		}
140 
141 		i915_request_add(rq);
142 		if (!igt_wait_for_spinner(&spin, rq)) {
143 			GEM_TRACE("spinner failed to start\n");
144 			GEM_TRACE_DUMP();
145 			intel_gt_set_wedged(gt);
146 			err = -EIO;
147 			goto out_ctx;
148 		}
149 
150 		igt_spinner_end(&spin);
151 		if (igt_flush_test(gt->i915)) {
152 			err = -EIO;
153 			goto out_ctx;
154 		}
155 
156 out_ctx:
157 		intel_context_put(ce);
158 		if (err)
159 			break;
160 	}
161 
162 	igt_spinner_fini(&spin);
163 	return err;
164 }
165 
166 static int live_unlite_restore(struct intel_gt *gt, int prio)
167 {
168 	struct intel_engine_cs *engine;
169 	enum intel_engine_id id;
170 	struct igt_spinner spin;
171 	int err = -ENOMEM;
172 
173 	/*
174 	 * Check that we can correctly context switch between 2 instances
175 	 * on the same engine from the same parent context.
176 	 */
177 
178 	if (igt_spinner_init(&spin, gt))
179 		return err;
180 
181 	err = 0;
182 	for_each_engine(engine, gt, id) {
183 		struct intel_context *ce[2] = {};
184 		struct i915_request *rq[2];
185 		struct igt_live_test t;
186 		int n;
187 
188 		if (prio && !intel_engine_has_preemption(engine))
189 			continue;
190 
191 		if (!intel_engine_can_store_dword(engine))
192 			continue;
193 
194 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
195 			err = -EIO;
196 			break;
197 		}
198 		st_engine_heartbeat_disable(engine);
199 
200 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
201 			struct intel_context *tmp;
202 
203 			tmp = intel_context_create(engine);
204 			if (IS_ERR(tmp)) {
205 				err = PTR_ERR(tmp);
206 				goto err_ce;
207 			}
208 
209 			err = intel_context_pin(tmp);
210 			if (err) {
211 				intel_context_put(tmp);
212 				goto err_ce;
213 			}
214 
215 			/*
216 			 * Setup the pair of contexts such that if we
217 			 * lite-restore using the RING_TAIL from ce[1] it
218 			 * will execute garbage from ce[0]->ring.
219 			 */
220 			memset(tmp->ring->vaddr,
221 			       POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
222 			       tmp->ring->vma->size);
223 
224 			ce[n] = tmp;
225 		}
226 		GEM_BUG_ON(!ce[1]->ring->size);
227 		intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
228 		lrc_update_regs(ce[1], engine, ce[1]->ring->head);
229 
230 		rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
231 		if (IS_ERR(rq[0])) {
232 			err = PTR_ERR(rq[0]);
233 			goto err_ce;
234 		}
235 
236 		i915_request_get(rq[0]);
237 		i915_request_add(rq[0]);
238 		GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
239 
240 		if (!igt_wait_for_spinner(&spin, rq[0])) {
241 			i915_request_put(rq[0]);
242 			goto err_ce;
243 		}
244 
245 		rq[1] = i915_request_create(ce[1]);
246 		if (IS_ERR(rq[1])) {
247 			err = PTR_ERR(rq[1]);
248 			i915_request_put(rq[0]);
249 			goto err_ce;
250 		}
251 
252 		if (!prio) {
253 			/*
254 			 * Ensure we do the switch to ce[1] on completion.
255 			 *
256 			 * rq[0] is already submitted, so this should reduce
257 			 * to a no-op (a wait on a request on the same engine
258 			 * uses the submit fence, not the completion fence),
259 			 * but it will install a dependency on rq[1] for rq[0]
260 			 * that will prevent the pair being reordered by
261 			 * timeslicing.
262 			 */
263 			i915_request_await_dma_fence(rq[1], &rq[0]->fence);
264 		}
265 
266 		i915_request_get(rq[1]);
267 		i915_request_add(rq[1]);
268 		GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
269 		i915_request_put(rq[0]);
270 
271 		if (prio) {
272 			struct i915_sched_attr attr = {
273 				.priority = prio,
274 			};
275 
276 			/* Alternatively preempt the spinner with ce[1] */
277 			engine->schedule(rq[1], &attr);
278 		}
279 
280 		/* And switch back to ce[0] for good measure */
281 		rq[0] = i915_request_create(ce[0]);
282 		if (IS_ERR(rq[0])) {
283 			err = PTR_ERR(rq[0]);
284 			i915_request_put(rq[1]);
285 			goto err_ce;
286 		}
287 
288 		i915_request_await_dma_fence(rq[0], &rq[1]->fence);
289 		i915_request_get(rq[0]);
290 		i915_request_add(rq[0]);
291 		GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
292 		i915_request_put(rq[1]);
293 		i915_request_put(rq[0]);
294 
295 err_ce:
296 		intel_engine_flush_submission(engine);
297 		igt_spinner_end(&spin);
298 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
299 			if (IS_ERR_OR_NULL(ce[n]))
300 				break;
301 
302 			intel_context_unpin(ce[n]);
303 			intel_context_put(ce[n]);
304 		}
305 
306 		st_engine_heartbeat_enable(engine);
307 		if (igt_live_test_end(&t))
308 			err = -EIO;
309 		if (err)
310 			break;
311 	}
312 
313 	igt_spinner_fini(&spin);
314 	return err;
315 }
316 
317 static int live_unlite_switch(void *arg)
318 {
319 	return live_unlite_restore(arg, 0);
320 }
321 
322 static int live_unlite_preempt(void *arg)
323 {
324 	return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX));
325 }
326 
327 static int live_unlite_ring(void *arg)
328 {
329 	struct intel_gt *gt = arg;
330 	struct intel_engine_cs *engine;
331 	struct igt_spinner spin;
332 	enum intel_engine_id id;
333 	int err = 0;
334 
335 	/*
336 	 * Setup a preemption event that will cause almost the entire ring
337 	 * to be unwound, potentially fooling our intel_ring_direction()
338 	 * into emitting a forward lite-restore instead of the rollback.
339 	 */
340 
341 	if (igt_spinner_init(&spin, gt))
342 		return -ENOMEM;
343 
344 	for_each_engine(engine, gt, id) {
345 		struct intel_context *ce[2] = {};
346 		struct i915_request *rq;
347 		struct igt_live_test t;
348 		int n;
349 
350 		if (!intel_engine_has_preemption(engine))
351 			continue;
352 
353 		if (!intel_engine_can_store_dword(engine))
354 			continue;
355 
356 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
357 			err = -EIO;
358 			break;
359 		}
360 		st_engine_heartbeat_disable(engine);
361 
362 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
363 			struct intel_context *tmp;
364 
365 			tmp = intel_context_create(engine);
366 			if (IS_ERR(tmp)) {
367 				err = PTR_ERR(tmp);
368 				goto err_ce;
369 			}
370 
371 			err = intel_context_pin(tmp);
372 			if (err) {
373 				intel_context_put(tmp);
374 				goto err_ce;
375 			}
376 
377 			memset32(tmp->ring->vaddr,
378 				 0xdeadbeef, /* trigger a hang if executed */
379 				 tmp->ring->vma->size / sizeof(u32));
380 
381 			ce[n] = tmp;
382 		}
383 
384 		/* Create max prio spinner, followed by N low prio nops */
385 		rq = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
386 		if (IS_ERR(rq)) {
387 			err = PTR_ERR(rq);
388 			goto err_ce;
389 		}
390 
391 		i915_request_get(rq);
392 		rq->sched.attr.priority = I915_PRIORITY_BARRIER;
393 		i915_request_add(rq);
394 
395 		if (!igt_wait_for_spinner(&spin, rq)) {
396 			intel_gt_set_wedged(gt);
397 			i915_request_put(rq);
398 			err = -ETIME;
399 			goto err_ce;
400 		}
401 
402 		/* Fill the ring, until we will cause a wrap */
403 		n = 0;
404 		while (intel_ring_direction(ce[0]->ring,
405 					    rq->wa_tail,
406 					    ce[0]->ring->tail) <= 0) {
407 			struct i915_request *tmp;
408 
409 			tmp = intel_context_create_request(ce[0]);
410 			if (IS_ERR(tmp)) {
411 				err = PTR_ERR(tmp);
412 				i915_request_put(rq);
413 				goto err_ce;
414 			}
415 
416 			i915_request_add(tmp);
417 			intel_engine_flush_submission(engine);
418 			n++;
419 		}
420 		intel_engine_flush_submission(engine);
421 		pr_debug("%s: Filled ring with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
422 			 engine->name, n,
423 			 ce[0]->ring->size,
424 			 ce[0]->ring->tail,
425 			 ce[0]->ring->emit,
426 			 rq->tail);
427 		GEM_BUG_ON(intel_ring_direction(ce[0]->ring,
428 						rq->tail,
429 						ce[0]->ring->tail) <= 0);
430 		i915_request_put(rq);
431 
432 		/* Create a second ring to preempt the first ring after rq[0] */
433 		rq = intel_context_create_request(ce[1]);
434 		if (IS_ERR(rq)) {
435 			err = PTR_ERR(rq);
436 			goto err_ce;
437 		}
438 
439 		rq->sched.attr.priority = I915_PRIORITY_BARRIER;
440 		i915_request_get(rq);
441 		i915_request_add(rq);
442 
443 		err = wait_for_submit(engine, rq, HZ / 2);
444 		i915_request_put(rq);
445 		if (err) {
446 			pr_err("%s: preemption request was not submitted\n",
447 			       engine->name);
448 			err = -ETIME;
449 		}
450 
451 		pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
452 			 engine->name,
453 			 ce[0]->ring->tail, ce[0]->ring->emit,
454 			 ce[1]->ring->tail, ce[1]->ring->emit);
455 
456 err_ce:
457 		intel_engine_flush_submission(engine);
458 		igt_spinner_end(&spin);
459 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
460 			if (IS_ERR_OR_NULL(ce[n]))
461 				break;
462 
463 			intel_context_unpin(ce[n]);
464 			intel_context_put(ce[n]);
465 		}
466 		st_engine_heartbeat_enable(engine);
467 		if (igt_live_test_end(&t))
468 			err = -EIO;
469 		if (err)
470 			break;
471 	}
472 
473 	igt_spinner_fini(&spin);
474 	return err;
475 }
476 
477 static int live_pin_rewind(void *arg)
478 {
479 	struct intel_gt *gt = arg;
480 	struct intel_engine_cs *engine;
481 	enum intel_engine_id id;
482 	int err = 0;
483 
484 	/*
485 	 * We have to be careful not to trust intel_ring too much, for example
486 	 * ring->head is updated upon retire which is out of sync with pinning
487 	 * the context. Thus we cannot use ring->head to set CTX_RING_HEAD,
488 	 * or else we risk writing an older, stale value.
489 	 *
490 	 * To simulate this, let's apply a bit of deliberate sabotague.
491 	 */
492 
493 	for_each_engine(engine, gt, id) {
494 		struct intel_context *ce;
495 		struct i915_request *rq;
496 		struct intel_ring *ring;
497 		struct igt_live_test t;
498 
499 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
500 			err = -EIO;
501 			break;
502 		}
503 
504 		ce = intel_context_create(engine);
505 		if (IS_ERR(ce)) {
506 			err = PTR_ERR(ce);
507 			break;
508 		}
509 
510 		err = intel_context_pin(ce);
511 		if (err) {
512 			intel_context_put(ce);
513 			break;
514 		}
515 
516 		/* Keep the context awake while we play games */
517 		err = i915_active_acquire(&ce->active);
518 		if (err) {
519 			intel_context_unpin(ce);
520 			intel_context_put(ce);
521 			break;
522 		}
523 		ring = ce->ring;
524 
525 		/* Poison the ring, and offset the next request from HEAD */
526 		memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32));
527 		ring->emit = ring->size / 2;
528 		ring->tail = ring->emit;
529 		GEM_BUG_ON(ring->head);
530 
531 		intel_context_unpin(ce);
532 
533 		/* Submit a simple nop request */
534 		GEM_BUG_ON(intel_context_is_pinned(ce));
535 		rq = intel_context_create_request(ce);
536 		i915_active_release(&ce->active); /* e.g. async retire */
537 		intel_context_put(ce);
538 		if (IS_ERR(rq)) {
539 			err = PTR_ERR(rq);
540 			break;
541 		}
542 		GEM_BUG_ON(!rq->head);
543 		i915_request_add(rq);
544 
545 		/* Expect not to hang! */
546 		if (igt_live_test_end(&t)) {
547 			err = -EIO;
548 			break;
549 		}
550 	}
551 
552 	return err;
553 }
554 
555 static int live_hold_reset(void *arg)
556 {
557 	struct intel_gt *gt = arg;
558 	struct intel_engine_cs *engine;
559 	enum intel_engine_id id;
560 	struct igt_spinner spin;
561 	int err = 0;
562 
563 	/*
564 	 * In order to support offline error capture for fast preempt reset,
565 	 * we need to decouple the guilty request and ensure that it and its
566 	 * descendents are not executed while the capture is in progress.
567 	 */
568 
569 	if (!intel_has_reset_engine(gt))
570 		return 0;
571 
572 	if (igt_spinner_init(&spin, gt))
573 		return -ENOMEM;
574 
575 	for_each_engine(engine, gt, id) {
576 		struct intel_context *ce;
577 		struct i915_request *rq;
578 
579 		ce = intel_context_create(engine);
580 		if (IS_ERR(ce)) {
581 			err = PTR_ERR(ce);
582 			break;
583 		}
584 
585 		st_engine_heartbeat_disable(engine);
586 
587 		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
588 		if (IS_ERR(rq)) {
589 			err = PTR_ERR(rq);
590 			goto out;
591 		}
592 		i915_request_add(rq);
593 
594 		if (!igt_wait_for_spinner(&spin, rq)) {
595 			intel_gt_set_wedged(gt);
596 			err = -ETIME;
597 			goto out;
598 		}
599 
600 		/* We have our request executing, now remove it and reset */
601 
602 		local_bh_disable();
603 		if (test_and_set_bit(I915_RESET_ENGINE + id,
604 				     &gt->reset.flags)) {
605 			local_bh_enable();
606 			intel_gt_set_wedged(gt);
607 			err = -EBUSY;
608 			goto out;
609 		}
610 		tasklet_disable(&engine->execlists.tasklet);
611 
612 		engine->execlists.tasklet.func(engine->execlists.tasklet.data);
613 		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
614 
615 		i915_request_get(rq);
616 		execlists_hold(engine, rq);
617 		GEM_BUG_ON(!i915_request_on_hold(rq));
618 
619 		__intel_engine_reset_bh(engine, NULL);
620 		GEM_BUG_ON(rq->fence.error != -EIO);
621 
622 		tasklet_enable(&engine->execlists.tasklet);
623 		clear_and_wake_up_bit(I915_RESET_ENGINE + id,
624 				      &gt->reset.flags);
625 		local_bh_enable();
626 
627 		/* Check that we do not resubmit the held request */
628 		if (!i915_request_wait(rq, 0, HZ / 5)) {
629 			pr_err("%s: on hold request completed!\n",
630 			       engine->name);
631 			i915_request_put(rq);
632 			err = -EIO;
633 			goto out;
634 		}
635 		GEM_BUG_ON(!i915_request_on_hold(rq));
636 
637 		/* But is resubmitted on release */
638 		execlists_unhold(engine, rq);
639 		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
640 			pr_err("%s: held request did not complete!\n",
641 			       engine->name);
642 			intel_gt_set_wedged(gt);
643 			err = -ETIME;
644 		}
645 		i915_request_put(rq);
646 
647 out:
648 		st_engine_heartbeat_enable(engine);
649 		intel_context_put(ce);
650 		if (err)
651 			break;
652 	}
653 
654 	igt_spinner_fini(&spin);
655 	return err;
656 }
657 
658 static const char *error_repr(int err)
659 {
660 	return err ? "bad" : "good";
661 }
662 
663 static int live_error_interrupt(void *arg)
664 {
665 	static const struct error_phase {
666 		enum { GOOD = 0, BAD = -EIO } error[2];
667 	} phases[] = {
668 		{ { BAD,  GOOD } },
669 		{ { BAD,  BAD  } },
670 		{ { BAD,  GOOD } },
671 		{ { GOOD, GOOD } }, /* sentinel */
672 	};
673 	struct intel_gt *gt = arg;
674 	struct intel_engine_cs *engine;
675 	enum intel_engine_id id;
676 
677 	/*
678 	 * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning
679 	 * of invalid commands in user batches that will cause a GPU hang.
680 	 * This is a faster mechanism than using hangcheck/heartbeats, but
681 	 * only detects problems the HW knows about -- it will not warn when
682 	 * we kill the HW!
683 	 *
684 	 * To verify our detection and reset, we throw some invalid commands
685 	 * at the HW and wait for the interrupt.
686 	 */
687 
688 	if (!intel_has_reset_engine(gt))
689 		return 0;
690 
691 	for_each_engine(engine, gt, id) {
692 		const struct error_phase *p;
693 		int err = 0;
694 
695 		st_engine_heartbeat_disable(engine);
696 
697 		for (p = phases; p->error[0] != GOOD; p++) {
698 			struct i915_request *client[ARRAY_SIZE(phases->error)];
699 			u32 *cs;
700 			int i;
701 
702 			memset(client, 0, sizeof(*client));
703 			for (i = 0; i < ARRAY_SIZE(client); i++) {
704 				struct intel_context *ce;
705 				struct i915_request *rq;
706 
707 				ce = intel_context_create(engine);
708 				if (IS_ERR(ce)) {
709 					err = PTR_ERR(ce);
710 					goto out;
711 				}
712 
713 				rq = intel_context_create_request(ce);
714 				intel_context_put(ce);
715 				if (IS_ERR(rq)) {
716 					err = PTR_ERR(rq);
717 					goto out;
718 				}
719 
720 				if (rq->engine->emit_init_breadcrumb) {
721 					err = rq->engine->emit_init_breadcrumb(rq);
722 					if (err) {
723 						i915_request_add(rq);
724 						goto out;
725 					}
726 				}
727 
728 				cs = intel_ring_begin(rq, 2);
729 				if (IS_ERR(cs)) {
730 					i915_request_add(rq);
731 					err = PTR_ERR(cs);
732 					goto out;
733 				}
734 
735 				if (p->error[i]) {
736 					*cs++ = 0xdeadbeef;
737 					*cs++ = 0xdeadbeef;
738 				} else {
739 					*cs++ = MI_NOOP;
740 					*cs++ = MI_NOOP;
741 				}
742 
743 				client[i] = i915_request_get(rq);
744 				i915_request_add(rq);
745 			}
746 
747 			err = wait_for_submit(engine, client[0], HZ / 2);
748 			if (err) {
749 				pr_err("%s: first request did not start within time!\n",
750 				       engine->name);
751 				err = -ETIME;
752 				goto out;
753 			}
754 
755 			for (i = 0; i < ARRAY_SIZE(client); i++) {
756 				if (i915_request_wait(client[i], 0, HZ / 5) < 0)
757 					pr_debug("%s: %s request incomplete!\n",
758 						 engine->name,
759 						 error_repr(p->error[i]));
760 
761 				if (!i915_request_started(client[i])) {
762 					pr_err("%s: %s request not started!\n",
763 					       engine->name,
764 					       error_repr(p->error[i]));
765 					err = -ETIME;
766 					goto out;
767 				}
768 
769 				/* Kick the tasklet to process the error */
770 				intel_engine_flush_submission(engine);
771 				if (client[i]->fence.error != p->error[i]) {
772 					pr_err("%s: %s request (%s) with wrong error code: %d\n",
773 					       engine->name,
774 					       error_repr(p->error[i]),
775 					       i915_request_completed(client[i]) ? "completed" : "running",
776 					       client[i]->fence.error);
777 					err = -EINVAL;
778 					goto out;
779 				}
780 			}
781 
782 out:
783 			for (i = 0; i < ARRAY_SIZE(client); i++)
784 				if (client[i])
785 					i915_request_put(client[i]);
786 			if (err) {
787 				pr_err("%s: failed at phase[%zd] { %d, %d }\n",
788 				       engine->name, p - phases,
789 				       p->error[0], p->error[1]);
790 				break;
791 			}
792 		}
793 
794 		st_engine_heartbeat_enable(engine);
795 		if (err) {
796 			intel_gt_set_wedged(gt);
797 			return err;
798 		}
799 	}
800 
801 	return 0;
802 }
803 
804 static int
805 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
806 {
807 	u32 *cs;
808 
809 	cs = intel_ring_begin(rq, 10);
810 	if (IS_ERR(cs))
811 		return PTR_ERR(cs);
812 
813 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
814 
815 	*cs++ = MI_SEMAPHORE_WAIT |
816 		MI_SEMAPHORE_GLOBAL_GTT |
817 		MI_SEMAPHORE_POLL |
818 		MI_SEMAPHORE_SAD_NEQ_SDD;
819 	*cs++ = 0;
820 	*cs++ = i915_ggtt_offset(vma) + 4 * idx;
821 	*cs++ = 0;
822 
823 	if (idx > 0) {
824 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
825 		*cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
826 		*cs++ = 0;
827 		*cs++ = 1;
828 	} else {
829 		*cs++ = MI_NOOP;
830 		*cs++ = MI_NOOP;
831 		*cs++ = MI_NOOP;
832 		*cs++ = MI_NOOP;
833 	}
834 
835 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
836 
837 	intel_ring_advance(rq, cs);
838 	return 0;
839 }
840 
841 static struct i915_request *
842 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
843 {
844 	struct intel_context *ce;
845 	struct i915_request *rq;
846 	int err;
847 
848 	ce = intel_context_create(engine);
849 	if (IS_ERR(ce))
850 		return ERR_CAST(ce);
851 
852 	rq = intel_context_create_request(ce);
853 	if (IS_ERR(rq))
854 		goto out_ce;
855 
856 	err = 0;
857 	if (rq->engine->emit_init_breadcrumb)
858 		err = rq->engine->emit_init_breadcrumb(rq);
859 	if (err == 0)
860 		err = emit_semaphore_chain(rq, vma, idx);
861 	if (err == 0)
862 		i915_request_get(rq);
863 	i915_request_add(rq);
864 	if (err)
865 		rq = ERR_PTR(err);
866 
867 out_ce:
868 	intel_context_put(ce);
869 	return rq;
870 }
871 
872 static int
873 release_queue(struct intel_engine_cs *engine,
874 	      struct i915_vma *vma,
875 	      int idx, int prio)
876 {
877 	struct i915_sched_attr attr = {
878 		.priority = prio,
879 	};
880 	struct i915_request *rq;
881 	u32 *cs;
882 
883 	rq = intel_engine_create_kernel_request(engine);
884 	if (IS_ERR(rq))
885 		return PTR_ERR(rq);
886 
887 	cs = intel_ring_begin(rq, 4);
888 	if (IS_ERR(cs)) {
889 		i915_request_add(rq);
890 		return PTR_ERR(cs);
891 	}
892 
893 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
894 	*cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
895 	*cs++ = 0;
896 	*cs++ = 1;
897 
898 	intel_ring_advance(rq, cs);
899 
900 	i915_request_get(rq);
901 	i915_request_add(rq);
902 
903 	local_bh_disable();
904 	engine->schedule(rq, &attr);
905 	local_bh_enable(); /* kick tasklet */
906 
907 	i915_request_put(rq);
908 
909 	return 0;
910 }
911 
912 static int
913 slice_semaphore_queue(struct intel_engine_cs *outer,
914 		      struct i915_vma *vma,
915 		      int count)
916 {
917 	struct intel_engine_cs *engine;
918 	struct i915_request *head;
919 	enum intel_engine_id id;
920 	int err, i, n = 0;
921 
922 	head = semaphore_queue(outer, vma, n++);
923 	if (IS_ERR(head))
924 		return PTR_ERR(head);
925 
926 	for_each_engine(engine, outer->gt, id) {
927 		if (!intel_engine_has_preemption(engine))
928 			continue;
929 
930 		for (i = 0; i < count; i++) {
931 			struct i915_request *rq;
932 
933 			rq = semaphore_queue(engine, vma, n++);
934 			if (IS_ERR(rq)) {
935 				err = PTR_ERR(rq);
936 				goto out;
937 			}
938 
939 			i915_request_put(rq);
940 		}
941 	}
942 
943 	err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER);
944 	if (err)
945 		goto out;
946 
947 	if (i915_request_wait(head, 0,
948 			      2 * outer->gt->info.num_engines * (count + 2) * (count + 3)) < 0) {
949 		pr_err("%s: Failed to slice along semaphore chain of length (%d, %d)!\n",
950 		       outer->name, count, n);
951 		GEM_TRACE_DUMP();
952 		intel_gt_set_wedged(outer->gt);
953 		err = -EIO;
954 	}
955 
956 out:
957 	i915_request_put(head);
958 	return err;
959 }
960 
961 static int live_timeslice_preempt(void *arg)
962 {
963 	struct intel_gt *gt = arg;
964 	struct drm_i915_gem_object *obj;
965 	struct intel_engine_cs *engine;
966 	enum intel_engine_id id;
967 	struct i915_vma *vma;
968 	void *vaddr;
969 	int err = 0;
970 
971 	/*
972 	 * If a request takes too long, we would like to give other users
973 	 * a fair go on the GPU. In particular, users may create batches
974 	 * that wait upon external input, where that input may even be
975 	 * supplied by another GPU job. To avoid blocking forever, we
976 	 * need to preempt the current task and replace it with another
977 	 * ready task.
978 	 */
979 	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
980 		return 0;
981 
982 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
983 	if (IS_ERR(obj))
984 		return PTR_ERR(obj);
985 
986 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
987 	if (IS_ERR(vma)) {
988 		err = PTR_ERR(vma);
989 		goto err_obj;
990 	}
991 
992 	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
993 	if (IS_ERR(vaddr)) {
994 		err = PTR_ERR(vaddr);
995 		goto err_obj;
996 	}
997 
998 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
999 	if (err)
1000 		goto err_map;
1001 
1002 	err = i915_vma_sync(vma);
1003 	if (err)
1004 		goto err_pin;
1005 
1006 	for_each_engine(engine, gt, id) {
1007 		if (!intel_engine_has_preemption(engine))
1008 			continue;
1009 
1010 		memset(vaddr, 0, PAGE_SIZE);
1011 
1012 		st_engine_heartbeat_disable(engine);
1013 		err = slice_semaphore_queue(engine, vma, 5);
1014 		st_engine_heartbeat_enable(engine);
1015 		if (err)
1016 			goto err_pin;
1017 
1018 		if (igt_flush_test(gt->i915)) {
1019 			err = -EIO;
1020 			goto err_pin;
1021 		}
1022 	}
1023 
1024 err_pin:
1025 	i915_vma_unpin(vma);
1026 err_map:
1027 	i915_gem_object_unpin_map(obj);
1028 err_obj:
1029 	i915_gem_object_put(obj);
1030 	return err;
1031 }
1032 
1033 static struct i915_request *
1034 create_rewinder(struct intel_context *ce,
1035 		struct i915_request *wait,
1036 		void *slot, int idx)
1037 {
1038 	const u32 offset =
1039 		i915_ggtt_offset(ce->engine->status_page.vma) +
1040 		offset_in_page(slot);
1041 	struct i915_request *rq;
1042 	u32 *cs;
1043 	int err;
1044 
1045 	rq = intel_context_create_request(ce);
1046 	if (IS_ERR(rq))
1047 		return rq;
1048 
1049 	if (wait) {
1050 		err = i915_request_await_dma_fence(rq, &wait->fence);
1051 		if (err)
1052 			goto err;
1053 	}
1054 
1055 	cs = intel_ring_begin(rq, 14);
1056 	if (IS_ERR(cs)) {
1057 		err = PTR_ERR(cs);
1058 		goto err;
1059 	}
1060 
1061 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1062 	*cs++ = MI_NOOP;
1063 
1064 	*cs++ = MI_SEMAPHORE_WAIT |
1065 		MI_SEMAPHORE_GLOBAL_GTT |
1066 		MI_SEMAPHORE_POLL |
1067 		MI_SEMAPHORE_SAD_GTE_SDD;
1068 	*cs++ = idx;
1069 	*cs++ = offset;
1070 	*cs++ = 0;
1071 
1072 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
1073 	*cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
1074 	*cs++ = offset + idx * sizeof(u32);
1075 	*cs++ = 0;
1076 
1077 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1078 	*cs++ = offset;
1079 	*cs++ = 0;
1080 	*cs++ = idx + 1;
1081 
1082 	intel_ring_advance(rq, cs);
1083 
1084 	rq->sched.attr.priority = I915_PRIORITY_MASK;
1085 	err = 0;
1086 err:
1087 	i915_request_get(rq);
1088 	i915_request_add(rq);
1089 	if (err) {
1090 		i915_request_put(rq);
1091 		return ERR_PTR(err);
1092 	}
1093 
1094 	return rq;
1095 }
1096 
1097 static int live_timeslice_rewind(void *arg)
1098 {
1099 	struct intel_gt *gt = arg;
1100 	struct intel_engine_cs *engine;
1101 	enum intel_engine_id id;
1102 
1103 	/*
1104 	 * The usual presumption on timeslice expiration is that we replace
1105 	 * the active context with another. However, given a chain of
1106 	 * dependencies we may end up with replacing the context with itself,
1107 	 * but only a few of those requests, forcing us to rewind the
1108 	 * RING_TAIL of the original request.
1109 	 */
1110 	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1111 		return 0;
1112 
1113 	for_each_engine(engine, gt, id) {
1114 		enum { A1, A2, B1 };
1115 		enum { X = 1, Z, Y };
1116 		struct i915_request *rq[3] = {};
1117 		struct intel_context *ce;
1118 		unsigned long timeslice;
1119 		int i, err = 0;
1120 		u32 *slot;
1121 
1122 		if (!intel_engine_has_timeslices(engine))
1123 			continue;
1124 
1125 		/*
1126 		 * A:rq1 -- semaphore wait, timestamp X
1127 		 * A:rq2 -- write timestamp Y
1128 		 *
1129 		 * B:rq1 [await A:rq1] -- write timestamp Z
1130 		 *
1131 		 * Force timeslice, release semaphore.
1132 		 *
1133 		 * Expect execution/evaluation order XZY
1134 		 */
1135 
1136 		st_engine_heartbeat_disable(engine);
1137 		timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1138 
1139 		slot = memset32(engine->status_page.addr + 1000, 0, 4);
1140 
1141 		ce = intel_context_create(engine);
1142 		if (IS_ERR(ce)) {
1143 			err = PTR_ERR(ce);
1144 			goto err;
1145 		}
1146 
1147 		rq[A1] = create_rewinder(ce, NULL, slot, X);
1148 		if (IS_ERR(rq[A1])) {
1149 			intel_context_put(ce);
1150 			goto err;
1151 		}
1152 
1153 		rq[A2] = create_rewinder(ce, NULL, slot, Y);
1154 		intel_context_put(ce);
1155 		if (IS_ERR(rq[A2]))
1156 			goto err;
1157 
1158 		err = wait_for_submit(engine, rq[A2], HZ / 2);
1159 		if (err) {
1160 			pr_err("%s: failed to submit first context\n",
1161 			       engine->name);
1162 			goto err;
1163 		}
1164 
1165 		ce = intel_context_create(engine);
1166 		if (IS_ERR(ce)) {
1167 			err = PTR_ERR(ce);
1168 			goto err;
1169 		}
1170 
1171 		rq[B1] = create_rewinder(ce, rq[A1], slot, Z);
1172 		intel_context_put(ce);
1173 		if (IS_ERR(rq[2]))
1174 			goto err;
1175 
1176 		err = wait_for_submit(engine, rq[B1], HZ / 2);
1177 		if (err) {
1178 			pr_err("%s: failed to submit second context\n",
1179 			       engine->name);
1180 			goto err;
1181 		}
1182 
1183 		/* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
1184 		ENGINE_TRACE(engine, "forcing tasklet for rewind\n");
1185 		while (i915_request_is_active(rq[A2])) { /* semaphore yield! */
1186 			/* Wait for the timeslice to kick in */
1187 			del_timer(&engine->execlists.timer);
1188 			tasklet_hi_schedule(&engine->execlists.tasklet);
1189 			intel_engine_flush_submission(engine);
1190 		}
1191 		/* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
1192 		GEM_BUG_ON(!i915_request_is_active(rq[A1]));
1193 		GEM_BUG_ON(!i915_request_is_active(rq[B1]));
1194 		GEM_BUG_ON(i915_request_is_active(rq[A2]));
1195 
1196 		/* Release the hounds! */
1197 		slot[0] = 1;
1198 		wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */
1199 
1200 		for (i = 1; i <= 3; i++) {
1201 			unsigned long timeout = jiffies + HZ / 2;
1202 
1203 			while (!READ_ONCE(slot[i]) &&
1204 			       time_before(jiffies, timeout))
1205 				;
1206 
1207 			if (!time_before(jiffies, timeout)) {
1208 				pr_err("%s: rq[%d] timed out\n",
1209 				       engine->name, i - 1);
1210 				err = -ETIME;
1211 				goto err;
1212 			}
1213 
1214 			pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]);
1215 		}
1216 
1217 		/* XZY: XZ < XY */
1218 		if (slot[Z] - slot[X] >= slot[Y] - slot[X]) {
1219 			pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n",
1220 			       engine->name,
1221 			       slot[Z] - slot[X],
1222 			       slot[Y] - slot[X]);
1223 			err = -EINVAL;
1224 		}
1225 
1226 err:
1227 		memset32(&slot[0], -1, 4);
1228 		wmb();
1229 
1230 		engine->props.timeslice_duration_ms = timeslice;
1231 		st_engine_heartbeat_enable(engine);
1232 		for (i = 0; i < 3; i++)
1233 			i915_request_put(rq[i]);
1234 		if (igt_flush_test(gt->i915))
1235 			err = -EIO;
1236 		if (err)
1237 			return err;
1238 	}
1239 
1240 	return 0;
1241 }
1242 
1243 static struct i915_request *nop_request(struct intel_engine_cs *engine)
1244 {
1245 	struct i915_request *rq;
1246 
1247 	rq = intel_engine_create_kernel_request(engine);
1248 	if (IS_ERR(rq))
1249 		return rq;
1250 
1251 	i915_request_get(rq);
1252 	i915_request_add(rq);
1253 
1254 	return rq;
1255 }
1256 
1257 static long slice_timeout(struct intel_engine_cs *engine)
1258 {
1259 	long timeout;
1260 
1261 	/* Enough time for a timeslice to kick in, and kick out */
1262 	timeout = 2 * msecs_to_jiffies_timeout(timeslice(engine));
1263 
1264 	/* Enough time for the nop request to complete */
1265 	timeout += HZ / 5;
1266 
1267 	return timeout + 1;
1268 }
1269 
1270 static int live_timeslice_queue(void *arg)
1271 {
1272 	struct intel_gt *gt = arg;
1273 	struct drm_i915_gem_object *obj;
1274 	struct intel_engine_cs *engine;
1275 	enum intel_engine_id id;
1276 	struct i915_vma *vma;
1277 	void *vaddr;
1278 	int err = 0;
1279 
1280 	/*
1281 	 * Make sure that even if ELSP[0] and ELSP[1] are filled with
1282 	 * timeslicing between them disabled, we *do* enable timeslicing
1283 	 * if the queue demands it. (Normally, we do not submit if
1284 	 * ELSP[1] is already occupied, so must rely on timeslicing to
1285 	 * eject ELSP[0] in favour of the queue.)
1286 	 */
1287 	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1288 		return 0;
1289 
1290 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1291 	if (IS_ERR(obj))
1292 		return PTR_ERR(obj);
1293 
1294 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1295 	if (IS_ERR(vma)) {
1296 		err = PTR_ERR(vma);
1297 		goto err_obj;
1298 	}
1299 
1300 	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
1301 	if (IS_ERR(vaddr)) {
1302 		err = PTR_ERR(vaddr);
1303 		goto err_obj;
1304 	}
1305 
1306 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1307 	if (err)
1308 		goto err_map;
1309 
1310 	err = i915_vma_sync(vma);
1311 	if (err)
1312 		goto err_pin;
1313 
1314 	for_each_engine(engine, gt, id) {
1315 		struct i915_sched_attr attr = {
1316 			.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
1317 		};
1318 		struct i915_request *rq, *nop;
1319 
1320 		if (!intel_engine_has_preemption(engine))
1321 			continue;
1322 
1323 		st_engine_heartbeat_disable(engine);
1324 		memset(vaddr, 0, PAGE_SIZE);
1325 
1326 		/* ELSP[0]: semaphore wait */
1327 		rq = semaphore_queue(engine, vma, 0);
1328 		if (IS_ERR(rq)) {
1329 			err = PTR_ERR(rq);
1330 			goto err_heartbeat;
1331 		}
1332 		engine->schedule(rq, &attr);
1333 		err = wait_for_submit(engine, rq, HZ / 2);
1334 		if (err) {
1335 			pr_err("%s: Timed out trying to submit semaphores\n",
1336 			       engine->name);
1337 			goto err_rq;
1338 		}
1339 
1340 		/* ELSP[1]: nop request */
1341 		nop = nop_request(engine);
1342 		if (IS_ERR(nop)) {
1343 			err = PTR_ERR(nop);
1344 			goto err_rq;
1345 		}
1346 		err = wait_for_submit(engine, nop, HZ / 2);
1347 		i915_request_put(nop);
1348 		if (err) {
1349 			pr_err("%s: Timed out trying to submit nop\n",
1350 			       engine->name);
1351 			goto err_rq;
1352 		}
1353 
1354 		GEM_BUG_ON(i915_request_completed(rq));
1355 		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
1356 
1357 		/* Queue: semaphore signal, matching priority as semaphore */
1358 		err = release_queue(engine, vma, 1, effective_prio(rq));
1359 		if (err)
1360 			goto err_rq;
1361 
1362 		/* Wait until we ack the release_queue and start timeslicing */
1363 		do {
1364 			cond_resched();
1365 			intel_engine_flush_submission(engine);
1366 		} while (READ_ONCE(engine->execlists.pending[0]));
1367 
1368 		/* Timeslice every jiffy, so within 2 we should signal */
1369 		if (i915_request_wait(rq, 0, slice_timeout(engine)) < 0) {
1370 			struct drm_printer p =
1371 				drm_info_printer(gt->i915->drm.dev);
1372 
1373 			pr_err("%s: Failed to timeslice into queue\n",
1374 			       engine->name);
1375 			intel_engine_dump(engine, &p,
1376 					  "%s\n", engine->name);
1377 
1378 			memset(vaddr, 0xff, PAGE_SIZE);
1379 			err = -EIO;
1380 		}
1381 err_rq:
1382 		i915_request_put(rq);
1383 err_heartbeat:
1384 		st_engine_heartbeat_enable(engine);
1385 		if (err)
1386 			break;
1387 	}
1388 
1389 err_pin:
1390 	i915_vma_unpin(vma);
1391 err_map:
1392 	i915_gem_object_unpin_map(obj);
1393 err_obj:
1394 	i915_gem_object_put(obj);
1395 	return err;
1396 }
1397 
1398 static int live_timeslice_nopreempt(void *arg)
1399 {
1400 	struct intel_gt *gt = arg;
1401 	struct intel_engine_cs *engine;
1402 	enum intel_engine_id id;
1403 	struct igt_spinner spin;
1404 	int err = 0;
1405 
1406 	/*
1407 	 * We should not timeslice into a request that is marked with
1408 	 * I915_REQUEST_NOPREEMPT.
1409 	 */
1410 	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1411 		return 0;
1412 
1413 	if (igt_spinner_init(&spin, gt))
1414 		return -ENOMEM;
1415 
1416 	for_each_engine(engine, gt, id) {
1417 		struct intel_context *ce;
1418 		struct i915_request *rq;
1419 		unsigned long timeslice;
1420 
1421 		if (!intel_engine_has_preemption(engine))
1422 			continue;
1423 
1424 		ce = intel_context_create(engine);
1425 		if (IS_ERR(ce)) {
1426 			err = PTR_ERR(ce);
1427 			break;
1428 		}
1429 
1430 		st_engine_heartbeat_disable(engine);
1431 		timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1432 
1433 		/* Create an unpreemptible spinner */
1434 
1435 		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
1436 		intel_context_put(ce);
1437 		if (IS_ERR(rq)) {
1438 			err = PTR_ERR(rq);
1439 			goto out_heartbeat;
1440 		}
1441 
1442 		i915_request_get(rq);
1443 		i915_request_add(rq);
1444 
1445 		if (!igt_wait_for_spinner(&spin, rq)) {
1446 			i915_request_put(rq);
1447 			err = -ETIME;
1448 			goto out_spin;
1449 		}
1450 
1451 		set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags);
1452 		i915_request_put(rq);
1453 
1454 		/* Followed by a maximum priority barrier (heartbeat) */
1455 
1456 		ce = intel_context_create(engine);
1457 		if (IS_ERR(ce)) {
1458 			err = PTR_ERR(ce);
1459 			goto out_spin;
1460 		}
1461 
1462 		rq = intel_context_create_request(ce);
1463 		intel_context_put(ce);
1464 		if (IS_ERR(rq)) {
1465 			err = PTR_ERR(rq);
1466 			goto out_spin;
1467 		}
1468 
1469 		rq->sched.attr.priority = I915_PRIORITY_BARRIER;
1470 		i915_request_get(rq);
1471 		i915_request_add(rq);
1472 
1473 		/*
1474 		 * Wait until the barrier is in ELSP, and we know timeslicing
1475 		 * will have been activated.
1476 		 */
1477 		if (wait_for_submit(engine, rq, HZ / 2)) {
1478 			i915_request_put(rq);
1479 			err = -ETIME;
1480 			goto out_spin;
1481 		}
1482 
1483 		/*
1484 		 * Since the ELSP[0] request is unpreemptible, it should not
1485 		 * allow the maximum priority barrier through. Wait long
1486 		 * enough to see if it is timesliced in by mistake.
1487 		 */
1488 		if (i915_request_wait(rq, 0, slice_timeout(engine)) >= 0) {
1489 			pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n",
1490 			       engine->name);
1491 			err = -EINVAL;
1492 		}
1493 		i915_request_put(rq);
1494 
1495 out_spin:
1496 		igt_spinner_end(&spin);
1497 out_heartbeat:
1498 		xchg(&engine->props.timeslice_duration_ms, timeslice);
1499 		st_engine_heartbeat_enable(engine);
1500 		if (err)
1501 			break;
1502 
1503 		if (igt_flush_test(gt->i915)) {
1504 			err = -EIO;
1505 			break;
1506 		}
1507 	}
1508 
1509 	igt_spinner_fini(&spin);
1510 	return err;
1511 }
1512 
1513 static int live_busywait_preempt(void *arg)
1514 {
1515 	struct intel_gt *gt = arg;
1516 	struct i915_gem_context *ctx_hi, *ctx_lo;
1517 	struct intel_engine_cs *engine;
1518 	struct drm_i915_gem_object *obj;
1519 	struct i915_vma *vma;
1520 	enum intel_engine_id id;
1521 	int err = -ENOMEM;
1522 	u32 *map;
1523 
1524 	/*
1525 	 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
1526 	 * preempt the busywaits used to synchronise between rings.
1527 	 */
1528 
1529 	ctx_hi = kernel_context(gt->i915);
1530 	if (!ctx_hi)
1531 		return -ENOMEM;
1532 	ctx_hi->sched.priority =
1533 		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
1534 
1535 	ctx_lo = kernel_context(gt->i915);
1536 	if (!ctx_lo)
1537 		goto err_ctx_hi;
1538 	ctx_lo->sched.priority =
1539 		I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
1540 
1541 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1542 	if (IS_ERR(obj)) {
1543 		err = PTR_ERR(obj);
1544 		goto err_ctx_lo;
1545 	}
1546 
1547 	map = i915_gem_object_pin_map(obj, I915_MAP_WC);
1548 	if (IS_ERR(map)) {
1549 		err = PTR_ERR(map);
1550 		goto err_obj;
1551 	}
1552 
1553 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1554 	if (IS_ERR(vma)) {
1555 		err = PTR_ERR(vma);
1556 		goto err_map;
1557 	}
1558 
1559 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1560 	if (err)
1561 		goto err_map;
1562 
1563 	err = i915_vma_sync(vma);
1564 	if (err)
1565 		goto err_vma;
1566 
1567 	for_each_engine(engine, gt, id) {
1568 		struct i915_request *lo, *hi;
1569 		struct igt_live_test t;
1570 		u32 *cs;
1571 
1572 		if (!intel_engine_has_preemption(engine))
1573 			continue;
1574 
1575 		if (!intel_engine_can_store_dword(engine))
1576 			continue;
1577 
1578 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1579 			err = -EIO;
1580 			goto err_vma;
1581 		}
1582 
1583 		/*
1584 		 * We create two requests. The low priority request
1585 		 * busywaits on a semaphore (inside the ringbuffer where
1586 		 * is should be preemptible) and the high priority requests
1587 		 * uses a MI_STORE_DWORD_IMM to update the semaphore value
1588 		 * allowing the first request to complete. If preemption
1589 		 * fails, we hang instead.
1590 		 */
1591 
1592 		lo = igt_request_alloc(ctx_lo, engine);
1593 		if (IS_ERR(lo)) {
1594 			err = PTR_ERR(lo);
1595 			goto err_vma;
1596 		}
1597 
1598 		cs = intel_ring_begin(lo, 8);
1599 		if (IS_ERR(cs)) {
1600 			err = PTR_ERR(cs);
1601 			i915_request_add(lo);
1602 			goto err_vma;
1603 		}
1604 
1605 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1606 		*cs++ = i915_ggtt_offset(vma);
1607 		*cs++ = 0;
1608 		*cs++ = 1;
1609 
1610 		/* XXX Do we need a flush + invalidate here? */
1611 
1612 		*cs++ = MI_SEMAPHORE_WAIT |
1613 			MI_SEMAPHORE_GLOBAL_GTT |
1614 			MI_SEMAPHORE_POLL |
1615 			MI_SEMAPHORE_SAD_EQ_SDD;
1616 		*cs++ = 0;
1617 		*cs++ = i915_ggtt_offset(vma);
1618 		*cs++ = 0;
1619 
1620 		intel_ring_advance(lo, cs);
1621 
1622 		i915_request_get(lo);
1623 		i915_request_add(lo);
1624 
1625 		if (wait_for(READ_ONCE(*map), 10)) {
1626 			i915_request_put(lo);
1627 			err = -ETIMEDOUT;
1628 			goto err_vma;
1629 		}
1630 
1631 		/* Low priority request should be busywaiting now */
1632 		if (i915_request_wait(lo, 0, 1) != -ETIME) {
1633 			i915_request_put(lo);
1634 			pr_err("%s: Busywaiting request did not!\n",
1635 			       engine->name);
1636 			err = -EIO;
1637 			goto err_vma;
1638 		}
1639 
1640 		hi = igt_request_alloc(ctx_hi, engine);
1641 		if (IS_ERR(hi)) {
1642 			err = PTR_ERR(hi);
1643 			i915_request_put(lo);
1644 			goto err_vma;
1645 		}
1646 
1647 		cs = intel_ring_begin(hi, 4);
1648 		if (IS_ERR(cs)) {
1649 			err = PTR_ERR(cs);
1650 			i915_request_add(hi);
1651 			i915_request_put(lo);
1652 			goto err_vma;
1653 		}
1654 
1655 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1656 		*cs++ = i915_ggtt_offset(vma);
1657 		*cs++ = 0;
1658 		*cs++ = 0;
1659 
1660 		intel_ring_advance(hi, cs);
1661 		i915_request_add(hi);
1662 
1663 		if (i915_request_wait(lo, 0, HZ / 5) < 0) {
1664 			struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
1665 
1666 			pr_err("%s: Failed to preempt semaphore busywait!\n",
1667 			       engine->name);
1668 
1669 			intel_engine_dump(engine, &p, "%s\n", engine->name);
1670 			GEM_TRACE_DUMP();
1671 
1672 			i915_request_put(lo);
1673 			intel_gt_set_wedged(gt);
1674 			err = -EIO;
1675 			goto err_vma;
1676 		}
1677 		GEM_BUG_ON(READ_ONCE(*map));
1678 		i915_request_put(lo);
1679 
1680 		if (igt_live_test_end(&t)) {
1681 			err = -EIO;
1682 			goto err_vma;
1683 		}
1684 	}
1685 
1686 	err = 0;
1687 err_vma:
1688 	i915_vma_unpin(vma);
1689 err_map:
1690 	i915_gem_object_unpin_map(obj);
1691 err_obj:
1692 	i915_gem_object_put(obj);
1693 err_ctx_lo:
1694 	kernel_context_close(ctx_lo);
1695 err_ctx_hi:
1696 	kernel_context_close(ctx_hi);
1697 	return err;
1698 }
1699 
1700 static struct i915_request *
1701 spinner_create_request(struct igt_spinner *spin,
1702 		       struct i915_gem_context *ctx,
1703 		       struct intel_engine_cs *engine,
1704 		       u32 arb)
1705 {
1706 	struct intel_context *ce;
1707 	struct i915_request *rq;
1708 
1709 	ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
1710 	if (IS_ERR(ce))
1711 		return ERR_CAST(ce);
1712 
1713 	rq = igt_spinner_create_request(spin, ce, arb);
1714 	intel_context_put(ce);
1715 	return rq;
1716 }
1717 
1718 static int live_preempt(void *arg)
1719 {
1720 	struct intel_gt *gt = arg;
1721 	struct i915_gem_context *ctx_hi, *ctx_lo;
1722 	struct igt_spinner spin_hi, spin_lo;
1723 	struct intel_engine_cs *engine;
1724 	enum intel_engine_id id;
1725 	int err = -ENOMEM;
1726 
1727 	if (igt_spinner_init(&spin_hi, gt))
1728 		return -ENOMEM;
1729 
1730 	if (igt_spinner_init(&spin_lo, gt))
1731 		goto err_spin_hi;
1732 
1733 	ctx_hi = kernel_context(gt->i915);
1734 	if (!ctx_hi)
1735 		goto err_spin_lo;
1736 	ctx_hi->sched.priority =
1737 		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
1738 
1739 	ctx_lo = kernel_context(gt->i915);
1740 	if (!ctx_lo)
1741 		goto err_ctx_hi;
1742 	ctx_lo->sched.priority =
1743 		I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
1744 
1745 	for_each_engine(engine, gt, id) {
1746 		struct igt_live_test t;
1747 		struct i915_request *rq;
1748 
1749 		if (!intel_engine_has_preemption(engine))
1750 			continue;
1751 
1752 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1753 			err = -EIO;
1754 			goto err_ctx_lo;
1755 		}
1756 
1757 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1758 					    MI_ARB_CHECK);
1759 		if (IS_ERR(rq)) {
1760 			err = PTR_ERR(rq);
1761 			goto err_ctx_lo;
1762 		}
1763 
1764 		i915_request_add(rq);
1765 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
1766 			GEM_TRACE("lo spinner failed to start\n");
1767 			GEM_TRACE_DUMP();
1768 			intel_gt_set_wedged(gt);
1769 			err = -EIO;
1770 			goto err_ctx_lo;
1771 		}
1772 
1773 		rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1774 					    MI_ARB_CHECK);
1775 		if (IS_ERR(rq)) {
1776 			igt_spinner_end(&spin_lo);
1777 			err = PTR_ERR(rq);
1778 			goto err_ctx_lo;
1779 		}
1780 
1781 		i915_request_add(rq);
1782 		if (!igt_wait_for_spinner(&spin_hi, rq)) {
1783 			GEM_TRACE("hi spinner failed to start\n");
1784 			GEM_TRACE_DUMP();
1785 			intel_gt_set_wedged(gt);
1786 			err = -EIO;
1787 			goto err_ctx_lo;
1788 		}
1789 
1790 		igt_spinner_end(&spin_hi);
1791 		igt_spinner_end(&spin_lo);
1792 
1793 		if (igt_live_test_end(&t)) {
1794 			err = -EIO;
1795 			goto err_ctx_lo;
1796 		}
1797 	}
1798 
1799 	err = 0;
1800 err_ctx_lo:
1801 	kernel_context_close(ctx_lo);
1802 err_ctx_hi:
1803 	kernel_context_close(ctx_hi);
1804 err_spin_lo:
1805 	igt_spinner_fini(&spin_lo);
1806 err_spin_hi:
1807 	igt_spinner_fini(&spin_hi);
1808 	return err;
1809 }
1810 
1811 static int live_late_preempt(void *arg)
1812 {
1813 	struct intel_gt *gt = arg;
1814 	struct i915_gem_context *ctx_hi, *ctx_lo;
1815 	struct igt_spinner spin_hi, spin_lo;
1816 	struct intel_engine_cs *engine;
1817 	struct i915_sched_attr attr = {};
1818 	enum intel_engine_id id;
1819 	int err = -ENOMEM;
1820 
1821 	if (igt_spinner_init(&spin_hi, gt))
1822 		return -ENOMEM;
1823 
1824 	if (igt_spinner_init(&spin_lo, gt))
1825 		goto err_spin_hi;
1826 
1827 	ctx_hi = kernel_context(gt->i915);
1828 	if (!ctx_hi)
1829 		goto err_spin_lo;
1830 
1831 	ctx_lo = kernel_context(gt->i915);
1832 	if (!ctx_lo)
1833 		goto err_ctx_hi;
1834 
1835 	/* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
1836 	ctx_lo->sched.priority = I915_USER_PRIORITY(1);
1837 
1838 	for_each_engine(engine, gt, id) {
1839 		struct igt_live_test t;
1840 		struct i915_request *rq;
1841 
1842 		if (!intel_engine_has_preemption(engine))
1843 			continue;
1844 
1845 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1846 			err = -EIO;
1847 			goto err_ctx_lo;
1848 		}
1849 
1850 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1851 					    MI_ARB_CHECK);
1852 		if (IS_ERR(rq)) {
1853 			err = PTR_ERR(rq);
1854 			goto err_ctx_lo;
1855 		}
1856 
1857 		i915_request_add(rq);
1858 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
1859 			pr_err("First context failed to start\n");
1860 			goto err_wedged;
1861 		}
1862 
1863 		rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1864 					    MI_NOOP);
1865 		if (IS_ERR(rq)) {
1866 			igt_spinner_end(&spin_lo);
1867 			err = PTR_ERR(rq);
1868 			goto err_ctx_lo;
1869 		}
1870 
1871 		i915_request_add(rq);
1872 		if (igt_wait_for_spinner(&spin_hi, rq)) {
1873 			pr_err("Second context overtook first?\n");
1874 			goto err_wedged;
1875 		}
1876 
1877 		attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1878 		engine->schedule(rq, &attr);
1879 
1880 		if (!igt_wait_for_spinner(&spin_hi, rq)) {
1881 			pr_err("High priority context failed to preempt the low priority context\n");
1882 			GEM_TRACE_DUMP();
1883 			goto err_wedged;
1884 		}
1885 
1886 		igt_spinner_end(&spin_hi);
1887 		igt_spinner_end(&spin_lo);
1888 
1889 		if (igt_live_test_end(&t)) {
1890 			err = -EIO;
1891 			goto err_ctx_lo;
1892 		}
1893 	}
1894 
1895 	err = 0;
1896 err_ctx_lo:
1897 	kernel_context_close(ctx_lo);
1898 err_ctx_hi:
1899 	kernel_context_close(ctx_hi);
1900 err_spin_lo:
1901 	igt_spinner_fini(&spin_lo);
1902 err_spin_hi:
1903 	igt_spinner_fini(&spin_hi);
1904 	return err;
1905 
1906 err_wedged:
1907 	igt_spinner_end(&spin_hi);
1908 	igt_spinner_end(&spin_lo);
1909 	intel_gt_set_wedged(gt);
1910 	err = -EIO;
1911 	goto err_ctx_lo;
1912 }
1913 
1914 struct preempt_client {
1915 	struct igt_spinner spin;
1916 	struct i915_gem_context *ctx;
1917 };
1918 
1919 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
1920 {
1921 	c->ctx = kernel_context(gt->i915);
1922 	if (!c->ctx)
1923 		return -ENOMEM;
1924 
1925 	if (igt_spinner_init(&c->spin, gt))
1926 		goto err_ctx;
1927 
1928 	return 0;
1929 
1930 err_ctx:
1931 	kernel_context_close(c->ctx);
1932 	return -ENOMEM;
1933 }
1934 
1935 static void preempt_client_fini(struct preempt_client *c)
1936 {
1937 	igt_spinner_fini(&c->spin);
1938 	kernel_context_close(c->ctx);
1939 }
1940 
1941 static int live_nopreempt(void *arg)
1942 {
1943 	struct intel_gt *gt = arg;
1944 	struct intel_engine_cs *engine;
1945 	struct preempt_client a, b;
1946 	enum intel_engine_id id;
1947 	int err = -ENOMEM;
1948 
1949 	/*
1950 	 * Verify that we can disable preemption for an individual request
1951 	 * that may be being observed and not want to be interrupted.
1952 	 */
1953 
1954 	if (preempt_client_init(gt, &a))
1955 		return -ENOMEM;
1956 	if (preempt_client_init(gt, &b))
1957 		goto err_client_a;
1958 	b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1959 
1960 	for_each_engine(engine, gt, id) {
1961 		struct i915_request *rq_a, *rq_b;
1962 
1963 		if (!intel_engine_has_preemption(engine))
1964 			continue;
1965 
1966 		engine->execlists.preempt_hang.count = 0;
1967 
1968 		rq_a = spinner_create_request(&a.spin,
1969 					      a.ctx, engine,
1970 					      MI_ARB_CHECK);
1971 		if (IS_ERR(rq_a)) {
1972 			err = PTR_ERR(rq_a);
1973 			goto err_client_b;
1974 		}
1975 
1976 		/* Low priority client, but unpreemptable! */
1977 		__set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
1978 
1979 		i915_request_add(rq_a);
1980 		if (!igt_wait_for_spinner(&a.spin, rq_a)) {
1981 			pr_err("First client failed to start\n");
1982 			goto err_wedged;
1983 		}
1984 
1985 		rq_b = spinner_create_request(&b.spin,
1986 					      b.ctx, engine,
1987 					      MI_ARB_CHECK);
1988 		if (IS_ERR(rq_b)) {
1989 			err = PTR_ERR(rq_b);
1990 			goto err_client_b;
1991 		}
1992 
1993 		i915_request_add(rq_b);
1994 
1995 		/* B is much more important than A! (But A is unpreemptable.) */
1996 		GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
1997 
1998 		/* Wait long enough for preemption and timeslicing */
1999 		if (igt_wait_for_spinner(&b.spin, rq_b)) {
2000 			pr_err("Second client started too early!\n");
2001 			goto err_wedged;
2002 		}
2003 
2004 		igt_spinner_end(&a.spin);
2005 
2006 		if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2007 			pr_err("Second client failed to start\n");
2008 			goto err_wedged;
2009 		}
2010 
2011 		igt_spinner_end(&b.spin);
2012 
2013 		if (engine->execlists.preempt_hang.count) {
2014 			pr_err("Preemption recorded x%d; should have been suppressed!\n",
2015 			       engine->execlists.preempt_hang.count);
2016 			err = -EINVAL;
2017 			goto err_wedged;
2018 		}
2019 
2020 		if (igt_flush_test(gt->i915))
2021 			goto err_wedged;
2022 	}
2023 
2024 	err = 0;
2025 err_client_b:
2026 	preempt_client_fini(&b);
2027 err_client_a:
2028 	preempt_client_fini(&a);
2029 	return err;
2030 
2031 err_wedged:
2032 	igt_spinner_end(&b.spin);
2033 	igt_spinner_end(&a.spin);
2034 	intel_gt_set_wedged(gt);
2035 	err = -EIO;
2036 	goto err_client_b;
2037 }
2038 
2039 struct live_preempt_cancel {
2040 	struct intel_engine_cs *engine;
2041 	struct preempt_client a, b;
2042 };
2043 
2044 static int __cancel_active0(struct live_preempt_cancel *arg)
2045 {
2046 	struct i915_request *rq;
2047 	struct igt_live_test t;
2048 	int err;
2049 
2050 	/* Preempt cancel of ELSP0 */
2051 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2052 	if (igt_live_test_begin(&t, arg->engine->i915,
2053 				__func__, arg->engine->name))
2054 		return -EIO;
2055 
2056 	rq = spinner_create_request(&arg->a.spin,
2057 				    arg->a.ctx, arg->engine,
2058 				    MI_ARB_CHECK);
2059 	if (IS_ERR(rq))
2060 		return PTR_ERR(rq);
2061 
2062 	clear_bit(CONTEXT_BANNED, &rq->context->flags);
2063 	i915_request_get(rq);
2064 	i915_request_add(rq);
2065 	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2066 		err = -EIO;
2067 		goto out;
2068 	}
2069 
2070 	intel_context_set_banned(rq->context);
2071 	err = intel_engine_pulse(arg->engine);
2072 	if (err)
2073 		goto out;
2074 
2075 	err = wait_for_reset(arg->engine, rq, HZ / 2);
2076 	if (err) {
2077 		pr_err("Cancelled inflight0 request did not reset\n");
2078 		goto out;
2079 	}
2080 
2081 out:
2082 	i915_request_put(rq);
2083 	if (igt_live_test_end(&t))
2084 		err = -EIO;
2085 	return err;
2086 }
2087 
2088 static int __cancel_active1(struct live_preempt_cancel *arg)
2089 {
2090 	struct i915_request *rq[2] = {};
2091 	struct igt_live_test t;
2092 	int err;
2093 
2094 	/* Preempt cancel of ELSP1 */
2095 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2096 	if (igt_live_test_begin(&t, arg->engine->i915,
2097 				__func__, arg->engine->name))
2098 		return -EIO;
2099 
2100 	rq[0] = spinner_create_request(&arg->a.spin,
2101 				       arg->a.ctx, arg->engine,
2102 				       MI_NOOP); /* no preemption */
2103 	if (IS_ERR(rq[0]))
2104 		return PTR_ERR(rq[0]);
2105 
2106 	clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2107 	i915_request_get(rq[0]);
2108 	i915_request_add(rq[0]);
2109 	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2110 		err = -EIO;
2111 		goto out;
2112 	}
2113 
2114 	rq[1] = spinner_create_request(&arg->b.spin,
2115 				       arg->b.ctx, arg->engine,
2116 				       MI_ARB_CHECK);
2117 	if (IS_ERR(rq[1])) {
2118 		err = PTR_ERR(rq[1]);
2119 		goto out;
2120 	}
2121 
2122 	clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2123 	i915_request_get(rq[1]);
2124 	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2125 	i915_request_add(rq[1]);
2126 	if (err)
2127 		goto out;
2128 
2129 	intel_context_set_banned(rq[1]->context);
2130 	err = intel_engine_pulse(arg->engine);
2131 	if (err)
2132 		goto out;
2133 
2134 	igt_spinner_end(&arg->a.spin);
2135 	err = wait_for_reset(arg->engine, rq[1], HZ / 2);
2136 	if (err)
2137 		goto out;
2138 
2139 	if (rq[0]->fence.error != 0) {
2140 		pr_err("Normal inflight0 request did not complete\n");
2141 		err = -EINVAL;
2142 		goto out;
2143 	}
2144 
2145 	if (rq[1]->fence.error != -EIO) {
2146 		pr_err("Cancelled inflight1 request did not report -EIO\n");
2147 		err = -EINVAL;
2148 		goto out;
2149 	}
2150 
2151 out:
2152 	i915_request_put(rq[1]);
2153 	i915_request_put(rq[0]);
2154 	if (igt_live_test_end(&t))
2155 		err = -EIO;
2156 	return err;
2157 }
2158 
2159 static int __cancel_queued(struct live_preempt_cancel *arg)
2160 {
2161 	struct i915_request *rq[3] = {};
2162 	struct igt_live_test t;
2163 	int err;
2164 
2165 	/* Full ELSP and one in the wings */
2166 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2167 	if (igt_live_test_begin(&t, arg->engine->i915,
2168 				__func__, arg->engine->name))
2169 		return -EIO;
2170 
2171 	rq[0] = spinner_create_request(&arg->a.spin,
2172 				       arg->a.ctx, arg->engine,
2173 				       MI_ARB_CHECK);
2174 	if (IS_ERR(rq[0]))
2175 		return PTR_ERR(rq[0]);
2176 
2177 	clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2178 	i915_request_get(rq[0]);
2179 	i915_request_add(rq[0]);
2180 	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2181 		err = -EIO;
2182 		goto out;
2183 	}
2184 
2185 	rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
2186 	if (IS_ERR(rq[1])) {
2187 		err = PTR_ERR(rq[1]);
2188 		goto out;
2189 	}
2190 
2191 	clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2192 	i915_request_get(rq[1]);
2193 	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2194 	i915_request_add(rq[1]);
2195 	if (err)
2196 		goto out;
2197 
2198 	rq[2] = spinner_create_request(&arg->b.spin,
2199 				       arg->a.ctx, arg->engine,
2200 				       MI_ARB_CHECK);
2201 	if (IS_ERR(rq[2])) {
2202 		err = PTR_ERR(rq[2]);
2203 		goto out;
2204 	}
2205 
2206 	i915_request_get(rq[2]);
2207 	err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
2208 	i915_request_add(rq[2]);
2209 	if (err)
2210 		goto out;
2211 
2212 	intel_context_set_banned(rq[2]->context);
2213 	err = intel_engine_pulse(arg->engine);
2214 	if (err)
2215 		goto out;
2216 
2217 	err = wait_for_reset(arg->engine, rq[2], HZ / 2);
2218 	if (err)
2219 		goto out;
2220 
2221 	if (rq[0]->fence.error != -EIO) {
2222 		pr_err("Cancelled inflight0 request did not report -EIO\n");
2223 		err = -EINVAL;
2224 		goto out;
2225 	}
2226 
2227 	if (rq[1]->fence.error != 0) {
2228 		pr_err("Normal inflight1 request did not complete\n");
2229 		err = -EINVAL;
2230 		goto out;
2231 	}
2232 
2233 	if (rq[2]->fence.error != -EIO) {
2234 		pr_err("Cancelled queued request did not report -EIO\n");
2235 		err = -EINVAL;
2236 		goto out;
2237 	}
2238 
2239 out:
2240 	i915_request_put(rq[2]);
2241 	i915_request_put(rq[1]);
2242 	i915_request_put(rq[0]);
2243 	if (igt_live_test_end(&t))
2244 		err = -EIO;
2245 	return err;
2246 }
2247 
2248 static int __cancel_hostile(struct live_preempt_cancel *arg)
2249 {
2250 	struct i915_request *rq;
2251 	int err;
2252 
2253 	/* Preempt cancel non-preemptible spinner in ELSP0 */
2254 	if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2255 		return 0;
2256 
2257 	if (!intel_has_reset_engine(arg->engine->gt))
2258 		return 0;
2259 
2260 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2261 	rq = spinner_create_request(&arg->a.spin,
2262 				    arg->a.ctx, arg->engine,
2263 				    MI_NOOP); /* preemption disabled */
2264 	if (IS_ERR(rq))
2265 		return PTR_ERR(rq);
2266 
2267 	clear_bit(CONTEXT_BANNED, &rq->context->flags);
2268 	i915_request_get(rq);
2269 	i915_request_add(rq);
2270 	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2271 		err = -EIO;
2272 		goto out;
2273 	}
2274 
2275 	intel_context_set_banned(rq->context);
2276 	err = intel_engine_pulse(arg->engine); /* force reset */
2277 	if (err)
2278 		goto out;
2279 
2280 	err = wait_for_reset(arg->engine, rq, HZ / 2);
2281 	if (err) {
2282 		pr_err("Cancelled inflight0 request did not reset\n");
2283 		goto out;
2284 	}
2285 
2286 out:
2287 	i915_request_put(rq);
2288 	if (igt_flush_test(arg->engine->i915))
2289 		err = -EIO;
2290 	return err;
2291 }
2292 
2293 static void force_reset_timeout(struct intel_engine_cs *engine)
2294 {
2295 	engine->reset_timeout.probability = 999;
2296 	atomic_set(&engine->reset_timeout.times, -1);
2297 }
2298 
2299 static void cancel_reset_timeout(struct intel_engine_cs *engine)
2300 {
2301 	memset(&engine->reset_timeout, 0, sizeof(engine->reset_timeout));
2302 }
2303 
2304 static int __cancel_fail(struct live_preempt_cancel *arg)
2305 {
2306 	struct intel_engine_cs *engine = arg->engine;
2307 	struct i915_request *rq;
2308 	int err;
2309 
2310 	if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2311 		return 0;
2312 
2313 	if (!intel_has_reset_engine(engine->gt))
2314 		return 0;
2315 
2316 	GEM_TRACE("%s(%s)\n", __func__, engine->name);
2317 	rq = spinner_create_request(&arg->a.spin,
2318 				    arg->a.ctx, engine,
2319 				    MI_NOOP); /* preemption disabled */
2320 	if (IS_ERR(rq))
2321 		return PTR_ERR(rq);
2322 
2323 	clear_bit(CONTEXT_BANNED, &rq->context->flags);
2324 	i915_request_get(rq);
2325 	i915_request_add(rq);
2326 	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2327 		err = -EIO;
2328 		goto out;
2329 	}
2330 
2331 	intel_context_set_banned(rq->context);
2332 
2333 	err = intel_engine_pulse(engine);
2334 	if (err)
2335 		goto out;
2336 
2337 	force_reset_timeout(engine);
2338 
2339 	/* force preempt reset [failure] */
2340 	while (!engine->execlists.pending[0])
2341 		intel_engine_flush_submission(engine);
2342 	del_timer_sync(&engine->execlists.preempt);
2343 	intel_engine_flush_submission(engine);
2344 
2345 	cancel_reset_timeout(engine);
2346 
2347 	/* after failure, require heartbeats to reset device */
2348 	intel_engine_set_heartbeat(engine, 1);
2349 	err = wait_for_reset(engine, rq, HZ / 2);
2350 	intel_engine_set_heartbeat(engine,
2351 				   engine->defaults.heartbeat_interval_ms);
2352 	if (err) {
2353 		pr_err("Cancelled inflight0 request did not reset\n");
2354 		goto out;
2355 	}
2356 
2357 out:
2358 	i915_request_put(rq);
2359 	if (igt_flush_test(engine->i915))
2360 		err = -EIO;
2361 	return err;
2362 }
2363 
2364 static int live_preempt_cancel(void *arg)
2365 {
2366 	struct intel_gt *gt = arg;
2367 	struct live_preempt_cancel data;
2368 	enum intel_engine_id id;
2369 	int err = -ENOMEM;
2370 
2371 	/*
2372 	 * To cancel an inflight context, we need to first remove it from the
2373 	 * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
2374 	 */
2375 
2376 	if (preempt_client_init(gt, &data.a))
2377 		return -ENOMEM;
2378 	if (preempt_client_init(gt, &data.b))
2379 		goto err_client_a;
2380 
2381 	for_each_engine(data.engine, gt, id) {
2382 		if (!intel_engine_has_preemption(data.engine))
2383 			continue;
2384 
2385 		err = __cancel_active0(&data);
2386 		if (err)
2387 			goto err_wedged;
2388 
2389 		err = __cancel_active1(&data);
2390 		if (err)
2391 			goto err_wedged;
2392 
2393 		err = __cancel_queued(&data);
2394 		if (err)
2395 			goto err_wedged;
2396 
2397 		err = __cancel_hostile(&data);
2398 		if (err)
2399 			goto err_wedged;
2400 
2401 		err = __cancel_fail(&data);
2402 		if (err)
2403 			goto err_wedged;
2404 	}
2405 
2406 	err = 0;
2407 err_client_b:
2408 	preempt_client_fini(&data.b);
2409 err_client_a:
2410 	preempt_client_fini(&data.a);
2411 	return err;
2412 
2413 err_wedged:
2414 	GEM_TRACE_DUMP();
2415 	igt_spinner_end(&data.b.spin);
2416 	igt_spinner_end(&data.a.spin);
2417 	intel_gt_set_wedged(gt);
2418 	goto err_client_b;
2419 }
2420 
2421 static int live_suppress_self_preempt(void *arg)
2422 {
2423 	struct intel_gt *gt = arg;
2424 	struct intel_engine_cs *engine;
2425 	struct i915_sched_attr attr = {
2426 		.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX)
2427 	};
2428 	struct preempt_client a, b;
2429 	enum intel_engine_id id;
2430 	int err = -ENOMEM;
2431 
2432 	/*
2433 	 * Verify that if a preemption request does not cause a change in
2434 	 * the current execution order, the preempt-to-idle injection is
2435 	 * skipped and that we do not accidentally apply it after the CS
2436 	 * completion event.
2437 	 */
2438 
2439 	if (intel_uc_uses_guc_submission(&gt->uc))
2440 		return 0; /* presume black blox */
2441 
2442 	if (intel_vgpu_active(gt->i915))
2443 		return 0; /* GVT forces single port & request submission */
2444 
2445 	if (preempt_client_init(gt, &a))
2446 		return -ENOMEM;
2447 	if (preempt_client_init(gt, &b))
2448 		goto err_client_a;
2449 
2450 	for_each_engine(engine, gt, id) {
2451 		struct i915_request *rq_a, *rq_b;
2452 		int depth;
2453 
2454 		if (!intel_engine_has_preemption(engine))
2455 			continue;
2456 
2457 		if (igt_flush_test(gt->i915))
2458 			goto err_wedged;
2459 
2460 		st_engine_heartbeat_disable(engine);
2461 		engine->execlists.preempt_hang.count = 0;
2462 
2463 		rq_a = spinner_create_request(&a.spin,
2464 					      a.ctx, engine,
2465 					      MI_NOOP);
2466 		if (IS_ERR(rq_a)) {
2467 			err = PTR_ERR(rq_a);
2468 			st_engine_heartbeat_enable(engine);
2469 			goto err_client_b;
2470 		}
2471 
2472 		i915_request_add(rq_a);
2473 		if (!igt_wait_for_spinner(&a.spin, rq_a)) {
2474 			pr_err("First client failed to start\n");
2475 			st_engine_heartbeat_enable(engine);
2476 			goto err_wedged;
2477 		}
2478 
2479 		/* Keep postponing the timer to avoid premature slicing */
2480 		mod_timer(&engine->execlists.timer, jiffies + HZ);
2481 		for (depth = 0; depth < 8; depth++) {
2482 			rq_b = spinner_create_request(&b.spin,
2483 						      b.ctx, engine,
2484 						      MI_NOOP);
2485 			if (IS_ERR(rq_b)) {
2486 				err = PTR_ERR(rq_b);
2487 				st_engine_heartbeat_enable(engine);
2488 				goto err_client_b;
2489 			}
2490 			i915_request_add(rq_b);
2491 
2492 			GEM_BUG_ON(i915_request_completed(rq_a));
2493 			engine->schedule(rq_a, &attr);
2494 			igt_spinner_end(&a.spin);
2495 
2496 			if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2497 				pr_err("Second client failed to start\n");
2498 				st_engine_heartbeat_enable(engine);
2499 				goto err_wedged;
2500 			}
2501 
2502 			swap(a, b);
2503 			rq_a = rq_b;
2504 		}
2505 		igt_spinner_end(&a.spin);
2506 
2507 		if (engine->execlists.preempt_hang.count) {
2508 			pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
2509 			       engine->name,
2510 			       engine->execlists.preempt_hang.count,
2511 			       depth);
2512 			st_engine_heartbeat_enable(engine);
2513 			err = -EINVAL;
2514 			goto err_client_b;
2515 		}
2516 
2517 		st_engine_heartbeat_enable(engine);
2518 		if (igt_flush_test(gt->i915))
2519 			goto err_wedged;
2520 	}
2521 
2522 	err = 0;
2523 err_client_b:
2524 	preempt_client_fini(&b);
2525 err_client_a:
2526 	preempt_client_fini(&a);
2527 	return err;
2528 
2529 err_wedged:
2530 	igt_spinner_end(&b.spin);
2531 	igt_spinner_end(&a.spin);
2532 	intel_gt_set_wedged(gt);
2533 	err = -EIO;
2534 	goto err_client_b;
2535 }
2536 
2537 static int live_chain_preempt(void *arg)
2538 {
2539 	struct intel_gt *gt = arg;
2540 	struct intel_engine_cs *engine;
2541 	struct preempt_client hi, lo;
2542 	enum intel_engine_id id;
2543 	int err = -ENOMEM;
2544 
2545 	/*
2546 	 * Build a chain AB...BA between two contexts (A, B) and request
2547 	 * preemption of the last request. It should then complete before
2548 	 * the previously submitted spinner in B.
2549 	 */
2550 
2551 	if (preempt_client_init(gt, &hi))
2552 		return -ENOMEM;
2553 
2554 	if (preempt_client_init(gt, &lo))
2555 		goto err_client_hi;
2556 
2557 	for_each_engine(engine, gt, id) {
2558 		struct i915_sched_attr attr = {
2559 			.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
2560 		};
2561 		struct igt_live_test t;
2562 		struct i915_request *rq;
2563 		int ring_size, count, i;
2564 
2565 		if (!intel_engine_has_preemption(engine))
2566 			continue;
2567 
2568 		rq = spinner_create_request(&lo.spin,
2569 					    lo.ctx, engine,
2570 					    MI_ARB_CHECK);
2571 		if (IS_ERR(rq))
2572 			goto err_wedged;
2573 
2574 		i915_request_get(rq);
2575 		i915_request_add(rq);
2576 
2577 		ring_size = rq->wa_tail - rq->head;
2578 		if (ring_size < 0)
2579 			ring_size += rq->ring->size;
2580 		ring_size = rq->ring->size / ring_size;
2581 		pr_debug("%s(%s): Using maximum of %d requests\n",
2582 			 __func__, engine->name, ring_size);
2583 
2584 		igt_spinner_end(&lo.spin);
2585 		if (i915_request_wait(rq, 0, HZ / 2) < 0) {
2586 			pr_err("Timed out waiting to flush %s\n", engine->name);
2587 			i915_request_put(rq);
2588 			goto err_wedged;
2589 		}
2590 		i915_request_put(rq);
2591 
2592 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
2593 			err = -EIO;
2594 			goto err_wedged;
2595 		}
2596 
2597 		for_each_prime_number_from(count, 1, ring_size) {
2598 			rq = spinner_create_request(&hi.spin,
2599 						    hi.ctx, engine,
2600 						    MI_ARB_CHECK);
2601 			if (IS_ERR(rq))
2602 				goto err_wedged;
2603 			i915_request_add(rq);
2604 			if (!igt_wait_for_spinner(&hi.spin, rq))
2605 				goto err_wedged;
2606 
2607 			rq = spinner_create_request(&lo.spin,
2608 						    lo.ctx, engine,
2609 						    MI_ARB_CHECK);
2610 			if (IS_ERR(rq))
2611 				goto err_wedged;
2612 			i915_request_add(rq);
2613 
2614 			for (i = 0; i < count; i++) {
2615 				rq = igt_request_alloc(lo.ctx, engine);
2616 				if (IS_ERR(rq))
2617 					goto err_wedged;
2618 				i915_request_add(rq);
2619 			}
2620 
2621 			rq = igt_request_alloc(hi.ctx, engine);
2622 			if (IS_ERR(rq))
2623 				goto err_wedged;
2624 
2625 			i915_request_get(rq);
2626 			i915_request_add(rq);
2627 			engine->schedule(rq, &attr);
2628 
2629 			igt_spinner_end(&hi.spin);
2630 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2631 				struct drm_printer p =
2632 					drm_info_printer(gt->i915->drm.dev);
2633 
2634 				pr_err("Failed to preempt over chain of %d\n",
2635 				       count);
2636 				intel_engine_dump(engine, &p,
2637 						  "%s\n", engine->name);
2638 				i915_request_put(rq);
2639 				goto err_wedged;
2640 			}
2641 			igt_spinner_end(&lo.spin);
2642 			i915_request_put(rq);
2643 
2644 			rq = igt_request_alloc(lo.ctx, engine);
2645 			if (IS_ERR(rq))
2646 				goto err_wedged;
2647 
2648 			i915_request_get(rq);
2649 			i915_request_add(rq);
2650 
2651 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2652 				struct drm_printer p =
2653 					drm_info_printer(gt->i915->drm.dev);
2654 
2655 				pr_err("Failed to flush low priority chain of %d requests\n",
2656 				       count);
2657 				intel_engine_dump(engine, &p,
2658 						  "%s\n", engine->name);
2659 
2660 				i915_request_put(rq);
2661 				goto err_wedged;
2662 			}
2663 			i915_request_put(rq);
2664 		}
2665 
2666 		if (igt_live_test_end(&t)) {
2667 			err = -EIO;
2668 			goto err_wedged;
2669 		}
2670 	}
2671 
2672 	err = 0;
2673 err_client_lo:
2674 	preempt_client_fini(&lo);
2675 err_client_hi:
2676 	preempt_client_fini(&hi);
2677 	return err;
2678 
2679 err_wedged:
2680 	igt_spinner_end(&hi.spin);
2681 	igt_spinner_end(&lo.spin);
2682 	intel_gt_set_wedged(gt);
2683 	err = -EIO;
2684 	goto err_client_lo;
2685 }
2686 
2687 static int create_gang(struct intel_engine_cs *engine,
2688 		       struct i915_request **prev)
2689 {
2690 	struct drm_i915_gem_object *obj;
2691 	struct intel_context *ce;
2692 	struct i915_request *rq;
2693 	struct i915_vma *vma;
2694 	u32 *cs;
2695 	int err;
2696 
2697 	ce = intel_context_create(engine);
2698 	if (IS_ERR(ce))
2699 		return PTR_ERR(ce);
2700 
2701 	obj = i915_gem_object_create_internal(engine->i915, 4096);
2702 	if (IS_ERR(obj)) {
2703 		err = PTR_ERR(obj);
2704 		goto err_ce;
2705 	}
2706 
2707 	vma = i915_vma_instance(obj, ce->vm, NULL);
2708 	if (IS_ERR(vma)) {
2709 		err = PTR_ERR(vma);
2710 		goto err_obj;
2711 	}
2712 
2713 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
2714 	if (err)
2715 		goto err_obj;
2716 
2717 	cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
2718 	if (IS_ERR(cs)) {
2719 		err = PTR_ERR(cs);
2720 		goto err_obj;
2721 	}
2722 
2723 	/* Semaphore target: spin until zero */
2724 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2725 
2726 	*cs++ = MI_SEMAPHORE_WAIT |
2727 		MI_SEMAPHORE_POLL |
2728 		MI_SEMAPHORE_SAD_EQ_SDD;
2729 	*cs++ = 0;
2730 	*cs++ = lower_32_bits(vma->node.start);
2731 	*cs++ = upper_32_bits(vma->node.start);
2732 
2733 	if (*prev) {
2734 		u64 offset = (*prev)->batch->node.start;
2735 
2736 		/* Terminate the spinner in the next lower priority batch. */
2737 		*cs++ = MI_STORE_DWORD_IMM_GEN4;
2738 		*cs++ = lower_32_bits(offset);
2739 		*cs++ = upper_32_bits(offset);
2740 		*cs++ = 0;
2741 	}
2742 
2743 	*cs++ = MI_BATCH_BUFFER_END;
2744 	i915_gem_object_flush_map(obj);
2745 	i915_gem_object_unpin_map(obj);
2746 
2747 	rq = intel_context_create_request(ce);
2748 	if (IS_ERR(rq)) {
2749 		err = PTR_ERR(rq);
2750 		goto err_obj;
2751 	}
2752 
2753 	rq->batch = i915_vma_get(vma);
2754 	i915_request_get(rq);
2755 
2756 	i915_vma_lock(vma);
2757 	err = i915_request_await_object(rq, vma->obj, false);
2758 	if (!err)
2759 		err = i915_vma_move_to_active(vma, rq, 0);
2760 	if (!err)
2761 		err = rq->engine->emit_bb_start(rq,
2762 						vma->node.start,
2763 						PAGE_SIZE, 0);
2764 	i915_vma_unlock(vma);
2765 	i915_request_add(rq);
2766 	if (err)
2767 		goto err_rq;
2768 
2769 	i915_gem_object_put(obj);
2770 	intel_context_put(ce);
2771 
2772 	rq->mock.link.next = &(*prev)->mock.link;
2773 	*prev = rq;
2774 	return 0;
2775 
2776 err_rq:
2777 	i915_vma_put(rq->batch);
2778 	i915_request_put(rq);
2779 err_obj:
2780 	i915_gem_object_put(obj);
2781 err_ce:
2782 	intel_context_put(ce);
2783 	return err;
2784 }
2785 
2786 static int __live_preempt_ring(struct intel_engine_cs *engine,
2787 			       struct igt_spinner *spin,
2788 			       int queue_sz, int ring_sz)
2789 {
2790 	struct intel_context *ce[2] = {};
2791 	struct i915_request *rq;
2792 	struct igt_live_test t;
2793 	int err = 0;
2794 	int n;
2795 
2796 	if (igt_live_test_begin(&t, engine->i915, __func__, engine->name))
2797 		return -EIO;
2798 
2799 	for (n = 0; n < ARRAY_SIZE(ce); n++) {
2800 		struct intel_context *tmp;
2801 
2802 		tmp = intel_context_create(engine);
2803 		if (IS_ERR(tmp)) {
2804 			err = PTR_ERR(tmp);
2805 			goto err_ce;
2806 		}
2807 
2808 		tmp->ring = __intel_context_ring_size(ring_sz);
2809 
2810 		err = intel_context_pin(tmp);
2811 		if (err) {
2812 			intel_context_put(tmp);
2813 			goto err_ce;
2814 		}
2815 
2816 		memset32(tmp->ring->vaddr,
2817 			 0xdeadbeef, /* trigger a hang if executed */
2818 			 tmp->ring->vma->size / sizeof(u32));
2819 
2820 		ce[n] = tmp;
2821 	}
2822 
2823 	rq = igt_spinner_create_request(spin, ce[0], MI_ARB_CHECK);
2824 	if (IS_ERR(rq)) {
2825 		err = PTR_ERR(rq);
2826 		goto err_ce;
2827 	}
2828 
2829 	i915_request_get(rq);
2830 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2831 	i915_request_add(rq);
2832 
2833 	if (!igt_wait_for_spinner(spin, rq)) {
2834 		intel_gt_set_wedged(engine->gt);
2835 		i915_request_put(rq);
2836 		err = -ETIME;
2837 		goto err_ce;
2838 	}
2839 
2840 	/* Fill the ring, until we will cause a wrap */
2841 	n = 0;
2842 	while (ce[0]->ring->tail - rq->wa_tail <= queue_sz) {
2843 		struct i915_request *tmp;
2844 
2845 		tmp = intel_context_create_request(ce[0]);
2846 		if (IS_ERR(tmp)) {
2847 			err = PTR_ERR(tmp);
2848 			i915_request_put(rq);
2849 			goto err_ce;
2850 		}
2851 
2852 		i915_request_add(tmp);
2853 		intel_engine_flush_submission(engine);
2854 		n++;
2855 	}
2856 	intel_engine_flush_submission(engine);
2857 	pr_debug("%s: Filled %d with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
2858 		 engine->name, queue_sz, n,
2859 		 ce[0]->ring->size,
2860 		 ce[0]->ring->tail,
2861 		 ce[0]->ring->emit,
2862 		 rq->tail);
2863 	i915_request_put(rq);
2864 
2865 	/* Create a second request to preempt the first ring */
2866 	rq = intel_context_create_request(ce[1]);
2867 	if (IS_ERR(rq)) {
2868 		err = PTR_ERR(rq);
2869 		goto err_ce;
2870 	}
2871 
2872 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2873 	i915_request_get(rq);
2874 	i915_request_add(rq);
2875 
2876 	err = wait_for_submit(engine, rq, HZ / 2);
2877 	i915_request_put(rq);
2878 	if (err) {
2879 		pr_err("%s: preemption request was not submited\n",
2880 		       engine->name);
2881 		err = -ETIME;
2882 	}
2883 
2884 	pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
2885 		 engine->name,
2886 		 ce[0]->ring->tail, ce[0]->ring->emit,
2887 		 ce[1]->ring->tail, ce[1]->ring->emit);
2888 
2889 err_ce:
2890 	intel_engine_flush_submission(engine);
2891 	igt_spinner_end(spin);
2892 	for (n = 0; n < ARRAY_SIZE(ce); n++) {
2893 		if (IS_ERR_OR_NULL(ce[n]))
2894 			break;
2895 
2896 		intel_context_unpin(ce[n]);
2897 		intel_context_put(ce[n]);
2898 	}
2899 	if (igt_live_test_end(&t))
2900 		err = -EIO;
2901 	return err;
2902 }
2903 
2904 static int live_preempt_ring(void *arg)
2905 {
2906 	struct intel_gt *gt = arg;
2907 	struct intel_engine_cs *engine;
2908 	struct igt_spinner spin;
2909 	enum intel_engine_id id;
2910 	int err = 0;
2911 
2912 	/*
2913 	 * Check that we rollback large chunks of a ring in order to do a
2914 	 * preemption event. Similar to live_unlite_ring, but looking at
2915 	 * ring size rather than the impact of intel_ring_direction().
2916 	 */
2917 
2918 	if (igt_spinner_init(&spin, gt))
2919 		return -ENOMEM;
2920 
2921 	for_each_engine(engine, gt, id) {
2922 		int n;
2923 
2924 		if (!intel_engine_has_preemption(engine))
2925 			continue;
2926 
2927 		if (!intel_engine_can_store_dword(engine))
2928 			continue;
2929 
2930 		st_engine_heartbeat_disable(engine);
2931 
2932 		for (n = 0; n <= 3; n++) {
2933 			err = __live_preempt_ring(engine, &spin,
2934 						  n * SZ_4K / 4, SZ_4K);
2935 			if (err)
2936 				break;
2937 		}
2938 
2939 		st_engine_heartbeat_enable(engine);
2940 		if (err)
2941 			break;
2942 	}
2943 
2944 	igt_spinner_fini(&spin);
2945 	return err;
2946 }
2947 
2948 static int live_preempt_gang(void *arg)
2949 {
2950 	struct intel_gt *gt = arg;
2951 	struct intel_engine_cs *engine;
2952 	enum intel_engine_id id;
2953 
2954 	/*
2955 	 * Build as long a chain of preempters as we can, with each
2956 	 * request higher priority than the last. Once we are ready, we release
2957 	 * the last batch which then precolates down the chain, each releasing
2958 	 * the next oldest in turn. The intent is to simply push as hard as we
2959 	 * can with the number of preemptions, trying to exceed narrow HW
2960 	 * limits. At a minimum, we insist that we can sort all the user
2961 	 * high priority levels into execution order.
2962 	 */
2963 
2964 	for_each_engine(engine, gt, id) {
2965 		struct i915_request *rq = NULL;
2966 		struct igt_live_test t;
2967 		IGT_TIMEOUT(end_time);
2968 		int prio = 0;
2969 		int err = 0;
2970 		u32 *cs;
2971 
2972 		if (!intel_engine_has_preemption(engine))
2973 			continue;
2974 
2975 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
2976 			return -EIO;
2977 
2978 		do {
2979 			struct i915_sched_attr attr = {
2980 				.priority = I915_USER_PRIORITY(prio++),
2981 			};
2982 
2983 			err = create_gang(engine, &rq);
2984 			if (err)
2985 				break;
2986 
2987 			/* Submit each spinner at increasing priority */
2988 			engine->schedule(rq, &attr);
2989 		} while (prio <= I915_PRIORITY_MAX &&
2990 			 !__igt_timeout(end_time, NULL));
2991 		pr_debug("%s: Preempt chain of %d requests\n",
2992 			 engine->name, prio);
2993 
2994 		/*
2995 		 * Such that the last spinner is the highest priority and
2996 		 * should execute first. When that spinner completes,
2997 		 * it will terminate the next lowest spinner until there
2998 		 * are no more spinners and the gang is complete.
2999 		 */
3000 		cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC);
3001 		if (!IS_ERR(cs)) {
3002 			*cs = 0;
3003 			i915_gem_object_unpin_map(rq->batch->obj);
3004 		} else {
3005 			err = PTR_ERR(cs);
3006 			intel_gt_set_wedged(gt);
3007 		}
3008 
3009 		while (rq) { /* wait for each rq from highest to lowest prio */
3010 			struct i915_request *n = list_next_entry(rq, mock.link);
3011 
3012 			if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
3013 				struct drm_printer p =
3014 					drm_info_printer(engine->i915->drm.dev);
3015 
3016 				pr_err("Failed to flush chain of %d requests, at %d\n",
3017 				       prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT);
3018 				intel_engine_dump(engine, &p,
3019 						  "%s\n", engine->name);
3020 
3021 				err = -ETIME;
3022 			}
3023 
3024 			i915_vma_put(rq->batch);
3025 			i915_request_put(rq);
3026 			rq = n;
3027 		}
3028 
3029 		if (igt_live_test_end(&t))
3030 			err = -EIO;
3031 		if (err)
3032 			return err;
3033 	}
3034 
3035 	return 0;
3036 }
3037 
3038 static struct i915_vma *
3039 create_gpr_user(struct intel_engine_cs *engine,
3040 		struct i915_vma *result,
3041 		unsigned int offset)
3042 {
3043 	struct drm_i915_gem_object *obj;
3044 	struct i915_vma *vma;
3045 	u32 *cs;
3046 	int err;
3047 	int i;
3048 
3049 	obj = i915_gem_object_create_internal(engine->i915, 4096);
3050 	if (IS_ERR(obj))
3051 		return ERR_CAST(obj);
3052 
3053 	vma = i915_vma_instance(obj, result->vm, NULL);
3054 	if (IS_ERR(vma)) {
3055 		i915_gem_object_put(obj);
3056 		return vma;
3057 	}
3058 
3059 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
3060 	if (err) {
3061 		i915_vma_put(vma);
3062 		return ERR_PTR(err);
3063 	}
3064 
3065 	cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
3066 	if (IS_ERR(cs)) {
3067 		i915_vma_put(vma);
3068 		return ERR_CAST(cs);
3069 	}
3070 
3071 	/* All GPR are clear for new contexts. We use GPR(0) as a constant */
3072 	*cs++ = MI_LOAD_REGISTER_IMM(1);
3073 	*cs++ = CS_GPR(engine, 0);
3074 	*cs++ = 1;
3075 
3076 	for (i = 1; i < NUM_GPR; i++) {
3077 		u64 addr;
3078 
3079 		/*
3080 		 * Perform: GPR[i]++
3081 		 *
3082 		 * As we read and write into the context saved GPR[i], if
3083 		 * we restart this batch buffer from an earlier point, we
3084 		 * will repeat the increment and store a value > 1.
3085 		 */
3086 		*cs++ = MI_MATH(4);
3087 		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(i));
3088 		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(0));
3089 		*cs++ = MI_MATH_ADD;
3090 		*cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU);
3091 
3092 		addr = result->node.start + offset + i * sizeof(*cs);
3093 		*cs++ = MI_STORE_REGISTER_MEM_GEN8;
3094 		*cs++ = CS_GPR(engine, 2 * i);
3095 		*cs++ = lower_32_bits(addr);
3096 		*cs++ = upper_32_bits(addr);
3097 
3098 		*cs++ = MI_SEMAPHORE_WAIT |
3099 			MI_SEMAPHORE_POLL |
3100 			MI_SEMAPHORE_SAD_GTE_SDD;
3101 		*cs++ = i;
3102 		*cs++ = lower_32_bits(result->node.start);
3103 		*cs++ = upper_32_bits(result->node.start);
3104 	}
3105 
3106 	*cs++ = MI_BATCH_BUFFER_END;
3107 	i915_gem_object_flush_map(obj);
3108 	i915_gem_object_unpin_map(obj);
3109 
3110 	return vma;
3111 }
3112 
3113 static struct i915_vma *create_global(struct intel_gt *gt, size_t sz)
3114 {
3115 	struct drm_i915_gem_object *obj;
3116 	struct i915_vma *vma;
3117 	int err;
3118 
3119 	obj = i915_gem_object_create_internal(gt->i915, sz);
3120 	if (IS_ERR(obj))
3121 		return ERR_CAST(obj);
3122 
3123 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
3124 	if (IS_ERR(vma)) {
3125 		i915_gem_object_put(obj);
3126 		return vma;
3127 	}
3128 
3129 	err = i915_ggtt_pin(vma, NULL, 0, 0);
3130 	if (err) {
3131 		i915_vma_put(vma);
3132 		return ERR_PTR(err);
3133 	}
3134 
3135 	return vma;
3136 }
3137 
3138 static struct i915_request *
3139 create_gpr_client(struct intel_engine_cs *engine,
3140 		  struct i915_vma *global,
3141 		  unsigned int offset)
3142 {
3143 	struct i915_vma *batch, *vma;
3144 	struct intel_context *ce;
3145 	struct i915_request *rq;
3146 	int err;
3147 
3148 	ce = intel_context_create(engine);
3149 	if (IS_ERR(ce))
3150 		return ERR_CAST(ce);
3151 
3152 	vma = i915_vma_instance(global->obj, ce->vm, NULL);
3153 	if (IS_ERR(vma)) {
3154 		err = PTR_ERR(vma);
3155 		goto out_ce;
3156 	}
3157 
3158 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
3159 	if (err)
3160 		goto out_ce;
3161 
3162 	batch = create_gpr_user(engine, vma, offset);
3163 	if (IS_ERR(batch)) {
3164 		err = PTR_ERR(batch);
3165 		goto out_vma;
3166 	}
3167 
3168 	rq = intel_context_create_request(ce);
3169 	if (IS_ERR(rq)) {
3170 		err = PTR_ERR(rq);
3171 		goto out_batch;
3172 	}
3173 
3174 	i915_vma_lock(vma);
3175 	err = i915_request_await_object(rq, vma->obj, false);
3176 	if (!err)
3177 		err = i915_vma_move_to_active(vma, rq, 0);
3178 	i915_vma_unlock(vma);
3179 
3180 	i915_vma_lock(batch);
3181 	if (!err)
3182 		err = i915_request_await_object(rq, batch->obj, false);
3183 	if (!err)
3184 		err = i915_vma_move_to_active(batch, rq, 0);
3185 	if (!err)
3186 		err = rq->engine->emit_bb_start(rq,
3187 						batch->node.start,
3188 						PAGE_SIZE, 0);
3189 	i915_vma_unlock(batch);
3190 	i915_vma_unpin(batch);
3191 
3192 	if (!err)
3193 		i915_request_get(rq);
3194 	i915_request_add(rq);
3195 
3196 out_batch:
3197 	i915_vma_put(batch);
3198 out_vma:
3199 	i915_vma_unpin(vma);
3200 out_ce:
3201 	intel_context_put(ce);
3202 	return err ? ERR_PTR(err) : rq;
3203 }
3204 
3205 static int preempt_user(struct intel_engine_cs *engine,
3206 			struct i915_vma *global,
3207 			int id)
3208 {
3209 	struct i915_sched_attr attr = {
3210 		.priority = I915_PRIORITY_MAX
3211 	};
3212 	struct i915_request *rq;
3213 	int err = 0;
3214 	u32 *cs;
3215 
3216 	rq = intel_engine_create_kernel_request(engine);
3217 	if (IS_ERR(rq))
3218 		return PTR_ERR(rq);
3219 
3220 	cs = intel_ring_begin(rq, 4);
3221 	if (IS_ERR(cs)) {
3222 		i915_request_add(rq);
3223 		return PTR_ERR(cs);
3224 	}
3225 
3226 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
3227 	*cs++ = i915_ggtt_offset(global);
3228 	*cs++ = 0;
3229 	*cs++ = id;
3230 
3231 	intel_ring_advance(rq, cs);
3232 
3233 	i915_request_get(rq);
3234 	i915_request_add(rq);
3235 
3236 	engine->schedule(rq, &attr);
3237 
3238 	if (i915_request_wait(rq, 0, HZ / 2) < 0)
3239 		err = -ETIME;
3240 	i915_request_put(rq);
3241 
3242 	return err;
3243 }
3244 
3245 static int live_preempt_user(void *arg)
3246 {
3247 	struct intel_gt *gt = arg;
3248 	struct intel_engine_cs *engine;
3249 	struct i915_vma *global;
3250 	enum intel_engine_id id;
3251 	u32 *result;
3252 	int err = 0;
3253 
3254 	/*
3255 	 * In our other tests, we look at preemption in carefully
3256 	 * controlled conditions in the ringbuffer. Since most of the
3257 	 * time is spent in user batches, most of our preemptions naturally
3258 	 * occur there. We want to verify that when we preempt inside a batch
3259 	 * we continue on from the current instruction and do not roll back
3260 	 * to the start, or another earlier arbitration point.
3261 	 *
3262 	 * To verify this, we create a batch which is a mixture of
3263 	 * MI_MATH (gpr++) MI_SRM (gpr) and preemption points. Then with
3264 	 * a few preempting contexts thrown into the mix, we look for any
3265 	 * repeated instructions (which show up as incorrect values).
3266 	 */
3267 
3268 	global = create_global(gt, 4096);
3269 	if (IS_ERR(global))
3270 		return PTR_ERR(global);
3271 
3272 	result = i915_gem_object_pin_map(global->obj, I915_MAP_WC);
3273 	if (IS_ERR(result)) {
3274 		i915_vma_unpin_and_release(&global, 0);
3275 		return PTR_ERR(result);
3276 	}
3277 
3278 	for_each_engine(engine, gt, id) {
3279 		struct i915_request *client[3] = {};
3280 		struct igt_live_test t;
3281 		int i;
3282 
3283 		if (!intel_engine_has_preemption(engine))
3284 			continue;
3285 
3286 		if (IS_GEN(gt->i915, 8) && engine->class != RENDER_CLASS)
3287 			continue; /* we need per-context GPR */
3288 
3289 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
3290 			err = -EIO;
3291 			break;
3292 		}
3293 
3294 		memset(result, 0, 4096);
3295 
3296 		for (i = 0; i < ARRAY_SIZE(client); i++) {
3297 			struct i915_request *rq;
3298 
3299 			rq = create_gpr_client(engine, global,
3300 					       NUM_GPR * i * sizeof(u32));
3301 			if (IS_ERR(rq)) {
3302 				err = PTR_ERR(rq);
3303 				goto end_test;
3304 			}
3305 
3306 			client[i] = rq;
3307 		}
3308 
3309 		/* Continuously preempt the set of 3 running contexts */
3310 		for (i = 1; i <= NUM_GPR; i++) {
3311 			err = preempt_user(engine, global, i);
3312 			if (err)
3313 				goto end_test;
3314 		}
3315 
3316 		if (READ_ONCE(result[0]) != NUM_GPR) {
3317 			pr_err("%s: Failed to release semaphore\n",
3318 			       engine->name);
3319 			err = -EIO;
3320 			goto end_test;
3321 		}
3322 
3323 		for (i = 0; i < ARRAY_SIZE(client); i++) {
3324 			int gpr;
3325 
3326 			if (i915_request_wait(client[i], 0, HZ / 2) < 0) {
3327 				err = -ETIME;
3328 				goto end_test;
3329 			}
3330 
3331 			for (gpr = 1; gpr < NUM_GPR; gpr++) {
3332 				if (result[NUM_GPR * i + gpr] != 1) {
3333 					pr_err("%s: Invalid result, client %d, gpr %d, result: %d\n",
3334 					       engine->name,
3335 					       i, gpr, result[NUM_GPR * i + gpr]);
3336 					err = -EINVAL;
3337 					goto end_test;
3338 				}
3339 			}
3340 		}
3341 
3342 end_test:
3343 		for (i = 0; i < ARRAY_SIZE(client); i++) {
3344 			if (!client[i])
3345 				break;
3346 
3347 			i915_request_put(client[i]);
3348 		}
3349 
3350 		/* Flush the semaphores on error */
3351 		smp_store_mb(result[0], -1);
3352 		if (igt_live_test_end(&t))
3353 			err = -EIO;
3354 		if (err)
3355 			break;
3356 	}
3357 
3358 	i915_vma_unpin_and_release(&global, I915_VMA_RELEASE_MAP);
3359 	return err;
3360 }
3361 
3362 static int live_preempt_timeout(void *arg)
3363 {
3364 	struct intel_gt *gt = arg;
3365 	struct i915_gem_context *ctx_hi, *ctx_lo;
3366 	struct igt_spinner spin_lo;
3367 	struct intel_engine_cs *engine;
3368 	enum intel_engine_id id;
3369 	int err = -ENOMEM;
3370 
3371 	/*
3372 	 * Check that we force preemption to occur by cancelling the previous
3373 	 * context if it refuses to yield the GPU.
3374 	 */
3375 	if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
3376 		return 0;
3377 
3378 	if (!intel_has_reset_engine(gt))
3379 		return 0;
3380 
3381 	if (igt_spinner_init(&spin_lo, gt))
3382 		return -ENOMEM;
3383 
3384 	ctx_hi = kernel_context(gt->i915);
3385 	if (!ctx_hi)
3386 		goto err_spin_lo;
3387 	ctx_hi->sched.priority =
3388 		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
3389 
3390 	ctx_lo = kernel_context(gt->i915);
3391 	if (!ctx_lo)
3392 		goto err_ctx_hi;
3393 	ctx_lo->sched.priority =
3394 		I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
3395 
3396 	for_each_engine(engine, gt, id) {
3397 		unsigned long saved_timeout;
3398 		struct i915_request *rq;
3399 
3400 		if (!intel_engine_has_preemption(engine))
3401 			continue;
3402 
3403 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
3404 					    MI_NOOP); /* preemption disabled */
3405 		if (IS_ERR(rq)) {
3406 			err = PTR_ERR(rq);
3407 			goto err_ctx_lo;
3408 		}
3409 
3410 		i915_request_add(rq);
3411 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
3412 			intel_gt_set_wedged(gt);
3413 			err = -EIO;
3414 			goto err_ctx_lo;
3415 		}
3416 
3417 		rq = igt_request_alloc(ctx_hi, engine);
3418 		if (IS_ERR(rq)) {
3419 			igt_spinner_end(&spin_lo);
3420 			err = PTR_ERR(rq);
3421 			goto err_ctx_lo;
3422 		}
3423 
3424 		/* Flush the previous CS ack before changing timeouts */
3425 		while (READ_ONCE(engine->execlists.pending[0]))
3426 			cpu_relax();
3427 
3428 		saved_timeout = engine->props.preempt_timeout_ms;
3429 		engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
3430 
3431 		i915_request_get(rq);
3432 		i915_request_add(rq);
3433 
3434 		intel_engine_flush_submission(engine);
3435 		engine->props.preempt_timeout_ms = saved_timeout;
3436 
3437 		if (i915_request_wait(rq, 0, HZ / 10) < 0) {
3438 			intel_gt_set_wedged(gt);
3439 			i915_request_put(rq);
3440 			err = -ETIME;
3441 			goto err_ctx_lo;
3442 		}
3443 
3444 		igt_spinner_end(&spin_lo);
3445 		i915_request_put(rq);
3446 	}
3447 
3448 	err = 0;
3449 err_ctx_lo:
3450 	kernel_context_close(ctx_lo);
3451 err_ctx_hi:
3452 	kernel_context_close(ctx_hi);
3453 err_spin_lo:
3454 	igt_spinner_fini(&spin_lo);
3455 	return err;
3456 }
3457 
3458 static int random_range(struct rnd_state *rnd, int min, int max)
3459 {
3460 	return i915_prandom_u32_max_state(max - min, rnd) + min;
3461 }
3462 
3463 static int random_priority(struct rnd_state *rnd)
3464 {
3465 	return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
3466 }
3467 
3468 struct preempt_smoke {
3469 	struct intel_gt *gt;
3470 	struct i915_gem_context **contexts;
3471 	struct intel_engine_cs *engine;
3472 	struct drm_i915_gem_object *batch;
3473 	unsigned int ncontext;
3474 	struct rnd_state prng;
3475 	unsigned long count;
3476 };
3477 
3478 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
3479 {
3480 	return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
3481 							  &smoke->prng)];
3482 }
3483 
3484 static int smoke_submit(struct preempt_smoke *smoke,
3485 			struct i915_gem_context *ctx, int prio,
3486 			struct drm_i915_gem_object *batch)
3487 {
3488 	struct i915_request *rq;
3489 	struct i915_vma *vma = NULL;
3490 	int err = 0;
3491 
3492 	if (batch) {
3493 		struct i915_address_space *vm;
3494 
3495 		vm = i915_gem_context_get_vm_rcu(ctx);
3496 		vma = i915_vma_instance(batch, vm, NULL);
3497 		i915_vm_put(vm);
3498 		if (IS_ERR(vma))
3499 			return PTR_ERR(vma);
3500 
3501 		err = i915_vma_pin(vma, 0, 0, PIN_USER);
3502 		if (err)
3503 			return err;
3504 	}
3505 
3506 	ctx->sched.priority = prio;
3507 
3508 	rq = igt_request_alloc(ctx, smoke->engine);
3509 	if (IS_ERR(rq)) {
3510 		err = PTR_ERR(rq);
3511 		goto unpin;
3512 	}
3513 
3514 	if (vma) {
3515 		i915_vma_lock(vma);
3516 		err = i915_request_await_object(rq, vma->obj, false);
3517 		if (!err)
3518 			err = i915_vma_move_to_active(vma, rq, 0);
3519 		if (!err)
3520 			err = rq->engine->emit_bb_start(rq,
3521 							vma->node.start,
3522 							PAGE_SIZE, 0);
3523 		i915_vma_unlock(vma);
3524 	}
3525 
3526 	i915_request_add(rq);
3527 
3528 unpin:
3529 	if (vma)
3530 		i915_vma_unpin(vma);
3531 
3532 	return err;
3533 }
3534 
3535 static int smoke_crescendo_thread(void *arg)
3536 {
3537 	struct preempt_smoke *smoke = arg;
3538 	IGT_TIMEOUT(end_time);
3539 	unsigned long count;
3540 
3541 	count = 0;
3542 	do {
3543 		struct i915_gem_context *ctx = smoke_context(smoke);
3544 		int err;
3545 
3546 		err = smoke_submit(smoke,
3547 				   ctx, count % I915_PRIORITY_MAX,
3548 				   smoke->batch);
3549 		if (err)
3550 			return err;
3551 
3552 		count++;
3553 	} while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
3554 
3555 	smoke->count = count;
3556 	return 0;
3557 }
3558 
3559 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
3560 #define BATCH BIT(0)
3561 {
3562 	struct task_struct *tsk[I915_NUM_ENGINES] = {};
3563 	struct preempt_smoke arg[I915_NUM_ENGINES];
3564 	struct intel_engine_cs *engine;
3565 	enum intel_engine_id id;
3566 	unsigned long count;
3567 	int err = 0;
3568 
3569 	for_each_engine(engine, smoke->gt, id) {
3570 		arg[id] = *smoke;
3571 		arg[id].engine = engine;
3572 		if (!(flags & BATCH))
3573 			arg[id].batch = NULL;
3574 		arg[id].count = 0;
3575 
3576 		tsk[id] = kthread_run(smoke_crescendo_thread, &arg,
3577 				      "igt/smoke:%d", id);
3578 		if (IS_ERR(tsk[id])) {
3579 			err = PTR_ERR(tsk[id]);
3580 			break;
3581 		}
3582 		get_task_struct(tsk[id]);
3583 	}
3584 
3585 	yield(); /* start all threads before we kthread_stop() */
3586 
3587 	count = 0;
3588 	for_each_engine(engine, smoke->gt, id) {
3589 		int status;
3590 
3591 		if (IS_ERR_OR_NULL(tsk[id]))
3592 			continue;
3593 
3594 		status = kthread_stop(tsk[id]);
3595 		if (status && !err)
3596 			err = status;
3597 
3598 		count += arg[id].count;
3599 
3600 		put_task_struct(tsk[id]);
3601 	}
3602 
3603 	pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
3604 		count, flags, smoke->gt->info.num_engines, smoke->ncontext);
3605 	return 0;
3606 }
3607 
3608 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
3609 {
3610 	enum intel_engine_id id;
3611 	IGT_TIMEOUT(end_time);
3612 	unsigned long count;
3613 
3614 	count = 0;
3615 	do {
3616 		for_each_engine(smoke->engine, smoke->gt, id) {
3617 			struct i915_gem_context *ctx = smoke_context(smoke);
3618 			int err;
3619 
3620 			err = smoke_submit(smoke,
3621 					   ctx, random_priority(&smoke->prng),
3622 					   flags & BATCH ? smoke->batch : NULL);
3623 			if (err)
3624 				return err;
3625 
3626 			count++;
3627 		}
3628 	} while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
3629 
3630 	pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
3631 		count, flags, smoke->gt->info.num_engines, smoke->ncontext);
3632 	return 0;
3633 }
3634 
3635 static int live_preempt_smoke(void *arg)
3636 {
3637 	struct preempt_smoke smoke = {
3638 		.gt = arg,
3639 		.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
3640 		.ncontext = 256,
3641 	};
3642 	const unsigned int phase[] = { 0, BATCH };
3643 	struct igt_live_test t;
3644 	int err = -ENOMEM;
3645 	u32 *cs;
3646 	int n;
3647 
3648 	smoke.contexts = kmalloc_array(smoke.ncontext,
3649 				       sizeof(*smoke.contexts),
3650 				       GFP_KERNEL);
3651 	if (!smoke.contexts)
3652 		return -ENOMEM;
3653 
3654 	smoke.batch =
3655 		i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
3656 	if (IS_ERR(smoke.batch)) {
3657 		err = PTR_ERR(smoke.batch);
3658 		goto err_free;
3659 	}
3660 
3661 	cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB);
3662 	if (IS_ERR(cs)) {
3663 		err = PTR_ERR(cs);
3664 		goto err_batch;
3665 	}
3666 	for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++)
3667 		cs[n] = MI_ARB_CHECK;
3668 	cs[n] = MI_BATCH_BUFFER_END;
3669 	i915_gem_object_flush_map(smoke.batch);
3670 	i915_gem_object_unpin_map(smoke.batch);
3671 
3672 	if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
3673 		err = -EIO;
3674 		goto err_batch;
3675 	}
3676 
3677 	for (n = 0; n < smoke.ncontext; n++) {
3678 		smoke.contexts[n] = kernel_context(smoke.gt->i915);
3679 		if (!smoke.contexts[n])
3680 			goto err_ctx;
3681 	}
3682 
3683 	for (n = 0; n < ARRAY_SIZE(phase); n++) {
3684 		err = smoke_crescendo(&smoke, phase[n]);
3685 		if (err)
3686 			goto err_ctx;
3687 
3688 		err = smoke_random(&smoke, phase[n]);
3689 		if (err)
3690 			goto err_ctx;
3691 	}
3692 
3693 err_ctx:
3694 	if (igt_live_test_end(&t))
3695 		err = -EIO;
3696 
3697 	for (n = 0; n < smoke.ncontext; n++) {
3698 		if (!smoke.contexts[n])
3699 			break;
3700 		kernel_context_close(smoke.contexts[n]);
3701 	}
3702 
3703 err_batch:
3704 	i915_gem_object_put(smoke.batch);
3705 err_free:
3706 	kfree(smoke.contexts);
3707 
3708 	return err;
3709 }
3710 
3711 static int nop_virtual_engine(struct intel_gt *gt,
3712 			      struct intel_engine_cs **siblings,
3713 			      unsigned int nsibling,
3714 			      unsigned int nctx,
3715 			      unsigned int flags)
3716 #define CHAIN BIT(0)
3717 {
3718 	IGT_TIMEOUT(end_time);
3719 	struct i915_request *request[16] = {};
3720 	struct intel_context *ve[16];
3721 	unsigned long n, prime, nc;
3722 	struct igt_live_test t;
3723 	ktime_t times[2] = {};
3724 	int err;
3725 
3726 	GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
3727 
3728 	for (n = 0; n < nctx; n++) {
3729 		ve[n] = intel_execlists_create_virtual(siblings, nsibling);
3730 		if (IS_ERR(ve[n])) {
3731 			err = PTR_ERR(ve[n]);
3732 			nctx = n;
3733 			goto out;
3734 		}
3735 
3736 		err = intel_context_pin(ve[n]);
3737 		if (err) {
3738 			intel_context_put(ve[n]);
3739 			nctx = n;
3740 			goto out;
3741 		}
3742 	}
3743 
3744 	err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
3745 	if (err)
3746 		goto out;
3747 
3748 	for_each_prime_number_from(prime, 1, 8192) {
3749 		times[1] = ktime_get_raw();
3750 
3751 		if (flags & CHAIN) {
3752 			for (nc = 0; nc < nctx; nc++) {
3753 				for (n = 0; n < prime; n++) {
3754 					struct i915_request *rq;
3755 
3756 					rq = i915_request_create(ve[nc]);
3757 					if (IS_ERR(rq)) {
3758 						err = PTR_ERR(rq);
3759 						goto out;
3760 					}
3761 
3762 					if (request[nc])
3763 						i915_request_put(request[nc]);
3764 					request[nc] = i915_request_get(rq);
3765 					i915_request_add(rq);
3766 				}
3767 			}
3768 		} else {
3769 			for (n = 0; n < prime; n++) {
3770 				for (nc = 0; nc < nctx; nc++) {
3771 					struct i915_request *rq;
3772 
3773 					rq = i915_request_create(ve[nc]);
3774 					if (IS_ERR(rq)) {
3775 						err = PTR_ERR(rq);
3776 						goto out;
3777 					}
3778 
3779 					if (request[nc])
3780 						i915_request_put(request[nc]);
3781 					request[nc] = i915_request_get(rq);
3782 					i915_request_add(rq);
3783 				}
3784 			}
3785 		}
3786 
3787 		for (nc = 0; nc < nctx; nc++) {
3788 			if (i915_request_wait(request[nc], 0, HZ / 10) < 0) {
3789 				pr_err("%s(%s): wait for %llx:%lld timed out\n",
3790 				       __func__, ve[0]->engine->name,
3791 				       request[nc]->fence.context,
3792 				       request[nc]->fence.seqno);
3793 
3794 				GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3795 					  __func__, ve[0]->engine->name,
3796 					  request[nc]->fence.context,
3797 					  request[nc]->fence.seqno);
3798 				GEM_TRACE_DUMP();
3799 				intel_gt_set_wedged(gt);
3800 				break;
3801 			}
3802 		}
3803 
3804 		times[1] = ktime_sub(ktime_get_raw(), times[1]);
3805 		if (prime == 1)
3806 			times[0] = times[1];
3807 
3808 		for (nc = 0; nc < nctx; nc++) {
3809 			i915_request_put(request[nc]);
3810 			request[nc] = NULL;
3811 		}
3812 
3813 		if (__igt_timeout(end_time, NULL))
3814 			break;
3815 	}
3816 
3817 	err = igt_live_test_end(&t);
3818 	if (err)
3819 		goto out;
3820 
3821 	pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
3822 		nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
3823 		prime, div64_u64(ktime_to_ns(times[1]), prime));
3824 
3825 out:
3826 	if (igt_flush_test(gt->i915))
3827 		err = -EIO;
3828 
3829 	for (nc = 0; nc < nctx; nc++) {
3830 		i915_request_put(request[nc]);
3831 		intel_context_unpin(ve[nc]);
3832 		intel_context_put(ve[nc]);
3833 	}
3834 	return err;
3835 }
3836 
3837 static unsigned int
3838 __select_siblings(struct intel_gt *gt,
3839 		  unsigned int class,
3840 		  struct intel_engine_cs **siblings,
3841 		  bool (*filter)(const struct intel_engine_cs *))
3842 {
3843 	unsigned int n = 0;
3844 	unsigned int inst;
3845 
3846 	for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3847 		if (!gt->engine_class[class][inst])
3848 			continue;
3849 
3850 		if (filter && !filter(gt->engine_class[class][inst]))
3851 			continue;
3852 
3853 		siblings[n++] = gt->engine_class[class][inst];
3854 	}
3855 
3856 	return n;
3857 }
3858 
3859 static unsigned int
3860 select_siblings(struct intel_gt *gt,
3861 		unsigned int class,
3862 		struct intel_engine_cs **siblings)
3863 {
3864 	return __select_siblings(gt, class, siblings, NULL);
3865 }
3866 
3867 static int live_virtual_engine(void *arg)
3868 {
3869 	struct intel_gt *gt = arg;
3870 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3871 	struct intel_engine_cs *engine;
3872 	enum intel_engine_id id;
3873 	unsigned int class;
3874 	int err;
3875 
3876 	if (intel_uc_uses_guc_submission(&gt->uc))
3877 		return 0;
3878 
3879 	for_each_engine(engine, gt, id) {
3880 		err = nop_virtual_engine(gt, &engine, 1, 1, 0);
3881 		if (err) {
3882 			pr_err("Failed to wrap engine %s: err=%d\n",
3883 			       engine->name, err);
3884 			return err;
3885 		}
3886 	}
3887 
3888 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3889 		int nsibling, n;
3890 
3891 		nsibling = select_siblings(gt, class, siblings);
3892 		if (nsibling < 2)
3893 			continue;
3894 
3895 		for (n = 1; n <= nsibling + 1; n++) {
3896 			err = nop_virtual_engine(gt, siblings, nsibling,
3897 						 n, 0);
3898 			if (err)
3899 				return err;
3900 		}
3901 
3902 		err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
3903 		if (err)
3904 			return err;
3905 	}
3906 
3907 	return 0;
3908 }
3909 
3910 static int mask_virtual_engine(struct intel_gt *gt,
3911 			       struct intel_engine_cs **siblings,
3912 			       unsigned int nsibling)
3913 {
3914 	struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
3915 	struct intel_context *ve;
3916 	struct igt_live_test t;
3917 	unsigned int n;
3918 	int err;
3919 
3920 	/*
3921 	 * Check that by setting the execution mask on a request, we can
3922 	 * restrict it to our desired engine within the virtual engine.
3923 	 */
3924 
3925 	ve = intel_execlists_create_virtual(siblings, nsibling);
3926 	if (IS_ERR(ve)) {
3927 		err = PTR_ERR(ve);
3928 		goto out_close;
3929 	}
3930 
3931 	err = intel_context_pin(ve);
3932 	if (err)
3933 		goto out_put;
3934 
3935 	err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3936 	if (err)
3937 		goto out_unpin;
3938 
3939 	for (n = 0; n < nsibling; n++) {
3940 		request[n] = i915_request_create(ve);
3941 		if (IS_ERR(request[n])) {
3942 			err = PTR_ERR(request[n]);
3943 			nsibling = n;
3944 			goto out;
3945 		}
3946 
3947 		/* Reverse order as it's more likely to be unnatural */
3948 		request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
3949 
3950 		i915_request_get(request[n]);
3951 		i915_request_add(request[n]);
3952 	}
3953 
3954 	for (n = 0; n < nsibling; n++) {
3955 		if (i915_request_wait(request[n], 0, HZ / 10) < 0) {
3956 			pr_err("%s(%s): wait for %llx:%lld timed out\n",
3957 			       __func__, ve->engine->name,
3958 			       request[n]->fence.context,
3959 			       request[n]->fence.seqno);
3960 
3961 			GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3962 				  __func__, ve->engine->name,
3963 				  request[n]->fence.context,
3964 				  request[n]->fence.seqno);
3965 			GEM_TRACE_DUMP();
3966 			intel_gt_set_wedged(gt);
3967 			err = -EIO;
3968 			goto out;
3969 		}
3970 
3971 		if (request[n]->engine != siblings[nsibling - n - 1]) {
3972 			pr_err("Executed on wrong sibling '%s', expected '%s'\n",
3973 			       request[n]->engine->name,
3974 			       siblings[nsibling - n - 1]->name);
3975 			err = -EINVAL;
3976 			goto out;
3977 		}
3978 	}
3979 
3980 	err = igt_live_test_end(&t);
3981 out:
3982 	if (igt_flush_test(gt->i915))
3983 		err = -EIO;
3984 
3985 	for (n = 0; n < nsibling; n++)
3986 		i915_request_put(request[n]);
3987 
3988 out_unpin:
3989 	intel_context_unpin(ve);
3990 out_put:
3991 	intel_context_put(ve);
3992 out_close:
3993 	return err;
3994 }
3995 
3996 static int live_virtual_mask(void *arg)
3997 {
3998 	struct intel_gt *gt = arg;
3999 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4000 	unsigned int class;
4001 	int err;
4002 
4003 	if (intel_uc_uses_guc_submission(&gt->uc))
4004 		return 0;
4005 
4006 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4007 		unsigned int nsibling;
4008 
4009 		nsibling = select_siblings(gt, class, siblings);
4010 		if (nsibling < 2)
4011 			continue;
4012 
4013 		err = mask_virtual_engine(gt, siblings, nsibling);
4014 		if (err)
4015 			return err;
4016 	}
4017 
4018 	return 0;
4019 }
4020 
4021 static int slicein_virtual_engine(struct intel_gt *gt,
4022 				  struct intel_engine_cs **siblings,
4023 				  unsigned int nsibling)
4024 {
4025 	const long timeout = slice_timeout(siblings[0]);
4026 	struct intel_context *ce;
4027 	struct i915_request *rq;
4028 	struct igt_spinner spin;
4029 	unsigned int n;
4030 	int err = 0;
4031 
4032 	/*
4033 	 * Virtual requests must take part in timeslicing on the target engines.
4034 	 */
4035 
4036 	if (igt_spinner_init(&spin, gt))
4037 		return -ENOMEM;
4038 
4039 	for (n = 0; n < nsibling; n++) {
4040 		ce = intel_context_create(siblings[n]);
4041 		if (IS_ERR(ce)) {
4042 			err = PTR_ERR(ce);
4043 			goto out;
4044 		}
4045 
4046 		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
4047 		intel_context_put(ce);
4048 		if (IS_ERR(rq)) {
4049 			err = PTR_ERR(rq);
4050 			goto out;
4051 		}
4052 
4053 		i915_request_add(rq);
4054 	}
4055 
4056 	ce = intel_execlists_create_virtual(siblings, nsibling);
4057 	if (IS_ERR(ce)) {
4058 		err = PTR_ERR(ce);
4059 		goto out;
4060 	}
4061 
4062 	rq = intel_context_create_request(ce);
4063 	intel_context_put(ce);
4064 	if (IS_ERR(rq)) {
4065 		err = PTR_ERR(rq);
4066 		goto out;
4067 	}
4068 
4069 	i915_request_get(rq);
4070 	i915_request_add(rq);
4071 	if (i915_request_wait(rq, 0, timeout) < 0) {
4072 		GEM_TRACE_ERR("%s(%s) failed to slice in virtual request\n",
4073 			      __func__, rq->engine->name);
4074 		GEM_TRACE_DUMP();
4075 		intel_gt_set_wedged(gt);
4076 		err = -EIO;
4077 	}
4078 	i915_request_put(rq);
4079 
4080 out:
4081 	igt_spinner_end(&spin);
4082 	if (igt_flush_test(gt->i915))
4083 		err = -EIO;
4084 	igt_spinner_fini(&spin);
4085 	return err;
4086 }
4087 
4088 static int sliceout_virtual_engine(struct intel_gt *gt,
4089 				   struct intel_engine_cs **siblings,
4090 				   unsigned int nsibling)
4091 {
4092 	const long timeout = slice_timeout(siblings[0]);
4093 	struct intel_context *ce;
4094 	struct i915_request *rq;
4095 	struct igt_spinner spin;
4096 	unsigned int n;
4097 	int err = 0;
4098 
4099 	/*
4100 	 * Virtual requests must allow others a fair timeslice.
4101 	 */
4102 
4103 	if (igt_spinner_init(&spin, gt))
4104 		return -ENOMEM;
4105 
4106 	/* XXX We do not handle oversubscription and fairness with normal rq */
4107 	for (n = 0; n < nsibling; n++) {
4108 		ce = intel_execlists_create_virtual(siblings, nsibling);
4109 		if (IS_ERR(ce)) {
4110 			err = PTR_ERR(ce);
4111 			goto out;
4112 		}
4113 
4114 		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
4115 		intel_context_put(ce);
4116 		if (IS_ERR(rq)) {
4117 			err = PTR_ERR(rq);
4118 			goto out;
4119 		}
4120 
4121 		i915_request_add(rq);
4122 	}
4123 
4124 	for (n = 0; !err && n < nsibling; n++) {
4125 		ce = intel_context_create(siblings[n]);
4126 		if (IS_ERR(ce)) {
4127 			err = PTR_ERR(ce);
4128 			goto out;
4129 		}
4130 
4131 		rq = intel_context_create_request(ce);
4132 		intel_context_put(ce);
4133 		if (IS_ERR(rq)) {
4134 			err = PTR_ERR(rq);
4135 			goto out;
4136 		}
4137 
4138 		i915_request_get(rq);
4139 		i915_request_add(rq);
4140 		if (i915_request_wait(rq, 0, timeout) < 0) {
4141 			GEM_TRACE_ERR("%s(%s) failed to slice out virtual request\n",
4142 				      __func__, siblings[n]->name);
4143 			GEM_TRACE_DUMP();
4144 			intel_gt_set_wedged(gt);
4145 			err = -EIO;
4146 		}
4147 		i915_request_put(rq);
4148 	}
4149 
4150 out:
4151 	igt_spinner_end(&spin);
4152 	if (igt_flush_test(gt->i915))
4153 		err = -EIO;
4154 	igt_spinner_fini(&spin);
4155 	return err;
4156 }
4157 
4158 static int live_virtual_slice(void *arg)
4159 {
4160 	struct intel_gt *gt = arg;
4161 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4162 	unsigned int class;
4163 	int err;
4164 
4165 	if (intel_uc_uses_guc_submission(&gt->uc))
4166 		return 0;
4167 
4168 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4169 		unsigned int nsibling;
4170 
4171 		nsibling = __select_siblings(gt, class, siblings,
4172 					     intel_engine_has_timeslices);
4173 		if (nsibling < 2)
4174 			continue;
4175 
4176 		err = slicein_virtual_engine(gt, siblings, nsibling);
4177 		if (err)
4178 			return err;
4179 
4180 		err = sliceout_virtual_engine(gt, siblings, nsibling);
4181 		if (err)
4182 			return err;
4183 	}
4184 
4185 	return 0;
4186 }
4187 
4188 static int preserved_virtual_engine(struct intel_gt *gt,
4189 				    struct intel_engine_cs **siblings,
4190 				    unsigned int nsibling)
4191 {
4192 	struct i915_request *last = NULL;
4193 	struct intel_context *ve;
4194 	struct i915_vma *scratch;
4195 	struct igt_live_test t;
4196 	unsigned int n;
4197 	int err = 0;
4198 	u32 *cs;
4199 
4200 	scratch = __vm_create_scratch_for_read(&siblings[0]->gt->ggtt->vm,
4201 					       PAGE_SIZE);
4202 	if (IS_ERR(scratch))
4203 		return PTR_ERR(scratch);
4204 
4205 	err = i915_vma_sync(scratch);
4206 	if (err)
4207 		goto out_scratch;
4208 
4209 	ve = intel_execlists_create_virtual(siblings, nsibling);
4210 	if (IS_ERR(ve)) {
4211 		err = PTR_ERR(ve);
4212 		goto out_scratch;
4213 	}
4214 
4215 	err = intel_context_pin(ve);
4216 	if (err)
4217 		goto out_put;
4218 
4219 	err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
4220 	if (err)
4221 		goto out_unpin;
4222 
4223 	for (n = 0; n < NUM_GPR_DW; n++) {
4224 		struct intel_engine_cs *engine = siblings[n % nsibling];
4225 		struct i915_request *rq;
4226 
4227 		rq = i915_request_create(ve);
4228 		if (IS_ERR(rq)) {
4229 			err = PTR_ERR(rq);
4230 			goto out_end;
4231 		}
4232 
4233 		i915_request_put(last);
4234 		last = i915_request_get(rq);
4235 
4236 		cs = intel_ring_begin(rq, 8);
4237 		if (IS_ERR(cs)) {
4238 			i915_request_add(rq);
4239 			err = PTR_ERR(cs);
4240 			goto out_end;
4241 		}
4242 
4243 		*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4244 		*cs++ = CS_GPR(engine, n);
4245 		*cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
4246 		*cs++ = 0;
4247 
4248 		*cs++ = MI_LOAD_REGISTER_IMM(1);
4249 		*cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
4250 		*cs++ = n + 1;
4251 
4252 		*cs++ = MI_NOOP;
4253 		intel_ring_advance(rq, cs);
4254 
4255 		/* Restrict this request to run on a particular engine */
4256 		rq->execution_mask = engine->mask;
4257 		i915_request_add(rq);
4258 	}
4259 
4260 	if (i915_request_wait(last, 0, HZ / 5) < 0) {
4261 		err = -ETIME;
4262 		goto out_end;
4263 	}
4264 
4265 	cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
4266 	if (IS_ERR(cs)) {
4267 		err = PTR_ERR(cs);
4268 		goto out_end;
4269 	}
4270 
4271 	for (n = 0; n < NUM_GPR_DW; n++) {
4272 		if (cs[n] != n) {
4273 			pr_err("Incorrect value[%d] found for GPR[%d]\n",
4274 			       cs[n], n);
4275 			err = -EINVAL;
4276 			break;
4277 		}
4278 	}
4279 
4280 	i915_gem_object_unpin_map(scratch->obj);
4281 
4282 out_end:
4283 	if (igt_live_test_end(&t))
4284 		err = -EIO;
4285 	i915_request_put(last);
4286 out_unpin:
4287 	intel_context_unpin(ve);
4288 out_put:
4289 	intel_context_put(ve);
4290 out_scratch:
4291 	i915_vma_unpin_and_release(&scratch, 0);
4292 	return err;
4293 }
4294 
4295 static int live_virtual_preserved(void *arg)
4296 {
4297 	struct intel_gt *gt = arg;
4298 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4299 	unsigned int class;
4300 
4301 	/*
4302 	 * Check that the context image retains non-privileged (user) registers
4303 	 * from one engine to the next. For this we check that the CS_GPR
4304 	 * are preserved.
4305 	 */
4306 
4307 	if (intel_uc_uses_guc_submission(&gt->uc))
4308 		return 0;
4309 
4310 	/* As we use CS_GPR we cannot run before they existed on all engines. */
4311 	if (INTEL_GEN(gt->i915) < 9)
4312 		return 0;
4313 
4314 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4315 		int nsibling, err;
4316 
4317 		nsibling = select_siblings(gt, class, siblings);
4318 		if (nsibling < 2)
4319 			continue;
4320 
4321 		err = preserved_virtual_engine(gt, siblings, nsibling);
4322 		if (err)
4323 			return err;
4324 	}
4325 
4326 	return 0;
4327 }
4328 
4329 static int bond_virtual_engine(struct intel_gt *gt,
4330 			       unsigned int class,
4331 			       struct intel_engine_cs **siblings,
4332 			       unsigned int nsibling,
4333 			       unsigned int flags)
4334 #define BOND_SCHEDULE BIT(0)
4335 {
4336 	struct intel_engine_cs *master;
4337 	struct i915_request *rq[16];
4338 	enum intel_engine_id id;
4339 	struct igt_spinner spin;
4340 	unsigned long n;
4341 	int err;
4342 
4343 	/*
4344 	 * A set of bonded requests is intended to be run concurrently
4345 	 * across a number of engines. We use one request per-engine
4346 	 * and a magic fence to schedule each of the bonded requests
4347 	 * at the same time. A consequence of our current scheduler is that
4348 	 * we only move requests to the HW ready queue when the request
4349 	 * becomes ready, that is when all of its prerequisite fences have
4350 	 * been signaled. As one of those fences is the master submit fence,
4351 	 * there is a delay on all secondary fences as the HW may be
4352 	 * currently busy. Equally, as all the requests are independent,
4353 	 * they may have other fences that delay individual request
4354 	 * submission to HW. Ergo, we do not guarantee that all requests are
4355 	 * immediately submitted to HW at the same time, just that if the
4356 	 * rules are abided by, they are ready at the same time as the
4357 	 * first is submitted. Userspace can embed semaphores in its batch
4358 	 * to ensure parallel execution of its phases as it requires.
4359 	 * Though naturally it gets requested that perhaps the scheduler should
4360 	 * take care of parallel execution, even across preemption events on
4361 	 * different HW. (The proper answer is of course "lalalala".)
4362 	 *
4363 	 * With the submit-fence, we have identified three possible phases
4364 	 * of synchronisation depending on the master fence: queued (not
4365 	 * ready), executing, and signaled. The first two are quite simple
4366 	 * and checked below. However, the signaled master fence handling is
4367 	 * contentious. Currently we do not distinguish between a signaled
4368 	 * fence and an expired fence, as once signaled it does not convey
4369 	 * any information about the previous execution. It may even be freed
4370 	 * and hence checking later it may not exist at all. Ergo we currently
4371 	 * do not apply the bonding constraint for an already signaled fence,
4372 	 * as our expectation is that it should not constrain the secondaries
4373 	 * and is outside of the scope of the bonded request API (i.e. all
4374 	 * userspace requests are meant to be running in parallel). As
4375 	 * it imposes no constraint, and is effectively a no-op, we do not
4376 	 * check below as normal execution flows are checked extensively above.
4377 	 *
4378 	 * XXX Is the degenerate handling of signaled submit fences the
4379 	 * expected behaviour for userpace?
4380 	 */
4381 
4382 	GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
4383 
4384 	if (igt_spinner_init(&spin, gt))
4385 		return -ENOMEM;
4386 
4387 	err = 0;
4388 	rq[0] = ERR_PTR(-ENOMEM);
4389 	for_each_engine(master, gt, id) {
4390 		struct i915_sw_fence fence = {};
4391 		struct intel_context *ce;
4392 
4393 		if (master->class == class)
4394 			continue;
4395 
4396 		ce = intel_context_create(master);
4397 		if (IS_ERR(ce)) {
4398 			err = PTR_ERR(ce);
4399 			goto out;
4400 		}
4401 
4402 		memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
4403 
4404 		rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP);
4405 		intel_context_put(ce);
4406 		if (IS_ERR(rq[0])) {
4407 			err = PTR_ERR(rq[0]);
4408 			goto out;
4409 		}
4410 		i915_request_get(rq[0]);
4411 
4412 		if (flags & BOND_SCHEDULE) {
4413 			onstack_fence_init(&fence);
4414 			err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
4415 							       &fence,
4416 							       GFP_KERNEL);
4417 		}
4418 
4419 		i915_request_add(rq[0]);
4420 		if (err < 0)
4421 			goto out;
4422 
4423 		if (!(flags & BOND_SCHEDULE) &&
4424 		    !igt_wait_for_spinner(&spin, rq[0])) {
4425 			err = -EIO;
4426 			goto out;
4427 		}
4428 
4429 		for (n = 0; n < nsibling; n++) {
4430 			struct intel_context *ve;
4431 
4432 			ve = intel_execlists_create_virtual(siblings, nsibling);
4433 			if (IS_ERR(ve)) {
4434 				err = PTR_ERR(ve);
4435 				onstack_fence_fini(&fence);
4436 				goto out;
4437 			}
4438 
4439 			err = intel_virtual_engine_attach_bond(ve->engine,
4440 							       master,
4441 							       siblings[n]);
4442 			if (err) {
4443 				intel_context_put(ve);
4444 				onstack_fence_fini(&fence);
4445 				goto out;
4446 			}
4447 
4448 			err = intel_context_pin(ve);
4449 			intel_context_put(ve);
4450 			if (err) {
4451 				onstack_fence_fini(&fence);
4452 				goto out;
4453 			}
4454 
4455 			rq[n + 1] = i915_request_create(ve);
4456 			intel_context_unpin(ve);
4457 			if (IS_ERR(rq[n + 1])) {
4458 				err = PTR_ERR(rq[n + 1]);
4459 				onstack_fence_fini(&fence);
4460 				goto out;
4461 			}
4462 			i915_request_get(rq[n + 1]);
4463 
4464 			err = i915_request_await_execution(rq[n + 1],
4465 							   &rq[0]->fence,
4466 							   ve->engine->bond_execute);
4467 			i915_request_add(rq[n + 1]);
4468 			if (err < 0) {
4469 				onstack_fence_fini(&fence);
4470 				goto out;
4471 			}
4472 		}
4473 		onstack_fence_fini(&fence);
4474 		intel_engine_flush_submission(master);
4475 		igt_spinner_end(&spin);
4476 
4477 		if (i915_request_wait(rq[0], 0, HZ / 10) < 0) {
4478 			pr_err("Master request did not execute (on %s)!\n",
4479 			       rq[0]->engine->name);
4480 			err = -EIO;
4481 			goto out;
4482 		}
4483 
4484 		for (n = 0; n < nsibling; n++) {
4485 			if (i915_request_wait(rq[n + 1], 0,
4486 					      MAX_SCHEDULE_TIMEOUT) < 0) {
4487 				err = -EIO;
4488 				goto out;
4489 			}
4490 
4491 			if (rq[n + 1]->engine != siblings[n]) {
4492 				pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
4493 				       siblings[n]->name,
4494 				       rq[n + 1]->engine->name,
4495 				       rq[0]->engine->name);
4496 				err = -EINVAL;
4497 				goto out;
4498 			}
4499 		}
4500 
4501 		for (n = 0; !IS_ERR(rq[n]); n++)
4502 			i915_request_put(rq[n]);
4503 		rq[0] = ERR_PTR(-ENOMEM);
4504 	}
4505 
4506 out:
4507 	for (n = 0; !IS_ERR(rq[n]); n++)
4508 		i915_request_put(rq[n]);
4509 	if (igt_flush_test(gt->i915))
4510 		err = -EIO;
4511 
4512 	igt_spinner_fini(&spin);
4513 	return err;
4514 }
4515 
4516 static int live_virtual_bond(void *arg)
4517 {
4518 	static const struct phase {
4519 		const char *name;
4520 		unsigned int flags;
4521 	} phases[] = {
4522 		{ "", 0 },
4523 		{ "schedule", BOND_SCHEDULE },
4524 		{ },
4525 	};
4526 	struct intel_gt *gt = arg;
4527 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4528 	unsigned int class;
4529 	int err;
4530 
4531 	if (intel_uc_uses_guc_submission(&gt->uc))
4532 		return 0;
4533 
4534 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4535 		const struct phase *p;
4536 		int nsibling;
4537 
4538 		nsibling = select_siblings(gt, class, siblings);
4539 		if (nsibling < 2)
4540 			continue;
4541 
4542 		for (p = phases; p->name; p++) {
4543 			err = bond_virtual_engine(gt,
4544 						  class, siblings, nsibling,
4545 						  p->flags);
4546 			if (err) {
4547 				pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
4548 				       __func__, p->name, class, nsibling, err);
4549 				return err;
4550 			}
4551 		}
4552 	}
4553 
4554 	return 0;
4555 }
4556 
4557 static int reset_virtual_engine(struct intel_gt *gt,
4558 				struct intel_engine_cs **siblings,
4559 				unsigned int nsibling)
4560 {
4561 	struct intel_engine_cs *engine;
4562 	struct intel_context *ve;
4563 	struct igt_spinner spin;
4564 	struct i915_request *rq;
4565 	unsigned int n;
4566 	int err = 0;
4567 
4568 	/*
4569 	 * In order to support offline error capture for fast preempt reset,
4570 	 * we need to decouple the guilty request and ensure that it and its
4571 	 * descendents are not executed while the capture is in progress.
4572 	 */
4573 
4574 	if (igt_spinner_init(&spin, gt))
4575 		return -ENOMEM;
4576 
4577 	ve = intel_execlists_create_virtual(siblings, nsibling);
4578 	if (IS_ERR(ve)) {
4579 		err = PTR_ERR(ve);
4580 		goto out_spin;
4581 	}
4582 
4583 	for (n = 0; n < nsibling; n++)
4584 		st_engine_heartbeat_disable(siblings[n]);
4585 
4586 	rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
4587 	if (IS_ERR(rq)) {
4588 		err = PTR_ERR(rq);
4589 		goto out_heartbeat;
4590 	}
4591 	i915_request_add(rq);
4592 
4593 	if (!igt_wait_for_spinner(&spin, rq)) {
4594 		intel_gt_set_wedged(gt);
4595 		err = -ETIME;
4596 		goto out_heartbeat;
4597 	}
4598 
4599 	engine = rq->engine;
4600 	GEM_BUG_ON(engine == ve->engine);
4601 
4602 	/* Take ownership of the reset and tasklet */
4603 	local_bh_disable();
4604 	if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
4605 			     &gt->reset.flags)) {
4606 		local_bh_enable();
4607 		intel_gt_set_wedged(gt);
4608 		err = -EBUSY;
4609 		goto out_heartbeat;
4610 	}
4611 	tasklet_disable(&engine->execlists.tasklet);
4612 
4613 	engine->execlists.tasklet.func(engine->execlists.tasklet.data);
4614 	GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
4615 
4616 	/* Fake a preemption event; failed of course */
4617 	spin_lock_irq(&engine->active.lock);
4618 	__unwind_incomplete_requests(engine);
4619 	spin_unlock_irq(&engine->active.lock);
4620 	GEM_BUG_ON(rq->engine != engine);
4621 
4622 	/* Reset the engine while keeping our active request on hold */
4623 	execlists_hold(engine, rq);
4624 	GEM_BUG_ON(!i915_request_on_hold(rq));
4625 
4626 	__intel_engine_reset_bh(engine, NULL);
4627 	GEM_BUG_ON(rq->fence.error != -EIO);
4628 
4629 	/* Release our grasp on the engine, letting CS flow again */
4630 	tasklet_enable(&engine->execlists.tasklet);
4631 	clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, &gt->reset.flags);
4632 	local_bh_enable();
4633 
4634 	/* Check that we do not resubmit the held request */
4635 	i915_request_get(rq);
4636 	if (!i915_request_wait(rq, 0, HZ / 5)) {
4637 		pr_err("%s: on hold request completed!\n",
4638 		       engine->name);
4639 		intel_gt_set_wedged(gt);
4640 		err = -EIO;
4641 		goto out_rq;
4642 	}
4643 	GEM_BUG_ON(!i915_request_on_hold(rq));
4644 
4645 	/* But is resubmitted on release */
4646 	execlists_unhold(engine, rq);
4647 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4648 		pr_err("%s: held request did not complete!\n",
4649 		       engine->name);
4650 		intel_gt_set_wedged(gt);
4651 		err = -ETIME;
4652 	}
4653 
4654 out_rq:
4655 	i915_request_put(rq);
4656 out_heartbeat:
4657 	for (n = 0; n < nsibling; n++)
4658 		st_engine_heartbeat_enable(siblings[n]);
4659 
4660 	intel_context_put(ve);
4661 out_spin:
4662 	igt_spinner_fini(&spin);
4663 	return err;
4664 }
4665 
4666 static int live_virtual_reset(void *arg)
4667 {
4668 	struct intel_gt *gt = arg;
4669 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4670 	unsigned int class;
4671 
4672 	/*
4673 	 * Check that we handle a reset event within a virtual engine.
4674 	 * Only the physical engine is reset, but we have to check the flow
4675 	 * of the virtual requests around the reset, and make sure it is not
4676 	 * forgotten.
4677 	 */
4678 
4679 	if (intel_uc_uses_guc_submission(&gt->uc))
4680 		return 0;
4681 
4682 	if (!intel_has_reset_engine(gt))
4683 		return 0;
4684 
4685 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4686 		int nsibling, err;
4687 
4688 		nsibling = select_siblings(gt, class, siblings);
4689 		if (nsibling < 2)
4690 			continue;
4691 
4692 		err = reset_virtual_engine(gt, siblings, nsibling);
4693 		if (err)
4694 			return err;
4695 	}
4696 
4697 	return 0;
4698 }
4699 
4700 int intel_execlists_live_selftests(struct drm_i915_private *i915)
4701 {
4702 	static const struct i915_subtest tests[] = {
4703 		SUBTEST(live_sanitycheck),
4704 		SUBTEST(live_unlite_switch),
4705 		SUBTEST(live_unlite_preempt),
4706 		SUBTEST(live_unlite_ring),
4707 		SUBTEST(live_pin_rewind),
4708 		SUBTEST(live_hold_reset),
4709 		SUBTEST(live_error_interrupt),
4710 		SUBTEST(live_timeslice_preempt),
4711 		SUBTEST(live_timeslice_rewind),
4712 		SUBTEST(live_timeslice_queue),
4713 		SUBTEST(live_timeslice_nopreempt),
4714 		SUBTEST(live_busywait_preempt),
4715 		SUBTEST(live_preempt),
4716 		SUBTEST(live_late_preempt),
4717 		SUBTEST(live_nopreempt),
4718 		SUBTEST(live_preempt_cancel),
4719 		SUBTEST(live_suppress_self_preempt),
4720 		SUBTEST(live_chain_preempt),
4721 		SUBTEST(live_preempt_ring),
4722 		SUBTEST(live_preempt_gang),
4723 		SUBTEST(live_preempt_timeout),
4724 		SUBTEST(live_preempt_user),
4725 		SUBTEST(live_preempt_smoke),
4726 		SUBTEST(live_virtual_engine),
4727 		SUBTEST(live_virtual_mask),
4728 		SUBTEST(live_virtual_preserved),
4729 		SUBTEST(live_virtual_slice),
4730 		SUBTEST(live_virtual_bond),
4731 		SUBTEST(live_virtual_reset),
4732 	};
4733 
4734 	if (!HAS_EXECLISTS(i915))
4735 		return 0;
4736 
4737 	if (intel_gt_is_wedged(&i915->gt))
4738 		return 0;
4739 
4740 	return intel_gt_live_subtests(tests, &i915->gt);
4741 }
4742