1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2018 Intel Corporation
5  */
6 
7 #include <linux/prime_numbers.h>
8 
9 #include "gem/i915_gem_pm.h"
10 #include "gt/intel_engine_heartbeat.h"
11 #include "gt/intel_reset.h"
12 
13 #include "i915_selftest.h"
14 #include "selftests/i915_random.h"
15 #include "selftests/igt_flush_test.h"
16 #include "selftests/igt_live_test.h"
17 #include "selftests/igt_spinner.h"
18 #include "selftests/lib_sw_fence.h"
19 
20 #include "gem/selftests/igt_gem_utils.h"
21 #include "gem/selftests/mock_context.h"
22 
23 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
24 #define NUM_GPR 16
25 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
26 
27 static struct i915_vma *create_scratch(struct intel_gt *gt)
28 {
29 	struct drm_i915_gem_object *obj;
30 	struct i915_vma *vma;
31 	int err;
32 
33 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
34 	if (IS_ERR(obj))
35 		return ERR_CAST(obj);
36 
37 	i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
38 
39 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
40 	if (IS_ERR(vma)) {
41 		i915_gem_object_put(obj);
42 		return vma;
43 	}
44 
45 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
46 	if (err) {
47 		i915_gem_object_put(obj);
48 		return ERR_PTR(err);
49 	}
50 
51 	return vma;
52 }
53 
54 static void engine_heartbeat_disable(struct intel_engine_cs *engine,
55 				     unsigned long *saved)
56 {
57 	*saved = engine->props.heartbeat_interval_ms;
58 	engine->props.heartbeat_interval_ms = 0;
59 
60 	intel_engine_pm_get(engine);
61 	intel_engine_park_heartbeat(engine);
62 }
63 
64 static void engine_heartbeat_enable(struct intel_engine_cs *engine,
65 				    unsigned long saved)
66 {
67 	intel_engine_pm_put(engine);
68 
69 	engine->props.heartbeat_interval_ms = saved;
70 }
71 
72 static bool is_active(struct i915_request *rq)
73 {
74 	if (i915_request_is_active(rq))
75 		return true;
76 
77 	if (i915_request_on_hold(rq))
78 		return true;
79 
80 	if (i915_request_started(rq))
81 		return true;
82 
83 	return false;
84 }
85 
86 static int wait_for_submit(struct intel_engine_cs *engine,
87 			   struct i915_request *rq,
88 			   unsigned long timeout)
89 {
90 	timeout += jiffies;
91 	do {
92 		bool done = time_after(jiffies, timeout);
93 
94 		if (i915_request_completed(rq)) /* that was quick! */
95 			return 0;
96 
97 		/* Wait until the HW has acknowleged the submission (or err) */
98 		intel_engine_flush_submission(engine);
99 		if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
100 			return 0;
101 
102 		if (done)
103 			return -ETIME;
104 
105 		cond_resched();
106 	} while (1);
107 }
108 
109 static int wait_for_reset(struct intel_engine_cs *engine,
110 			  struct i915_request *rq,
111 			  unsigned long timeout)
112 {
113 	timeout += jiffies;
114 
115 	do {
116 		cond_resched();
117 		intel_engine_flush_submission(engine);
118 
119 		if (READ_ONCE(engine->execlists.pending[0]))
120 			continue;
121 
122 		if (i915_request_completed(rq))
123 			break;
124 
125 		if (READ_ONCE(rq->fence.error))
126 			break;
127 	} while (time_before(jiffies, timeout));
128 
129 	flush_scheduled_work();
130 
131 	if (rq->fence.error != -EIO) {
132 		pr_err("%s: hanging request %llx:%lld not reset\n",
133 		       engine->name,
134 		       rq->fence.context,
135 		       rq->fence.seqno);
136 		return -EINVAL;
137 	}
138 
139 	/* Give the request a jiffie to complete after flushing the worker */
140 	if (i915_request_wait(rq, 0,
141 			      max(0l, (long)(timeout - jiffies)) + 1) < 0) {
142 		pr_err("%s: hanging request %llx:%lld did not complete\n",
143 		       engine->name,
144 		       rq->fence.context,
145 		       rq->fence.seqno);
146 		return -ETIME;
147 	}
148 
149 	return 0;
150 }
151 
152 static int live_sanitycheck(void *arg)
153 {
154 	struct intel_gt *gt = arg;
155 	struct intel_engine_cs *engine;
156 	enum intel_engine_id id;
157 	struct igt_spinner spin;
158 	int err = 0;
159 
160 	if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
161 		return 0;
162 
163 	if (igt_spinner_init(&spin, gt))
164 		return -ENOMEM;
165 
166 	for_each_engine(engine, gt, id) {
167 		struct intel_context *ce;
168 		struct i915_request *rq;
169 
170 		ce = intel_context_create(engine);
171 		if (IS_ERR(ce)) {
172 			err = PTR_ERR(ce);
173 			break;
174 		}
175 
176 		rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
177 		if (IS_ERR(rq)) {
178 			err = PTR_ERR(rq);
179 			goto out_ctx;
180 		}
181 
182 		i915_request_add(rq);
183 		if (!igt_wait_for_spinner(&spin, rq)) {
184 			GEM_TRACE("spinner failed to start\n");
185 			GEM_TRACE_DUMP();
186 			intel_gt_set_wedged(gt);
187 			err = -EIO;
188 			goto out_ctx;
189 		}
190 
191 		igt_spinner_end(&spin);
192 		if (igt_flush_test(gt->i915)) {
193 			err = -EIO;
194 			goto out_ctx;
195 		}
196 
197 out_ctx:
198 		intel_context_put(ce);
199 		if (err)
200 			break;
201 	}
202 
203 	igt_spinner_fini(&spin);
204 	return err;
205 }
206 
207 static int live_unlite_restore(struct intel_gt *gt, int prio)
208 {
209 	struct intel_engine_cs *engine;
210 	enum intel_engine_id id;
211 	struct igt_spinner spin;
212 	int err = -ENOMEM;
213 
214 	/*
215 	 * Check that we can correctly context switch between 2 instances
216 	 * on the same engine from the same parent context.
217 	 */
218 
219 	if (igt_spinner_init(&spin, gt))
220 		return err;
221 
222 	err = 0;
223 	for_each_engine(engine, gt, id) {
224 		struct intel_context *ce[2] = {};
225 		struct i915_request *rq[2];
226 		struct igt_live_test t;
227 		unsigned long saved;
228 		int n;
229 
230 		if (prio && !intel_engine_has_preemption(engine))
231 			continue;
232 
233 		if (!intel_engine_can_store_dword(engine))
234 			continue;
235 
236 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
237 			err = -EIO;
238 			break;
239 		}
240 		engine_heartbeat_disable(engine, &saved);
241 
242 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
243 			struct intel_context *tmp;
244 
245 			tmp = intel_context_create(engine);
246 			if (IS_ERR(tmp)) {
247 				err = PTR_ERR(tmp);
248 				goto err_ce;
249 			}
250 
251 			err = intel_context_pin(tmp);
252 			if (err) {
253 				intel_context_put(tmp);
254 				goto err_ce;
255 			}
256 
257 			/*
258 			 * Setup the pair of contexts such that if we
259 			 * lite-restore using the RING_TAIL from ce[1] it
260 			 * will execute garbage from ce[0]->ring.
261 			 */
262 			memset(tmp->ring->vaddr,
263 			       POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
264 			       tmp->ring->vma->size);
265 
266 			ce[n] = tmp;
267 		}
268 		GEM_BUG_ON(!ce[1]->ring->size);
269 		intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
270 		__execlists_update_reg_state(ce[1], engine, ce[1]->ring->head);
271 
272 		rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
273 		if (IS_ERR(rq[0])) {
274 			err = PTR_ERR(rq[0]);
275 			goto err_ce;
276 		}
277 
278 		i915_request_get(rq[0]);
279 		i915_request_add(rq[0]);
280 		GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
281 
282 		if (!igt_wait_for_spinner(&spin, rq[0])) {
283 			i915_request_put(rq[0]);
284 			goto err_ce;
285 		}
286 
287 		rq[1] = i915_request_create(ce[1]);
288 		if (IS_ERR(rq[1])) {
289 			err = PTR_ERR(rq[1]);
290 			i915_request_put(rq[0]);
291 			goto err_ce;
292 		}
293 
294 		if (!prio) {
295 			/*
296 			 * Ensure we do the switch to ce[1] on completion.
297 			 *
298 			 * rq[0] is already submitted, so this should reduce
299 			 * to a no-op (a wait on a request on the same engine
300 			 * uses the submit fence, not the completion fence),
301 			 * but it will install a dependency on rq[1] for rq[0]
302 			 * that will prevent the pair being reordered by
303 			 * timeslicing.
304 			 */
305 			i915_request_await_dma_fence(rq[1], &rq[0]->fence);
306 		}
307 
308 		i915_request_get(rq[1]);
309 		i915_request_add(rq[1]);
310 		GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
311 		i915_request_put(rq[0]);
312 
313 		if (prio) {
314 			struct i915_sched_attr attr = {
315 				.priority = prio,
316 			};
317 
318 			/* Alternatively preempt the spinner with ce[1] */
319 			engine->schedule(rq[1], &attr);
320 		}
321 
322 		/* And switch back to ce[0] for good measure */
323 		rq[0] = i915_request_create(ce[0]);
324 		if (IS_ERR(rq[0])) {
325 			err = PTR_ERR(rq[0]);
326 			i915_request_put(rq[1]);
327 			goto err_ce;
328 		}
329 
330 		i915_request_await_dma_fence(rq[0], &rq[1]->fence);
331 		i915_request_get(rq[0]);
332 		i915_request_add(rq[0]);
333 		GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
334 		i915_request_put(rq[1]);
335 		i915_request_put(rq[0]);
336 
337 err_ce:
338 		tasklet_kill(&engine->execlists.tasklet); /* flush submission */
339 		igt_spinner_end(&spin);
340 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
341 			if (IS_ERR_OR_NULL(ce[n]))
342 				break;
343 
344 			intel_context_unpin(ce[n]);
345 			intel_context_put(ce[n]);
346 		}
347 
348 		engine_heartbeat_enable(engine, saved);
349 		if (igt_live_test_end(&t))
350 			err = -EIO;
351 		if (err)
352 			break;
353 	}
354 
355 	igt_spinner_fini(&spin);
356 	return err;
357 }
358 
359 static int live_unlite_switch(void *arg)
360 {
361 	return live_unlite_restore(arg, 0);
362 }
363 
364 static int live_unlite_preempt(void *arg)
365 {
366 	return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX));
367 }
368 
369 static int live_pin_rewind(void *arg)
370 {
371 	struct intel_gt *gt = arg;
372 	struct intel_engine_cs *engine;
373 	enum intel_engine_id id;
374 	int err = 0;
375 
376 	/*
377 	 * We have to be careful not to trust intel_ring too much, for example
378 	 * ring->head is updated upon retire which is out of sync with pinning
379 	 * the context. Thus we cannot use ring->head to set CTX_RING_HEAD,
380 	 * or else we risk writing an older, stale value.
381 	 *
382 	 * To simulate this, let's apply a bit of deliberate sabotague.
383 	 */
384 
385 	for_each_engine(engine, gt, id) {
386 		struct intel_context *ce;
387 		struct i915_request *rq;
388 		struct intel_ring *ring;
389 		struct igt_live_test t;
390 
391 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
392 			err = -EIO;
393 			break;
394 		}
395 
396 		ce = intel_context_create(engine);
397 		if (IS_ERR(ce)) {
398 			err = PTR_ERR(ce);
399 			break;
400 		}
401 
402 		err = intel_context_pin(ce);
403 		if (err) {
404 			intel_context_put(ce);
405 			break;
406 		}
407 
408 		/* Keep the context awake while we play games */
409 		err = i915_active_acquire(&ce->active);
410 		if (err) {
411 			intel_context_unpin(ce);
412 			intel_context_put(ce);
413 			break;
414 		}
415 		ring = ce->ring;
416 
417 		/* Poison the ring, and offset the next request from HEAD */
418 		memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32));
419 		ring->emit = ring->size / 2;
420 		ring->tail = ring->emit;
421 		GEM_BUG_ON(ring->head);
422 
423 		intel_context_unpin(ce);
424 
425 		/* Submit a simple nop request */
426 		GEM_BUG_ON(intel_context_is_pinned(ce));
427 		rq = intel_context_create_request(ce);
428 		i915_active_release(&ce->active); /* e.g. async retire */
429 		intel_context_put(ce);
430 		if (IS_ERR(rq)) {
431 			err = PTR_ERR(rq);
432 			break;
433 		}
434 		GEM_BUG_ON(!rq->head);
435 		i915_request_add(rq);
436 
437 		/* Expect not to hang! */
438 		if (igt_live_test_end(&t)) {
439 			err = -EIO;
440 			break;
441 		}
442 	}
443 
444 	return err;
445 }
446 
447 static int live_hold_reset(void *arg)
448 {
449 	struct intel_gt *gt = arg;
450 	struct intel_engine_cs *engine;
451 	enum intel_engine_id id;
452 	struct igt_spinner spin;
453 	int err = 0;
454 
455 	/*
456 	 * In order to support offline error capture for fast preempt reset,
457 	 * we need to decouple the guilty request and ensure that it and its
458 	 * descendents are not executed while the capture is in progress.
459 	 */
460 
461 	if (!intel_has_reset_engine(gt))
462 		return 0;
463 
464 	if (igt_spinner_init(&spin, gt))
465 		return -ENOMEM;
466 
467 	for_each_engine(engine, gt, id) {
468 		struct intel_context *ce;
469 		unsigned long heartbeat;
470 		struct i915_request *rq;
471 
472 		ce = intel_context_create(engine);
473 		if (IS_ERR(ce)) {
474 			err = PTR_ERR(ce);
475 			break;
476 		}
477 
478 		engine_heartbeat_disable(engine, &heartbeat);
479 
480 		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
481 		if (IS_ERR(rq)) {
482 			err = PTR_ERR(rq);
483 			goto out;
484 		}
485 		i915_request_add(rq);
486 
487 		if (!igt_wait_for_spinner(&spin, rq)) {
488 			intel_gt_set_wedged(gt);
489 			err = -ETIME;
490 			goto out;
491 		}
492 
493 		/* We have our request executing, now remove it and reset */
494 
495 		if (test_and_set_bit(I915_RESET_ENGINE + id,
496 				     &gt->reset.flags)) {
497 			intel_gt_set_wedged(gt);
498 			err = -EBUSY;
499 			goto out;
500 		}
501 		tasklet_disable(&engine->execlists.tasklet);
502 
503 		engine->execlists.tasklet.func(engine->execlists.tasklet.data);
504 		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
505 
506 		i915_request_get(rq);
507 		execlists_hold(engine, rq);
508 		GEM_BUG_ON(!i915_request_on_hold(rq));
509 
510 		intel_engine_reset(engine, NULL);
511 		GEM_BUG_ON(rq->fence.error != -EIO);
512 
513 		tasklet_enable(&engine->execlists.tasklet);
514 		clear_and_wake_up_bit(I915_RESET_ENGINE + id,
515 				      &gt->reset.flags);
516 
517 		/* Check that we do not resubmit the held request */
518 		if (!i915_request_wait(rq, 0, HZ / 5)) {
519 			pr_err("%s: on hold request completed!\n",
520 			       engine->name);
521 			i915_request_put(rq);
522 			err = -EIO;
523 			goto out;
524 		}
525 		GEM_BUG_ON(!i915_request_on_hold(rq));
526 
527 		/* But is resubmitted on release */
528 		execlists_unhold(engine, rq);
529 		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
530 			pr_err("%s: held request did not complete!\n",
531 			       engine->name);
532 			intel_gt_set_wedged(gt);
533 			err = -ETIME;
534 		}
535 		i915_request_put(rq);
536 
537 out:
538 		engine_heartbeat_enable(engine, heartbeat);
539 		intel_context_put(ce);
540 		if (err)
541 			break;
542 	}
543 
544 	igt_spinner_fini(&spin);
545 	return err;
546 }
547 
548 static const char *error_repr(int err)
549 {
550 	return err ? "bad" : "good";
551 }
552 
553 static int live_error_interrupt(void *arg)
554 {
555 	static const struct error_phase {
556 		enum { GOOD = 0, BAD = -EIO } error[2];
557 	} phases[] = {
558 		{ { BAD,  GOOD } },
559 		{ { BAD,  BAD  } },
560 		{ { BAD,  GOOD } },
561 		{ { GOOD, GOOD } }, /* sentinel */
562 	};
563 	struct intel_gt *gt = arg;
564 	struct intel_engine_cs *engine;
565 	enum intel_engine_id id;
566 
567 	/*
568 	 * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning
569 	 * of invalid commands in user batches that will cause a GPU hang.
570 	 * This is a faster mechanism than using hangcheck/heartbeats, but
571 	 * only detects problems the HW knows about -- it will not warn when
572 	 * we kill the HW!
573 	 *
574 	 * To verify our detection and reset, we throw some invalid commands
575 	 * at the HW and wait for the interrupt.
576 	 */
577 
578 	if (!intel_has_reset_engine(gt))
579 		return 0;
580 
581 	for_each_engine(engine, gt, id) {
582 		const struct error_phase *p;
583 		unsigned long heartbeat;
584 		int err = 0;
585 
586 		engine_heartbeat_disable(engine, &heartbeat);
587 
588 		for (p = phases; p->error[0] != GOOD; p++) {
589 			struct i915_request *client[ARRAY_SIZE(phases->error)];
590 			u32 *cs;
591 			int i;
592 
593 			memset(client, 0, sizeof(*client));
594 			for (i = 0; i < ARRAY_SIZE(client); i++) {
595 				struct intel_context *ce;
596 				struct i915_request *rq;
597 
598 				ce = intel_context_create(engine);
599 				if (IS_ERR(ce)) {
600 					err = PTR_ERR(ce);
601 					goto out;
602 				}
603 
604 				rq = intel_context_create_request(ce);
605 				intel_context_put(ce);
606 				if (IS_ERR(rq)) {
607 					err = PTR_ERR(rq);
608 					goto out;
609 				}
610 
611 				if (rq->engine->emit_init_breadcrumb) {
612 					err = rq->engine->emit_init_breadcrumb(rq);
613 					if (err) {
614 						i915_request_add(rq);
615 						goto out;
616 					}
617 				}
618 
619 				cs = intel_ring_begin(rq, 2);
620 				if (IS_ERR(cs)) {
621 					i915_request_add(rq);
622 					err = PTR_ERR(cs);
623 					goto out;
624 				}
625 
626 				if (p->error[i]) {
627 					*cs++ = 0xdeadbeef;
628 					*cs++ = 0xdeadbeef;
629 				} else {
630 					*cs++ = MI_NOOP;
631 					*cs++ = MI_NOOP;
632 				}
633 
634 				client[i] = i915_request_get(rq);
635 				i915_request_add(rq);
636 			}
637 
638 			err = wait_for_submit(engine, client[0], HZ / 2);
639 			if (err) {
640 				pr_err("%s: first request did not start within time!\n",
641 				       engine->name);
642 				err = -ETIME;
643 				goto out;
644 			}
645 
646 			for (i = 0; i < ARRAY_SIZE(client); i++) {
647 				if (i915_request_wait(client[i], 0, HZ / 5) < 0)
648 					pr_debug("%s: %s request incomplete!\n",
649 						 engine->name,
650 						 error_repr(p->error[i]));
651 
652 				if (!i915_request_started(client[i])) {
653 					pr_err("%s: %s request not started!\n",
654 					       engine->name,
655 					       error_repr(p->error[i]));
656 					err = -ETIME;
657 					goto out;
658 				}
659 
660 				/* Kick the tasklet to process the error */
661 				intel_engine_flush_submission(engine);
662 				if (client[i]->fence.error != p->error[i]) {
663 					pr_err("%s: %s request (%s) with wrong error code: %d\n",
664 					       engine->name,
665 					       error_repr(p->error[i]),
666 					       i915_request_completed(client[i]) ? "completed" : "running",
667 					       client[i]->fence.error);
668 					err = -EINVAL;
669 					goto out;
670 				}
671 			}
672 
673 out:
674 			for (i = 0; i < ARRAY_SIZE(client); i++)
675 				if (client[i])
676 					i915_request_put(client[i]);
677 			if (err) {
678 				pr_err("%s: failed at phase[%zd] { %d, %d }\n",
679 				       engine->name, p - phases,
680 				       p->error[0], p->error[1]);
681 				break;
682 			}
683 		}
684 
685 		engine_heartbeat_enable(engine, heartbeat);
686 		if (err) {
687 			intel_gt_set_wedged(gt);
688 			return err;
689 		}
690 	}
691 
692 	return 0;
693 }
694 
695 static int
696 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
697 {
698 	u32 *cs;
699 
700 	cs = intel_ring_begin(rq, 10);
701 	if (IS_ERR(cs))
702 		return PTR_ERR(cs);
703 
704 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
705 
706 	*cs++ = MI_SEMAPHORE_WAIT |
707 		MI_SEMAPHORE_GLOBAL_GTT |
708 		MI_SEMAPHORE_POLL |
709 		MI_SEMAPHORE_SAD_NEQ_SDD;
710 	*cs++ = 0;
711 	*cs++ = i915_ggtt_offset(vma) + 4 * idx;
712 	*cs++ = 0;
713 
714 	if (idx > 0) {
715 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
716 		*cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
717 		*cs++ = 0;
718 		*cs++ = 1;
719 	} else {
720 		*cs++ = MI_NOOP;
721 		*cs++ = MI_NOOP;
722 		*cs++ = MI_NOOP;
723 		*cs++ = MI_NOOP;
724 	}
725 
726 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
727 
728 	intel_ring_advance(rq, cs);
729 	return 0;
730 }
731 
732 static struct i915_request *
733 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
734 {
735 	struct intel_context *ce;
736 	struct i915_request *rq;
737 	int err;
738 
739 	ce = intel_context_create(engine);
740 	if (IS_ERR(ce))
741 		return ERR_CAST(ce);
742 
743 	rq = intel_context_create_request(ce);
744 	if (IS_ERR(rq))
745 		goto out_ce;
746 
747 	err = 0;
748 	if (rq->engine->emit_init_breadcrumb)
749 		err = rq->engine->emit_init_breadcrumb(rq);
750 	if (err == 0)
751 		err = emit_semaphore_chain(rq, vma, idx);
752 	if (err == 0)
753 		i915_request_get(rq);
754 	i915_request_add(rq);
755 	if (err)
756 		rq = ERR_PTR(err);
757 
758 out_ce:
759 	intel_context_put(ce);
760 	return rq;
761 }
762 
763 static int
764 release_queue(struct intel_engine_cs *engine,
765 	      struct i915_vma *vma,
766 	      int idx, int prio)
767 {
768 	struct i915_sched_attr attr = {
769 		.priority = prio,
770 	};
771 	struct i915_request *rq;
772 	u32 *cs;
773 
774 	rq = intel_engine_create_kernel_request(engine);
775 	if (IS_ERR(rq))
776 		return PTR_ERR(rq);
777 
778 	cs = intel_ring_begin(rq, 4);
779 	if (IS_ERR(cs)) {
780 		i915_request_add(rq);
781 		return PTR_ERR(cs);
782 	}
783 
784 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
785 	*cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
786 	*cs++ = 0;
787 	*cs++ = 1;
788 
789 	intel_ring_advance(rq, cs);
790 
791 	i915_request_get(rq);
792 	i915_request_add(rq);
793 
794 	local_bh_disable();
795 	engine->schedule(rq, &attr);
796 	local_bh_enable(); /* kick tasklet */
797 
798 	i915_request_put(rq);
799 
800 	return 0;
801 }
802 
803 static int
804 slice_semaphore_queue(struct intel_engine_cs *outer,
805 		      struct i915_vma *vma,
806 		      int count)
807 {
808 	struct intel_engine_cs *engine;
809 	struct i915_request *head;
810 	enum intel_engine_id id;
811 	int err, i, n = 0;
812 
813 	head = semaphore_queue(outer, vma, n++);
814 	if (IS_ERR(head))
815 		return PTR_ERR(head);
816 
817 	for_each_engine(engine, outer->gt, id) {
818 		for (i = 0; i < count; i++) {
819 			struct i915_request *rq;
820 
821 			rq = semaphore_queue(engine, vma, n++);
822 			if (IS_ERR(rq)) {
823 				err = PTR_ERR(rq);
824 				goto out;
825 			}
826 
827 			i915_request_put(rq);
828 		}
829 	}
830 
831 	err = release_queue(outer, vma, n, INT_MAX);
832 	if (err)
833 		goto out;
834 
835 	if (i915_request_wait(head, 0,
836 			      2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) {
837 		pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n",
838 		       count, n);
839 		GEM_TRACE_DUMP();
840 		intel_gt_set_wedged(outer->gt);
841 		err = -EIO;
842 	}
843 
844 out:
845 	i915_request_put(head);
846 	return err;
847 }
848 
849 static int live_timeslice_preempt(void *arg)
850 {
851 	struct intel_gt *gt = arg;
852 	struct drm_i915_gem_object *obj;
853 	struct i915_vma *vma;
854 	void *vaddr;
855 	int err = 0;
856 	int count;
857 
858 	/*
859 	 * If a request takes too long, we would like to give other users
860 	 * a fair go on the GPU. In particular, users may create batches
861 	 * that wait upon external input, where that input may even be
862 	 * supplied by another GPU job. To avoid blocking forever, we
863 	 * need to preempt the current task and replace it with another
864 	 * ready task.
865 	 */
866 	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
867 		return 0;
868 
869 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
870 	if (IS_ERR(obj))
871 		return PTR_ERR(obj);
872 
873 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
874 	if (IS_ERR(vma)) {
875 		err = PTR_ERR(vma);
876 		goto err_obj;
877 	}
878 
879 	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
880 	if (IS_ERR(vaddr)) {
881 		err = PTR_ERR(vaddr);
882 		goto err_obj;
883 	}
884 
885 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
886 	if (err)
887 		goto err_map;
888 
889 	err = i915_vma_sync(vma);
890 	if (err)
891 		goto err_pin;
892 
893 	for_each_prime_number_from(count, 1, 16) {
894 		struct intel_engine_cs *engine;
895 		enum intel_engine_id id;
896 
897 		for_each_engine(engine, gt, id) {
898 			unsigned long saved;
899 
900 			if (!intel_engine_has_preemption(engine))
901 				continue;
902 
903 			memset(vaddr, 0, PAGE_SIZE);
904 
905 			engine_heartbeat_disable(engine, &saved);
906 			err = slice_semaphore_queue(engine, vma, count);
907 			engine_heartbeat_enable(engine, saved);
908 			if (err)
909 				goto err_pin;
910 
911 			if (igt_flush_test(gt->i915)) {
912 				err = -EIO;
913 				goto err_pin;
914 			}
915 		}
916 	}
917 
918 err_pin:
919 	i915_vma_unpin(vma);
920 err_map:
921 	i915_gem_object_unpin_map(obj);
922 err_obj:
923 	i915_gem_object_put(obj);
924 	return err;
925 }
926 
927 static struct i915_request *
928 create_rewinder(struct intel_context *ce,
929 		struct i915_request *wait,
930 		void *slot, int idx)
931 {
932 	const u32 offset =
933 		i915_ggtt_offset(ce->engine->status_page.vma) +
934 		offset_in_page(slot);
935 	struct i915_request *rq;
936 	u32 *cs;
937 	int err;
938 
939 	rq = intel_context_create_request(ce);
940 	if (IS_ERR(rq))
941 		return rq;
942 
943 	if (wait) {
944 		err = i915_request_await_dma_fence(rq, &wait->fence);
945 		if (err)
946 			goto err;
947 	}
948 
949 	cs = intel_ring_begin(rq, 14);
950 	if (IS_ERR(cs)) {
951 		err = PTR_ERR(cs);
952 		goto err;
953 	}
954 
955 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
956 	*cs++ = MI_NOOP;
957 
958 	*cs++ = MI_SEMAPHORE_WAIT |
959 		MI_SEMAPHORE_GLOBAL_GTT |
960 		MI_SEMAPHORE_POLL |
961 		MI_SEMAPHORE_SAD_GTE_SDD;
962 	*cs++ = idx;
963 	*cs++ = offset;
964 	*cs++ = 0;
965 
966 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
967 	*cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
968 	*cs++ = offset + idx * sizeof(u32);
969 	*cs++ = 0;
970 
971 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
972 	*cs++ = offset;
973 	*cs++ = 0;
974 	*cs++ = idx + 1;
975 
976 	intel_ring_advance(rq, cs);
977 
978 	rq->sched.attr.priority = I915_PRIORITY_MASK;
979 	err = 0;
980 err:
981 	i915_request_get(rq);
982 	i915_request_add(rq);
983 	if (err) {
984 		i915_request_put(rq);
985 		return ERR_PTR(err);
986 	}
987 
988 	return rq;
989 }
990 
991 static int live_timeslice_rewind(void *arg)
992 {
993 	struct intel_gt *gt = arg;
994 	struct intel_engine_cs *engine;
995 	enum intel_engine_id id;
996 
997 	/*
998 	 * The usual presumption on timeslice expiration is that we replace
999 	 * the active context with another. However, given a chain of
1000 	 * dependencies we may end up with replacing the context with itself,
1001 	 * but only a few of those requests, forcing us to rewind the
1002 	 * RING_TAIL of the original request.
1003 	 */
1004 	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1005 		return 0;
1006 
1007 	for_each_engine(engine, gt, id) {
1008 		enum { A1, A2, B1 };
1009 		enum { X = 1, Z, Y };
1010 		struct i915_request *rq[3] = {};
1011 		struct intel_context *ce;
1012 		unsigned long heartbeat;
1013 		unsigned long timeslice;
1014 		int i, err = 0;
1015 		u32 *slot;
1016 
1017 		if (!intel_engine_has_timeslices(engine))
1018 			continue;
1019 
1020 		/*
1021 		 * A:rq1 -- semaphore wait, timestamp X
1022 		 * A:rq2 -- write timestamp Y
1023 		 *
1024 		 * B:rq1 [await A:rq1] -- write timestamp Z
1025 		 *
1026 		 * Force timeslice, release semaphore.
1027 		 *
1028 		 * Expect execution/evaluation order XZY
1029 		 */
1030 
1031 		engine_heartbeat_disable(engine, &heartbeat);
1032 		timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1033 
1034 		slot = memset32(engine->status_page.addr + 1000, 0, 4);
1035 
1036 		ce = intel_context_create(engine);
1037 		if (IS_ERR(ce)) {
1038 			err = PTR_ERR(ce);
1039 			goto err;
1040 		}
1041 
1042 		rq[0] = create_rewinder(ce, NULL, slot, X);
1043 		if (IS_ERR(rq[0])) {
1044 			intel_context_put(ce);
1045 			goto err;
1046 		}
1047 
1048 		rq[1] = create_rewinder(ce, NULL, slot, Y);
1049 		intel_context_put(ce);
1050 		if (IS_ERR(rq[1]))
1051 			goto err;
1052 
1053 		err = wait_for_submit(engine, rq[1], HZ / 2);
1054 		if (err) {
1055 			pr_err("%s: failed to submit first context\n",
1056 			       engine->name);
1057 			goto err;
1058 		}
1059 
1060 		ce = intel_context_create(engine);
1061 		if (IS_ERR(ce)) {
1062 			err = PTR_ERR(ce);
1063 			goto err;
1064 		}
1065 
1066 		rq[2] = create_rewinder(ce, rq[0], slot, Z);
1067 		intel_context_put(ce);
1068 		if (IS_ERR(rq[2]))
1069 			goto err;
1070 
1071 		err = wait_for_submit(engine, rq[2], HZ / 2);
1072 		if (err) {
1073 			pr_err("%s: failed to submit second context\n",
1074 			       engine->name);
1075 			goto err;
1076 		}
1077 
1078 		/* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
1079 		if (i915_request_is_active(rq[A2])) { /* semaphore yielded! */
1080 			/* Wait for the timeslice to kick in */
1081 			del_timer(&engine->execlists.timer);
1082 			tasklet_hi_schedule(&engine->execlists.tasklet);
1083 			intel_engine_flush_submission(engine);
1084 		}
1085 		/* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
1086 		GEM_BUG_ON(!i915_request_is_active(rq[A1]));
1087 		GEM_BUG_ON(!i915_request_is_active(rq[B1]));
1088 		GEM_BUG_ON(i915_request_is_active(rq[A2]));
1089 
1090 		/* Release the hounds! */
1091 		slot[0] = 1;
1092 		wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */
1093 
1094 		for (i = 1; i <= 3; i++) {
1095 			unsigned long timeout = jiffies + HZ / 2;
1096 
1097 			while (!READ_ONCE(slot[i]) &&
1098 			       time_before(jiffies, timeout))
1099 				;
1100 
1101 			if (!time_before(jiffies, timeout)) {
1102 				pr_err("%s: rq[%d] timed out\n",
1103 				       engine->name, i - 1);
1104 				err = -ETIME;
1105 				goto err;
1106 			}
1107 
1108 			pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]);
1109 		}
1110 
1111 		/* XZY: XZ < XY */
1112 		if (slot[Z] - slot[X] >= slot[Y] - slot[X]) {
1113 			pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n",
1114 			       engine->name,
1115 			       slot[Z] - slot[X],
1116 			       slot[Y] - slot[X]);
1117 			err = -EINVAL;
1118 		}
1119 
1120 err:
1121 		memset32(&slot[0], -1, 4);
1122 		wmb();
1123 
1124 		engine->props.timeslice_duration_ms = timeslice;
1125 		engine_heartbeat_enable(engine, heartbeat);
1126 		for (i = 0; i < 3; i++)
1127 			i915_request_put(rq[i]);
1128 		if (igt_flush_test(gt->i915))
1129 			err = -EIO;
1130 		if (err)
1131 			return err;
1132 	}
1133 
1134 	return 0;
1135 }
1136 
1137 static struct i915_request *nop_request(struct intel_engine_cs *engine)
1138 {
1139 	struct i915_request *rq;
1140 
1141 	rq = intel_engine_create_kernel_request(engine);
1142 	if (IS_ERR(rq))
1143 		return rq;
1144 
1145 	i915_request_get(rq);
1146 	i915_request_add(rq);
1147 
1148 	return rq;
1149 }
1150 
1151 static long timeslice_threshold(const struct intel_engine_cs *engine)
1152 {
1153 	return 2 * msecs_to_jiffies_timeout(timeslice(engine)) + 1;
1154 }
1155 
1156 static int live_timeslice_queue(void *arg)
1157 {
1158 	struct intel_gt *gt = arg;
1159 	struct drm_i915_gem_object *obj;
1160 	struct intel_engine_cs *engine;
1161 	enum intel_engine_id id;
1162 	struct i915_vma *vma;
1163 	void *vaddr;
1164 	int err = 0;
1165 
1166 	/*
1167 	 * Make sure that even if ELSP[0] and ELSP[1] are filled with
1168 	 * timeslicing between them disabled, we *do* enable timeslicing
1169 	 * if the queue demands it. (Normally, we do not submit if
1170 	 * ELSP[1] is already occupied, so must rely on timeslicing to
1171 	 * eject ELSP[0] in favour of the queue.)
1172 	 */
1173 	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1174 		return 0;
1175 
1176 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1177 	if (IS_ERR(obj))
1178 		return PTR_ERR(obj);
1179 
1180 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1181 	if (IS_ERR(vma)) {
1182 		err = PTR_ERR(vma);
1183 		goto err_obj;
1184 	}
1185 
1186 	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
1187 	if (IS_ERR(vaddr)) {
1188 		err = PTR_ERR(vaddr);
1189 		goto err_obj;
1190 	}
1191 
1192 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1193 	if (err)
1194 		goto err_map;
1195 
1196 	err = i915_vma_sync(vma);
1197 	if (err)
1198 		goto err_pin;
1199 
1200 	for_each_engine(engine, gt, id) {
1201 		struct i915_sched_attr attr = {
1202 			.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
1203 		};
1204 		struct i915_request *rq, *nop;
1205 		unsigned long saved;
1206 
1207 		if (!intel_engine_has_preemption(engine))
1208 			continue;
1209 
1210 		engine_heartbeat_disable(engine, &saved);
1211 		memset(vaddr, 0, PAGE_SIZE);
1212 
1213 		/* ELSP[0]: semaphore wait */
1214 		rq = semaphore_queue(engine, vma, 0);
1215 		if (IS_ERR(rq)) {
1216 			err = PTR_ERR(rq);
1217 			goto err_heartbeat;
1218 		}
1219 		engine->schedule(rq, &attr);
1220 		err = wait_for_submit(engine, rq, HZ / 2);
1221 		if (err) {
1222 			pr_err("%s: Timed out trying to submit semaphores\n",
1223 			       engine->name);
1224 			goto err_rq;
1225 		}
1226 
1227 		/* ELSP[1]: nop request */
1228 		nop = nop_request(engine);
1229 		if (IS_ERR(nop)) {
1230 			err = PTR_ERR(nop);
1231 			goto err_rq;
1232 		}
1233 		err = wait_for_submit(engine, nop, HZ / 2);
1234 		i915_request_put(nop);
1235 		if (err) {
1236 			pr_err("%s: Timed out trying to submit nop\n",
1237 			       engine->name);
1238 			goto err_rq;
1239 		}
1240 
1241 		GEM_BUG_ON(i915_request_completed(rq));
1242 		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
1243 
1244 		/* Queue: semaphore signal, matching priority as semaphore */
1245 		err = release_queue(engine, vma, 1, effective_prio(rq));
1246 		if (err)
1247 			goto err_rq;
1248 
1249 		/* Wait until we ack the release_queue and start timeslicing */
1250 		do {
1251 			cond_resched();
1252 			intel_engine_flush_submission(engine);
1253 		} while (READ_ONCE(engine->execlists.pending[0]));
1254 
1255 		if (!READ_ONCE(engine->execlists.timer.expires) &&
1256 		    execlists_active(&engine->execlists) == rq &&
1257 		    !i915_request_completed(rq)) {
1258 			struct drm_printer p =
1259 				drm_info_printer(gt->i915->drm.dev);
1260 
1261 			GEM_TRACE_ERR("%s: Failed to enable timeslicing!\n",
1262 				      engine->name);
1263 			intel_engine_dump(engine, &p,
1264 					  "%s\n", engine->name);
1265 			GEM_TRACE_DUMP();
1266 
1267 			memset(vaddr, 0xff, PAGE_SIZE);
1268 			err = -EINVAL;
1269 		}
1270 
1271 		/* Timeslice every jiffy, so within 2 we should signal */
1272 		if (i915_request_wait(rq, 0, timeslice_threshold(engine)) < 0) {
1273 			struct drm_printer p =
1274 				drm_info_printer(gt->i915->drm.dev);
1275 
1276 			pr_err("%s: Failed to timeslice into queue\n",
1277 			       engine->name);
1278 			intel_engine_dump(engine, &p,
1279 					  "%s\n", engine->name);
1280 
1281 			memset(vaddr, 0xff, PAGE_SIZE);
1282 			err = -EIO;
1283 		}
1284 err_rq:
1285 		i915_request_put(rq);
1286 err_heartbeat:
1287 		engine_heartbeat_enable(engine, saved);
1288 		if (err)
1289 			break;
1290 	}
1291 
1292 err_pin:
1293 	i915_vma_unpin(vma);
1294 err_map:
1295 	i915_gem_object_unpin_map(obj);
1296 err_obj:
1297 	i915_gem_object_put(obj);
1298 	return err;
1299 }
1300 
1301 static int live_busywait_preempt(void *arg)
1302 {
1303 	struct intel_gt *gt = arg;
1304 	struct i915_gem_context *ctx_hi, *ctx_lo;
1305 	struct intel_engine_cs *engine;
1306 	struct drm_i915_gem_object *obj;
1307 	struct i915_vma *vma;
1308 	enum intel_engine_id id;
1309 	int err = -ENOMEM;
1310 	u32 *map;
1311 
1312 	/*
1313 	 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
1314 	 * preempt the busywaits used to synchronise between rings.
1315 	 */
1316 
1317 	ctx_hi = kernel_context(gt->i915);
1318 	if (!ctx_hi)
1319 		return -ENOMEM;
1320 	ctx_hi->sched.priority =
1321 		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
1322 
1323 	ctx_lo = kernel_context(gt->i915);
1324 	if (!ctx_lo)
1325 		goto err_ctx_hi;
1326 	ctx_lo->sched.priority =
1327 		I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
1328 
1329 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1330 	if (IS_ERR(obj)) {
1331 		err = PTR_ERR(obj);
1332 		goto err_ctx_lo;
1333 	}
1334 
1335 	map = i915_gem_object_pin_map(obj, I915_MAP_WC);
1336 	if (IS_ERR(map)) {
1337 		err = PTR_ERR(map);
1338 		goto err_obj;
1339 	}
1340 
1341 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1342 	if (IS_ERR(vma)) {
1343 		err = PTR_ERR(vma);
1344 		goto err_map;
1345 	}
1346 
1347 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1348 	if (err)
1349 		goto err_map;
1350 
1351 	err = i915_vma_sync(vma);
1352 	if (err)
1353 		goto err_vma;
1354 
1355 	for_each_engine(engine, gt, id) {
1356 		struct i915_request *lo, *hi;
1357 		struct igt_live_test t;
1358 		u32 *cs;
1359 
1360 		if (!intel_engine_has_preemption(engine))
1361 			continue;
1362 
1363 		if (!intel_engine_can_store_dword(engine))
1364 			continue;
1365 
1366 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1367 			err = -EIO;
1368 			goto err_vma;
1369 		}
1370 
1371 		/*
1372 		 * We create two requests. The low priority request
1373 		 * busywaits on a semaphore (inside the ringbuffer where
1374 		 * is should be preemptible) and the high priority requests
1375 		 * uses a MI_STORE_DWORD_IMM to update the semaphore value
1376 		 * allowing the first request to complete. If preemption
1377 		 * fails, we hang instead.
1378 		 */
1379 
1380 		lo = igt_request_alloc(ctx_lo, engine);
1381 		if (IS_ERR(lo)) {
1382 			err = PTR_ERR(lo);
1383 			goto err_vma;
1384 		}
1385 
1386 		cs = intel_ring_begin(lo, 8);
1387 		if (IS_ERR(cs)) {
1388 			err = PTR_ERR(cs);
1389 			i915_request_add(lo);
1390 			goto err_vma;
1391 		}
1392 
1393 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1394 		*cs++ = i915_ggtt_offset(vma);
1395 		*cs++ = 0;
1396 		*cs++ = 1;
1397 
1398 		/* XXX Do we need a flush + invalidate here? */
1399 
1400 		*cs++ = MI_SEMAPHORE_WAIT |
1401 			MI_SEMAPHORE_GLOBAL_GTT |
1402 			MI_SEMAPHORE_POLL |
1403 			MI_SEMAPHORE_SAD_EQ_SDD;
1404 		*cs++ = 0;
1405 		*cs++ = i915_ggtt_offset(vma);
1406 		*cs++ = 0;
1407 
1408 		intel_ring_advance(lo, cs);
1409 
1410 		i915_request_get(lo);
1411 		i915_request_add(lo);
1412 
1413 		if (wait_for(READ_ONCE(*map), 10)) {
1414 			i915_request_put(lo);
1415 			err = -ETIMEDOUT;
1416 			goto err_vma;
1417 		}
1418 
1419 		/* Low priority request should be busywaiting now */
1420 		if (i915_request_wait(lo, 0, 1) != -ETIME) {
1421 			i915_request_put(lo);
1422 			pr_err("%s: Busywaiting request did not!\n",
1423 			       engine->name);
1424 			err = -EIO;
1425 			goto err_vma;
1426 		}
1427 
1428 		hi = igt_request_alloc(ctx_hi, engine);
1429 		if (IS_ERR(hi)) {
1430 			err = PTR_ERR(hi);
1431 			i915_request_put(lo);
1432 			goto err_vma;
1433 		}
1434 
1435 		cs = intel_ring_begin(hi, 4);
1436 		if (IS_ERR(cs)) {
1437 			err = PTR_ERR(cs);
1438 			i915_request_add(hi);
1439 			i915_request_put(lo);
1440 			goto err_vma;
1441 		}
1442 
1443 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1444 		*cs++ = i915_ggtt_offset(vma);
1445 		*cs++ = 0;
1446 		*cs++ = 0;
1447 
1448 		intel_ring_advance(hi, cs);
1449 		i915_request_add(hi);
1450 
1451 		if (i915_request_wait(lo, 0, HZ / 5) < 0) {
1452 			struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
1453 
1454 			pr_err("%s: Failed to preempt semaphore busywait!\n",
1455 			       engine->name);
1456 
1457 			intel_engine_dump(engine, &p, "%s\n", engine->name);
1458 			GEM_TRACE_DUMP();
1459 
1460 			i915_request_put(lo);
1461 			intel_gt_set_wedged(gt);
1462 			err = -EIO;
1463 			goto err_vma;
1464 		}
1465 		GEM_BUG_ON(READ_ONCE(*map));
1466 		i915_request_put(lo);
1467 
1468 		if (igt_live_test_end(&t)) {
1469 			err = -EIO;
1470 			goto err_vma;
1471 		}
1472 	}
1473 
1474 	err = 0;
1475 err_vma:
1476 	i915_vma_unpin(vma);
1477 err_map:
1478 	i915_gem_object_unpin_map(obj);
1479 err_obj:
1480 	i915_gem_object_put(obj);
1481 err_ctx_lo:
1482 	kernel_context_close(ctx_lo);
1483 err_ctx_hi:
1484 	kernel_context_close(ctx_hi);
1485 	return err;
1486 }
1487 
1488 static struct i915_request *
1489 spinner_create_request(struct igt_spinner *spin,
1490 		       struct i915_gem_context *ctx,
1491 		       struct intel_engine_cs *engine,
1492 		       u32 arb)
1493 {
1494 	struct intel_context *ce;
1495 	struct i915_request *rq;
1496 
1497 	ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
1498 	if (IS_ERR(ce))
1499 		return ERR_CAST(ce);
1500 
1501 	rq = igt_spinner_create_request(spin, ce, arb);
1502 	intel_context_put(ce);
1503 	return rq;
1504 }
1505 
1506 static int live_preempt(void *arg)
1507 {
1508 	struct intel_gt *gt = arg;
1509 	struct i915_gem_context *ctx_hi, *ctx_lo;
1510 	struct igt_spinner spin_hi, spin_lo;
1511 	struct intel_engine_cs *engine;
1512 	enum intel_engine_id id;
1513 	int err = -ENOMEM;
1514 
1515 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1516 		return 0;
1517 
1518 	if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
1519 		pr_err("Logical preemption supported, but not exposed\n");
1520 
1521 	if (igt_spinner_init(&spin_hi, gt))
1522 		return -ENOMEM;
1523 
1524 	if (igt_spinner_init(&spin_lo, gt))
1525 		goto err_spin_hi;
1526 
1527 	ctx_hi = kernel_context(gt->i915);
1528 	if (!ctx_hi)
1529 		goto err_spin_lo;
1530 	ctx_hi->sched.priority =
1531 		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
1532 
1533 	ctx_lo = kernel_context(gt->i915);
1534 	if (!ctx_lo)
1535 		goto err_ctx_hi;
1536 	ctx_lo->sched.priority =
1537 		I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
1538 
1539 	for_each_engine(engine, gt, id) {
1540 		struct igt_live_test t;
1541 		struct i915_request *rq;
1542 
1543 		if (!intel_engine_has_preemption(engine))
1544 			continue;
1545 
1546 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1547 			err = -EIO;
1548 			goto err_ctx_lo;
1549 		}
1550 
1551 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1552 					    MI_ARB_CHECK);
1553 		if (IS_ERR(rq)) {
1554 			err = PTR_ERR(rq);
1555 			goto err_ctx_lo;
1556 		}
1557 
1558 		i915_request_add(rq);
1559 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
1560 			GEM_TRACE("lo spinner failed to start\n");
1561 			GEM_TRACE_DUMP();
1562 			intel_gt_set_wedged(gt);
1563 			err = -EIO;
1564 			goto err_ctx_lo;
1565 		}
1566 
1567 		rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1568 					    MI_ARB_CHECK);
1569 		if (IS_ERR(rq)) {
1570 			igt_spinner_end(&spin_lo);
1571 			err = PTR_ERR(rq);
1572 			goto err_ctx_lo;
1573 		}
1574 
1575 		i915_request_add(rq);
1576 		if (!igt_wait_for_spinner(&spin_hi, rq)) {
1577 			GEM_TRACE("hi spinner failed to start\n");
1578 			GEM_TRACE_DUMP();
1579 			intel_gt_set_wedged(gt);
1580 			err = -EIO;
1581 			goto err_ctx_lo;
1582 		}
1583 
1584 		igt_spinner_end(&spin_hi);
1585 		igt_spinner_end(&spin_lo);
1586 
1587 		if (igt_live_test_end(&t)) {
1588 			err = -EIO;
1589 			goto err_ctx_lo;
1590 		}
1591 	}
1592 
1593 	err = 0;
1594 err_ctx_lo:
1595 	kernel_context_close(ctx_lo);
1596 err_ctx_hi:
1597 	kernel_context_close(ctx_hi);
1598 err_spin_lo:
1599 	igt_spinner_fini(&spin_lo);
1600 err_spin_hi:
1601 	igt_spinner_fini(&spin_hi);
1602 	return err;
1603 }
1604 
1605 static int live_late_preempt(void *arg)
1606 {
1607 	struct intel_gt *gt = arg;
1608 	struct i915_gem_context *ctx_hi, *ctx_lo;
1609 	struct igt_spinner spin_hi, spin_lo;
1610 	struct intel_engine_cs *engine;
1611 	struct i915_sched_attr attr = {};
1612 	enum intel_engine_id id;
1613 	int err = -ENOMEM;
1614 
1615 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1616 		return 0;
1617 
1618 	if (igt_spinner_init(&spin_hi, gt))
1619 		return -ENOMEM;
1620 
1621 	if (igt_spinner_init(&spin_lo, gt))
1622 		goto err_spin_hi;
1623 
1624 	ctx_hi = kernel_context(gt->i915);
1625 	if (!ctx_hi)
1626 		goto err_spin_lo;
1627 
1628 	ctx_lo = kernel_context(gt->i915);
1629 	if (!ctx_lo)
1630 		goto err_ctx_hi;
1631 
1632 	/* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
1633 	ctx_lo->sched.priority = I915_USER_PRIORITY(1);
1634 
1635 	for_each_engine(engine, gt, id) {
1636 		struct igt_live_test t;
1637 		struct i915_request *rq;
1638 
1639 		if (!intel_engine_has_preemption(engine))
1640 			continue;
1641 
1642 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1643 			err = -EIO;
1644 			goto err_ctx_lo;
1645 		}
1646 
1647 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1648 					    MI_ARB_CHECK);
1649 		if (IS_ERR(rq)) {
1650 			err = PTR_ERR(rq);
1651 			goto err_ctx_lo;
1652 		}
1653 
1654 		i915_request_add(rq);
1655 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
1656 			pr_err("First context failed to start\n");
1657 			goto err_wedged;
1658 		}
1659 
1660 		rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1661 					    MI_NOOP);
1662 		if (IS_ERR(rq)) {
1663 			igt_spinner_end(&spin_lo);
1664 			err = PTR_ERR(rq);
1665 			goto err_ctx_lo;
1666 		}
1667 
1668 		i915_request_add(rq);
1669 		if (igt_wait_for_spinner(&spin_hi, rq)) {
1670 			pr_err("Second context overtook first?\n");
1671 			goto err_wedged;
1672 		}
1673 
1674 		attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1675 		engine->schedule(rq, &attr);
1676 
1677 		if (!igt_wait_for_spinner(&spin_hi, rq)) {
1678 			pr_err("High priority context failed to preempt the low priority context\n");
1679 			GEM_TRACE_DUMP();
1680 			goto err_wedged;
1681 		}
1682 
1683 		igt_spinner_end(&spin_hi);
1684 		igt_spinner_end(&spin_lo);
1685 
1686 		if (igt_live_test_end(&t)) {
1687 			err = -EIO;
1688 			goto err_ctx_lo;
1689 		}
1690 	}
1691 
1692 	err = 0;
1693 err_ctx_lo:
1694 	kernel_context_close(ctx_lo);
1695 err_ctx_hi:
1696 	kernel_context_close(ctx_hi);
1697 err_spin_lo:
1698 	igt_spinner_fini(&spin_lo);
1699 err_spin_hi:
1700 	igt_spinner_fini(&spin_hi);
1701 	return err;
1702 
1703 err_wedged:
1704 	igt_spinner_end(&spin_hi);
1705 	igt_spinner_end(&spin_lo);
1706 	intel_gt_set_wedged(gt);
1707 	err = -EIO;
1708 	goto err_ctx_lo;
1709 }
1710 
1711 struct preempt_client {
1712 	struct igt_spinner spin;
1713 	struct i915_gem_context *ctx;
1714 };
1715 
1716 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
1717 {
1718 	c->ctx = kernel_context(gt->i915);
1719 	if (!c->ctx)
1720 		return -ENOMEM;
1721 
1722 	if (igt_spinner_init(&c->spin, gt))
1723 		goto err_ctx;
1724 
1725 	return 0;
1726 
1727 err_ctx:
1728 	kernel_context_close(c->ctx);
1729 	return -ENOMEM;
1730 }
1731 
1732 static void preempt_client_fini(struct preempt_client *c)
1733 {
1734 	igt_spinner_fini(&c->spin);
1735 	kernel_context_close(c->ctx);
1736 }
1737 
1738 static int live_nopreempt(void *arg)
1739 {
1740 	struct intel_gt *gt = arg;
1741 	struct intel_engine_cs *engine;
1742 	struct preempt_client a, b;
1743 	enum intel_engine_id id;
1744 	int err = -ENOMEM;
1745 
1746 	/*
1747 	 * Verify that we can disable preemption for an individual request
1748 	 * that may be being observed and not want to be interrupted.
1749 	 */
1750 
1751 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1752 		return 0;
1753 
1754 	if (preempt_client_init(gt, &a))
1755 		return -ENOMEM;
1756 	if (preempt_client_init(gt, &b))
1757 		goto err_client_a;
1758 	b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1759 
1760 	for_each_engine(engine, gt, id) {
1761 		struct i915_request *rq_a, *rq_b;
1762 
1763 		if (!intel_engine_has_preemption(engine))
1764 			continue;
1765 
1766 		engine->execlists.preempt_hang.count = 0;
1767 
1768 		rq_a = spinner_create_request(&a.spin,
1769 					      a.ctx, engine,
1770 					      MI_ARB_CHECK);
1771 		if (IS_ERR(rq_a)) {
1772 			err = PTR_ERR(rq_a);
1773 			goto err_client_b;
1774 		}
1775 
1776 		/* Low priority client, but unpreemptable! */
1777 		__set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
1778 
1779 		i915_request_add(rq_a);
1780 		if (!igt_wait_for_spinner(&a.spin, rq_a)) {
1781 			pr_err("First client failed to start\n");
1782 			goto err_wedged;
1783 		}
1784 
1785 		rq_b = spinner_create_request(&b.spin,
1786 					      b.ctx, engine,
1787 					      MI_ARB_CHECK);
1788 		if (IS_ERR(rq_b)) {
1789 			err = PTR_ERR(rq_b);
1790 			goto err_client_b;
1791 		}
1792 
1793 		i915_request_add(rq_b);
1794 
1795 		/* B is much more important than A! (But A is unpreemptable.) */
1796 		GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
1797 
1798 		/* Wait long enough for preemption and timeslicing */
1799 		if (igt_wait_for_spinner(&b.spin, rq_b)) {
1800 			pr_err("Second client started too early!\n");
1801 			goto err_wedged;
1802 		}
1803 
1804 		igt_spinner_end(&a.spin);
1805 
1806 		if (!igt_wait_for_spinner(&b.spin, rq_b)) {
1807 			pr_err("Second client failed to start\n");
1808 			goto err_wedged;
1809 		}
1810 
1811 		igt_spinner_end(&b.spin);
1812 
1813 		if (engine->execlists.preempt_hang.count) {
1814 			pr_err("Preemption recorded x%d; should have been suppressed!\n",
1815 			       engine->execlists.preempt_hang.count);
1816 			err = -EINVAL;
1817 			goto err_wedged;
1818 		}
1819 
1820 		if (igt_flush_test(gt->i915))
1821 			goto err_wedged;
1822 	}
1823 
1824 	err = 0;
1825 err_client_b:
1826 	preempt_client_fini(&b);
1827 err_client_a:
1828 	preempt_client_fini(&a);
1829 	return err;
1830 
1831 err_wedged:
1832 	igt_spinner_end(&b.spin);
1833 	igt_spinner_end(&a.spin);
1834 	intel_gt_set_wedged(gt);
1835 	err = -EIO;
1836 	goto err_client_b;
1837 }
1838 
1839 struct live_preempt_cancel {
1840 	struct intel_engine_cs *engine;
1841 	struct preempt_client a, b;
1842 };
1843 
1844 static int __cancel_active0(struct live_preempt_cancel *arg)
1845 {
1846 	struct i915_request *rq;
1847 	struct igt_live_test t;
1848 	int err;
1849 
1850 	/* Preempt cancel of ELSP0 */
1851 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1852 	if (igt_live_test_begin(&t, arg->engine->i915,
1853 				__func__, arg->engine->name))
1854 		return -EIO;
1855 
1856 	rq = spinner_create_request(&arg->a.spin,
1857 				    arg->a.ctx, arg->engine,
1858 				    MI_ARB_CHECK);
1859 	if (IS_ERR(rq))
1860 		return PTR_ERR(rq);
1861 
1862 	clear_bit(CONTEXT_BANNED, &rq->context->flags);
1863 	i915_request_get(rq);
1864 	i915_request_add(rq);
1865 	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
1866 		err = -EIO;
1867 		goto out;
1868 	}
1869 
1870 	intel_context_set_banned(rq->context);
1871 	err = intel_engine_pulse(arg->engine);
1872 	if (err)
1873 		goto out;
1874 
1875 	err = wait_for_reset(arg->engine, rq, HZ / 2);
1876 	if (err) {
1877 		pr_err("Cancelled inflight0 request did not reset\n");
1878 		goto out;
1879 	}
1880 
1881 out:
1882 	i915_request_put(rq);
1883 	if (igt_live_test_end(&t))
1884 		err = -EIO;
1885 	return err;
1886 }
1887 
1888 static int __cancel_active1(struct live_preempt_cancel *arg)
1889 {
1890 	struct i915_request *rq[2] = {};
1891 	struct igt_live_test t;
1892 	int err;
1893 
1894 	/* Preempt cancel of ELSP1 */
1895 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1896 	if (igt_live_test_begin(&t, arg->engine->i915,
1897 				__func__, arg->engine->name))
1898 		return -EIO;
1899 
1900 	rq[0] = spinner_create_request(&arg->a.spin,
1901 				       arg->a.ctx, arg->engine,
1902 				       MI_NOOP); /* no preemption */
1903 	if (IS_ERR(rq[0]))
1904 		return PTR_ERR(rq[0]);
1905 
1906 	clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
1907 	i915_request_get(rq[0]);
1908 	i915_request_add(rq[0]);
1909 	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
1910 		err = -EIO;
1911 		goto out;
1912 	}
1913 
1914 	rq[1] = spinner_create_request(&arg->b.spin,
1915 				       arg->b.ctx, arg->engine,
1916 				       MI_ARB_CHECK);
1917 	if (IS_ERR(rq[1])) {
1918 		err = PTR_ERR(rq[1]);
1919 		goto out;
1920 	}
1921 
1922 	clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
1923 	i915_request_get(rq[1]);
1924 	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
1925 	i915_request_add(rq[1]);
1926 	if (err)
1927 		goto out;
1928 
1929 	intel_context_set_banned(rq[1]->context);
1930 	err = intel_engine_pulse(arg->engine);
1931 	if (err)
1932 		goto out;
1933 
1934 	igt_spinner_end(&arg->a.spin);
1935 	err = wait_for_reset(arg->engine, rq[1], HZ / 2);
1936 	if (err)
1937 		goto out;
1938 
1939 	if (rq[0]->fence.error != 0) {
1940 		pr_err("Normal inflight0 request did not complete\n");
1941 		err = -EINVAL;
1942 		goto out;
1943 	}
1944 
1945 	if (rq[1]->fence.error != -EIO) {
1946 		pr_err("Cancelled inflight1 request did not report -EIO\n");
1947 		err = -EINVAL;
1948 		goto out;
1949 	}
1950 
1951 out:
1952 	i915_request_put(rq[1]);
1953 	i915_request_put(rq[0]);
1954 	if (igt_live_test_end(&t))
1955 		err = -EIO;
1956 	return err;
1957 }
1958 
1959 static int __cancel_queued(struct live_preempt_cancel *arg)
1960 {
1961 	struct i915_request *rq[3] = {};
1962 	struct igt_live_test t;
1963 	int err;
1964 
1965 	/* Full ELSP and one in the wings */
1966 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1967 	if (igt_live_test_begin(&t, arg->engine->i915,
1968 				__func__, arg->engine->name))
1969 		return -EIO;
1970 
1971 	rq[0] = spinner_create_request(&arg->a.spin,
1972 				       arg->a.ctx, arg->engine,
1973 				       MI_ARB_CHECK);
1974 	if (IS_ERR(rq[0]))
1975 		return PTR_ERR(rq[0]);
1976 
1977 	clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
1978 	i915_request_get(rq[0]);
1979 	i915_request_add(rq[0]);
1980 	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
1981 		err = -EIO;
1982 		goto out;
1983 	}
1984 
1985 	rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
1986 	if (IS_ERR(rq[1])) {
1987 		err = PTR_ERR(rq[1]);
1988 		goto out;
1989 	}
1990 
1991 	clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
1992 	i915_request_get(rq[1]);
1993 	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
1994 	i915_request_add(rq[1]);
1995 	if (err)
1996 		goto out;
1997 
1998 	rq[2] = spinner_create_request(&arg->b.spin,
1999 				       arg->a.ctx, arg->engine,
2000 				       MI_ARB_CHECK);
2001 	if (IS_ERR(rq[2])) {
2002 		err = PTR_ERR(rq[2]);
2003 		goto out;
2004 	}
2005 
2006 	i915_request_get(rq[2]);
2007 	err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
2008 	i915_request_add(rq[2]);
2009 	if (err)
2010 		goto out;
2011 
2012 	intel_context_set_banned(rq[2]->context);
2013 	err = intel_engine_pulse(arg->engine);
2014 	if (err)
2015 		goto out;
2016 
2017 	err = wait_for_reset(arg->engine, rq[2], HZ / 2);
2018 	if (err)
2019 		goto out;
2020 
2021 	if (rq[0]->fence.error != -EIO) {
2022 		pr_err("Cancelled inflight0 request did not report -EIO\n");
2023 		err = -EINVAL;
2024 		goto out;
2025 	}
2026 
2027 	if (rq[1]->fence.error != 0) {
2028 		pr_err("Normal inflight1 request did not complete\n");
2029 		err = -EINVAL;
2030 		goto out;
2031 	}
2032 
2033 	if (rq[2]->fence.error != -EIO) {
2034 		pr_err("Cancelled queued request did not report -EIO\n");
2035 		err = -EINVAL;
2036 		goto out;
2037 	}
2038 
2039 out:
2040 	i915_request_put(rq[2]);
2041 	i915_request_put(rq[1]);
2042 	i915_request_put(rq[0]);
2043 	if (igt_live_test_end(&t))
2044 		err = -EIO;
2045 	return err;
2046 }
2047 
2048 static int __cancel_hostile(struct live_preempt_cancel *arg)
2049 {
2050 	struct i915_request *rq;
2051 	int err;
2052 
2053 	/* Preempt cancel non-preemptible spinner in ELSP0 */
2054 	if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2055 		return 0;
2056 
2057 	if (!intel_has_reset_engine(arg->engine->gt))
2058 		return 0;
2059 
2060 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2061 	rq = spinner_create_request(&arg->a.spin,
2062 				    arg->a.ctx, arg->engine,
2063 				    MI_NOOP); /* preemption disabled */
2064 	if (IS_ERR(rq))
2065 		return PTR_ERR(rq);
2066 
2067 	clear_bit(CONTEXT_BANNED, &rq->context->flags);
2068 	i915_request_get(rq);
2069 	i915_request_add(rq);
2070 	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2071 		err = -EIO;
2072 		goto out;
2073 	}
2074 
2075 	intel_context_set_banned(rq->context);
2076 	err = intel_engine_pulse(arg->engine); /* force reset */
2077 	if (err)
2078 		goto out;
2079 
2080 	err = wait_for_reset(arg->engine, rq, HZ / 2);
2081 	if (err) {
2082 		pr_err("Cancelled inflight0 request did not reset\n");
2083 		goto out;
2084 	}
2085 
2086 out:
2087 	i915_request_put(rq);
2088 	if (igt_flush_test(arg->engine->i915))
2089 		err = -EIO;
2090 	return err;
2091 }
2092 
2093 static int live_preempt_cancel(void *arg)
2094 {
2095 	struct intel_gt *gt = arg;
2096 	struct live_preempt_cancel data;
2097 	enum intel_engine_id id;
2098 	int err = -ENOMEM;
2099 
2100 	/*
2101 	 * To cancel an inflight context, we need to first remove it from the
2102 	 * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
2103 	 */
2104 
2105 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2106 		return 0;
2107 
2108 	if (preempt_client_init(gt, &data.a))
2109 		return -ENOMEM;
2110 	if (preempt_client_init(gt, &data.b))
2111 		goto err_client_a;
2112 
2113 	for_each_engine(data.engine, gt, id) {
2114 		if (!intel_engine_has_preemption(data.engine))
2115 			continue;
2116 
2117 		err = __cancel_active0(&data);
2118 		if (err)
2119 			goto err_wedged;
2120 
2121 		err = __cancel_active1(&data);
2122 		if (err)
2123 			goto err_wedged;
2124 
2125 		err = __cancel_queued(&data);
2126 		if (err)
2127 			goto err_wedged;
2128 
2129 		err = __cancel_hostile(&data);
2130 		if (err)
2131 			goto err_wedged;
2132 	}
2133 
2134 	err = 0;
2135 err_client_b:
2136 	preempt_client_fini(&data.b);
2137 err_client_a:
2138 	preempt_client_fini(&data.a);
2139 	return err;
2140 
2141 err_wedged:
2142 	GEM_TRACE_DUMP();
2143 	igt_spinner_end(&data.b.spin);
2144 	igt_spinner_end(&data.a.spin);
2145 	intel_gt_set_wedged(gt);
2146 	goto err_client_b;
2147 }
2148 
2149 static int live_suppress_self_preempt(void *arg)
2150 {
2151 	struct intel_gt *gt = arg;
2152 	struct intel_engine_cs *engine;
2153 	struct i915_sched_attr attr = {
2154 		.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX)
2155 	};
2156 	struct preempt_client a, b;
2157 	enum intel_engine_id id;
2158 	int err = -ENOMEM;
2159 
2160 	/*
2161 	 * Verify that if a preemption request does not cause a change in
2162 	 * the current execution order, the preempt-to-idle injection is
2163 	 * skipped and that we do not accidentally apply it after the CS
2164 	 * completion event.
2165 	 */
2166 
2167 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2168 		return 0;
2169 
2170 	if (intel_uc_uses_guc_submission(&gt->uc))
2171 		return 0; /* presume black blox */
2172 
2173 	if (intel_vgpu_active(gt->i915))
2174 		return 0; /* GVT forces single port & request submission */
2175 
2176 	if (preempt_client_init(gt, &a))
2177 		return -ENOMEM;
2178 	if (preempt_client_init(gt, &b))
2179 		goto err_client_a;
2180 
2181 	for_each_engine(engine, gt, id) {
2182 		struct i915_request *rq_a, *rq_b;
2183 		int depth;
2184 
2185 		if (!intel_engine_has_preemption(engine))
2186 			continue;
2187 
2188 		if (igt_flush_test(gt->i915))
2189 			goto err_wedged;
2190 
2191 		intel_engine_pm_get(engine);
2192 		engine->execlists.preempt_hang.count = 0;
2193 
2194 		rq_a = spinner_create_request(&a.spin,
2195 					      a.ctx, engine,
2196 					      MI_NOOP);
2197 		if (IS_ERR(rq_a)) {
2198 			err = PTR_ERR(rq_a);
2199 			intel_engine_pm_put(engine);
2200 			goto err_client_b;
2201 		}
2202 
2203 		i915_request_add(rq_a);
2204 		if (!igt_wait_for_spinner(&a.spin, rq_a)) {
2205 			pr_err("First client failed to start\n");
2206 			intel_engine_pm_put(engine);
2207 			goto err_wedged;
2208 		}
2209 
2210 		/* Keep postponing the timer to avoid premature slicing */
2211 		mod_timer(&engine->execlists.timer, jiffies + HZ);
2212 		for (depth = 0; depth < 8; depth++) {
2213 			rq_b = spinner_create_request(&b.spin,
2214 						      b.ctx, engine,
2215 						      MI_NOOP);
2216 			if (IS_ERR(rq_b)) {
2217 				err = PTR_ERR(rq_b);
2218 				intel_engine_pm_put(engine);
2219 				goto err_client_b;
2220 			}
2221 			i915_request_add(rq_b);
2222 
2223 			GEM_BUG_ON(i915_request_completed(rq_a));
2224 			engine->schedule(rq_a, &attr);
2225 			igt_spinner_end(&a.spin);
2226 
2227 			if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2228 				pr_err("Second client failed to start\n");
2229 				intel_engine_pm_put(engine);
2230 				goto err_wedged;
2231 			}
2232 
2233 			swap(a, b);
2234 			rq_a = rq_b;
2235 		}
2236 		igt_spinner_end(&a.spin);
2237 
2238 		if (engine->execlists.preempt_hang.count) {
2239 			pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
2240 			       engine->name,
2241 			       engine->execlists.preempt_hang.count,
2242 			       depth);
2243 			intel_engine_pm_put(engine);
2244 			err = -EINVAL;
2245 			goto err_client_b;
2246 		}
2247 
2248 		intel_engine_pm_put(engine);
2249 		if (igt_flush_test(gt->i915))
2250 			goto err_wedged;
2251 	}
2252 
2253 	err = 0;
2254 err_client_b:
2255 	preempt_client_fini(&b);
2256 err_client_a:
2257 	preempt_client_fini(&a);
2258 	return err;
2259 
2260 err_wedged:
2261 	igt_spinner_end(&b.spin);
2262 	igt_spinner_end(&a.spin);
2263 	intel_gt_set_wedged(gt);
2264 	err = -EIO;
2265 	goto err_client_b;
2266 }
2267 
2268 static int __i915_sw_fence_call
2269 dummy_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
2270 {
2271 	return NOTIFY_DONE;
2272 }
2273 
2274 static struct i915_request *dummy_request(struct intel_engine_cs *engine)
2275 {
2276 	struct i915_request *rq;
2277 
2278 	rq = kzalloc(sizeof(*rq), GFP_KERNEL);
2279 	if (!rq)
2280 		return NULL;
2281 
2282 	rq->engine = engine;
2283 
2284 	spin_lock_init(&rq->lock);
2285 	INIT_LIST_HEAD(&rq->fence.cb_list);
2286 	rq->fence.lock = &rq->lock;
2287 	rq->fence.ops = &i915_fence_ops;
2288 
2289 	i915_sched_node_init(&rq->sched);
2290 
2291 	/* mark this request as permanently incomplete */
2292 	rq->fence.seqno = 1;
2293 	BUILD_BUG_ON(sizeof(rq->fence.seqno) != 8); /* upper 32b == 0 */
2294 	rq->hwsp_seqno = (u32 *)&rq->fence.seqno + 1;
2295 	GEM_BUG_ON(i915_request_completed(rq));
2296 
2297 	i915_sw_fence_init(&rq->submit, dummy_notify);
2298 	set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
2299 
2300 	spin_lock_init(&rq->lock);
2301 	rq->fence.lock = &rq->lock;
2302 	INIT_LIST_HEAD(&rq->fence.cb_list);
2303 
2304 	return rq;
2305 }
2306 
2307 static void dummy_request_free(struct i915_request *dummy)
2308 {
2309 	/* We have to fake the CS interrupt to kick the next request */
2310 	i915_sw_fence_commit(&dummy->submit);
2311 
2312 	i915_request_mark_complete(dummy);
2313 	dma_fence_signal(&dummy->fence);
2314 
2315 	i915_sched_node_fini(&dummy->sched);
2316 	i915_sw_fence_fini(&dummy->submit);
2317 
2318 	dma_fence_free(&dummy->fence);
2319 }
2320 
2321 static int live_suppress_wait_preempt(void *arg)
2322 {
2323 	struct intel_gt *gt = arg;
2324 	struct preempt_client client[4];
2325 	struct i915_request *rq[ARRAY_SIZE(client)] = {};
2326 	struct intel_engine_cs *engine;
2327 	enum intel_engine_id id;
2328 	int err = -ENOMEM;
2329 	int i;
2330 
2331 	/*
2332 	 * Waiters are given a little priority nudge, but not enough
2333 	 * to actually cause any preemption. Double check that we do
2334 	 * not needlessly generate preempt-to-idle cycles.
2335 	 */
2336 
2337 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2338 		return 0;
2339 
2340 	if (preempt_client_init(gt, &client[0])) /* ELSP[0] */
2341 		return -ENOMEM;
2342 	if (preempt_client_init(gt, &client[1])) /* ELSP[1] */
2343 		goto err_client_0;
2344 	if (preempt_client_init(gt, &client[2])) /* head of queue */
2345 		goto err_client_1;
2346 	if (preempt_client_init(gt, &client[3])) /* bystander */
2347 		goto err_client_2;
2348 
2349 	for_each_engine(engine, gt, id) {
2350 		int depth;
2351 
2352 		if (!intel_engine_has_preemption(engine))
2353 			continue;
2354 
2355 		if (!engine->emit_init_breadcrumb)
2356 			continue;
2357 
2358 		for (depth = 0; depth < ARRAY_SIZE(client); depth++) {
2359 			struct i915_request *dummy;
2360 
2361 			engine->execlists.preempt_hang.count = 0;
2362 
2363 			dummy = dummy_request(engine);
2364 			if (!dummy)
2365 				goto err_client_3;
2366 
2367 			for (i = 0; i < ARRAY_SIZE(client); i++) {
2368 				struct i915_request *this;
2369 
2370 				this = spinner_create_request(&client[i].spin,
2371 							      client[i].ctx, engine,
2372 							      MI_NOOP);
2373 				if (IS_ERR(this)) {
2374 					err = PTR_ERR(this);
2375 					goto err_wedged;
2376 				}
2377 
2378 				/* Disable NEWCLIENT promotion */
2379 				__i915_active_fence_set(&i915_request_timeline(this)->last_request,
2380 							&dummy->fence);
2381 
2382 				rq[i] = i915_request_get(this);
2383 				i915_request_add(this);
2384 			}
2385 
2386 			dummy_request_free(dummy);
2387 
2388 			GEM_BUG_ON(i915_request_completed(rq[0]));
2389 			if (!igt_wait_for_spinner(&client[0].spin, rq[0])) {
2390 				pr_err("%s: First client failed to start\n",
2391 				       engine->name);
2392 				goto err_wedged;
2393 			}
2394 			GEM_BUG_ON(!i915_request_started(rq[0]));
2395 
2396 			if (i915_request_wait(rq[depth],
2397 					      I915_WAIT_PRIORITY,
2398 					      1) != -ETIME) {
2399 				pr_err("%s: Waiter depth:%d completed!\n",
2400 				       engine->name, depth);
2401 				goto err_wedged;
2402 			}
2403 
2404 			for (i = 0; i < ARRAY_SIZE(client); i++) {
2405 				igt_spinner_end(&client[i].spin);
2406 				i915_request_put(rq[i]);
2407 				rq[i] = NULL;
2408 			}
2409 
2410 			if (igt_flush_test(gt->i915))
2411 				goto err_wedged;
2412 
2413 			if (engine->execlists.preempt_hang.count) {
2414 				pr_err("%s: Preemption recorded x%d, depth %d; should have been suppressed!\n",
2415 				       engine->name,
2416 				       engine->execlists.preempt_hang.count,
2417 				       depth);
2418 				err = -EINVAL;
2419 				goto err_client_3;
2420 			}
2421 		}
2422 	}
2423 
2424 	err = 0;
2425 err_client_3:
2426 	preempt_client_fini(&client[3]);
2427 err_client_2:
2428 	preempt_client_fini(&client[2]);
2429 err_client_1:
2430 	preempt_client_fini(&client[1]);
2431 err_client_0:
2432 	preempt_client_fini(&client[0]);
2433 	return err;
2434 
2435 err_wedged:
2436 	for (i = 0; i < ARRAY_SIZE(client); i++) {
2437 		igt_spinner_end(&client[i].spin);
2438 		i915_request_put(rq[i]);
2439 	}
2440 	intel_gt_set_wedged(gt);
2441 	err = -EIO;
2442 	goto err_client_3;
2443 }
2444 
2445 static int live_chain_preempt(void *arg)
2446 {
2447 	struct intel_gt *gt = arg;
2448 	struct intel_engine_cs *engine;
2449 	struct preempt_client hi, lo;
2450 	enum intel_engine_id id;
2451 	int err = -ENOMEM;
2452 
2453 	/*
2454 	 * Build a chain AB...BA between two contexts (A, B) and request
2455 	 * preemption of the last request. It should then complete before
2456 	 * the previously submitted spinner in B.
2457 	 */
2458 
2459 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2460 		return 0;
2461 
2462 	if (preempt_client_init(gt, &hi))
2463 		return -ENOMEM;
2464 
2465 	if (preempt_client_init(gt, &lo))
2466 		goto err_client_hi;
2467 
2468 	for_each_engine(engine, gt, id) {
2469 		struct i915_sched_attr attr = {
2470 			.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
2471 		};
2472 		struct igt_live_test t;
2473 		struct i915_request *rq;
2474 		int ring_size, count, i;
2475 
2476 		if (!intel_engine_has_preemption(engine))
2477 			continue;
2478 
2479 		rq = spinner_create_request(&lo.spin,
2480 					    lo.ctx, engine,
2481 					    MI_ARB_CHECK);
2482 		if (IS_ERR(rq))
2483 			goto err_wedged;
2484 
2485 		i915_request_get(rq);
2486 		i915_request_add(rq);
2487 
2488 		ring_size = rq->wa_tail - rq->head;
2489 		if (ring_size < 0)
2490 			ring_size += rq->ring->size;
2491 		ring_size = rq->ring->size / ring_size;
2492 		pr_debug("%s(%s): Using maximum of %d requests\n",
2493 			 __func__, engine->name, ring_size);
2494 
2495 		igt_spinner_end(&lo.spin);
2496 		if (i915_request_wait(rq, 0, HZ / 2) < 0) {
2497 			pr_err("Timed out waiting to flush %s\n", engine->name);
2498 			i915_request_put(rq);
2499 			goto err_wedged;
2500 		}
2501 		i915_request_put(rq);
2502 
2503 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
2504 			err = -EIO;
2505 			goto err_wedged;
2506 		}
2507 
2508 		for_each_prime_number_from(count, 1, ring_size) {
2509 			rq = spinner_create_request(&hi.spin,
2510 						    hi.ctx, engine,
2511 						    MI_ARB_CHECK);
2512 			if (IS_ERR(rq))
2513 				goto err_wedged;
2514 			i915_request_add(rq);
2515 			if (!igt_wait_for_spinner(&hi.spin, rq))
2516 				goto err_wedged;
2517 
2518 			rq = spinner_create_request(&lo.spin,
2519 						    lo.ctx, engine,
2520 						    MI_ARB_CHECK);
2521 			if (IS_ERR(rq))
2522 				goto err_wedged;
2523 			i915_request_add(rq);
2524 
2525 			for (i = 0; i < count; i++) {
2526 				rq = igt_request_alloc(lo.ctx, engine);
2527 				if (IS_ERR(rq))
2528 					goto err_wedged;
2529 				i915_request_add(rq);
2530 			}
2531 
2532 			rq = igt_request_alloc(hi.ctx, engine);
2533 			if (IS_ERR(rq))
2534 				goto err_wedged;
2535 
2536 			i915_request_get(rq);
2537 			i915_request_add(rq);
2538 			engine->schedule(rq, &attr);
2539 
2540 			igt_spinner_end(&hi.spin);
2541 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2542 				struct drm_printer p =
2543 					drm_info_printer(gt->i915->drm.dev);
2544 
2545 				pr_err("Failed to preempt over chain of %d\n",
2546 				       count);
2547 				intel_engine_dump(engine, &p,
2548 						  "%s\n", engine->name);
2549 				i915_request_put(rq);
2550 				goto err_wedged;
2551 			}
2552 			igt_spinner_end(&lo.spin);
2553 			i915_request_put(rq);
2554 
2555 			rq = igt_request_alloc(lo.ctx, engine);
2556 			if (IS_ERR(rq))
2557 				goto err_wedged;
2558 
2559 			i915_request_get(rq);
2560 			i915_request_add(rq);
2561 
2562 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2563 				struct drm_printer p =
2564 					drm_info_printer(gt->i915->drm.dev);
2565 
2566 				pr_err("Failed to flush low priority chain of %d requests\n",
2567 				       count);
2568 				intel_engine_dump(engine, &p,
2569 						  "%s\n", engine->name);
2570 
2571 				i915_request_put(rq);
2572 				goto err_wedged;
2573 			}
2574 			i915_request_put(rq);
2575 		}
2576 
2577 		if (igt_live_test_end(&t)) {
2578 			err = -EIO;
2579 			goto err_wedged;
2580 		}
2581 	}
2582 
2583 	err = 0;
2584 err_client_lo:
2585 	preempt_client_fini(&lo);
2586 err_client_hi:
2587 	preempt_client_fini(&hi);
2588 	return err;
2589 
2590 err_wedged:
2591 	igt_spinner_end(&hi.spin);
2592 	igt_spinner_end(&lo.spin);
2593 	intel_gt_set_wedged(gt);
2594 	err = -EIO;
2595 	goto err_client_lo;
2596 }
2597 
2598 static int create_gang(struct intel_engine_cs *engine,
2599 		       struct i915_request **prev)
2600 {
2601 	struct drm_i915_gem_object *obj;
2602 	struct intel_context *ce;
2603 	struct i915_request *rq;
2604 	struct i915_vma *vma;
2605 	u32 *cs;
2606 	int err;
2607 
2608 	ce = intel_context_create(engine);
2609 	if (IS_ERR(ce))
2610 		return PTR_ERR(ce);
2611 
2612 	obj = i915_gem_object_create_internal(engine->i915, 4096);
2613 	if (IS_ERR(obj)) {
2614 		err = PTR_ERR(obj);
2615 		goto err_ce;
2616 	}
2617 
2618 	vma = i915_vma_instance(obj, ce->vm, NULL);
2619 	if (IS_ERR(vma)) {
2620 		err = PTR_ERR(vma);
2621 		goto err_obj;
2622 	}
2623 
2624 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
2625 	if (err)
2626 		goto err_obj;
2627 
2628 	cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
2629 	if (IS_ERR(cs))
2630 		goto err_obj;
2631 
2632 	/* Semaphore target: spin until zero */
2633 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2634 
2635 	*cs++ = MI_SEMAPHORE_WAIT |
2636 		MI_SEMAPHORE_POLL |
2637 		MI_SEMAPHORE_SAD_EQ_SDD;
2638 	*cs++ = 0;
2639 	*cs++ = lower_32_bits(vma->node.start);
2640 	*cs++ = upper_32_bits(vma->node.start);
2641 
2642 	if (*prev) {
2643 		u64 offset = (*prev)->batch->node.start;
2644 
2645 		/* Terminate the spinner in the next lower priority batch. */
2646 		*cs++ = MI_STORE_DWORD_IMM_GEN4;
2647 		*cs++ = lower_32_bits(offset);
2648 		*cs++ = upper_32_bits(offset);
2649 		*cs++ = 0;
2650 	}
2651 
2652 	*cs++ = MI_BATCH_BUFFER_END;
2653 	i915_gem_object_flush_map(obj);
2654 	i915_gem_object_unpin_map(obj);
2655 
2656 	rq = intel_context_create_request(ce);
2657 	if (IS_ERR(rq))
2658 		goto err_obj;
2659 
2660 	rq->batch = i915_vma_get(vma);
2661 	i915_request_get(rq);
2662 
2663 	i915_vma_lock(vma);
2664 	err = i915_request_await_object(rq, vma->obj, false);
2665 	if (!err)
2666 		err = i915_vma_move_to_active(vma, rq, 0);
2667 	if (!err)
2668 		err = rq->engine->emit_bb_start(rq,
2669 						vma->node.start,
2670 						PAGE_SIZE, 0);
2671 	i915_vma_unlock(vma);
2672 	i915_request_add(rq);
2673 	if (err)
2674 		goto err_rq;
2675 
2676 	i915_gem_object_put(obj);
2677 	intel_context_put(ce);
2678 
2679 	rq->client_link.next = &(*prev)->client_link;
2680 	*prev = rq;
2681 	return 0;
2682 
2683 err_rq:
2684 	i915_vma_put(rq->batch);
2685 	i915_request_put(rq);
2686 err_obj:
2687 	i915_gem_object_put(obj);
2688 err_ce:
2689 	intel_context_put(ce);
2690 	return err;
2691 }
2692 
2693 static int live_preempt_gang(void *arg)
2694 {
2695 	struct intel_gt *gt = arg;
2696 	struct intel_engine_cs *engine;
2697 	enum intel_engine_id id;
2698 
2699 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2700 		return 0;
2701 
2702 	/*
2703 	 * Build as long a chain of preempters as we can, with each
2704 	 * request higher priority than the last. Once we are ready, we release
2705 	 * the last batch which then precolates down the chain, each releasing
2706 	 * the next oldest in turn. The intent is to simply push as hard as we
2707 	 * can with the number of preemptions, trying to exceed narrow HW
2708 	 * limits. At a minimum, we insist that we can sort all the user
2709 	 * high priority levels into execution order.
2710 	 */
2711 
2712 	for_each_engine(engine, gt, id) {
2713 		struct i915_request *rq = NULL;
2714 		struct igt_live_test t;
2715 		IGT_TIMEOUT(end_time);
2716 		int prio = 0;
2717 		int err = 0;
2718 		u32 *cs;
2719 
2720 		if (!intel_engine_has_preemption(engine))
2721 			continue;
2722 
2723 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
2724 			return -EIO;
2725 
2726 		do {
2727 			struct i915_sched_attr attr = {
2728 				.priority = I915_USER_PRIORITY(prio++),
2729 			};
2730 
2731 			err = create_gang(engine, &rq);
2732 			if (err)
2733 				break;
2734 
2735 			/* Submit each spinner at increasing priority */
2736 			engine->schedule(rq, &attr);
2737 
2738 			if (prio <= I915_PRIORITY_MAX)
2739 				continue;
2740 
2741 			if (prio > (INT_MAX >> I915_USER_PRIORITY_SHIFT))
2742 				break;
2743 
2744 			if (__igt_timeout(end_time, NULL))
2745 				break;
2746 		} while (1);
2747 		pr_debug("%s: Preempt chain of %d requests\n",
2748 			 engine->name, prio);
2749 
2750 		/*
2751 		 * Such that the last spinner is the highest priority and
2752 		 * should execute first. When that spinner completes,
2753 		 * it will terminate the next lowest spinner until there
2754 		 * are no more spinners and the gang is complete.
2755 		 */
2756 		cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC);
2757 		if (!IS_ERR(cs)) {
2758 			*cs = 0;
2759 			i915_gem_object_unpin_map(rq->batch->obj);
2760 		} else {
2761 			err = PTR_ERR(cs);
2762 			intel_gt_set_wedged(gt);
2763 		}
2764 
2765 		while (rq) { /* wait for each rq from highest to lowest prio */
2766 			struct i915_request *n =
2767 				list_next_entry(rq, client_link);
2768 
2769 			if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
2770 				struct drm_printer p =
2771 					drm_info_printer(engine->i915->drm.dev);
2772 
2773 				pr_err("Failed to flush chain of %d requests, at %d\n",
2774 				       prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT);
2775 				intel_engine_dump(engine, &p,
2776 						  "%s\n", engine->name);
2777 
2778 				err = -ETIME;
2779 			}
2780 
2781 			i915_vma_put(rq->batch);
2782 			i915_request_put(rq);
2783 			rq = n;
2784 		}
2785 
2786 		if (igt_live_test_end(&t))
2787 			err = -EIO;
2788 		if (err)
2789 			return err;
2790 	}
2791 
2792 	return 0;
2793 }
2794 
2795 static struct i915_vma *
2796 create_gpr_user(struct intel_engine_cs *engine,
2797 		struct i915_vma *result,
2798 		unsigned int offset)
2799 {
2800 	struct drm_i915_gem_object *obj;
2801 	struct i915_vma *vma;
2802 	u32 *cs;
2803 	int err;
2804 	int i;
2805 
2806 	obj = i915_gem_object_create_internal(engine->i915, 4096);
2807 	if (IS_ERR(obj))
2808 		return ERR_CAST(obj);
2809 
2810 	vma = i915_vma_instance(obj, result->vm, NULL);
2811 	if (IS_ERR(vma)) {
2812 		i915_gem_object_put(obj);
2813 		return vma;
2814 	}
2815 
2816 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
2817 	if (err) {
2818 		i915_vma_put(vma);
2819 		return ERR_PTR(err);
2820 	}
2821 
2822 	cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
2823 	if (IS_ERR(cs)) {
2824 		i915_vma_put(vma);
2825 		return ERR_CAST(cs);
2826 	}
2827 
2828 	/* All GPR are clear for new contexts. We use GPR(0) as a constant */
2829 	*cs++ = MI_LOAD_REGISTER_IMM(1);
2830 	*cs++ = CS_GPR(engine, 0);
2831 	*cs++ = 1;
2832 
2833 	for (i = 1; i < NUM_GPR; i++) {
2834 		u64 addr;
2835 
2836 		/*
2837 		 * Perform: GPR[i]++
2838 		 *
2839 		 * As we read and write into the context saved GPR[i], if
2840 		 * we restart this batch buffer from an earlier point, we
2841 		 * will repeat the increment and store a value > 1.
2842 		 */
2843 		*cs++ = MI_MATH(4);
2844 		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(i));
2845 		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(0));
2846 		*cs++ = MI_MATH_ADD;
2847 		*cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU);
2848 
2849 		addr = result->node.start + offset + i * sizeof(*cs);
2850 		*cs++ = MI_STORE_REGISTER_MEM_GEN8;
2851 		*cs++ = CS_GPR(engine, 2 * i);
2852 		*cs++ = lower_32_bits(addr);
2853 		*cs++ = upper_32_bits(addr);
2854 
2855 		*cs++ = MI_SEMAPHORE_WAIT |
2856 			MI_SEMAPHORE_POLL |
2857 			MI_SEMAPHORE_SAD_GTE_SDD;
2858 		*cs++ = i;
2859 		*cs++ = lower_32_bits(result->node.start);
2860 		*cs++ = upper_32_bits(result->node.start);
2861 	}
2862 
2863 	*cs++ = MI_BATCH_BUFFER_END;
2864 	i915_gem_object_flush_map(obj);
2865 	i915_gem_object_unpin_map(obj);
2866 
2867 	return vma;
2868 }
2869 
2870 static struct i915_vma *create_global(struct intel_gt *gt, size_t sz)
2871 {
2872 	struct drm_i915_gem_object *obj;
2873 	struct i915_vma *vma;
2874 	int err;
2875 
2876 	obj = i915_gem_object_create_internal(gt->i915, sz);
2877 	if (IS_ERR(obj))
2878 		return ERR_CAST(obj);
2879 
2880 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
2881 	if (IS_ERR(vma)) {
2882 		i915_gem_object_put(obj);
2883 		return vma;
2884 	}
2885 
2886 	err = i915_ggtt_pin(vma, 0, 0);
2887 	if (err) {
2888 		i915_vma_put(vma);
2889 		return ERR_PTR(err);
2890 	}
2891 
2892 	return vma;
2893 }
2894 
2895 static struct i915_request *
2896 create_gpr_client(struct intel_engine_cs *engine,
2897 		  struct i915_vma *global,
2898 		  unsigned int offset)
2899 {
2900 	struct i915_vma *batch, *vma;
2901 	struct intel_context *ce;
2902 	struct i915_request *rq;
2903 	int err;
2904 
2905 	ce = intel_context_create(engine);
2906 	if (IS_ERR(ce))
2907 		return ERR_CAST(ce);
2908 
2909 	vma = i915_vma_instance(global->obj, ce->vm, NULL);
2910 	if (IS_ERR(vma)) {
2911 		err = PTR_ERR(vma);
2912 		goto out_ce;
2913 	}
2914 
2915 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
2916 	if (err)
2917 		goto out_ce;
2918 
2919 	batch = create_gpr_user(engine, vma, offset);
2920 	if (IS_ERR(batch)) {
2921 		err = PTR_ERR(batch);
2922 		goto out_vma;
2923 	}
2924 
2925 	rq = intel_context_create_request(ce);
2926 	if (IS_ERR(rq)) {
2927 		err = PTR_ERR(rq);
2928 		goto out_batch;
2929 	}
2930 
2931 	i915_vma_lock(vma);
2932 	err = i915_request_await_object(rq, vma->obj, false);
2933 	if (!err)
2934 		err = i915_vma_move_to_active(vma, rq, 0);
2935 	i915_vma_unlock(vma);
2936 
2937 	i915_vma_lock(batch);
2938 	if (!err)
2939 		err = i915_request_await_object(rq, batch->obj, false);
2940 	if (!err)
2941 		err = i915_vma_move_to_active(batch, rq, 0);
2942 	if (!err)
2943 		err = rq->engine->emit_bb_start(rq,
2944 						batch->node.start,
2945 						PAGE_SIZE, 0);
2946 	i915_vma_unlock(batch);
2947 	i915_vma_unpin(batch);
2948 
2949 	if (!err)
2950 		i915_request_get(rq);
2951 	i915_request_add(rq);
2952 
2953 out_batch:
2954 	i915_vma_put(batch);
2955 out_vma:
2956 	i915_vma_unpin(vma);
2957 out_ce:
2958 	intel_context_put(ce);
2959 	return err ? ERR_PTR(err) : rq;
2960 }
2961 
2962 static int preempt_user(struct intel_engine_cs *engine,
2963 			struct i915_vma *global,
2964 			int id)
2965 {
2966 	struct i915_sched_attr attr = {
2967 		.priority = I915_PRIORITY_MAX
2968 	};
2969 	struct i915_request *rq;
2970 	int err = 0;
2971 	u32 *cs;
2972 
2973 	rq = intel_engine_create_kernel_request(engine);
2974 	if (IS_ERR(rq))
2975 		return PTR_ERR(rq);
2976 
2977 	cs = intel_ring_begin(rq, 4);
2978 	if (IS_ERR(cs)) {
2979 		i915_request_add(rq);
2980 		return PTR_ERR(cs);
2981 	}
2982 
2983 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
2984 	*cs++ = i915_ggtt_offset(global);
2985 	*cs++ = 0;
2986 	*cs++ = id;
2987 
2988 	intel_ring_advance(rq, cs);
2989 
2990 	i915_request_get(rq);
2991 	i915_request_add(rq);
2992 
2993 	engine->schedule(rq, &attr);
2994 
2995 	if (i915_request_wait(rq, 0, HZ / 2) < 0)
2996 		err = -ETIME;
2997 	i915_request_put(rq);
2998 
2999 	return err;
3000 }
3001 
3002 static int live_preempt_user(void *arg)
3003 {
3004 	struct intel_gt *gt = arg;
3005 	struct intel_engine_cs *engine;
3006 	struct i915_vma *global;
3007 	enum intel_engine_id id;
3008 	u32 *result;
3009 	int err = 0;
3010 
3011 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
3012 		return 0;
3013 
3014 	/*
3015 	 * In our other tests, we look at preemption in carefully
3016 	 * controlled conditions in the ringbuffer. Since most of the
3017 	 * time is spent in user batches, most of our preemptions naturally
3018 	 * occur there. We want to verify that when we preempt inside a batch
3019 	 * we continue on from the current instruction and do not roll back
3020 	 * to the start, or another earlier arbitration point.
3021 	 *
3022 	 * To verify this, we create a batch which is a mixture of
3023 	 * MI_MATH (gpr++) MI_SRM (gpr) and preemption points. Then with
3024 	 * a few preempting contexts thrown into the mix, we look for any
3025 	 * repeated instructions (which show up as incorrect values).
3026 	 */
3027 
3028 	global = create_global(gt, 4096);
3029 	if (IS_ERR(global))
3030 		return PTR_ERR(global);
3031 
3032 	result = i915_gem_object_pin_map(global->obj, I915_MAP_WC);
3033 	if (IS_ERR(result)) {
3034 		i915_vma_unpin_and_release(&global, 0);
3035 		return PTR_ERR(result);
3036 	}
3037 
3038 	for_each_engine(engine, gt, id) {
3039 		struct i915_request *client[3] = {};
3040 		struct igt_live_test t;
3041 		int i;
3042 
3043 		if (!intel_engine_has_preemption(engine))
3044 			continue;
3045 
3046 		if (IS_GEN(gt->i915, 8) && engine->class != RENDER_CLASS)
3047 			continue; /* we need per-context GPR */
3048 
3049 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
3050 			err = -EIO;
3051 			break;
3052 		}
3053 
3054 		memset(result, 0, 4096);
3055 
3056 		for (i = 0; i < ARRAY_SIZE(client); i++) {
3057 			struct i915_request *rq;
3058 
3059 			rq = create_gpr_client(engine, global,
3060 					       NUM_GPR * i * sizeof(u32));
3061 			if (IS_ERR(rq))
3062 				goto end_test;
3063 
3064 			client[i] = rq;
3065 		}
3066 
3067 		/* Continuously preempt the set of 3 running contexts */
3068 		for (i = 1; i <= NUM_GPR; i++) {
3069 			err = preempt_user(engine, global, i);
3070 			if (err)
3071 				goto end_test;
3072 		}
3073 
3074 		if (READ_ONCE(result[0]) != NUM_GPR) {
3075 			pr_err("%s: Failed to release semaphore\n",
3076 			       engine->name);
3077 			err = -EIO;
3078 			goto end_test;
3079 		}
3080 
3081 		for (i = 0; i < ARRAY_SIZE(client); i++) {
3082 			int gpr;
3083 
3084 			if (i915_request_wait(client[i], 0, HZ / 2) < 0) {
3085 				err = -ETIME;
3086 				goto end_test;
3087 			}
3088 
3089 			for (gpr = 1; gpr < NUM_GPR; gpr++) {
3090 				if (result[NUM_GPR * i + gpr] != 1) {
3091 					pr_err("%s: Invalid result, client %d, gpr %d, result: %d\n",
3092 					       engine->name,
3093 					       i, gpr, result[NUM_GPR * i + gpr]);
3094 					err = -EINVAL;
3095 					goto end_test;
3096 				}
3097 			}
3098 		}
3099 
3100 end_test:
3101 		for (i = 0; i < ARRAY_SIZE(client); i++) {
3102 			if (!client[i])
3103 				break;
3104 
3105 			i915_request_put(client[i]);
3106 		}
3107 
3108 		/* Flush the semaphores on error */
3109 		smp_store_mb(result[0], -1);
3110 		if (igt_live_test_end(&t))
3111 			err = -EIO;
3112 		if (err)
3113 			break;
3114 	}
3115 
3116 	i915_vma_unpin_and_release(&global, I915_VMA_RELEASE_MAP);
3117 	return err;
3118 }
3119 
3120 static int live_preempt_timeout(void *arg)
3121 {
3122 	struct intel_gt *gt = arg;
3123 	struct i915_gem_context *ctx_hi, *ctx_lo;
3124 	struct igt_spinner spin_lo;
3125 	struct intel_engine_cs *engine;
3126 	enum intel_engine_id id;
3127 	int err = -ENOMEM;
3128 
3129 	/*
3130 	 * Check that we force preemption to occur by cancelling the previous
3131 	 * context if it refuses to yield the GPU.
3132 	 */
3133 	if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
3134 		return 0;
3135 
3136 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
3137 		return 0;
3138 
3139 	if (!intel_has_reset_engine(gt))
3140 		return 0;
3141 
3142 	if (igt_spinner_init(&spin_lo, gt))
3143 		return -ENOMEM;
3144 
3145 	ctx_hi = kernel_context(gt->i915);
3146 	if (!ctx_hi)
3147 		goto err_spin_lo;
3148 	ctx_hi->sched.priority =
3149 		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
3150 
3151 	ctx_lo = kernel_context(gt->i915);
3152 	if (!ctx_lo)
3153 		goto err_ctx_hi;
3154 	ctx_lo->sched.priority =
3155 		I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
3156 
3157 	for_each_engine(engine, gt, id) {
3158 		unsigned long saved_timeout;
3159 		struct i915_request *rq;
3160 
3161 		if (!intel_engine_has_preemption(engine))
3162 			continue;
3163 
3164 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
3165 					    MI_NOOP); /* preemption disabled */
3166 		if (IS_ERR(rq)) {
3167 			err = PTR_ERR(rq);
3168 			goto err_ctx_lo;
3169 		}
3170 
3171 		i915_request_add(rq);
3172 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
3173 			intel_gt_set_wedged(gt);
3174 			err = -EIO;
3175 			goto err_ctx_lo;
3176 		}
3177 
3178 		rq = igt_request_alloc(ctx_hi, engine);
3179 		if (IS_ERR(rq)) {
3180 			igt_spinner_end(&spin_lo);
3181 			err = PTR_ERR(rq);
3182 			goto err_ctx_lo;
3183 		}
3184 
3185 		/* Flush the previous CS ack before changing timeouts */
3186 		while (READ_ONCE(engine->execlists.pending[0]))
3187 			cpu_relax();
3188 
3189 		saved_timeout = engine->props.preempt_timeout_ms;
3190 		engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
3191 
3192 		i915_request_get(rq);
3193 		i915_request_add(rq);
3194 
3195 		intel_engine_flush_submission(engine);
3196 		engine->props.preempt_timeout_ms = saved_timeout;
3197 
3198 		if (i915_request_wait(rq, 0, HZ / 10) < 0) {
3199 			intel_gt_set_wedged(gt);
3200 			i915_request_put(rq);
3201 			err = -ETIME;
3202 			goto err_ctx_lo;
3203 		}
3204 
3205 		igt_spinner_end(&spin_lo);
3206 		i915_request_put(rq);
3207 	}
3208 
3209 	err = 0;
3210 err_ctx_lo:
3211 	kernel_context_close(ctx_lo);
3212 err_ctx_hi:
3213 	kernel_context_close(ctx_hi);
3214 err_spin_lo:
3215 	igt_spinner_fini(&spin_lo);
3216 	return err;
3217 }
3218 
3219 static int random_range(struct rnd_state *rnd, int min, int max)
3220 {
3221 	return i915_prandom_u32_max_state(max - min, rnd) + min;
3222 }
3223 
3224 static int random_priority(struct rnd_state *rnd)
3225 {
3226 	return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
3227 }
3228 
3229 struct preempt_smoke {
3230 	struct intel_gt *gt;
3231 	struct i915_gem_context **contexts;
3232 	struct intel_engine_cs *engine;
3233 	struct drm_i915_gem_object *batch;
3234 	unsigned int ncontext;
3235 	struct rnd_state prng;
3236 	unsigned long count;
3237 };
3238 
3239 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
3240 {
3241 	return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
3242 							  &smoke->prng)];
3243 }
3244 
3245 static int smoke_submit(struct preempt_smoke *smoke,
3246 			struct i915_gem_context *ctx, int prio,
3247 			struct drm_i915_gem_object *batch)
3248 {
3249 	struct i915_request *rq;
3250 	struct i915_vma *vma = NULL;
3251 	int err = 0;
3252 
3253 	if (batch) {
3254 		struct i915_address_space *vm;
3255 
3256 		vm = i915_gem_context_get_vm_rcu(ctx);
3257 		vma = i915_vma_instance(batch, vm, NULL);
3258 		i915_vm_put(vm);
3259 		if (IS_ERR(vma))
3260 			return PTR_ERR(vma);
3261 
3262 		err = i915_vma_pin(vma, 0, 0, PIN_USER);
3263 		if (err)
3264 			return err;
3265 	}
3266 
3267 	ctx->sched.priority = prio;
3268 
3269 	rq = igt_request_alloc(ctx, smoke->engine);
3270 	if (IS_ERR(rq)) {
3271 		err = PTR_ERR(rq);
3272 		goto unpin;
3273 	}
3274 
3275 	if (vma) {
3276 		i915_vma_lock(vma);
3277 		err = i915_request_await_object(rq, vma->obj, false);
3278 		if (!err)
3279 			err = i915_vma_move_to_active(vma, rq, 0);
3280 		if (!err)
3281 			err = rq->engine->emit_bb_start(rq,
3282 							vma->node.start,
3283 							PAGE_SIZE, 0);
3284 		i915_vma_unlock(vma);
3285 	}
3286 
3287 	i915_request_add(rq);
3288 
3289 unpin:
3290 	if (vma)
3291 		i915_vma_unpin(vma);
3292 
3293 	return err;
3294 }
3295 
3296 static int smoke_crescendo_thread(void *arg)
3297 {
3298 	struct preempt_smoke *smoke = arg;
3299 	IGT_TIMEOUT(end_time);
3300 	unsigned long count;
3301 
3302 	count = 0;
3303 	do {
3304 		struct i915_gem_context *ctx = smoke_context(smoke);
3305 		int err;
3306 
3307 		err = smoke_submit(smoke,
3308 				   ctx, count % I915_PRIORITY_MAX,
3309 				   smoke->batch);
3310 		if (err)
3311 			return err;
3312 
3313 		count++;
3314 	} while (!__igt_timeout(end_time, NULL));
3315 
3316 	smoke->count = count;
3317 	return 0;
3318 }
3319 
3320 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
3321 #define BATCH BIT(0)
3322 {
3323 	struct task_struct *tsk[I915_NUM_ENGINES] = {};
3324 	struct preempt_smoke arg[I915_NUM_ENGINES];
3325 	struct intel_engine_cs *engine;
3326 	enum intel_engine_id id;
3327 	unsigned long count;
3328 	int err = 0;
3329 
3330 	for_each_engine(engine, smoke->gt, id) {
3331 		arg[id] = *smoke;
3332 		arg[id].engine = engine;
3333 		if (!(flags & BATCH))
3334 			arg[id].batch = NULL;
3335 		arg[id].count = 0;
3336 
3337 		tsk[id] = kthread_run(smoke_crescendo_thread, &arg,
3338 				      "igt/smoke:%d", id);
3339 		if (IS_ERR(tsk[id])) {
3340 			err = PTR_ERR(tsk[id]);
3341 			break;
3342 		}
3343 		get_task_struct(tsk[id]);
3344 	}
3345 
3346 	yield(); /* start all threads before we kthread_stop() */
3347 
3348 	count = 0;
3349 	for_each_engine(engine, smoke->gt, id) {
3350 		int status;
3351 
3352 		if (IS_ERR_OR_NULL(tsk[id]))
3353 			continue;
3354 
3355 		status = kthread_stop(tsk[id]);
3356 		if (status && !err)
3357 			err = status;
3358 
3359 		count += arg[id].count;
3360 
3361 		put_task_struct(tsk[id]);
3362 	}
3363 
3364 	pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
3365 		count, flags,
3366 		RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
3367 	return 0;
3368 }
3369 
3370 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
3371 {
3372 	enum intel_engine_id id;
3373 	IGT_TIMEOUT(end_time);
3374 	unsigned long count;
3375 
3376 	count = 0;
3377 	do {
3378 		for_each_engine(smoke->engine, smoke->gt, id) {
3379 			struct i915_gem_context *ctx = smoke_context(smoke);
3380 			int err;
3381 
3382 			err = smoke_submit(smoke,
3383 					   ctx, random_priority(&smoke->prng),
3384 					   flags & BATCH ? smoke->batch : NULL);
3385 			if (err)
3386 				return err;
3387 
3388 			count++;
3389 		}
3390 	} while (!__igt_timeout(end_time, NULL));
3391 
3392 	pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
3393 		count, flags,
3394 		RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
3395 	return 0;
3396 }
3397 
3398 static int live_preempt_smoke(void *arg)
3399 {
3400 	struct preempt_smoke smoke = {
3401 		.gt = arg,
3402 		.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
3403 		.ncontext = 1024,
3404 	};
3405 	const unsigned int phase[] = { 0, BATCH };
3406 	struct igt_live_test t;
3407 	int err = -ENOMEM;
3408 	u32 *cs;
3409 	int n;
3410 
3411 	if (!HAS_LOGICAL_RING_PREEMPTION(smoke.gt->i915))
3412 		return 0;
3413 
3414 	smoke.contexts = kmalloc_array(smoke.ncontext,
3415 				       sizeof(*smoke.contexts),
3416 				       GFP_KERNEL);
3417 	if (!smoke.contexts)
3418 		return -ENOMEM;
3419 
3420 	smoke.batch =
3421 		i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
3422 	if (IS_ERR(smoke.batch)) {
3423 		err = PTR_ERR(smoke.batch);
3424 		goto err_free;
3425 	}
3426 
3427 	cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB);
3428 	if (IS_ERR(cs)) {
3429 		err = PTR_ERR(cs);
3430 		goto err_batch;
3431 	}
3432 	for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++)
3433 		cs[n] = MI_ARB_CHECK;
3434 	cs[n] = MI_BATCH_BUFFER_END;
3435 	i915_gem_object_flush_map(smoke.batch);
3436 	i915_gem_object_unpin_map(smoke.batch);
3437 
3438 	if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
3439 		err = -EIO;
3440 		goto err_batch;
3441 	}
3442 
3443 	for (n = 0; n < smoke.ncontext; n++) {
3444 		smoke.contexts[n] = kernel_context(smoke.gt->i915);
3445 		if (!smoke.contexts[n])
3446 			goto err_ctx;
3447 	}
3448 
3449 	for (n = 0; n < ARRAY_SIZE(phase); n++) {
3450 		err = smoke_crescendo(&smoke, phase[n]);
3451 		if (err)
3452 			goto err_ctx;
3453 
3454 		err = smoke_random(&smoke, phase[n]);
3455 		if (err)
3456 			goto err_ctx;
3457 	}
3458 
3459 err_ctx:
3460 	if (igt_live_test_end(&t))
3461 		err = -EIO;
3462 
3463 	for (n = 0; n < smoke.ncontext; n++) {
3464 		if (!smoke.contexts[n])
3465 			break;
3466 		kernel_context_close(smoke.contexts[n]);
3467 	}
3468 
3469 err_batch:
3470 	i915_gem_object_put(smoke.batch);
3471 err_free:
3472 	kfree(smoke.contexts);
3473 
3474 	return err;
3475 }
3476 
3477 static int nop_virtual_engine(struct intel_gt *gt,
3478 			      struct intel_engine_cs **siblings,
3479 			      unsigned int nsibling,
3480 			      unsigned int nctx,
3481 			      unsigned int flags)
3482 #define CHAIN BIT(0)
3483 {
3484 	IGT_TIMEOUT(end_time);
3485 	struct i915_request *request[16] = {};
3486 	struct intel_context *ve[16];
3487 	unsigned long n, prime, nc;
3488 	struct igt_live_test t;
3489 	ktime_t times[2] = {};
3490 	int err;
3491 
3492 	GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
3493 
3494 	for (n = 0; n < nctx; n++) {
3495 		ve[n] = intel_execlists_create_virtual(siblings, nsibling);
3496 		if (IS_ERR(ve[n])) {
3497 			err = PTR_ERR(ve[n]);
3498 			nctx = n;
3499 			goto out;
3500 		}
3501 
3502 		err = intel_context_pin(ve[n]);
3503 		if (err) {
3504 			intel_context_put(ve[n]);
3505 			nctx = n;
3506 			goto out;
3507 		}
3508 	}
3509 
3510 	err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
3511 	if (err)
3512 		goto out;
3513 
3514 	for_each_prime_number_from(prime, 1, 8192) {
3515 		times[1] = ktime_get_raw();
3516 
3517 		if (flags & CHAIN) {
3518 			for (nc = 0; nc < nctx; nc++) {
3519 				for (n = 0; n < prime; n++) {
3520 					struct i915_request *rq;
3521 
3522 					rq = i915_request_create(ve[nc]);
3523 					if (IS_ERR(rq)) {
3524 						err = PTR_ERR(rq);
3525 						goto out;
3526 					}
3527 
3528 					if (request[nc])
3529 						i915_request_put(request[nc]);
3530 					request[nc] = i915_request_get(rq);
3531 					i915_request_add(rq);
3532 				}
3533 			}
3534 		} else {
3535 			for (n = 0; n < prime; n++) {
3536 				for (nc = 0; nc < nctx; nc++) {
3537 					struct i915_request *rq;
3538 
3539 					rq = i915_request_create(ve[nc]);
3540 					if (IS_ERR(rq)) {
3541 						err = PTR_ERR(rq);
3542 						goto out;
3543 					}
3544 
3545 					if (request[nc])
3546 						i915_request_put(request[nc]);
3547 					request[nc] = i915_request_get(rq);
3548 					i915_request_add(rq);
3549 				}
3550 			}
3551 		}
3552 
3553 		for (nc = 0; nc < nctx; nc++) {
3554 			if (i915_request_wait(request[nc], 0, HZ / 10) < 0) {
3555 				pr_err("%s(%s): wait for %llx:%lld timed out\n",
3556 				       __func__, ve[0]->engine->name,
3557 				       request[nc]->fence.context,
3558 				       request[nc]->fence.seqno);
3559 
3560 				GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3561 					  __func__, ve[0]->engine->name,
3562 					  request[nc]->fence.context,
3563 					  request[nc]->fence.seqno);
3564 				GEM_TRACE_DUMP();
3565 				intel_gt_set_wedged(gt);
3566 				break;
3567 			}
3568 		}
3569 
3570 		times[1] = ktime_sub(ktime_get_raw(), times[1]);
3571 		if (prime == 1)
3572 			times[0] = times[1];
3573 
3574 		for (nc = 0; nc < nctx; nc++) {
3575 			i915_request_put(request[nc]);
3576 			request[nc] = NULL;
3577 		}
3578 
3579 		if (__igt_timeout(end_time, NULL))
3580 			break;
3581 	}
3582 
3583 	err = igt_live_test_end(&t);
3584 	if (err)
3585 		goto out;
3586 
3587 	pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
3588 		nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
3589 		prime, div64_u64(ktime_to_ns(times[1]), prime));
3590 
3591 out:
3592 	if (igt_flush_test(gt->i915))
3593 		err = -EIO;
3594 
3595 	for (nc = 0; nc < nctx; nc++) {
3596 		i915_request_put(request[nc]);
3597 		intel_context_unpin(ve[nc]);
3598 		intel_context_put(ve[nc]);
3599 	}
3600 	return err;
3601 }
3602 
3603 static int live_virtual_engine(void *arg)
3604 {
3605 	struct intel_gt *gt = arg;
3606 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3607 	struct intel_engine_cs *engine;
3608 	enum intel_engine_id id;
3609 	unsigned int class, inst;
3610 	int err;
3611 
3612 	if (intel_uc_uses_guc_submission(&gt->uc))
3613 		return 0;
3614 
3615 	for_each_engine(engine, gt, id) {
3616 		err = nop_virtual_engine(gt, &engine, 1, 1, 0);
3617 		if (err) {
3618 			pr_err("Failed to wrap engine %s: err=%d\n",
3619 			       engine->name, err);
3620 			return err;
3621 		}
3622 	}
3623 
3624 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3625 		int nsibling, n;
3626 
3627 		nsibling = 0;
3628 		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3629 			if (!gt->engine_class[class][inst])
3630 				continue;
3631 
3632 			siblings[nsibling++] = gt->engine_class[class][inst];
3633 		}
3634 		if (nsibling < 2)
3635 			continue;
3636 
3637 		for (n = 1; n <= nsibling + 1; n++) {
3638 			err = nop_virtual_engine(gt, siblings, nsibling,
3639 						 n, 0);
3640 			if (err)
3641 				return err;
3642 		}
3643 
3644 		err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
3645 		if (err)
3646 			return err;
3647 	}
3648 
3649 	return 0;
3650 }
3651 
3652 static int mask_virtual_engine(struct intel_gt *gt,
3653 			       struct intel_engine_cs **siblings,
3654 			       unsigned int nsibling)
3655 {
3656 	struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
3657 	struct intel_context *ve;
3658 	struct igt_live_test t;
3659 	unsigned int n;
3660 	int err;
3661 
3662 	/*
3663 	 * Check that by setting the execution mask on a request, we can
3664 	 * restrict it to our desired engine within the virtual engine.
3665 	 */
3666 
3667 	ve = intel_execlists_create_virtual(siblings, nsibling);
3668 	if (IS_ERR(ve)) {
3669 		err = PTR_ERR(ve);
3670 		goto out_close;
3671 	}
3672 
3673 	err = intel_context_pin(ve);
3674 	if (err)
3675 		goto out_put;
3676 
3677 	err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3678 	if (err)
3679 		goto out_unpin;
3680 
3681 	for (n = 0; n < nsibling; n++) {
3682 		request[n] = i915_request_create(ve);
3683 		if (IS_ERR(request[n])) {
3684 			err = PTR_ERR(request[n]);
3685 			nsibling = n;
3686 			goto out;
3687 		}
3688 
3689 		/* Reverse order as it's more likely to be unnatural */
3690 		request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
3691 
3692 		i915_request_get(request[n]);
3693 		i915_request_add(request[n]);
3694 	}
3695 
3696 	for (n = 0; n < nsibling; n++) {
3697 		if (i915_request_wait(request[n], 0, HZ / 10) < 0) {
3698 			pr_err("%s(%s): wait for %llx:%lld timed out\n",
3699 			       __func__, ve->engine->name,
3700 			       request[n]->fence.context,
3701 			       request[n]->fence.seqno);
3702 
3703 			GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3704 				  __func__, ve->engine->name,
3705 				  request[n]->fence.context,
3706 				  request[n]->fence.seqno);
3707 			GEM_TRACE_DUMP();
3708 			intel_gt_set_wedged(gt);
3709 			err = -EIO;
3710 			goto out;
3711 		}
3712 
3713 		if (request[n]->engine != siblings[nsibling - n - 1]) {
3714 			pr_err("Executed on wrong sibling '%s', expected '%s'\n",
3715 			       request[n]->engine->name,
3716 			       siblings[nsibling - n - 1]->name);
3717 			err = -EINVAL;
3718 			goto out;
3719 		}
3720 	}
3721 
3722 	err = igt_live_test_end(&t);
3723 out:
3724 	if (igt_flush_test(gt->i915))
3725 		err = -EIO;
3726 
3727 	for (n = 0; n < nsibling; n++)
3728 		i915_request_put(request[n]);
3729 
3730 out_unpin:
3731 	intel_context_unpin(ve);
3732 out_put:
3733 	intel_context_put(ve);
3734 out_close:
3735 	return err;
3736 }
3737 
3738 static int live_virtual_mask(void *arg)
3739 {
3740 	struct intel_gt *gt = arg;
3741 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3742 	unsigned int class, inst;
3743 	int err;
3744 
3745 	if (intel_uc_uses_guc_submission(&gt->uc))
3746 		return 0;
3747 
3748 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3749 		unsigned int nsibling;
3750 
3751 		nsibling = 0;
3752 		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3753 			if (!gt->engine_class[class][inst])
3754 				break;
3755 
3756 			siblings[nsibling++] = gt->engine_class[class][inst];
3757 		}
3758 		if (nsibling < 2)
3759 			continue;
3760 
3761 		err = mask_virtual_engine(gt, siblings, nsibling);
3762 		if (err)
3763 			return err;
3764 	}
3765 
3766 	return 0;
3767 }
3768 
3769 static int preserved_virtual_engine(struct intel_gt *gt,
3770 				    struct intel_engine_cs **siblings,
3771 				    unsigned int nsibling)
3772 {
3773 	struct i915_request *last = NULL;
3774 	struct intel_context *ve;
3775 	struct i915_vma *scratch;
3776 	struct igt_live_test t;
3777 	unsigned int n;
3778 	int err = 0;
3779 	u32 *cs;
3780 
3781 	scratch = create_scratch(siblings[0]->gt);
3782 	if (IS_ERR(scratch))
3783 		return PTR_ERR(scratch);
3784 
3785 	err = i915_vma_sync(scratch);
3786 	if (err)
3787 		goto out_scratch;
3788 
3789 	ve = intel_execlists_create_virtual(siblings, nsibling);
3790 	if (IS_ERR(ve)) {
3791 		err = PTR_ERR(ve);
3792 		goto out_scratch;
3793 	}
3794 
3795 	err = intel_context_pin(ve);
3796 	if (err)
3797 		goto out_put;
3798 
3799 	err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3800 	if (err)
3801 		goto out_unpin;
3802 
3803 	for (n = 0; n < NUM_GPR_DW; n++) {
3804 		struct intel_engine_cs *engine = siblings[n % nsibling];
3805 		struct i915_request *rq;
3806 
3807 		rq = i915_request_create(ve);
3808 		if (IS_ERR(rq)) {
3809 			err = PTR_ERR(rq);
3810 			goto out_end;
3811 		}
3812 
3813 		i915_request_put(last);
3814 		last = i915_request_get(rq);
3815 
3816 		cs = intel_ring_begin(rq, 8);
3817 		if (IS_ERR(cs)) {
3818 			i915_request_add(rq);
3819 			err = PTR_ERR(cs);
3820 			goto out_end;
3821 		}
3822 
3823 		*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
3824 		*cs++ = CS_GPR(engine, n);
3825 		*cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
3826 		*cs++ = 0;
3827 
3828 		*cs++ = MI_LOAD_REGISTER_IMM(1);
3829 		*cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
3830 		*cs++ = n + 1;
3831 
3832 		*cs++ = MI_NOOP;
3833 		intel_ring_advance(rq, cs);
3834 
3835 		/* Restrict this request to run on a particular engine */
3836 		rq->execution_mask = engine->mask;
3837 		i915_request_add(rq);
3838 	}
3839 
3840 	if (i915_request_wait(last, 0, HZ / 5) < 0) {
3841 		err = -ETIME;
3842 		goto out_end;
3843 	}
3844 
3845 	cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
3846 	if (IS_ERR(cs)) {
3847 		err = PTR_ERR(cs);
3848 		goto out_end;
3849 	}
3850 
3851 	for (n = 0; n < NUM_GPR_DW; n++) {
3852 		if (cs[n] != n) {
3853 			pr_err("Incorrect value[%d] found for GPR[%d]\n",
3854 			       cs[n], n);
3855 			err = -EINVAL;
3856 			break;
3857 		}
3858 	}
3859 
3860 	i915_gem_object_unpin_map(scratch->obj);
3861 
3862 out_end:
3863 	if (igt_live_test_end(&t))
3864 		err = -EIO;
3865 	i915_request_put(last);
3866 out_unpin:
3867 	intel_context_unpin(ve);
3868 out_put:
3869 	intel_context_put(ve);
3870 out_scratch:
3871 	i915_vma_unpin_and_release(&scratch, 0);
3872 	return err;
3873 }
3874 
3875 static int live_virtual_preserved(void *arg)
3876 {
3877 	struct intel_gt *gt = arg;
3878 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3879 	unsigned int class, inst;
3880 
3881 	/*
3882 	 * Check that the context image retains non-privileged (user) registers
3883 	 * from one engine to the next. For this we check that the CS_GPR
3884 	 * are preserved.
3885 	 */
3886 
3887 	if (intel_uc_uses_guc_submission(&gt->uc))
3888 		return 0;
3889 
3890 	/* As we use CS_GPR we cannot run before they existed on all engines. */
3891 	if (INTEL_GEN(gt->i915) < 9)
3892 		return 0;
3893 
3894 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3895 		int nsibling, err;
3896 
3897 		nsibling = 0;
3898 		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3899 			if (!gt->engine_class[class][inst])
3900 				continue;
3901 
3902 			siblings[nsibling++] = gt->engine_class[class][inst];
3903 		}
3904 		if (nsibling < 2)
3905 			continue;
3906 
3907 		err = preserved_virtual_engine(gt, siblings, nsibling);
3908 		if (err)
3909 			return err;
3910 	}
3911 
3912 	return 0;
3913 }
3914 
3915 static int bond_virtual_engine(struct intel_gt *gt,
3916 			       unsigned int class,
3917 			       struct intel_engine_cs **siblings,
3918 			       unsigned int nsibling,
3919 			       unsigned int flags)
3920 #define BOND_SCHEDULE BIT(0)
3921 {
3922 	struct intel_engine_cs *master;
3923 	struct i915_request *rq[16];
3924 	enum intel_engine_id id;
3925 	struct igt_spinner spin;
3926 	unsigned long n;
3927 	int err;
3928 
3929 	/*
3930 	 * A set of bonded requests is intended to be run concurrently
3931 	 * across a number of engines. We use one request per-engine
3932 	 * and a magic fence to schedule each of the bonded requests
3933 	 * at the same time. A consequence of our current scheduler is that
3934 	 * we only move requests to the HW ready queue when the request
3935 	 * becomes ready, that is when all of its prerequisite fences have
3936 	 * been signaled. As one of those fences is the master submit fence,
3937 	 * there is a delay on all secondary fences as the HW may be
3938 	 * currently busy. Equally, as all the requests are independent,
3939 	 * they may have other fences that delay individual request
3940 	 * submission to HW. Ergo, we do not guarantee that all requests are
3941 	 * immediately submitted to HW at the same time, just that if the
3942 	 * rules are abided by, they are ready at the same time as the
3943 	 * first is submitted. Userspace can embed semaphores in its batch
3944 	 * to ensure parallel execution of its phases as it requires.
3945 	 * Though naturally it gets requested that perhaps the scheduler should
3946 	 * take care of parallel execution, even across preemption events on
3947 	 * different HW. (The proper answer is of course "lalalala".)
3948 	 *
3949 	 * With the submit-fence, we have identified three possible phases
3950 	 * of synchronisation depending on the master fence: queued (not
3951 	 * ready), executing, and signaled. The first two are quite simple
3952 	 * and checked below. However, the signaled master fence handling is
3953 	 * contentious. Currently we do not distinguish between a signaled
3954 	 * fence and an expired fence, as once signaled it does not convey
3955 	 * any information about the previous execution. It may even be freed
3956 	 * and hence checking later it may not exist at all. Ergo we currently
3957 	 * do not apply the bonding constraint for an already signaled fence,
3958 	 * as our expectation is that it should not constrain the secondaries
3959 	 * and is outside of the scope of the bonded request API (i.e. all
3960 	 * userspace requests are meant to be running in parallel). As
3961 	 * it imposes no constraint, and is effectively a no-op, we do not
3962 	 * check below as normal execution flows are checked extensively above.
3963 	 *
3964 	 * XXX Is the degenerate handling of signaled submit fences the
3965 	 * expected behaviour for userpace?
3966 	 */
3967 
3968 	GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
3969 
3970 	if (igt_spinner_init(&spin, gt))
3971 		return -ENOMEM;
3972 
3973 	err = 0;
3974 	rq[0] = ERR_PTR(-ENOMEM);
3975 	for_each_engine(master, gt, id) {
3976 		struct i915_sw_fence fence = {};
3977 		struct intel_context *ce;
3978 
3979 		if (master->class == class)
3980 			continue;
3981 
3982 		ce = intel_context_create(master);
3983 		if (IS_ERR(ce)) {
3984 			err = PTR_ERR(ce);
3985 			goto out;
3986 		}
3987 
3988 		memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
3989 
3990 		rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP);
3991 		intel_context_put(ce);
3992 		if (IS_ERR(rq[0])) {
3993 			err = PTR_ERR(rq[0]);
3994 			goto out;
3995 		}
3996 		i915_request_get(rq[0]);
3997 
3998 		if (flags & BOND_SCHEDULE) {
3999 			onstack_fence_init(&fence);
4000 			err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
4001 							       &fence,
4002 							       GFP_KERNEL);
4003 		}
4004 
4005 		i915_request_add(rq[0]);
4006 		if (err < 0)
4007 			goto out;
4008 
4009 		if (!(flags & BOND_SCHEDULE) &&
4010 		    !igt_wait_for_spinner(&spin, rq[0])) {
4011 			err = -EIO;
4012 			goto out;
4013 		}
4014 
4015 		for (n = 0; n < nsibling; n++) {
4016 			struct intel_context *ve;
4017 
4018 			ve = intel_execlists_create_virtual(siblings, nsibling);
4019 			if (IS_ERR(ve)) {
4020 				err = PTR_ERR(ve);
4021 				onstack_fence_fini(&fence);
4022 				goto out;
4023 			}
4024 
4025 			err = intel_virtual_engine_attach_bond(ve->engine,
4026 							       master,
4027 							       siblings[n]);
4028 			if (err) {
4029 				intel_context_put(ve);
4030 				onstack_fence_fini(&fence);
4031 				goto out;
4032 			}
4033 
4034 			err = intel_context_pin(ve);
4035 			intel_context_put(ve);
4036 			if (err) {
4037 				onstack_fence_fini(&fence);
4038 				goto out;
4039 			}
4040 
4041 			rq[n + 1] = i915_request_create(ve);
4042 			intel_context_unpin(ve);
4043 			if (IS_ERR(rq[n + 1])) {
4044 				err = PTR_ERR(rq[n + 1]);
4045 				onstack_fence_fini(&fence);
4046 				goto out;
4047 			}
4048 			i915_request_get(rq[n + 1]);
4049 
4050 			err = i915_request_await_execution(rq[n + 1],
4051 							   &rq[0]->fence,
4052 							   ve->engine->bond_execute);
4053 			i915_request_add(rq[n + 1]);
4054 			if (err < 0) {
4055 				onstack_fence_fini(&fence);
4056 				goto out;
4057 			}
4058 		}
4059 		onstack_fence_fini(&fence);
4060 		intel_engine_flush_submission(master);
4061 		igt_spinner_end(&spin);
4062 
4063 		if (i915_request_wait(rq[0], 0, HZ / 10) < 0) {
4064 			pr_err("Master request did not execute (on %s)!\n",
4065 			       rq[0]->engine->name);
4066 			err = -EIO;
4067 			goto out;
4068 		}
4069 
4070 		for (n = 0; n < nsibling; n++) {
4071 			if (i915_request_wait(rq[n + 1], 0,
4072 					      MAX_SCHEDULE_TIMEOUT) < 0) {
4073 				err = -EIO;
4074 				goto out;
4075 			}
4076 
4077 			if (rq[n + 1]->engine != siblings[n]) {
4078 				pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
4079 				       siblings[n]->name,
4080 				       rq[n + 1]->engine->name,
4081 				       rq[0]->engine->name);
4082 				err = -EINVAL;
4083 				goto out;
4084 			}
4085 		}
4086 
4087 		for (n = 0; !IS_ERR(rq[n]); n++)
4088 			i915_request_put(rq[n]);
4089 		rq[0] = ERR_PTR(-ENOMEM);
4090 	}
4091 
4092 out:
4093 	for (n = 0; !IS_ERR(rq[n]); n++)
4094 		i915_request_put(rq[n]);
4095 	if (igt_flush_test(gt->i915))
4096 		err = -EIO;
4097 
4098 	igt_spinner_fini(&spin);
4099 	return err;
4100 }
4101 
4102 static int live_virtual_bond(void *arg)
4103 {
4104 	static const struct phase {
4105 		const char *name;
4106 		unsigned int flags;
4107 	} phases[] = {
4108 		{ "", 0 },
4109 		{ "schedule", BOND_SCHEDULE },
4110 		{ },
4111 	};
4112 	struct intel_gt *gt = arg;
4113 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4114 	unsigned int class, inst;
4115 	int err;
4116 
4117 	if (intel_uc_uses_guc_submission(&gt->uc))
4118 		return 0;
4119 
4120 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4121 		const struct phase *p;
4122 		int nsibling;
4123 
4124 		nsibling = 0;
4125 		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
4126 			if (!gt->engine_class[class][inst])
4127 				break;
4128 
4129 			GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings));
4130 			siblings[nsibling++] = gt->engine_class[class][inst];
4131 		}
4132 		if (nsibling < 2)
4133 			continue;
4134 
4135 		for (p = phases; p->name; p++) {
4136 			err = bond_virtual_engine(gt,
4137 						  class, siblings, nsibling,
4138 						  p->flags);
4139 			if (err) {
4140 				pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
4141 				       __func__, p->name, class, nsibling, err);
4142 				return err;
4143 			}
4144 		}
4145 	}
4146 
4147 	return 0;
4148 }
4149 
4150 static int reset_virtual_engine(struct intel_gt *gt,
4151 				struct intel_engine_cs **siblings,
4152 				unsigned int nsibling)
4153 {
4154 	struct intel_engine_cs *engine;
4155 	struct intel_context *ve;
4156 	unsigned long *heartbeat;
4157 	struct igt_spinner spin;
4158 	struct i915_request *rq;
4159 	unsigned int n;
4160 	int err = 0;
4161 
4162 	/*
4163 	 * In order to support offline error capture for fast preempt reset,
4164 	 * we need to decouple the guilty request and ensure that it and its
4165 	 * descendents are not executed while the capture is in progress.
4166 	 */
4167 
4168 	heartbeat = kmalloc_array(nsibling, sizeof(*heartbeat), GFP_KERNEL);
4169 	if (!heartbeat)
4170 		return -ENOMEM;
4171 
4172 	if (igt_spinner_init(&spin, gt)) {
4173 		err = -ENOMEM;
4174 		goto out_free;
4175 	}
4176 
4177 	ve = intel_execlists_create_virtual(siblings, nsibling);
4178 	if (IS_ERR(ve)) {
4179 		err = PTR_ERR(ve);
4180 		goto out_spin;
4181 	}
4182 
4183 	for (n = 0; n < nsibling; n++)
4184 		engine_heartbeat_disable(siblings[n], &heartbeat[n]);
4185 
4186 	rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
4187 	if (IS_ERR(rq)) {
4188 		err = PTR_ERR(rq);
4189 		goto out_heartbeat;
4190 	}
4191 	i915_request_add(rq);
4192 
4193 	if (!igt_wait_for_spinner(&spin, rq)) {
4194 		intel_gt_set_wedged(gt);
4195 		err = -ETIME;
4196 		goto out_heartbeat;
4197 	}
4198 
4199 	engine = rq->engine;
4200 	GEM_BUG_ON(engine == ve->engine);
4201 
4202 	/* Take ownership of the reset and tasklet */
4203 	if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
4204 			     &gt->reset.flags)) {
4205 		intel_gt_set_wedged(gt);
4206 		err = -EBUSY;
4207 		goto out_heartbeat;
4208 	}
4209 	tasklet_disable(&engine->execlists.tasklet);
4210 
4211 	engine->execlists.tasklet.func(engine->execlists.tasklet.data);
4212 	GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
4213 
4214 	/* Fake a preemption event; failed of course */
4215 	spin_lock_irq(&engine->active.lock);
4216 	__unwind_incomplete_requests(engine);
4217 	spin_unlock_irq(&engine->active.lock);
4218 	GEM_BUG_ON(rq->engine != ve->engine);
4219 
4220 	/* Reset the engine while keeping our active request on hold */
4221 	execlists_hold(engine, rq);
4222 	GEM_BUG_ON(!i915_request_on_hold(rq));
4223 
4224 	intel_engine_reset(engine, NULL);
4225 	GEM_BUG_ON(rq->fence.error != -EIO);
4226 
4227 	/* Release our grasp on the engine, letting CS flow again */
4228 	tasklet_enable(&engine->execlists.tasklet);
4229 	clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, &gt->reset.flags);
4230 
4231 	/* Check that we do not resubmit the held request */
4232 	i915_request_get(rq);
4233 	if (!i915_request_wait(rq, 0, HZ / 5)) {
4234 		pr_err("%s: on hold request completed!\n",
4235 		       engine->name);
4236 		intel_gt_set_wedged(gt);
4237 		err = -EIO;
4238 		goto out_rq;
4239 	}
4240 	GEM_BUG_ON(!i915_request_on_hold(rq));
4241 
4242 	/* But is resubmitted on release */
4243 	execlists_unhold(engine, rq);
4244 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4245 		pr_err("%s: held request did not complete!\n",
4246 		       engine->name);
4247 		intel_gt_set_wedged(gt);
4248 		err = -ETIME;
4249 	}
4250 
4251 out_rq:
4252 	i915_request_put(rq);
4253 out_heartbeat:
4254 	for (n = 0; n < nsibling; n++)
4255 		engine_heartbeat_enable(siblings[n], heartbeat[n]);
4256 
4257 	intel_context_put(ve);
4258 out_spin:
4259 	igt_spinner_fini(&spin);
4260 out_free:
4261 	kfree(heartbeat);
4262 	return err;
4263 }
4264 
4265 static int live_virtual_reset(void *arg)
4266 {
4267 	struct intel_gt *gt = arg;
4268 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4269 	unsigned int class, inst;
4270 
4271 	/*
4272 	 * Check that we handle a reset event within a virtual engine.
4273 	 * Only the physical engine is reset, but we have to check the flow
4274 	 * of the virtual requests around the reset, and make sure it is not
4275 	 * forgotten.
4276 	 */
4277 
4278 	if (intel_uc_uses_guc_submission(&gt->uc))
4279 		return 0;
4280 
4281 	if (!intel_has_reset_engine(gt))
4282 		return 0;
4283 
4284 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4285 		int nsibling, err;
4286 
4287 		nsibling = 0;
4288 		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
4289 			if (!gt->engine_class[class][inst])
4290 				continue;
4291 
4292 			siblings[nsibling++] = gt->engine_class[class][inst];
4293 		}
4294 		if (nsibling < 2)
4295 			continue;
4296 
4297 		err = reset_virtual_engine(gt, siblings, nsibling);
4298 		if (err)
4299 			return err;
4300 	}
4301 
4302 	return 0;
4303 }
4304 
4305 int intel_execlists_live_selftests(struct drm_i915_private *i915)
4306 {
4307 	static const struct i915_subtest tests[] = {
4308 		SUBTEST(live_sanitycheck),
4309 		SUBTEST(live_unlite_switch),
4310 		SUBTEST(live_unlite_preempt),
4311 		SUBTEST(live_pin_rewind),
4312 		SUBTEST(live_hold_reset),
4313 		SUBTEST(live_error_interrupt),
4314 		SUBTEST(live_timeslice_preempt),
4315 		SUBTEST(live_timeslice_rewind),
4316 		SUBTEST(live_timeslice_queue),
4317 		SUBTEST(live_busywait_preempt),
4318 		SUBTEST(live_preempt),
4319 		SUBTEST(live_late_preempt),
4320 		SUBTEST(live_nopreempt),
4321 		SUBTEST(live_preempt_cancel),
4322 		SUBTEST(live_suppress_self_preempt),
4323 		SUBTEST(live_suppress_wait_preempt),
4324 		SUBTEST(live_chain_preempt),
4325 		SUBTEST(live_preempt_gang),
4326 		SUBTEST(live_preempt_timeout),
4327 		SUBTEST(live_preempt_user),
4328 		SUBTEST(live_preempt_smoke),
4329 		SUBTEST(live_virtual_engine),
4330 		SUBTEST(live_virtual_mask),
4331 		SUBTEST(live_virtual_preserved),
4332 		SUBTEST(live_virtual_bond),
4333 		SUBTEST(live_virtual_reset),
4334 	};
4335 
4336 	if (!HAS_EXECLISTS(i915))
4337 		return 0;
4338 
4339 	if (intel_gt_is_wedged(&i915->gt))
4340 		return 0;
4341 
4342 	return intel_gt_live_subtests(tests, &i915->gt);
4343 }
4344 
4345 static int emit_semaphore_signal(struct intel_context *ce, void *slot)
4346 {
4347 	const u32 offset =
4348 		i915_ggtt_offset(ce->engine->status_page.vma) +
4349 		offset_in_page(slot);
4350 	struct i915_request *rq;
4351 	u32 *cs;
4352 
4353 	rq = intel_context_create_request(ce);
4354 	if (IS_ERR(rq))
4355 		return PTR_ERR(rq);
4356 
4357 	cs = intel_ring_begin(rq, 4);
4358 	if (IS_ERR(cs)) {
4359 		i915_request_add(rq);
4360 		return PTR_ERR(cs);
4361 	}
4362 
4363 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
4364 	*cs++ = offset;
4365 	*cs++ = 0;
4366 	*cs++ = 1;
4367 
4368 	intel_ring_advance(rq, cs);
4369 
4370 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
4371 	i915_request_add(rq);
4372 	return 0;
4373 }
4374 
4375 static int context_flush(struct intel_context *ce, long timeout)
4376 {
4377 	struct i915_request *rq;
4378 	struct dma_fence *fence;
4379 	int err = 0;
4380 
4381 	rq = intel_engine_create_kernel_request(ce->engine);
4382 	if (IS_ERR(rq))
4383 		return PTR_ERR(rq);
4384 
4385 	fence = i915_active_fence_get(&ce->timeline->last_request);
4386 	if (fence) {
4387 		i915_request_await_dma_fence(rq, fence);
4388 		dma_fence_put(fence);
4389 	}
4390 
4391 	rq = i915_request_get(rq);
4392 	i915_request_add(rq);
4393 	if (i915_request_wait(rq, 0, timeout) < 0)
4394 		err = -ETIME;
4395 	i915_request_put(rq);
4396 
4397 	rmb(); /* We know the request is written, make sure all state is too! */
4398 	return err;
4399 }
4400 
4401 static int live_lrc_layout(void *arg)
4402 {
4403 	struct intel_gt *gt = arg;
4404 	struct intel_engine_cs *engine;
4405 	enum intel_engine_id id;
4406 	u32 *lrc;
4407 	int err;
4408 
4409 	/*
4410 	 * Check the registers offsets we use to create the initial reg state
4411 	 * match the layout saved by HW.
4412 	 */
4413 
4414 	lrc = kmalloc(PAGE_SIZE, GFP_KERNEL);
4415 	if (!lrc)
4416 		return -ENOMEM;
4417 
4418 	err = 0;
4419 	for_each_engine(engine, gt, id) {
4420 		u32 *hw;
4421 		int dw;
4422 
4423 		if (!engine->default_state)
4424 			continue;
4425 
4426 		hw = shmem_pin_map(engine->default_state);
4427 		if (IS_ERR(hw)) {
4428 			err = PTR_ERR(hw);
4429 			break;
4430 		}
4431 		hw += LRC_STATE_OFFSET / sizeof(*hw);
4432 
4433 		execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE),
4434 					 engine->kernel_context,
4435 					 engine,
4436 					 engine->kernel_context->ring,
4437 					 true);
4438 
4439 		dw = 0;
4440 		do {
4441 			u32 lri = hw[dw];
4442 
4443 			if (lri == 0) {
4444 				dw++;
4445 				continue;
4446 			}
4447 
4448 			if (lrc[dw] == 0) {
4449 				pr_debug("%s: skipped instruction %x at dword %d\n",
4450 					 engine->name, lri, dw);
4451 				dw++;
4452 				continue;
4453 			}
4454 
4455 			if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
4456 				pr_err("%s: Expected LRI command at dword %d, found %08x\n",
4457 				       engine->name, dw, lri);
4458 				err = -EINVAL;
4459 				break;
4460 			}
4461 
4462 			if (lrc[dw] != lri) {
4463 				pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
4464 				       engine->name, dw, lri, lrc[dw]);
4465 				err = -EINVAL;
4466 				break;
4467 			}
4468 
4469 			lri &= 0x7f;
4470 			lri++;
4471 			dw++;
4472 
4473 			while (lri) {
4474 				if (hw[dw] != lrc[dw]) {
4475 					pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
4476 					       engine->name, dw, hw[dw], lrc[dw]);
4477 					err = -EINVAL;
4478 					break;
4479 				}
4480 
4481 				/*
4482 				 * Skip over the actual register value as we
4483 				 * expect that to differ.
4484 				 */
4485 				dw += 2;
4486 				lri -= 2;
4487 			}
4488 		} while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
4489 
4490 		if (err) {
4491 			pr_info("%s: HW register image:\n", engine->name);
4492 			igt_hexdump(hw, PAGE_SIZE);
4493 
4494 			pr_info("%s: SW register image:\n", engine->name);
4495 			igt_hexdump(lrc, PAGE_SIZE);
4496 		}
4497 
4498 		shmem_unpin_map(engine->default_state, hw);
4499 		if (err)
4500 			break;
4501 	}
4502 
4503 	kfree(lrc);
4504 	return err;
4505 }
4506 
4507 static int find_offset(const u32 *lri, u32 offset)
4508 {
4509 	int i;
4510 
4511 	for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
4512 		if (lri[i] == offset)
4513 			return i;
4514 
4515 	return -1;
4516 }
4517 
4518 static int live_lrc_fixed(void *arg)
4519 {
4520 	struct intel_gt *gt = arg;
4521 	struct intel_engine_cs *engine;
4522 	enum intel_engine_id id;
4523 	int err = 0;
4524 
4525 	/*
4526 	 * Check the assumed register offsets match the actual locations in
4527 	 * the context image.
4528 	 */
4529 
4530 	for_each_engine(engine, gt, id) {
4531 		const struct {
4532 			u32 reg;
4533 			u32 offset;
4534 			const char *name;
4535 		} tbl[] = {
4536 			{
4537 				i915_mmio_reg_offset(RING_START(engine->mmio_base)),
4538 				CTX_RING_START - 1,
4539 				"RING_START"
4540 			},
4541 			{
4542 				i915_mmio_reg_offset(RING_CTL(engine->mmio_base)),
4543 				CTX_RING_CTL - 1,
4544 				"RING_CTL"
4545 			},
4546 			{
4547 				i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)),
4548 				CTX_RING_HEAD - 1,
4549 				"RING_HEAD"
4550 			},
4551 			{
4552 				i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)),
4553 				CTX_RING_TAIL - 1,
4554 				"RING_TAIL"
4555 			},
4556 			{
4557 				i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)),
4558 				lrc_ring_mi_mode(engine),
4559 				"RING_MI_MODE"
4560 			},
4561 			{
4562 				i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)),
4563 				CTX_BB_STATE - 1,
4564 				"BB_STATE"
4565 			},
4566 			{
4567 				i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(engine->mmio_base)),
4568 				lrc_ring_wa_bb_per_ctx(engine),
4569 				"RING_BB_PER_CTX_PTR"
4570 			},
4571 			{
4572 				i915_mmio_reg_offset(RING_INDIRECT_CTX(engine->mmio_base)),
4573 				lrc_ring_indirect_ptr(engine),
4574 				"RING_INDIRECT_CTX_PTR"
4575 			},
4576 			{
4577 				i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(engine->mmio_base)),
4578 				lrc_ring_indirect_offset(engine),
4579 				"RING_INDIRECT_CTX_OFFSET"
4580 			},
4581 			{
4582 				i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)),
4583 				CTX_TIMESTAMP - 1,
4584 				"RING_CTX_TIMESTAMP"
4585 			},
4586 			{
4587 				i915_mmio_reg_offset(GEN8_RING_CS_GPR(engine->mmio_base, 0)),
4588 				lrc_ring_gpr0(engine),
4589 				"RING_CS_GPR0"
4590 			},
4591 			{
4592 				i915_mmio_reg_offset(RING_CMD_BUF_CCTL(engine->mmio_base)),
4593 				lrc_ring_cmd_buf_cctl(engine),
4594 				"RING_CMD_BUF_CCTL"
4595 			},
4596 			{ },
4597 		}, *t;
4598 		u32 *hw;
4599 
4600 		if (!engine->default_state)
4601 			continue;
4602 
4603 		hw = shmem_pin_map(engine->default_state);
4604 		if (IS_ERR(hw)) {
4605 			err = PTR_ERR(hw);
4606 			break;
4607 		}
4608 		hw += LRC_STATE_OFFSET / sizeof(*hw);
4609 
4610 		for (t = tbl; t->name; t++) {
4611 			int dw = find_offset(hw, t->reg);
4612 
4613 			if (dw != t->offset) {
4614 				pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
4615 				       engine->name,
4616 				       t->name,
4617 				       t->reg,
4618 				       dw,
4619 				       t->offset);
4620 				err = -EINVAL;
4621 			}
4622 		}
4623 
4624 		shmem_unpin_map(engine->default_state, hw);
4625 	}
4626 
4627 	return err;
4628 }
4629 
4630 static int __live_lrc_state(struct intel_engine_cs *engine,
4631 			    struct i915_vma *scratch)
4632 {
4633 	struct intel_context *ce;
4634 	struct i915_request *rq;
4635 	enum {
4636 		RING_START_IDX = 0,
4637 		RING_TAIL_IDX,
4638 		MAX_IDX
4639 	};
4640 	u32 expected[MAX_IDX];
4641 	u32 *cs;
4642 	int err;
4643 	int n;
4644 
4645 	ce = intel_context_create(engine);
4646 	if (IS_ERR(ce))
4647 		return PTR_ERR(ce);
4648 
4649 	err = intel_context_pin(ce);
4650 	if (err)
4651 		goto err_put;
4652 
4653 	rq = i915_request_create(ce);
4654 	if (IS_ERR(rq)) {
4655 		err = PTR_ERR(rq);
4656 		goto err_unpin;
4657 	}
4658 
4659 	cs = intel_ring_begin(rq, 4 * MAX_IDX);
4660 	if (IS_ERR(cs)) {
4661 		err = PTR_ERR(cs);
4662 		i915_request_add(rq);
4663 		goto err_unpin;
4664 	}
4665 
4666 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4667 	*cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
4668 	*cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32);
4669 	*cs++ = 0;
4670 
4671 	expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma);
4672 
4673 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4674 	*cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
4675 	*cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
4676 	*cs++ = 0;
4677 
4678 	i915_vma_lock(scratch);
4679 	err = i915_request_await_object(rq, scratch->obj, true);
4680 	if (!err)
4681 		err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
4682 	i915_vma_unlock(scratch);
4683 
4684 	i915_request_get(rq);
4685 	i915_request_add(rq);
4686 	if (err)
4687 		goto err_rq;
4688 
4689 	intel_engine_flush_submission(engine);
4690 	expected[RING_TAIL_IDX] = ce->ring->tail;
4691 
4692 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4693 		err = -ETIME;
4694 		goto err_rq;
4695 	}
4696 
4697 	cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
4698 	if (IS_ERR(cs)) {
4699 		err = PTR_ERR(cs);
4700 		goto err_rq;
4701 	}
4702 
4703 	for (n = 0; n < MAX_IDX; n++) {
4704 		if (cs[n] != expected[n]) {
4705 			pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
4706 			       engine->name, n, cs[n], expected[n]);
4707 			err = -EINVAL;
4708 			break;
4709 		}
4710 	}
4711 
4712 	i915_gem_object_unpin_map(scratch->obj);
4713 
4714 err_rq:
4715 	i915_request_put(rq);
4716 err_unpin:
4717 	intel_context_unpin(ce);
4718 err_put:
4719 	intel_context_put(ce);
4720 	return err;
4721 }
4722 
4723 static int live_lrc_state(void *arg)
4724 {
4725 	struct intel_gt *gt = arg;
4726 	struct intel_engine_cs *engine;
4727 	struct i915_vma *scratch;
4728 	enum intel_engine_id id;
4729 	int err = 0;
4730 
4731 	/*
4732 	 * Check the live register state matches what we expect for this
4733 	 * intel_context.
4734 	 */
4735 
4736 	scratch = create_scratch(gt);
4737 	if (IS_ERR(scratch))
4738 		return PTR_ERR(scratch);
4739 
4740 	for_each_engine(engine, gt, id) {
4741 		err = __live_lrc_state(engine, scratch);
4742 		if (err)
4743 			break;
4744 	}
4745 
4746 	if (igt_flush_test(gt->i915))
4747 		err = -EIO;
4748 
4749 	i915_vma_unpin_and_release(&scratch, 0);
4750 	return err;
4751 }
4752 
4753 static int gpr_make_dirty(struct intel_context *ce)
4754 {
4755 	struct i915_request *rq;
4756 	u32 *cs;
4757 	int n;
4758 
4759 	rq = intel_context_create_request(ce);
4760 	if (IS_ERR(rq))
4761 		return PTR_ERR(rq);
4762 
4763 	cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2);
4764 	if (IS_ERR(cs)) {
4765 		i915_request_add(rq);
4766 		return PTR_ERR(cs);
4767 	}
4768 
4769 	*cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
4770 	for (n = 0; n < NUM_GPR_DW; n++) {
4771 		*cs++ = CS_GPR(ce->engine, n);
4772 		*cs++ = STACK_MAGIC;
4773 	}
4774 	*cs++ = MI_NOOP;
4775 
4776 	intel_ring_advance(rq, cs);
4777 
4778 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
4779 	i915_request_add(rq);
4780 
4781 	return 0;
4782 }
4783 
4784 static struct i915_request *
4785 __gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot)
4786 {
4787 	const u32 offset =
4788 		i915_ggtt_offset(ce->engine->status_page.vma) +
4789 		offset_in_page(slot);
4790 	struct i915_request *rq;
4791 	u32 *cs;
4792 	int err;
4793 	int n;
4794 
4795 	rq = intel_context_create_request(ce);
4796 	if (IS_ERR(rq))
4797 		return rq;
4798 
4799 	cs = intel_ring_begin(rq, 6 + 4 * NUM_GPR_DW);
4800 	if (IS_ERR(cs)) {
4801 		i915_request_add(rq);
4802 		return ERR_CAST(cs);
4803 	}
4804 
4805 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4806 	*cs++ = MI_NOOP;
4807 
4808 	*cs++ = MI_SEMAPHORE_WAIT |
4809 		MI_SEMAPHORE_GLOBAL_GTT |
4810 		MI_SEMAPHORE_POLL |
4811 		MI_SEMAPHORE_SAD_NEQ_SDD;
4812 	*cs++ = 0;
4813 	*cs++ = offset;
4814 	*cs++ = 0;
4815 
4816 	for (n = 0; n < NUM_GPR_DW; n++) {
4817 		*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4818 		*cs++ = CS_GPR(ce->engine, n);
4819 		*cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
4820 		*cs++ = 0;
4821 	}
4822 
4823 	i915_vma_lock(scratch);
4824 	err = i915_request_await_object(rq, scratch->obj, true);
4825 	if (!err)
4826 		err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
4827 	i915_vma_unlock(scratch);
4828 
4829 	i915_request_get(rq);
4830 	i915_request_add(rq);
4831 	if (err) {
4832 		i915_request_put(rq);
4833 		rq = ERR_PTR(err);
4834 	}
4835 
4836 	return rq;
4837 }
4838 
4839 static int __live_lrc_gpr(struct intel_engine_cs *engine,
4840 			  struct i915_vma *scratch,
4841 			  bool preempt)
4842 {
4843 	u32 *slot = memset32(engine->status_page.addr + 1000, 0, 4);
4844 	struct intel_context *ce;
4845 	struct i915_request *rq;
4846 	u32 *cs;
4847 	int err;
4848 	int n;
4849 
4850 	if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS)
4851 		return 0; /* GPR only on rcs0 for gen8 */
4852 
4853 	err = gpr_make_dirty(engine->kernel_context);
4854 	if (err)
4855 		return err;
4856 
4857 	ce = intel_context_create(engine);
4858 	if (IS_ERR(ce))
4859 		return PTR_ERR(ce);
4860 
4861 	rq = __gpr_read(ce, scratch, slot);
4862 	if (IS_ERR(rq)) {
4863 		err = PTR_ERR(rq);
4864 		goto err_put;
4865 	}
4866 
4867 	err = wait_for_submit(engine, rq, HZ / 2);
4868 	if (err)
4869 		goto err_rq;
4870 
4871 	if (preempt) {
4872 		err = gpr_make_dirty(engine->kernel_context);
4873 		if (err)
4874 			goto err_rq;
4875 
4876 		err = emit_semaphore_signal(engine->kernel_context, slot);
4877 		if (err)
4878 			goto err_rq;
4879 	} else {
4880 		slot[0] = 1;
4881 		wmb();
4882 	}
4883 
4884 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4885 		err = -ETIME;
4886 		goto err_rq;
4887 	}
4888 
4889 	cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
4890 	if (IS_ERR(cs)) {
4891 		err = PTR_ERR(cs);
4892 		goto err_rq;
4893 	}
4894 
4895 	for (n = 0; n < NUM_GPR_DW; n++) {
4896 		if (cs[n]) {
4897 			pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
4898 			       engine->name,
4899 			       n / 2, n & 1 ? "udw" : "ldw",
4900 			       cs[n]);
4901 			err = -EINVAL;
4902 			break;
4903 		}
4904 	}
4905 
4906 	i915_gem_object_unpin_map(scratch->obj);
4907 
4908 err_rq:
4909 	memset32(&slot[0], -1, 4);
4910 	wmb();
4911 	i915_request_put(rq);
4912 err_put:
4913 	intel_context_put(ce);
4914 	return err;
4915 }
4916 
4917 static int live_lrc_gpr(void *arg)
4918 {
4919 	struct intel_gt *gt = arg;
4920 	struct intel_engine_cs *engine;
4921 	struct i915_vma *scratch;
4922 	enum intel_engine_id id;
4923 	int err = 0;
4924 
4925 	/*
4926 	 * Check that GPR registers are cleared in new contexts as we need
4927 	 * to avoid leaking any information from previous contexts.
4928 	 */
4929 
4930 	scratch = create_scratch(gt);
4931 	if (IS_ERR(scratch))
4932 		return PTR_ERR(scratch);
4933 
4934 	for_each_engine(engine, gt, id) {
4935 		unsigned long heartbeat;
4936 
4937 		engine_heartbeat_disable(engine, &heartbeat);
4938 
4939 		err = __live_lrc_gpr(engine, scratch, false);
4940 		if (err)
4941 			goto err;
4942 
4943 		err = __live_lrc_gpr(engine, scratch, true);
4944 		if (err)
4945 			goto err;
4946 
4947 err:
4948 		engine_heartbeat_enable(engine, heartbeat);
4949 		if (igt_flush_test(gt->i915))
4950 			err = -EIO;
4951 		if (err)
4952 			break;
4953 	}
4954 
4955 	i915_vma_unpin_and_release(&scratch, 0);
4956 	return err;
4957 }
4958 
4959 static struct i915_request *
4960 create_timestamp(struct intel_context *ce, void *slot, int idx)
4961 {
4962 	const u32 offset =
4963 		i915_ggtt_offset(ce->engine->status_page.vma) +
4964 		offset_in_page(slot);
4965 	struct i915_request *rq;
4966 	u32 *cs;
4967 	int err;
4968 
4969 	rq = intel_context_create_request(ce);
4970 	if (IS_ERR(rq))
4971 		return rq;
4972 
4973 	cs = intel_ring_begin(rq, 10);
4974 	if (IS_ERR(cs)) {
4975 		err = PTR_ERR(cs);
4976 		goto err;
4977 	}
4978 
4979 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4980 	*cs++ = MI_NOOP;
4981 
4982 	*cs++ = MI_SEMAPHORE_WAIT |
4983 		MI_SEMAPHORE_GLOBAL_GTT |
4984 		MI_SEMAPHORE_POLL |
4985 		MI_SEMAPHORE_SAD_NEQ_SDD;
4986 	*cs++ = 0;
4987 	*cs++ = offset;
4988 	*cs++ = 0;
4989 
4990 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4991 	*cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base));
4992 	*cs++ = offset + idx * sizeof(u32);
4993 	*cs++ = 0;
4994 
4995 	intel_ring_advance(rq, cs);
4996 
4997 	rq->sched.attr.priority = I915_PRIORITY_MASK;
4998 	err = 0;
4999 err:
5000 	i915_request_get(rq);
5001 	i915_request_add(rq);
5002 	if (err) {
5003 		i915_request_put(rq);
5004 		return ERR_PTR(err);
5005 	}
5006 
5007 	return rq;
5008 }
5009 
5010 struct lrc_timestamp {
5011 	struct intel_engine_cs *engine;
5012 	struct intel_context *ce[2];
5013 	u32 poison;
5014 };
5015 
5016 static bool timestamp_advanced(u32 start, u32 end)
5017 {
5018 	return (s32)(end - start) > 0;
5019 }
5020 
5021 static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt)
5022 {
5023 	u32 *slot = memset32(arg->engine->status_page.addr + 1000, 0, 4);
5024 	struct i915_request *rq;
5025 	u32 timestamp;
5026 	int err = 0;
5027 
5028 	arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison;
5029 	rq = create_timestamp(arg->ce[0], slot, 1);
5030 	if (IS_ERR(rq))
5031 		return PTR_ERR(rq);
5032 
5033 	err = wait_for_submit(rq->engine, rq, HZ / 2);
5034 	if (err)
5035 		goto err;
5036 
5037 	if (preempt) {
5038 		arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef;
5039 		err = emit_semaphore_signal(arg->ce[1], slot);
5040 		if (err)
5041 			goto err;
5042 	} else {
5043 		slot[0] = 1;
5044 		wmb();
5045 	}
5046 
5047 	/* And wait for switch to kernel (to save our context to memory) */
5048 	err = context_flush(arg->ce[0], HZ / 2);
5049 	if (err)
5050 		goto err;
5051 
5052 	if (!timestamp_advanced(arg->poison, slot[1])) {
5053 		pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n",
5054 		       arg->engine->name, preempt ? "preempt" : "simple",
5055 		       arg->poison, slot[1]);
5056 		err = -EINVAL;
5057 	}
5058 
5059 	timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]);
5060 	if (!timestamp_advanced(slot[1], timestamp)) {
5061 		pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n",
5062 		       arg->engine->name, preempt ? "preempt" : "simple",
5063 		       slot[1], timestamp);
5064 		err = -EINVAL;
5065 	}
5066 
5067 err:
5068 	memset32(slot, -1, 4);
5069 	i915_request_put(rq);
5070 	return err;
5071 }
5072 
5073 static int live_lrc_timestamp(void *arg)
5074 {
5075 	struct lrc_timestamp data = {};
5076 	struct intel_gt *gt = arg;
5077 	enum intel_engine_id id;
5078 	const u32 poison[] = {
5079 		0,
5080 		S32_MAX,
5081 		(u32)S32_MAX + 1,
5082 		U32_MAX,
5083 	};
5084 
5085 	/*
5086 	 * We want to verify that the timestamp is saved and restore across
5087 	 * context switches and is monotonic.
5088 	 *
5089 	 * So we do this with a little bit of LRC poisoning to check various
5090 	 * boundary conditions, and see what happens if we preempt the context
5091 	 * with a second request (carrying more poison into the timestamp).
5092 	 */
5093 
5094 	for_each_engine(data.engine, gt, id) {
5095 		unsigned long heartbeat;
5096 		int i, err = 0;
5097 
5098 		engine_heartbeat_disable(data.engine, &heartbeat);
5099 
5100 		for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
5101 			struct intel_context *tmp;
5102 
5103 			tmp = intel_context_create(data.engine);
5104 			if (IS_ERR(tmp)) {
5105 				err = PTR_ERR(tmp);
5106 				goto err;
5107 			}
5108 
5109 			err = intel_context_pin(tmp);
5110 			if (err) {
5111 				intel_context_put(tmp);
5112 				goto err;
5113 			}
5114 
5115 			data.ce[i] = tmp;
5116 		}
5117 
5118 		for (i = 0; i < ARRAY_SIZE(poison); i++) {
5119 			data.poison = poison[i];
5120 
5121 			err = __lrc_timestamp(&data, false);
5122 			if (err)
5123 				break;
5124 
5125 			err = __lrc_timestamp(&data, true);
5126 			if (err)
5127 				break;
5128 		}
5129 
5130 err:
5131 		engine_heartbeat_enable(data.engine, heartbeat);
5132 		for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
5133 			if (!data.ce[i])
5134 				break;
5135 
5136 			intel_context_unpin(data.ce[i]);
5137 			intel_context_put(data.ce[i]);
5138 		}
5139 
5140 		if (igt_flush_test(gt->i915))
5141 			err = -EIO;
5142 		if (err)
5143 			return err;
5144 	}
5145 
5146 	return 0;
5147 }
5148 
5149 static struct i915_vma *
5150 create_user_vma(struct i915_address_space *vm, unsigned long size)
5151 {
5152 	struct drm_i915_gem_object *obj;
5153 	struct i915_vma *vma;
5154 	int err;
5155 
5156 	obj = i915_gem_object_create_internal(vm->i915, size);
5157 	if (IS_ERR(obj))
5158 		return ERR_CAST(obj);
5159 
5160 	vma = i915_vma_instance(obj, vm, NULL);
5161 	if (IS_ERR(vma)) {
5162 		i915_gem_object_put(obj);
5163 		return vma;
5164 	}
5165 
5166 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
5167 	if (err) {
5168 		i915_gem_object_put(obj);
5169 		return ERR_PTR(err);
5170 	}
5171 
5172 	return vma;
5173 }
5174 
5175 static struct i915_vma *
5176 store_context(struct intel_context *ce, struct i915_vma *scratch)
5177 {
5178 	struct i915_vma *batch;
5179 	u32 dw, x, *cs, *hw;
5180 	u32 *defaults;
5181 
5182 	batch = create_user_vma(ce->vm, SZ_64K);
5183 	if (IS_ERR(batch))
5184 		return batch;
5185 
5186 	cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
5187 	if (IS_ERR(cs)) {
5188 		i915_vma_put(batch);
5189 		return ERR_CAST(cs);
5190 	}
5191 
5192 	defaults = shmem_pin_map(ce->engine->default_state);
5193 	if (!defaults) {
5194 		i915_gem_object_unpin_map(batch->obj);
5195 		i915_vma_put(batch);
5196 		return ERR_PTR(-ENOMEM);
5197 	}
5198 
5199 	x = 0;
5200 	dw = 0;
5201 	hw = defaults;
5202 	hw += LRC_STATE_OFFSET / sizeof(*hw);
5203 	do {
5204 		u32 len = hw[dw] & 0x7f;
5205 
5206 		if (hw[dw] == 0) {
5207 			dw++;
5208 			continue;
5209 		}
5210 
5211 		if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
5212 			dw += len + 2;
5213 			continue;
5214 		}
5215 
5216 		dw++;
5217 		len = (len + 1) / 2;
5218 		while (len--) {
5219 			*cs++ = MI_STORE_REGISTER_MEM_GEN8;
5220 			*cs++ = hw[dw];
5221 			*cs++ = lower_32_bits(scratch->node.start + x);
5222 			*cs++ = upper_32_bits(scratch->node.start + x);
5223 
5224 			dw += 2;
5225 			x += 4;
5226 		}
5227 	} while (dw < PAGE_SIZE / sizeof(u32) &&
5228 		 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
5229 
5230 	*cs++ = MI_BATCH_BUFFER_END;
5231 
5232 	shmem_unpin_map(ce->engine->default_state, defaults);
5233 
5234 	i915_gem_object_flush_map(batch->obj);
5235 	i915_gem_object_unpin_map(batch->obj);
5236 
5237 	return batch;
5238 }
5239 
5240 static int move_to_active(struct i915_request *rq,
5241 			  struct i915_vma *vma,
5242 			  unsigned int flags)
5243 {
5244 	int err;
5245 
5246 	i915_vma_lock(vma);
5247 	err = i915_request_await_object(rq, vma->obj, flags);
5248 	if (!err)
5249 		err = i915_vma_move_to_active(vma, rq, flags);
5250 	i915_vma_unlock(vma);
5251 
5252 	return err;
5253 }
5254 
5255 static struct i915_request *
5256 record_registers(struct intel_context *ce,
5257 		 struct i915_vma *before,
5258 		 struct i915_vma *after,
5259 		 u32 *sema)
5260 {
5261 	struct i915_vma *b_before, *b_after;
5262 	struct i915_request *rq;
5263 	u32 *cs;
5264 	int err;
5265 
5266 	b_before = store_context(ce, before);
5267 	if (IS_ERR(b_before))
5268 		return ERR_CAST(b_before);
5269 
5270 	b_after = store_context(ce, after);
5271 	if (IS_ERR(b_after)) {
5272 		rq = ERR_CAST(b_after);
5273 		goto err_before;
5274 	}
5275 
5276 	rq = intel_context_create_request(ce);
5277 	if (IS_ERR(rq))
5278 		goto err_after;
5279 
5280 	err = move_to_active(rq, before, EXEC_OBJECT_WRITE);
5281 	if (err)
5282 		goto err_rq;
5283 
5284 	err = move_to_active(rq, b_before, 0);
5285 	if (err)
5286 		goto err_rq;
5287 
5288 	err = move_to_active(rq, after, EXEC_OBJECT_WRITE);
5289 	if (err)
5290 		goto err_rq;
5291 
5292 	err = move_to_active(rq, b_after, 0);
5293 	if (err)
5294 		goto err_rq;
5295 
5296 	cs = intel_ring_begin(rq, 14);
5297 	if (IS_ERR(cs)) {
5298 		err = PTR_ERR(cs);
5299 		goto err_rq;
5300 	}
5301 
5302 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
5303 	*cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
5304 	*cs++ = lower_32_bits(b_before->node.start);
5305 	*cs++ = upper_32_bits(b_before->node.start);
5306 
5307 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
5308 	*cs++ = MI_SEMAPHORE_WAIT |
5309 		MI_SEMAPHORE_GLOBAL_GTT |
5310 		MI_SEMAPHORE_POLL |
5311 		MI_SEMAPHORE_SAD_NEQ_SDD;
5312 	*cs++ = 0;
5313 	*cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
5314 		offset_in_page(sema);
5315 	*cs++ = 0;
5316 	*cs++ = MI_NOOP;
5317 
5318 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
5319 	*cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
5320 	*cs++ = lower_32_bits(b_after->node.start);
5321 	*cs++ = upper_32_bits(b_after->node.start);
5322 
5323 	intel_ring_advance(rq, cs);
5324 
5325 	WRITE_ONCE(*sema, 0);
5326 	i915_request_get(rq);
5327 	i915_request_add(rq);
5328 err_after:
5329 	i915_vma_put(b_after);
5330 err_before:
5331 	i915_vma_put(b_before);
5332 	return rq;
5333 
5334 err_rq:
5335 	i915_request_add(rq);
5336 	rq = ERR_PTR(err);
5337 	goto err_after;
5338 }
5339 
5340 static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
5341 {
5342 	struct i915_vma *batch;
5343 	u32 dw, *cs, *hw;
5344 	u32 *defaults;
5345 
5346 	batch = create_user_vma(ce->vm, SZ_64K);
5347 	if (IS_ERR(batch))
5348 		return batch;
5349 
5350 	cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
5351 	if (IS_ERR(cs)) {
5352 		i915_vma_put(batch);
5353 		return ERR_CAST(cs);
5354 	}
5355 
5356 	defaults = shmem_pin_map(ce->engine->default_state);
5357 	if (!defaults) {
5358 		i915_gem_object_unpin_map(batch->obj);
5359 		i915_vma_put(batch);
5360 		return ERR_PTR(-ENOMEM);
5361 	}
5362 
5363 	dw = 0;
5364 	hw = defaults;
5365 	hw += LRC_STATE_OFFSET / sizeof(*hw);
5366 	do {
5367 		u32 len = hw[dw] & 0x7f;
5368 
5369 		if (hw[dw] == 0) {
5370 			dw++;
5371 			continue;
5372 		}
5373 
5374 		if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
5375 			dw += len + 2;
5376 			continue;
5377 		}
5378 
5379 		dw++;
5380 		len = (len + 1) / 2;
5381 		*cs++ = MI_LOAD_REGISTER_IMM(len);
5382 		while (len--) {
5383 			*cs++ = hw[dw];
5384 			*cs++ = poison;
5385 			dw += 2;
5386 		}
5387 	} while (dw < PAGE_SIZE / sizeof(u32) &&
5388 		 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
5389 
5390 	*cs++ = MI_BATCH_BUFFER_END;
5391 
5392 	shmem_unpin_map(ce->engine->default_state, defaults);
5393 
5394 	i915_gem_object_flush_map(batch->obj);
5395 	i915_gem_object_unpin_map(batch->obj);
5396 
5397 	return batch;
5398 }
5399 
5400 static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema)
5401 {
5402 	struct i915_request *rq;
5403 	struct i915_vma *batch;
5404 	u32 *cs;
5405 	int err;
5406 
5407 	batch = load_context(ce, poison);
5408 	if (IS_ERR(batch))
5409 		return PTR_ERR(batch);
5410 
5411 	rq = intel_context_create_request(ce);
5412 	if (IS_ERR(rq)) {
5413 		err = PTR_ERR(rq);
5414 		goto err_batch;
5415 	}
5416 
5417 	err = move_to_active(rq, batch, 0);
5418 	if (err)
5419 		goto err_rq;
5420 
5421 	cs = intel_ring_begin(rq, 8);
5422 	if (IS_ERR(cs)) {
5423 		err = PTR_ERR(cs);
5424 		goto err_rq;
5425 	}
5426 
5427 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
5428 	*cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
5429 	*cs++ = lower_32_bits(batch->node.start);
5430 	*cs++ = upper_32_bits(batch->node.start);
5431 
5432 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
5433 	*cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
5434 		offset_in_page(sema);
5435 	*cs++ = 0;
5436 	*cs++ = 1;
5437 
5438 	intel_ring_advance(rq, cs);
5439 
5440 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
5441 err_rq:
5442 	i915_request_add(rq);
5443 err_batch:
5444 	i915_vma_put(batch);
5445 	return err;
5446 }
5447 
5448 static bool is_moving(u32 a, u32 b)
5449 {
5450 	return a != b;
5451 }
5452 
5453 static int compare_isolation(struct intel_engine_cs *engine,
5454 			     struct i915_vma *ref[2],
5455 			     struct i915_vma *result[2],
5456 			     struct intel_context *ce,
5457 			     u32 poison)
5458 {
5459 	u32 x, dw, *hw, *lrc;
5460 	u32 *A[2], *B[2];
5461 	u32 *defaults;
5462 	int err = 0;
5463 
5464 	A[0] = i915_gem_object_pin_map(ref[0]->obj, I915_MAP_WC);
5465 	if (IS_ERR(A[0]))
5466 		return PTR_ERR(A[0]);
5467 
5468 	A[1] = i915_gem_object_pin_map(ref[1]->obj, I915_MAP_WC);
5469 	if (IS_ERR(A[1])) {
5470 		err = PTR_ERR(A[1]);
5471 		goto err_A0;
5472 	}
5473 
5474 	B[0] = i915_gem_object_pin_map(result[0]->obj, I915_MAP_WC);
5475 	if (IS_ERR(B[0])) {
5476 		err = PTR_ERR(B[0]);
5477 		goto err_A1;
5478 	}
5479 
5480 	B[1] = i915_gem_object_pin_map(result[1]->obj, I915_MAP_WC);
5481 	if (IS_ERR(B[1])) {
5482 		err = PTR_ERR(B[1]);
5483 		goto err_B0;
5484 	}
5485 
5486 	lrc = i915_gem_object_pin_map(ce->state->obj,
5487 				      i915_coherent_map_type(engine->i915));
5488 	if (IS_ERR(lrc)) {
5489 		err = PTR_ERR(lrc);
5490 		goto err_B1;
5491 	}
5492 	lrc += LRC_STATE_OFFSET / sizeof(*hw);
5493 
5494 	defaults = shmem_pin_map(ce->engine->default_state);
5495 	if (!defaults) {
5496 		err = -ENOMEM;
5497 		goto err_lrc;
5498 	}
5499 
5500 	x = 0;
5501 	dw = 0;
5502 	hw = defaults;
5503 	hw += LRC_STATE_OFFSET / sizeof(*hw);
5504 	do {
5505 		u32 len = hw[dw] & 0x7f;
5506 
5507 		if (hw[dw] == 0) {
5508 			dw++;
5509 			continue;
5510 		}
5511 
5512 		if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
5513 			dw += len + 2;
5514 			continue;
5515 		}
5516 
5517 		dw++;
5518 		len = (len + 1) / 2;
5519 		while (len--) {
5520 			if (!is_moving(A[0][x], A[1][x]) &&
5521 			    (A[0][x] != B[0][x] || A[1][x] != B[1][x])) {
5522 				switch (hw[dw] & 4095) {
5523 				case 0x30: /* RING_HEAD */
5524 				case 0x34: /* RING_TAIL */
5525 					break;
5526 
5527 				default:
5528 					pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n",
5529 					       engine->name, dw,
5530 					       hw[dw], hw[dw + 1],
5531 					       A[0][x], B[0][x], B[1][x],
5532 					       poison, lrc[dw + 1]);
5533 					err = -EINVAL;
5534 				}
5535 			}
5536 			dw += 2;
5537 			x++;
5538 		}
5539 	} while (dw < PAGE_SIZE / sizeof(u32) &&
5540 		 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
5541 
5542 	shmem_unpin_map(ce->engine->default_state, defaults);
5543 err_lrc:
5544 	i915_gem_object_unpin_map(ce->state->obj);
5545 err_B1:
5546 	i915_gem_object_unpin_map(result[1]->obj);
5547 err_B0:
5548 	i915_gem_object_unpin_map(result[0]->obj);
5549 err_A1:
5550 	i915_gem_object_unpin_map(ref[1]->obj);
5551 err_A0:
5552 	i915_gem_object_unpin_map(ref[0]->obj);
5553 	return err;
5554 }
5555 
5556 static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison)
5557 {
5558 	u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1);
5559 	struct i915_vma *ref[2], *result[2];
5560 	struct intel_context *A, *B;
5561 	struct i915_request *rq;
5562 	int err;
5563 
5564 	A = intel_context_create(engine);
5565 	if (IS_ERR(A))
5566 		return PTR_ERR(A);
5567 
5568 	B = intel_context_create(engine);
5569 	if (IS_ERR(B)) {
5570 		err = PTR_ERR(B);
5571 		goto err_A;
5572 	}
5573 
5574 	ref[0] = create_user_vma(A->vm, SZ_64K);
5575 	if (IS_ERR(ref[0])) {
5576 		err = PTR_ERR(ref[0]);
5577 		goto err_B;
5578 	}
5579 
5580 	ref[1] = create_user_vma(A->vm, SZ_64K);
5581 	if (IS_ERR(ref[1])) {
5582 		err = PTR_ERR(ref[1]);
5583 		goto err_ref0;
5584 	}
5585 
5586 	rq = record_registers(A, ref[0], ref[1], sema);
5587 	if (IS_ERR(rq)) {
5588 		err = PTR_ERR(rq);
5589 		goto err_ref1;
5590 	}
5591 
5592 	WRITE_ONCE(*sema, 1);
5593 	wmb();
5594 
5595 	if (i915_request_wait(rq, 0, HZ / 2) < 0) {
5596 		i915_request_put(rq);
5597 		err = -ETIME;
5598 		goto err_ref1;
5599 	}
5600 	i915_request_put(rq);
5601 
5602 	result[0] = create_user_vma(A->vm, SZ_64K);
5603 	if (IS_ERR(result[0])) {
5604 		err = PTR_ERR(result[0]);
5605 		goto err_ref1;
5606 	}
5607 
5608 	result[1] = create_user_vma(A->vm, SZ_64K);
5609 	if (IS_ERR(result[1])) {
5610 		err = PTR_ERR(result[1]);
5611 		goto err_result0;
5612 	}
5613 
5614 	rq = record_registers(A, result[0], result[1], sema);
5615 	if (IS_ERR(rq)) {
5616 		err = PTR_ERR(rq);
5617 		goto err_result1;
5618 	}
5619 
5620 	err = poison_registers(B, poison, sema);
5621 	if (err) {
5622 		WRITE_ONCE(*sema, -1);
5623 		i915_request_put(rq);
5624 		goto err_result1;
5625 	}
5626 
5627 	if (i915_request_wait(rq, 0, HZ / 2) < 0) {
5628 		i915_request_put(rq);
5629 		err = -ETIME;
5630 		goto err_result1;
5631 	}
5632 	i915_request_put(rq);
5633 
5634 	err = compare_isolation(engine, ref, result, A, poison);
5635 
5636 err_result1:
5637 	i915_vma_put(result[1]);
5638 err_result0:
5639 	i915_vma_put(result[0]);
5640 err_ref1:
5641 	i915_vma_put(ref[1]);
5642 err_ref0:
5643 	i915_vma_put(ref[0]);
5644 err_B:
5645 	intel_context_put(B);
5646 err_A:
5647 	intel_context_put(A);
5648 	return err;
5649 }
5650 
5651 static bool skip_isolation(const struct intel_engine_cs *engine)
5652 {
5653 	if (engine->class == COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) == 9)
5654 		return true;
5655 
5656 	if (engine->class == RENDER_CLASS && INTEL_GEN(engine->i915) == 11)
5657 		return true;
5658 
5659 	return false;
5660 }
5661 
5662 static int live_lrc_isolation(void *arg)
5663 {
5664 	struct intel_gt *gt = arg;
5665 	struct intel_engine_cs *engine;
5666 	enum intel_engine_id id;
5667 	const u32 poison[] = {
5668 		STACK_MAGIC,
5669 		0x3a3a3a3a,
5670 		0x5c5c5c5c,
5671 		0xffffffff,
5672 		0xffff0000,
5673 	};
5674 	int err = 0;
5675 
5676 	/*
5677 	 * Our goal is try and verify that per-context state cannot be
5678 	 * tampered with by another non-privileged client.
5679 	 *
5680 	 * We take the list of context registers from the LRI in the default
5681 	 * context image and attempt to modify that list from a remote context.
5682 	 */
5683 
5684 	for_each_engine(engine, gt, id) {
5685 		int i;
5686 
5687 		/* Just don't even ask */
5688 		if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) &&
5689 		    skip_isolation(engine))
5690 			continue;
5691 
5692 		intel_engine_pm_get(engine);
5693 		for (i = 0; i < ARRAY_SIZE(poison); i++) {
5694 			int result;
5695 
5696 			result = __lrc_isolation(engine, poison[i]);
5697 			if (result && !err)
5698 				err = result;
5699 
5700 			result = __lrc_isolation(engine, ~poison[i]);
5701 			if (result && !err)
5702 				err = result;
5703 		}
5704 		intel_engine_pm_put(engine);
5705 		if (igt_flush_test(gt->i915)) {
5706 			err = -EIO;
5707 			break;
5708 		}
5709 	}
5710 
5711 	return err;
5712 }
5713 
5714 static int indirect_ctx_submit_req(struct intel_context *ce)
5715 {
5716 	struct i915_request *rq;
5717 	int err = 0;
5718 
5719 	rq = intel_context_create_request(ce);
5720 	if (IS_ERR(rq))
5721 		return PTR_ERR(rq);
5722 
5723 	i915_request_get(rq);
5724 	i915_request_add(rq);
5725 
5726 	if (i915_request_wait(rq, 0, HZ / 5) < 0)
5727 		err = -ETIME;
5728 
5729 	i915_request_put(rq);
5730 
5731 	return err;
5732 }
5733 
5734 #define CTX_BB_CANARY_OFFSET (3 * 1024)
5735 #define CTX_BB_CANARY_INDEX  (CTX_BB_CANARY_OFFSET / sizeof(u32))
5736 
5737 static u32 *
5738 emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
5739 {
5740 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 |
5741 		MI_SRM_LRM_GLOBAL_GTT |
5742 		MI_LRI_LRM_CS_MMIO;
5743 	*cs++ = i915_mmio_reg_offset(RING_START(0));
5744 	*cs++ = i915_ggtt_offset(ce->state) +
5745 		context_wa_bb_offset(ce) +
5746 		CTX_BB_CANARY_OFFSET;
5747 	*cs++ = 0;
5748 
5749 	return cs;
5750 }
5751 
5752 static void
5753 indirect_ctx_bb_setup(struct intel_context *ce)
5754 {
5755 	u32 *cs = context_indirect_bb(ce);
5756 
5757 	cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d;
5758 
5759 	setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary);
5760 }
5761 
5762 static bool check_ring_start(struct intel_context *ce)
5763 {
5764 	const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) -
5765 		LRC_STATE_OFFSET + context_wa_bb_offset(ce);
5766 
5767 	if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START])
5768 		return true;
5769 
5770 	pr_err("ring start mismatch: canary 0x%08x vs state 0x%08x\n",
5771 	       ctx_bb[CTX_BB_CANARY_INDEX],
5772 	       ce->lrc_reg_state[CTX_RING_START]);
5773 
5774 	return false;
5775 }
5776 
5777 static int indirect_ctx_bb_check(struct intel_context *ce)
5778 {
5779 	int err;
5780 
5781 	err = indirect_ctx_submit_req(ce);
5782 	if (err)
5783 		return err;
5784 
5785 	if (!check_ring_start(ce))
5786 		return -EINVAL;
5787 
5788 	return 0;
5789 }
5790 
5791 static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine)
5792 {
5793 	struct intel_context *a, *b;
5794 	int err;
5795 
5796 	a = intel_context_create(engine);
5797 	if (IS_ERR(a))
5798 		return PTR_ERR(a);
5799 	err = intel_context_pin(a);
5800 	if (err)
5801 		goto put_a;
5802 
5803 	b = intel_context_create(engine);
5804 	if (IS_ERR(b)) {
5805 		err = PTR_ERR(b);
5806 		goto unpin_a;
5807 	}
5808 	err = intel_context_pin(b);
5809 	if (err)
5810 		goto put_b;
5811 
5812 	/* We use the already reserved extra page in context state */
5813 	if (!a->wa_bb_page) {
5814 		GEM_BUG_ON(b->wa_bb_page);
5815 		GEM_BUG_ON(INTEL_GEN(engine->i915) == 12);
5816 		goto unpin_b;
5817 	}
5818 
5819 	/*
5820 	 * In order to test that our per context bb is truly per context,
5821 	 * and executes at the intended spot on context restoring process,
5822 	 * make the batch store the ring start value to memory.
5823 	 * As ring start is restored apriori of starting the indirect ctx bb and
5824 	 * as it will be different for each context, it fits to this purpose.
5825 	 */
5826 	indirect_ctx_bb_setup(a);
5827 	indirect_ctx_bb_setup(b);
5828 
5829 	err = indirect_ctx_bb_check(a);
5830 	if (err)
5831 		goto unpin_b;
5832 
5833 	err = indirect_ctx_bb_check(b);
5834 
5835 unpin_b:
5836 	intel_context_unpin(b);
5837 put_b:
5838 	intel_context_put(b);
5839 unpin_a:
5840 	intel_context_unpin(a);
5841 put_a:
5842 	intel_context_put(a);
5843 
5844 	return err;
5845 }
5846 
5847 static int live_lrc_indirect_ctx_bb(void *arg)
5848 {
5849 	struct intel_gt *gt = arg;
5850 	struct intel_engine_cs *engine;
5851 	enum intel_engine_id id;
5852 	int err = 0;
5853 
5854 	for_each_engine(engine, gt, id) {
5855 		intel_engine_pm_get(engine);
5856 		err = __live_lrc_indirect_ctx_bb(engine);
5857 		intel_engine_pm_put(engine);
5858 
5859 		if (igt_flush_test(gt->i915))
5860 			err = -EIO;
5861 
5862 		if (err)
5863 			break;
5864 	}
5865 
5866 	return err;
5867 }
5868 
5869 static void garbage_reset(struct intel_engine_cs *engine,
5870 			  struct i915_request *rq)
5871 {
5872 	const unsigned int bit = I915_RESET_ENGINE + engine->id;
5873 	unsigned long *lock = &engine->gt->reset.flags;
5874 
5875 	if (test_and_set_bit(bit, lock))
5876 		return;
5877 
5878 	tasklet_disable(&engine->execlists.tasklet);
5879 
5880 	if (!rq->fence.error)
5881 		intel_engine_reset(engine, NULL);
5882 
5883 	tasklet_enable(&engine->execlists.tasklet);
5884 	clear_and_wake_up_bit(bit, lock);
5885 }
5886 
5887 static struct i915_request *garbage(struct intel_context *ce,
5888 				    struct rnd_state *prng)
5889 {
5890 	struct i915_request *rq;
5891 	int err;
5892 
5893 	err = intel_context_pin(ce);
5894 	if (err)
5895 		return ERR_PTR(err);
5896 
5897 	prandom_bytes_state(prng,
5898 			    ce->lrc_reg_state,
5899 			    ce->engine->context_size -
5900 			    LRC_STATE_OFFSET);
5901 
5902 	rq = intel_context_create_request(ce);
5903 	if (IS_ERR(rq)) {
5904 		err = PTR_ERR(rq);
5905 		goto err_unpin;
5906 	}
5907 
5908 	i915_request_get(rq);
5909 	i915_request_add(rq);
5910 	return rq;
5911 
5912 err_unpin:
5913 	intel_context_unpin(ce);
5914 	return ERR_PTR(err);
5915 }
5916 
5917 static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng)
5918 {
5919 	struct intel_context *ce;
5920 	struct i915_request *hang;
5921 	int err = 0;
5922 
5923 	ce = intel_context_create(engine);
5924 	if (IS_ERR(ce))
5925 		return PTR_ERR(ce);
5926 
5927 	hang = garbage(ce, prng);
5928 	if (IS_ERR(hang)) {
5929 		err = PTR_ERR(hang);
5930 		goto err_ce;
5931 	}
5932 
5933 	if (wait_for_submit(engine, hang, HZ / 2)) {
5934 		i915_request_put(hang);
5935 		err = -ETIME;
5936 		goto err_ce;
5937 	}
5938 
5939 	intel_context_set_banned(ce);
5940 	garbage_reset(engine, hang);
5941 
5942 	intel_engine_flush_submission(engine);
5943 	if (!hang->fence.error) {
5944 		i915_request_put(hang);
5945 		pr_err("%s: corrupted context was not reset\n",
5946 		       engine->name);
5947 		err = -EINVAL;
5948 		goto err_ce;
5949 	}
5950 
5951 	if (i915_request_wait(hang, 0, HZ / 2) < 0) {
5952 		pr_err("%s: corrupted context did not recover\n",
5953 		       engine->name);
5954 		i915_request_put(hang);
5955 		err = -EIO;
5956 		goto err_ce;
5957 	}
5958 	i915_request_put(hang);
5959 
5960 err_ce:
5961 	intel_context_put(ce);
5962 	return err;
5963 }
5964 
5965 static int live_lrc_garbage(void *arg)
5966 {
5967 	struct intel_gt *gt = arg;
5968 	struct intel_engine_cs *engine;
5969 	enum intel_engine_id id;
5970 
5971 	/*
5972 	 * Verify that we can recover if one context state is completely
5973 	 * corrupted.
5974 	 */
5975 
5976 	if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN))
5977 		return 0;
5978 
5979 	for_each_engine(engine, gt, id) {
5980 		I915_RND_STATE(prng);
5981 		int err = 0, i;
5982 
5983 		if (!intel_has_reset_engine(engine->gt))
5984 			continue;
5985 
5986 		intel_engine_pm_get(engine);
5987 		for (i = 0; i < 3; i++) {
5988 			err = __lrc_garbage(engine, &prng);
5989 			if (err)
5990 				break;
5991 		}
5992 		intel_engine_pm_put(engine);
5993 
5994 		if (igt_flush_test(gt->i915))
5995 			err = -EIO;
5996 		if (err)
5997 			return err;
5998 	}
5999 
6000 	return 0;
6001 }
6002 
6003 static int __live_pphwsp_runtime(struct intel_engine_cs *engine)
6004 {
6005 	struct intel_context *ce;
6006 	struct i915_request *rq;
6007 	IGT_TIMEOUT(end_time);
6008 	int err;
6009 
6010 	ce = intel_context_create(engine);
6011 	if (IS_ERR(ce))
6012 		return PTR_ERR(ce);
6013 
6014 	ce->runtime.num_underflow = 0;
6015 	ce->runtime.max_underflow = 0;
6016 
6017 	do {
6018 		unsigned int loop = 1024;
6019 
6020 		while (loop) {
6021 			rq = intel_context_create_request(ce);
6022 			if (IS_ERR(rq)) {
6023 				err = PTR_ERR(rq);
6024 				goto err_rq;
6025 			}
6026 
6027 			if (--loop == 0)
6028 				i915_request_get(rq);
6029 
6030 			i915_request_add(rq);
6031 		}
6032 
6033 		if (__igt_timeout(end_time, NULL))
6034 			break;
6035 
6036 		i915_request_put(rq);
6037 	} while (1);
6038 
6039 	err = i915_request_wait(rq, 0, HZ / 5);
6040 	if (err < 0) {
6041 		pr_err("%s: request not completed!\n", engine->name);
6042 		goto err_wait;
6043 	}
6044 
6045 	igt_flush_test(engine->i915);
6046 
6047 	pr_info("%s: pphwsp runtime %lluns, average %lluns\n",
6048 		engine->name,
6049 		intel_context_get_total_runtime_ns(ce),
6050 		intel_context_get_avg_runtime_ns(ce));
6051 
6052 	err = 0;
6053 	if (ce->runtime.num_underflow) {
6054 		pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n",
6055 		       engine->name,
6056 		       ce->runtime.num_underflow,
6057 		       ce->runtime.max_underflow);
6058 		GEM_TRACE_DUMP();
6059 		err = -EOVERFLOW;
6060 	}
6061 
6062 err_wait:
6063 	i915_request_put(rq);
6064 err_rq:
6065 	intel_context_put(ce);
6066 	return err;
6067 }
6068 
6069 static int live_pphwsp_runtime(void *arg)
6070 {
6071 	struct intel_gt *gt = arg;
6072 	struct intel_engine_cs *engine;
6073 	enum intel_engine_id id;
6074 	int err = 0;
6075 
6076 	/*
6077 	 * Check that cumulative context runtime as stored in the pphwsp[16]
6078 	 * is monotonic.
6079 	 */
6080 
6081 	for_each_engine(engine, gt, id) {
6082 		err = __live_pphwsp_runtime(engine);
6083 		if (err)
6084 			break;
6085 	}
6086 
6087 	if (igt_flush_test(gt->i915))
6088 		err = -EIO;
6089 
6090 	return err;
6091 }
6092 
6093 int intel_lrc_live_selftests(struct drm_i915_private *i915)
6094 {
6095 	static const struct i915_subtest tests[] = {
6096 		SUBTEST(live_lrc_layout),
6097 		SUBTEST(live_lrc_fixed),
6098 		SUBTEST(live_lrc_state),
6099 		SUBTEST(live_lrc_gpr),
6100 		SUBTEST(live_lrc_isolation),
6101 		SUBTEST(live_lrc_timestamp),
6102 		SUBTEST(live_lrc_garbage),
6103 		SUBTEST(live_pphwsp_runtime),
6104 		SUBTEST(live_lrc_indirect_ctx_bb),
6105 	};
6106 
6107 	if (!HAS_LOGICAL_RING_CONTEXTS(i915))
6108 		return 0;
6109 
6110 	return intel_gt_live_subtests(tests, &i915->gt);
6111 }
6112