1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2018 Intel Corporation
5  */
6 
7 #include <linux/prime_numbers.h>
8 
9 #include "gem/i915_gem_pm.h"
10 #include "gt/intel_engine_heartbeat.h"
11 #include "gt/intel_reset.h"
12 
13 #include "i915_selftest.h"
14 #include "selftests/i915_random.h"
15 #include "selftests/igt_flush_test.h"
16 #include "selftests/igt_live_test.h"
17 #include "selftests/igt_spinner.h"
18 #include "selftests/lib_sw_fence.h"
19 
20 #include "gem/selftests/igt_gem_utils.h"
21 #include "gem/selftests/mock_context.h"
22 
23 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
24 #define NUM_GPR_DW (16 * 2) /* each GPR is 2 dwords */
25 
26 static struct i915_vma *create_scratch(struct intel_gt *gt)
27 {
28 	struct drm_i915_gem_object *obj;
29 	struct i915_vma *vma;
30 	int err;
31 
32 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
33 	if (IS_ERR(obj))
34 		return ERR_CAST(obj);
35 
36 	i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
37 
38 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
39 	if (IS_ERR(vma)) {
40 		i915_gem_object_put(obj);
41 		return vma;
42 	}
43 
44 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
45 	if (err) {
46 		i915_gem_object_put(obj);
47 		return ERR_PTR(err);
48 	}
49 
50 	return vma;
51 }
52 
53 static void engine_heartbeat_disable(struct intel_engine_cs *engine,
54 				     unsigned long *saved)
55 {
56 	*saved = engine->props.heartbeat_interval_ms;
57 	engine->props.heartbeat_interval_ms = 0;
58 
59 	intel_engine_pm_get(engine);
60 	intel_engine_park_heartbeat(engine);
61 }
62 
63 static void engine_heartbeat_enable(struct intel_engine_cs *engine,
64 				    unsigned long saved)
65 {
66 	intel_engine_pm_put(engine);
67 
68 	engine->props.heartbeat_interval_ms = saved;
69 }
70 
71 static int live_sanitycheck(void *arg)
72 {
73 	struct intel_gt *gt = arg;
74 	struct intel_engine_cs *engine;
75 	enum intel_engine_id id;
76 	struct igt_spinner spin;
77 	int err = 0;
78 
79 	if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
80 		return 0;
81 
82 	if (igt_spinner_init(&spin, gt))
83 		return -ENOMEM;
84 
85 	for_each_engine(engine, gt, id) {
86 		struct intel_context *ce;
87 		struct i915_request *rq;
88 
89 		ce = intel_context_create(engine);
90 		if (IS_ERR(ce)) {
91 			err = PTR_ERR(ce);
92 			break;
93 		}
94 
95 		rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
96 		if (IS_ERR(rq)) {
97 			err = PTR_ERR(rq);
98 			goto out_ctx;
99 		}
100 
101 		i915_request_add(rq);
102 		if (!igt_wait_for_spinner(&spin, rq)) {
103 			GEM_TRACE("spinner failed to start\n");
104 			GEM_TRACE_DUMP();
105 			intel_gt_set_wedged(gt);
106 			err = -EIO;
107 			goto out_ctx;
108 		}
109 
110 		igt_spinner_end(&spin);
111 		if (igt_flush_test(gt->i915)) {
112 			err = -EIO;
113 			goto out_ctx;
114 		}
115 
116 out_ctx:
117 		intel_context_put(ce);
118 		if (err)
119 			break;
120 	}
121 
122 	igt_spinner_fini(&spin);
123 	return err;
124 }
125 
126 static int live_unlite_restore(struct intel_gt *gt, int prio)
127 {
128 	struct intel_engine_cs *engine;
129 	enum intel_engine_id id;
130 	struct igt_spinner spin;
131 	int err = -ENOMEM;
132 
133 	/*
134 	 * Check that we can correctly context switch between 2 instances
135 	 * on the same engine from the same parent context.
136 	 */
137 
138 	if (igt_spinner_init(&spin, gt))
139 		return err;
140 
141 	err = 0;
142 	for_each_engine(engine, gt, id) {
143 		struct intel_context *ce[2] = {};
144 		struct i915_request *rq[2];
145 		struct igt_live_test t;
146 		unsigned long saved;
147 		int n;
148 
149 		if (prio && !intel_engine_has_preemption(engine))
150 			continue;
151 
152 		if (!intel_engine_can_store_dword(engine))
153 			continue;
154 
155 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
156 			err = -EIO;
157 			break;
158 		}
159 		engine_heartbeat_disable(engine, &saved);
160 
161 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
162 			struct intel_context *tmp;
163 
164 			tmp = intel_context_create(engine);
165 			if (IS_ERR(tmp)) {
166 				err = PTR_ERR(tmp);
167 				goto err_ce;
168 			}
169 
170 			err = intel_context_pin(tmp);
171 			if (err) {
172 				intel_context_put(tmp);
173 				goto err_ce;
174 			}
175 
176 			/*
177 			 * Setup the pair of contexts such that if we
178 			 * lite-restore using the RING_TAIL from ce[1] it
179 			 * will execute garbage from ce[0]->ring.
180 			 */
181 			memset(tmp->ring->vaddr,
182 			       POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
183 			       tmp->ring->vma->size);
184 
185 			ce[n] = tmp;
186 		}
187 		GEM_BUG_ON(!ce[1]->ring->size);
188 		intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
189 		__execlists_update_reg_state(ce[1], engine);
190 
191 		rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
192 		if (IS_ERR(rq[0])) {
193 			err = PTR_ERR(rq[0]);
194 			goto err_ce;
195 		}
196 
197 		i915_request_get(rq[0]);
198 		i915_request_add(rq[0]);
199 		GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
200 
201 		if (!igt_wait_for_spinner(&spin, rq[0])) {
202 			i915_request_put(rq[0]);
203 			goto err_ce;
204 		}
205 
206 		rq[1] = i915_request_create(ce[1]);
207 		if (IS_ERR(rq[1])) {
208 			err = PTR_ERR(rq[1]);
209 			i915_request_put(rq[0]);
210 			goto err_ce;
211 		}
212 
213 		if (!prio) {
214 			/*
215 			 * Ensure we do the switch to ce[1] on completion.
216 			 *
217 			 * rq[0] is already submitted, so this should reduce
218 			 * to a no-op (a wait on a request on the same engine
219 			 * uses the submit fence, not the completion fence),
220 			 * but it will install a dependency on rq[1] for rq[0]
221 			 * that will prevent the pair being reordered by
222 			 * timeslicing.
223 			 */
224 			i915_request_await_dma_fence(rq[1], &rq[0]->fence);
225 		}
226 
227 		i915_request_get(rq[1]);
228 		i915_request_add(rq[1]);
229 		GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
230 		i915_request_put(rq[0]);
231 
232 		if (prio) {
233 			struct i915_sched_attr attr = {
234 				.priority = prio,
235 			};
236 
237 			/* Alternatively preempt the spinner with ce[1] */
238 			engine->schedule(rq[1], &attr);
239 		}
240 
241 		/* And switch back to ce[0] for good measure */
242 		rq[0] = i915_request_create(ce[0]);
243 		if (IS_ERR(rq[0])) {
244 			err = PTR_ERR(rq[0]);
245 			i915_request_put(rq[1]);
246 			goto err_ce;
247 		}
248 
249 		i915_request_await_dma_fence(rq[0], &rq[1]->fence);
250 		i915_request_get(rq[0]);
251 		i915_request_add(rq[0]);
252 		GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
253 		i915_request_put(rq[1]);
254 		i915_request_put(rq[0]);
255 
256 err_ce:
257 		tasklet_kill(&engine->execlists.tasklet); /* flush submission */
258 		igt_spinner_end(&spin);
259 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
260 			if (IS_ERR_OR_NULL(ce[n]))
261 				break;
262 
263 			intel_context_unpin(ce[n]);
264 			intel_context_put(ce[n]);
265 		}
266 
267 		engine_heartbeat_enable(engine, saved);
268 		if (igt_live_test_end(&t))
269 			err = -EIO;
270 		if (err)
271 			break;
272 	}
273 
274 	igt_spinner_fini(&spin);
275 	return err;
276 }
277 
278 static int live_unlite_switch(void *arg)
279 {
280 	return live_unlite_restore(arg, 0);
281 }
282 
283 static int live_unlite_preempt(void *arg)
284 {
285 	return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX));
286 }
287 
288 static int live_hold_reset(void *arg)
289 {
290 	struct intel_gt *gt = arg;
291 	struct intel_engine_cs *engine;
292 	enum intel_engine_id id;
293 	struct igt_spinner spin;
294 	int err = 0;
295 
296 	/*
297 	 * In order to support offline error capture for fast preempt reset,
298 	 * we need to decouple the guilty request and ensure that it and its
299 	 * descendents are not executed while the capture is in progress.
300 	 */
301 
302 	if (!intel_has_reset_engine(gt))
303 		return 0;
304 
305 	if (igt_spinner_init(&spin, gt))
306 		return -ENOMEM;
307 
308 	for_each_engine(engine, gt, id) {
309 		struct intel_context *ce;
310 		unsigned long heartbeat;
311 		struct i915_request *rq;
312 
313 		ce = intel_context_create(engine);
314 		if (IS_ERR(ce)) {
315 			err = PTR_ERR(ce);
316 			break;
317 		}
318 
319 		engine_heartbeat_disable(engine, &heartbeat);
320 
321 		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
322 		if (IS_ERR(rq)) {
323 			err = PTR_ERR(rq);
324 			goto out;
325 		}
326 		i915_request_add(rq);
327 
328 		if (!igt_wait_for_spinner(&spin, rq)) {
329 			intel_gt_set_wedged(gt);
330 			err = -ETIME;
331 			goto out;
332 		}
333 
334 		/* We have our request executing, now remove it and reset */
335 
336 		if (test_and_set_bit(I915_RESET_ENGINE + id,
337 				     &gt->reset.flags)) {
338 			intel_gt_set_wedged(gt);
339 			err = -EBUSY;
340 			goto out;
341 		}
342 		tasklet_disable(&engine->execlists.tasklet);
343 
344 		engine->execlists.tasklet.func(engine->execlists.tasklet.data);
345 		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
346 
347 		i915_request_get(rq);
348 		execlists_hold(engine, rq);
349 		GEM_BUG_ON(!i915_request_on_hold(rq));
350 
351 		intel_engine_reset(engine, NULL);
352 		GEM_BUG_ON(rq->fence.error != -EIO);
353 
354 		tasklet_enable(&engine->execlists.tasklet);
355 		clear_and_wake_up_bit(I915_RESET_ENGINE + id,
356 				      &gt->reset.flags);
357 
358 		/* Check that we do not resubmit the held request */
359 		if (!i915_request_wait(rq, 0, HZ / 5)) {
360 			pr_err("%s: on hold request completed!\n",
361 			       engine->name);
362 			i915_request_put(rq);
363 			err = -EIO;
364 			goto out;
365 		}
366 		GEM_BUG_ON(!i915_request_on_hold(rq));
367 
368 		/* But is resubmitted on release */
369 		execlists_unhold(engine, rq);
370 		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
371 			pr_err("%s: held request did not complete!\n",
372 			       engine->name);
373 			intel_gt_set_wedged(gt);
374 			err = -ETIME;
375 		}
376 		i915_request_put(rq);
377 
378 out:
379 		engine_heartbeat_enable(engine, heartbeat);
380 		intel_context_put(ce);
381 		if (err)
382 			break;
383 	}
384 
385 	igt_spinner_fini(&spin);
386 	return err;
387 }
388 
389 static int
390 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
391 {
392 	u32 *cs;
393 
394 	cs = intel_ring_begin(rq, 10);
395 	if (IS_ERR(cs))
396 		return PTR_ERR(cs);
397 
398 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
399 
400 	*cs++ = MI_SEMAPHORE_WAIT |
401 		MI_SEMAPHORE_GLOBAL_GTT |
402 		MI_SEMAPHORE_POLL |
403 		MI_SEMAPHORE_SAD_NEQ_SDD;
404 	*cs++ = 0;
405 	*cs++ = i915_ggtt_offset(vma) + 4 * idx;
406 	*cs++ = 0;
407 
408 	if (idx > 0) {
409 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
410 		*cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
411 		*cs++ = 0;
412 		*cs++ = 1;
413 	} else {
414 		*cs++ = MI_NOOP;
415 		*cs++ = MI_NOOP;
416 		*cs++ = MI_NOOP;
417 		*cs++ = MI_NOOP;
418 	}
419 
420 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
421 
422 	intel_ring_advance(rq, cs);
423 	return 0;
424 }
425 
426 static struct i915_request *
427 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
428 {
429 	struct intel_context *ce;
430 	struct i915_request *rq;
431 	int err;
432 
433 	ce = intel_context_create(engine);
434 	if (IS_ERR(ce))
435 		return ERR_CAST(ce);
436 
437 	rq = intel_context_create_request(ce);
438 	if (IS_ERR(rq))
439 		goto out_ce;
440 
441 	err = 0;
442 	if (rq->engine->emit_init_breadcrumb)
443 		err = rq->engine->emit_init_breadcrumb(rq);
444 	if (err == 0)
445 		err = emit_semaphore_chain(rq, vma, idx);
446 	if (err == 0)
447 		i915_request_get(rq);
448 	i915_request_add(rq);
449 	if (err)
450 		rq = ERR_PTR(err);
451 
452 out_ce:
453 	intel_context_put(ce);
454 	return rq;
455 }
456 
457 static int
458 release_queue(struct intel_engine_cs *engine,
459 	      struct i915_vma *vma,
460 	      int idx, int prio)
461 {
462 	struct i915_sched_attr attr = {
463 		.priority = prio,
464 	};
465 	struct i915_request *rq;
466 	u32 *cs;
467 
468 	rq = intel_engine_create_kernel_request(engine);
469 	if (IS_ERR(rq))
470 		return PTR_ERR(rq);
471 
472 	cs = intel_ring_begin(rq, 4);
473 	if (IS_ERR(cs)) {
474 		i915_request_add(rq);
475 		return PTR_ERR(cs);
476 	}
477 
478 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
479 	*cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
480 	*cs++ = 0;
481 	*cs++ = 1;
482 
483 	intel_ring_advance(rq, cs);
484 
485 	i915_request_get(rq);
486 	i915_request_add(rq);
487 
488 	local_bh_disable();
489 	engine->schedule(rq, &attr);
490 	local_bh_enable(); /* kick tasklet */
491 
492 	i915_request_put(rq);
493 
494 	return 0;
495 }
496 
497 static int
498 slice_semaphore_queue(struct intel_engine_cs *outer,
499 		      struct i915_vma *vma,
500 		      int count)
501 {
502 	struct intel_engine_cs *engine;
503 	struct i915_request *head;
504 	enum intel_engine_id id;
505 	int err, i, n = 0;
506 
507 	head = semaphore_queue(outer, vma, n++);
508 	if (IS_ERR(head))
509 		return PTR_ERR(head);
510 
511 	for_each_engine(engine, outer->gt, id) {
512 		for (i = 0; i < count; i++) {
513 			struct i915_request *rq;
514 
515 			rq = semaphore_queue(engine, vma, n++);
516 			if (IS_ERR(rq)) {
517 				err = PTR_ERR(rq);
518 				goto out;
519 			}
520 
521 			i915_request_put(rq);
522 		}
523 	}
524 
525 	err = release_queue(outer, vma, n, INT_MAX);
526 	if (err)
527 		goto out;
528 
529 	if (i915_request_wait(head, 0,
530 			      2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) {
531 		pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n",
532 		       count, n);
533 		GEM_TRACE_DUMP();
534 		intel_gt_set_wedged(outer->gt);
535 		err = -EIO;
536 	}
537 
538 out:
539 	i915_request_put(head);
540 	return err;
541 }
542 
543 static int live_timeslice_preempt(void *arg)
544 {
545 	struct intel_gt *gt = arg;
546 	struct drm_i915_gem_object *obj;
547 	struct i915_vma *vma;
548 	void *vaddr;
549 	int err = 0;
550 	int count;
551 
552 	/*
553 	 * If a request takes too long, we would like to give other users
554 	 * a fair go on the GPU. In particular, users may create batches
555 	 * that wait upon external input, where that input may even be
556 	 * supplied by another GPU job. To avoid blocking forever, we
557 	 * need to preempt the current task and replace it with another
558 	 * ready task.
559 	 */
560 	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
561 		return 0;
562 
563 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
564 	if (IS_ERR(obj))
565 		return PTR_ERR(obj);
566 
567 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
568 	if (IS_ERR(vma)) {
569 		err = PTR_ERR(vma);
570 		goto err_obj;
571 	}
572 
573 	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
574 	if (IS_ERR(vaddr)) {
575 		err = PTR_ERR(vaddr);
576 		goto err_obj;
577 	}
578 
579 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
580 	if (err)
581 		goto err_map;
582 
583 	for_each_prime_number_from(count, 1, 16) {
584 		struct intel_engine_cs *engine;
585 		enum intel_engine_id id;
586 
587 		for_each_engine(engine, gt, id) {
588 			unsigned long saved;
589 
590 			if (!intel_engine_has_preemption(engine))
591 				continue;
592 
593 			memset(vaddr, 0, PAGE_SIZE);
594 
595 			engine_heartbeat_disable(engine, &saved);
596 			err = slice_semaphore_queue(engine, vma, count);
597 			engine_heartbeat_enable(engine, saved);
598 			if (err)
599 				goto err_pin;
600 
601 			if (igt_flush_test(gt->i915)) {
602 				err = -EIO;
603 				goto err_pin;
604 			}
605 		}
606 	}
607 
608 err_pin:
609 	i915_vma_unpin(vma);
610 err_map:
611 	i915_gem_object_unpin_map(obj);
612 err_obj:
613 	i915_gem_object_put(obj);
614 	return err;
615 }
616 
617 static struct i915_request *nop_request(struct intel_engine_cs *engine)
618 {
619 	struct i915_request *rq;
620 
621 	rq = intel_engine_create_kernel_request(engine);
622 	if (IS_ERR(rq))
623 		return rq;
624 
625 	i915_request_get(rq);
626 	i915_request_add(rq);
627 
628 	return rq;
629 }
630 
631 static int wait_for_submit(struct intel_engine_cs *engine,
632 			   struct i915_request *rq,
633 			   unsigned long timeout)
634 {
635 	timeout += jiffies;
636 	do {
637 		cond_resched();
638 		intel_engine_flush_submission(engine);
639 		if (i915_request_is_active(rq))
640 			return 0;
641 	} while (time_before(jiffies, timeout));
642 
643 	return -ETIME;
644 }
645 
646 static long timeslice_threshold(const struct intel_engine_cs *engine)
647 {
648 	return 2 * msecs_to_jiffies_timeout(timeslice(engine)) + 1;
649 }
650 
651 static int live_timeslice_queue(void *arg)
652 {
653 	struct intel_gt *gt = arg;
654 	struct drm_i915_gem_object *obj;
655 	struct intel_engine_cs *engine;
656 	enum intel_engine_id id;
657 	struct i915_vma *vma;
658 	void *vaddr;
659 	int err = 0;
660 
661 	/*
662 	 * Make sure that even if ELSP[0] and ELSP[1] are filled with
663 	 * timeslicing between them disabled, we *do* enable timeslicing
664 	 * if the queue demands it. (Normally, we do not submit if
665 	 * ELSP[1] is already occupied, so must rely on timeslicing to
666 	 * eject ELSP[0] in favour of the queue.)
667 	 */
668 	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
669 		return 0;
670 
671 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
672 	if (IS_ERR(obj))
673 		return PTR_ERR(obj);
674 
675 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
676 	if (IS_ERR(vma)) {
677 		err = PTR_ERR(vma);
678 		goto err_obj;
679 	}
680 
681 	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
682 	if (IS_ERR(vaddr)) {
683 		err = PTR_ERR(vaddr);
684 		goto err_obj;
685 	}
686 
687 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
688 	if (err)
689 		goto err_map;
690 
691 	for_each_engine(engine, gt, id) {
692 		struct i915_sched_attr attr = {
693 			.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
694 		};
695 		struct i915_request *rq, *nop;
696 		unsigned long saved;
697 
698 		if (!intel_engine_has_preemption(engine))
699 			continue;
700 
701 		engine_heartbeat_disable(engine, &saved);
702 		memset(vaddr, 0, PAGE_SIZE);
703 
704 		/* ELSP[0]: semaphore wait */
705 		rq = semaphore_queue(engine, vma, 0);
706 		if (IS_ERR(rq)) {
707 			err = PTR_ERR(rq);
708 			goto err_heartbeat;
709 		}
710 		engine->schedule(rq, &attr);
711 		err = wait_for_submit(engine, rq, HZ / 2);
712 		if (err) {
713 			pr_err("%s: Timed out trying to submit semaphores\n",
714 			       engine->name);
715 			goto err_rq;
716 		}
717 
718 		/* ELSP[1]: nop request */
719 		nop = nop_request(engine);
720 		if (IS_ERR(nop)) {
721 			err = PTR_ERR(nop);
722 			goto err_rq;
723 		}
724 		err = wait_for_submit(engine, nop, HZ / 2);
725 		i915_request_put(nop);
726 		if (err) {
727 			pr_err("%s: Timed out trying to submit nop\n",
728 			       engine->name);
729 			goto err_rq;
730 		}
731 
732 		GEM_BUG_ON(i915_request_completed(rq));
733 		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
734 
735 		/* Queue: semaphore signal, matching priority as semaphore */
736 		err = release_queue(engine, vma, 1, effective_prio(rq));
737 		if (err)
738 			goto err_rq;
739 
740 		intel_engine_flush_submission(engine);
741 		if (!READ_ONCE(engine->execlists.timer.expires) &&
742 		    !i915_request_completed(rq)) {
743 			struct drm_printer p =
744 				drm_info_printer(gt->i915->drm.dev);
745 
746 			GEM_TRACE_ERR("%s: Failed to enable timeslicing!\n",
747 				      engine->name);
748 			intel_engine_dump(engine, &p,
749 					  "%s\n", engine->name);
750 			GEM_TRACE_DUMP();
751 
752 			memset(vaddr, 0xff, PAGE_SIZE);
753 			err = -EINVAL;
754 		}
755 
756 		/* Timeslice every jiffy, so within 2 we should signal */
757 		if (i915_request_wait(rq, 0, timeslice_threshold(engine)) < 0) {
758 			struct drm_printer p =
759 				drm_info_printer(gt->i915->drm.dev);
760 
761 			pr_err("%s: Failed to timeslice into queue\n",
762 			       engine->name);
763 			intel_engine_dump(engine, &p,
764 					  "%s\n", engine->name);
765 
766 			memset(vaddr, 0xff, PAGE_SIZE);
767 			err = -EIO;
768 		}
769 err_rq:
770 		i915_request_put(rq);
771 err_heartbeat:
772 		engine_heartbeat_enable(engine, saved);
773 		if (err)
774 			break;
775 	}
776 
777 	i915_vma_unpin(vma);
778 err_map:
779 	i915_gem_object_unpin_map(obj);
780 err_obj:
781 	i915_gem_object_put(obj);
782 	return err;
783 }
784 
785 static int live_busywait_preempt(void *arg)
786 {
787 	struct intel_gt *gt = arg;
788 	struct i915_gem_context *ctx_hi, *ctx_lo;
789 	struct intel_engine_cs *engine;
790 	struct drm_i915_gem_object *obj;
791 	struct i915_vma *vma;
792 	enum intel_engine_id id;
793 	int err = -ENOMEM;
794 	u32 *map;
795 
796 	/*
797 	 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
798 	 * preempt the busywaits used to synchronise between rings.
799 	 */
800 
801 	ctx_hi = kernel_context(gt->i915);
802 	if (!ctx_hi)
803 		return -ENOMEM;
804 	ctx_hi->sched.priority =
805 		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
806 
807 	ctx_lo = kernel_context(gt->i915);
808 	if (!ctx_lo)
809 		goto err_ctx_hi;
810 	ctx_lo->sched.priority =
811 		I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
812 
813 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
814 	if (IS_ERR(obj)) {
815 		err = PTR_ERR(obj);
816 		goto err_ctx_lo;
817 	}
818 
819 	map = i915_gem_object_pin_map(obj, I915_MAP_WC);
820 	if (IS_ERR(map)) {
821 		err = PTR_ERR(map);
822 		goto err_obj;
823 	}
824 
825 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
826 	if (IS_ERR(vma)) {
827 		err = PTR_ERR(vma);
828 		goto err_map;
829 	}
830 
831 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
832 	if (err)
833 		goto err_map;
834 
835 	for_each_engine(engine, gt, id) {
836 		struct i915_request *lo, *hi;
837 		struct igt_live_test t;
838 		u32 *cs;
839 
840 		if (!intel_engine_has_preemption(engine))
841 			continue;
842 
843 		if (!intel_engine_can_store_dword(engine))
844 			continue;
845 
846 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
847 			err = -EIO;
848 			goto err_vma;
849 		}
850 
851 		/*
852 		 * We create two requests. The low priority request
853 		 * busywaits on a semaphore (inside the ringbuffer where
854 		 * is should be preemptible) and the high priority requests
855 		 * uses a MI_STORE_DWORD_IMM to update the semaphore value
856 		 * allowing the first request to complete. If preemption
857 		 * fails, we hang instead.
858 		 */
859 
860 		lo = igt_request_alloc(ctx_lo, engine);
861 		if (IS_ERR(lo)) {
862 			err = PTR_ERR(lo);
863 			goto err_vma;
864 		}
865 
866 		cs = intel_ring_begin(lo, 8);
867 		if (IS_ERR(cs)) {
868 			err = PTR_ERR(cs);
869 			i915_request_add(lo);
870 			goto err_vma;
871 		}
872 
873 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
874 		*cs++ = i915_ggtt_offset(vma);
875 		*cs++ = 0;
876 		*cs++ = 1;
877 
878 		/* XXX Do we need a flush + invalidate here? */
879 
880 		*cs++ = MI_SEMAPHORE_WAIT |
881 			MI_SEMAPHORE_GLOBAL_GTT |
882 			MI_SEMAPHORE_POLL |
883 			MI_SEMAPHORE_SAD_EQ_SDD;
884 		*cs++ = 0;
885 		*cs++ = i915_ggtt_offset(vma);
886 		*cs++ = 0;
887 
888 		intel_ring_advance(lo, cs);
889 
890 		i915_request_get(lo);
891 		i915_request_add(lo);
892 
893 		if (wait_for(READ_ONCE(*map), 10)) {
894 			i915_request_put(lo);
895 			err = -ETIMEDOUT;
896 			goto err_vma;
897 		}
898 
899 		/* Low priority request should be busywaiting now */
900 		if (i915_request_wait(lo, 0, 1) != -ETIME) {
901 			i915_request_put(lo);
902 			pr_err("%s: Busywaiting request did not!\n",
903 			       engine->name);
904 			err = -EIO;
905 			goto err_vma;
906 		}
907 
908 		hi = igt_request_alloc(ctx_hi, engine);
909 		if (IS_ERR(hi)) {
910 			err = PTR_ERR(hi);
911 			i915_request_put(lo);
912 			goto err_vma;
913 		}
914 
915 		cs = intel_ring_begin(hi, 4);
916 		if (IS_ERR(cs)) {
917 			err = PTR_ERR(cs);
918 			i915_request_add(hi);
919 			i915_request_put(lo);
920 			goto err_vma;
921 		}
922 
923 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
924 		*cs++ = i915_ggtt_offset(vma);
925 		*cs++ = 0;
926 		*cs++ = 0;
927 
928 		intel_ring_advance(hi, cs);
929 		i915_request_add(hi);
930 
931 		if (i915_request_wait(lo, 0, HZ / 5) < 0) {
932 			struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
933 
934 			pr_err("%s: Failed to preempt semaphore busywait!\n",
935 			       engine->name);
936 
937 			intel_engine_dump(engine, &p, "%s\n", engine->name);
938 			GEM_TRACE_DUMP();
939 
940 			i915_request_put(lo);
941 			intel_gt_set_wedged(gt);
942 			err = -EIO;
943 			goto err_vma;
944 		}
945 		GEM_BUG_ON(READ_ONCE(*map));
946 		i915_request_put(lo);
947 
948 		if (igt_live_test_end(&t)) {
949 			err = -EIO;
950 			goto err_vma;
951 		}
952 	}
953 
954 	err = 0;
955 err_vma:
956 	i915_vma_unpin(vma);
957 err_map:
958 	i915_gem_object_unpin_map(obj);
959 err_obj:
960 	i915_gem_object_put(obj);
961 err_ctx_lo:
962 	kernel_context_close(ctx_lo);
963 err_ctx_hi:
964 	kernel_context_close(ctx_hi);
965 	return err;
966 }
967 
968 static struct i915_request *
969 spinner_create_request(struct igt_spinner *spin,
970 		       struct i915_gem_context *ctx,
971 		       struct intel_engine_cs *engine,
972 		       u32 arb)
973 {
974 	struct intel_context *ce;
975 	struct i915_request *rq;
976 
977 	ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
978 	if (IS_ERR(ce))
979 		return ERR_CAST(ce);
980 
981 	rq = igt_spinner_create_request(spin, ce, arb);
982 	intel_context_put(ce);
983 	return rq;
984 }
985 
986 static int live_preempt(void *arg)
987 {
988 	struct intel_gt *gt = arg;
989 	struct i915_gem_context *ctx_hi, *ctx_lo;
990 	struct igt_spinner spin_hi, spin_lo;
991 	struct intel_engine_cs *engine;
992 	enum intel_engine_id id;
993 	int err = -ENOMEM;
994 
995 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
996 		return 0;
997 
998 	if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
999 		pr_err("Logical preemption supported, but not exposed\n");
1000 
1001 	if (igt_spinner_init(&spin_hi, gt))
1002 		return -ENOMEM;
1003 
1004 	if (igt_spinner_init(&spin_lo, gt))
1005 		goto err_spin_hi;
1006 
1007 	ctx_hi = kernel_context(gt->i915);
1008 	if (!ctx_hi)
1009 		goto err_spin_lo;
1010 	ctx_hi->sched.priority =
1011 		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
1012 
1013 	ctx_lo = kernel_context(gt->i915);
1014 	if (!ctx_lo)
1015 		goto err_ctx_hi;
1016 	ctx_lo->sched.priority =
1017 		I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
1018 
1019 	for_each_engine(engine, gt, id) {
1020 		struct igt_live_test t;
1021 		struct i915_request *rq;
1022 
1023 		if (!intel_engine_has_preemption(engine))
1024 			continue;
1025 
1026 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1027 			err = -EIO;
1028 			goto err_ctx_lo;
1029 		}
1030 
1031 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1032 					    MI_ARB_CHECK);
1033 		if (IS_ERR(rq)) {
1034 			err = PTR_ERR(rq);
1035 			goto err_ctx_lo;
1036 		}
1037 
1038 		i915_request_add(rq);
1039 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
1040 			GEM_TRACE("lo spinner failed to start\n");
1041 			GEM_TRACE_DUMP();
1042 			intel_gt_set_wedged(gt);
1043 			err = -EIO;
1044 			goto err_ctx_lo;
1045 		}
1046 
1047 		rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1048 					    MI_ARB_CHECK);
1049 		if (IS_ERR(rq)) {
1050 			igt_spinner_end(&spin_lo);
1051 			err = PTR_ERR(rq);
1052 			goto err_ctx_lo;
1053 		}
1054 
1055 		i915_request_add(rq);
1056 		if (!igt_wait_for_spinner(&spin_hi, rq)) {
1057 			GEM_TRACE("hi spinner failed to start\n");
1058 			GEM_TRACE_DUMP();
1059 			intel_gt_set_wedged(gt);
1060 			err = -EIO;
1061 			goto err_ctx_lo;
1062 		}
1063 
1064 		igt_spinner_end(&spin_hi);
1065 		igt_spinner_end(&spin_lo);
1066 
1067 		if (igt_live_test_end(&t)) {
1068 			err = -EIO;
1069 			goto err_ctx_lo;
1070 		}
1071 	}
1072 
1073 	err = 0;
1074 err_ctx_lo:
1075 	kernel_context_close(ctx_lo);
1076 err_ctx_hi:
1077 	kernel_context_close(ctx_hi);
1078 err_spin_lo:
1079 	igt_spinner_fini(&spin_lo);
1080 err_spin_hi:
1081 	igt_spinner_fini(&spin_hi);
1082 	return err;
1083 }
1084 
1085 static int live_late_preempt(void *arg)
1086 {
1087 	struct intel_gt *gt = arg;
1088 	struct i915_gem_context *ctx_hi, *ctx_lo;
1089 	struct igt_spinner spin_hi, spin_lo;
1090 	struct intel_engine_cs *engine;
1091 	struct i915_sched_attr attr = {};
1092 	enum intel_engine_id id;
1093 	int err = -ENOMEM;
1094 
1095 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1096 		return 0;
1097 
1098 	if (igt_spinner_init(&spin_hi, gt))
1099 		return -ENOMEM;
1100 
1101 	if (igt_spinner_init(&spin_lo, gt))
1102 		goto err_spin_hi;
1103 
1104 	ctx_hi = kernel_context(gt->i915);
1105 	if (!ctx_hi)
1106 		goto err_spin_lo;
1107 
1108 	ctx_lo = kernel_context(gt->i915);
1109 	if (!ctx_lo)
1110 		goto err_ctx_hi;
1111 
1112 	/* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
1113 	ctx_lo->sched.priority = I915_USER_PRIORITY(1);
1114 
1115 	for_each_engine(engine, gt, id) {
1116 		struct igt_live_test t;
1117 		struct i915_request *rq;
1118 
1119 		if (!intel_engine_has_preemption(engine))
1120 			continue;
1121 
1122 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1123 			err = -EIO;
1124 			goto err_ctx_lo;
1125 		}
1126 
1127 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1128 					    MI_ARB_CHECK);
1129 		if (IS_ERR(rq)) {
1130 			err = PTR_ERR(rq);
1131 			goto err_ctx_lo;
1132 		}
1133 
1134 		i915_request_add(rq);
1135 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
1136 			pr_err("First context failed to start\n");
1137 			goto err_wedged;
1138 		}
1139 
1140 		rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1141 					    MI_NOOP);
1142 		if (IS_ERR(rq)) {
1143 			igt_spinner_end(&spin_lo);
1144 			err = PTR_ERR(rq);
1145 			goto err_ctx_lo;
1146 		}
1147 
1148 		i915_request_add(rq);
1149 		if (igt_wait_for_spinner(&spin_hi, rq)) {
1150 			pr_err("Second context overtook first?\n");
1151 			goto err_wedged;
1152 		}
1153 
1154 		attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1155 		engine->schedule(rq, &attr);
1156 
1157 		if (!igt_wait_for_spinner(&spin_hi, rq)) {
1158 			pr_err("High priority context failed to preempt the low priority context\n");
1159 			GEM_TRACE_DUMP();
1160 			goto err_wedged;
1161 		}
1162 
1163 		igt_spinner_end(&spin_hi);
1164 		igt_spinner_end(&spin_lo);
1165 
1166 		if (igt_live_test_end(&t)) {
1167 			err = -EIO;
1168 			goto err_ctx_lo;
1169 		}
1170 	}
1171 
1172 	err = 0;
1173 err_ctx_lo:
1174 	kernel_context_close(ctx_lo);
1175 err_ctx_hi:
1176 	kernel_context_close(ctx_hi);
1177 err_spin_lo:
1178 	igt_spinner_fini(&spin_lo);
1179 err_spin_hi:
1180 	igt_spinner_fini(&spin_hi);
1181 	return err;
1182 
1183 err_wedged:
1184 	igt_spinner_end(&spin_hi);
1185 	igt_spinner_end(&spin_lo);
1186 	intel_gt_set_wedged(gt);
1187 	err = -EIO;
1188 	goto err_ctx_lo;
1189 }
1190 
1191 struct preempt_client {
1192 	struct igt_spinner spin;
1193 	struct i915_gem_context *ctx;
1194 };
1195 
1196 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
1197 {
1198 	c->ctx = kernel_context(gt->i915);
1199 	if (!c->ctx)
1200 		return -ENOMEM;
1201 
1202 	if (igt_spinner_init(&c->spin, gt))
1203 		goto err_ctx;
1204 
1205 	return 0;
1206 
1207 err_ctx:
1208 	kernel_context_close(c->ctx);
1209 	return -ENOMEM;
1210 }
1211 
1212 static void preempt_client_fini(struct preempt_client *c)
1213 {
1214 	igt_spinner_fini(&c->spin);
1215 	kernel_context_close(c->ctx);
1216 }
1217 
1218 static int live_nopreempt(void *arg)
1219 {
1220 	struct intel_gt *gt = arg;
1221 	struct intel_engine_cs *engine;
1222 	struct preempt_client a, b;
1223 	enum intel_engine_id id;
1224 	int err = -ENOMEM;
1225 
1226 	/*
1227 	 * Verify that we can disable preemption for an individual request
1228 	 * that may be being observed and not want to be interrupted.
1229 	 */
1230 
1231 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1232 		return 0;
1233 
1234 	if (preempt_client_init(gt, &a))
1235 		return -ENOMEM;
1236 	if (preempt_client_init(gt, &b))
1237 		goto err_client_a;
1238 	b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1239 
1240 	for_each_engine(engine, gt, id) {
1241 		struct i915_request *rq_a, *rq_b;
1242 
1243 		if (!intel_engine_has_preemption(engine))
1244 			continue;
1245 
1246 		engine->execlists.preempt_hang.count = 0;
1247 
1248 		rq_a = spinner_create_request(&a.spin,
1249 					      a.ctx, engine,
1250 					      MI_ARB_CHECK);
1251 		if (IS_ERR(rq_a)) {
1252 			err = PTR_ERR(rq_a);
1253 			goto err_client_b;
1254 		}
1255 
1256 		/* Low priority client, but unpreemptable! */
1257 		__set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
1258 
1259 		i915_request_add(rq_a);
1260 		if (!igt_wait_for_spinner(&a.spin, rq_a)) {
1261 			pr_err("First client failed to start\n");
1262 			goto err_wedged;
1263 		}
1264 
1265 		rq_b = spinner_create_request(&b.spin,
1266 					      b.ctx, engine,
1267 					      MI_ARB_CHECK);
1268 		if (IS_ERR(rq_b)) {
1269 			err = PTR_ERR(rq_b);
1270 			goto err_client_b;
1271 		}
1272 
1273 		i915_request_add(rq_b);
1274 
1275 		/* B is much more important than A! (But A is unpreemptable.) */
1276 		GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
1277 
1278 		/* Wait long enough for preemption and timeslicing */
1279 		if (igt_wait_for_spinner(&b.spin, rq_b)) {
1280 			pr_err("Second client started too early!\n");
1281 			goto err_wedged;
1282 		}
1283 
1284 		igt_spinner_end(&a.spin);
1285 
1286 		if (!igt_wait_for_spinner(&b.spin, rq_b)) {
1287 			pr_err("Second client failed to start\n");
1288 			goto err_wedged;
1289 		}
1290 
1291 		igt_spinner_end(&b.spin);
1292 
1293 		if (engine->execlists.preempt_hang.count) {
1294 			pr_err("Preemption recorded x%d; should have been suppressed!\n",
1295 			       engine->execlists.preempt_hang.count);
1296 			err = -EINVAL;
1297 			goto err_wedged;
1298 		}
1299 
1300 		if (igt_flush_test(gt->i915))
1301 			goto err_wedged;
1302 	}
1303 
1304 	err = 0;
1305 err_client_b:
1306 	preempt_client_fini(&b);
1307 err_client_a:
1308 	preempt_client_fini(&a);
1309 	return err;
1310 
1311 err_wedged:
1312 	igt_spinner_end(&b.spin);
1313 	igt_spinner_end(&a.spin);
1314 	intel_gt_set_wedged(gt);
1315 	err = -EIO;
1316 	goto err_client_b;
1317 }
1318 
1319 struct live_preempt_cancel {
1320 	struct intel_engine_cs *engine;
1321 	struct preempt_client a, b;
1322 };
1323 
1324 static int __cancel_active0(struct live_preempt_cancel *arg)
1325 {
1326 	struct i915_request *rq;
1327 	struct igt_live_test t;
1328 	int err;
1329 
1330 	/* Preempt cancel of ELSP0 */
1331 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1332 	if (igt_live_test_begin(&t, arg->engine->i915,
1333 				__func__, arg->engine->name))
1334 		return -EIO;
1335 
1336 	rq = spinner_create_request(&arg->a.spin,
1337 				    arg->a.ctx, arg->engine,
1338 				    MI_ARB_CHECK);
1339 	if (IS_ERR(rq))
1340 		return PTR_ERR(rq);
1341 
1342 	clear_bit(CONTEXT_BANNED, &rq->context->flags);
1343 	i915_request_get(rq);
1344 	i915_request_add(rq);
1345 	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
1346 		err = -EIO;
1347 		goto out;
1348 	}
1349 
1350 	intel_context_set_banned(rq->context);
1351 	err = intel_engine_pulse(arg->engine);
1352 	if (err)
1353 		goto out;
1354 
1355 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
1356 		err = -EIO;
1357 		goto out;
1358 	}
1359 
1360 	if (rq->fence.error != -EIO) {
1361 		pr_err("Cancelled inflight0 request did not report -EIO\n");
1362 		err = -EINVAL;
1363 		goto out;
1364 	}
1365 
1366 out:
1367 	i915_request_put(rq);
1368 	if (igt_live_test_end(&t))
1369 		err = -EIO;
1370 	return err;
1371 }
1372 
1373 static int __cancel_active1(struct live_preempt_cancel *arg)
1374 {
1375 	struct i915_request *rq[2] = {};
1376 	struct igt_live_test t;
1377 	int err;
1378 
1379 	/* Preempt cancel of ELSP1 */
1380 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1381 	if (igt_live_test_begin(&t, arg->engine->i915,
1382 				__func__, arg->engine->name))
1383 		return -EIO;
1384 
1385 	rq[0] = spinner_create_request(&arg->a.spin,
1386 				       arg->a.ctx, arg->engine,
1387 				       MI_NOOP); /* no preemption */
1388 	if (IS_ERR(rq[0]))
1389 		return PTR_ERR(rq[0]);
1390 
1391 	clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
1392 	i915_request_get(rq[0]);
1393 	i915_request_add(rq[0]);
1394 	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
1395 		err = -EIO;
1396 		goto out;
1397 	}
1398 
1399 	rq[1] = spinner_create_request(&arg->b.spin,
1400 				       arg->b.ctx, arg->engine,
1401 				       MI_ARB_CHECK);
1402 	if (IS_ERR(rq[1])) {
1403 		err = PTR_ERR(rq[1]);
1404 		goto out;
1405 	}
1406 
1407 	clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
1408 	i915_request_get(rq[1]);
1409 	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
1410 	i915_request_add(rq[1]);
1411 	if (err)
1412 		goto out;
1413 
1414 	intel_context_set_banned(rq[1]->context);
1415 	err = intel_engine_pulse(arg->engine);
1416 	if (err)
1417 		goto out;
1418 
1419 	igt_spinner_end(&arg->a.spin);
1420 	if (i915_request_wait(rq[1], 0, HZ / 5) < 0) {
1421 		err = -EIO;
1422 		goto out;
1423 	}
1424 
1425 	if (rq[0]->fence.error != 0) {
1426 		pr_err("Normal inflight0 request did not complete\n");
1427 		err = -EINVAL;
1428 		goto out;
1429 	}
1430 
1431 	if (rq[1]->fence.error != -EIO) {
1432 		pr_err("Cancelled inflight1 request did not report -EIO\n");
1433 		err = -EINVAL;
1434 		goto out;
1435 	}
1436 
1437 out:
1438 	i915_request_put(rq[1]);
1439 	i915_request_put(rq[0]);
1440 	if (igt_live_test_end(&t))
1441 		err = -EIO;
1442 	return err;
1443 }
1444 
1445 static int __cancel_queued(struct live_preempt_cancel *arg)
1446 {
1447 	struct i915_request *rq[3] = {};
1448 	struct igt_live_test t;
1449 	int err;
1450 
1451 	/* Full ELSP and one in the wings */
1452 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1453 	if (igt_live_test_begin(&t, arg->engine->i915,
1454 				__func__, arg->engine->name))
1455 		return -EIO;
1456 
1457 	rq[0] = spinner_create_request(&arg->a.spin,
1458 				       arg->a.ctx, arg->engine,
1459 				       MI_ARB_CHECK);
1460 	if (IS_ERR(rq[0]))
1461 		return PTR_ERR(rq[0]);
1462 
1463 	clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
1464 	i915_request_get(rq[0]);
1465 	i915_request_add(rq[0]);
1466 	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
1467 		err = -EIO;
1468 		goto out;
1469 	}
1470 
1471 	rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
1472 	if (IS_ERR(rq[1])) {
1473 		err = PTR_ERR(rq[1]);
1474 		goto out;
1475 	}
1476 
1477 	clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
1478 	i915_request_get(rq[1]);
1479 	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
1480 	i915_request_add(rq[1]);
1481 	if (err)
1482 		goto out;
1483 
1484 	rq[2] = spinner_create_request(&arg->b.spin,
1485 				       arg->a.ctx, arg->engine,
1486 				       MI_ARB_CHECK);
1487 	if (IS_ERR(rq[2])) {
1488 		err = PTR_ERR(rq[2]);
1489 		goto out;
1490 	}
1491 
1492 	i915_request_get(rq[2]);
1493 	err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
1494 	i915_request_add(rq[2]);
1495 	if (err)
1496 		goto out;
1497 
1498 	intel_context_set_banned(rq[2]->context);
1499 	err = intel_engine_pulse(arg->engine);
1500 	if (err)
1501 		goto out;
1502 
1503 	if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
1504 		err = -EIO;
1505 		goto out;
1506 	}
1507 
1508 	if (rq[0]->fence.error != -EIO) {
1509 		pr_err("Cancelled inflight0 request did not report -EIO\n");
1510 		err = -EINVAL;
1511 		goto out;
1512 	}
1513 
1514 	if (rq[1]->fence.error != 0) {
1515 		pr_err("Normal inflight1 request did not complete\n");
1516 		err = -EINVAL;
1517 		goto out;
1518 	}
1519 
1520 	if (rq[2]->fence.error != -EIO) {
1521 		pr_err("Cancelled queued request did not report -EIO\n");
1522 		err = -EINVAL;
1523 		goto out;
1524 	}
1525 
1526 out:
1527 	i915_request_put(rq[2]);
1528 	i915_request_put(rq[1]);
1529 	i915_request_put(rq[0]);
1530 	if (igt_live_test_end(&t))
1531 		err = -EIO;
1532 	return err;
1533 }
1534 
1535 static int __cancel_hostile(struct live_preempt_cancel *arg)
1536 {
1537 	struct i915_request *rq;
1538 	int err;
1539 
1540 	/* Preempt cancel non-preemptible spinner in ELSP0 */
1541 	if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
1542 		return 0;
1543 
1544 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1545 	rq = spinner_create_request(&arg->a.spin,
1546 				    arg->a.ctx, arg->engine,
1547 				    MI_NOOP); /* preemption disabled */
1548 	if (IS_ERR(rq))
1549 		return PTR_ERR(rq);
1550 
1551 	clear_bit(CONTEXT_BANNED, &rq->context->flags);
1552 	i915_request_get(rq);
1553 	i915_request_add(rq);
1554 	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
1555 		err = -EIO;
1556 		goto out;
1557 	}
1558 
1559 	intel_context_set_banned(rq->context);
1560 	err = intel_engine_pulse(arg->engine); /* force reset */
1561 	if (err)
1562 		goto out;
1563 
1564 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
1565 		err = -EIO;
1566 		goto out;
1567 	}
1568 
1569 	if (rq->fence.error != -EIO) {
1570 		pr_err("Cancelled inflight0 request did not report -EIO\n");
1571 		err = -EINVAL;
1572 		goto out;
1573 	}
1574 
1575 out:
1576 	i915_request_put(rq);
1577 	if (igt_flush_test(arg->engine->i915))
1578 		err = -EIO;
1579 	return err;
1580 }
1581 
1582 static int live_preempt_cancel(void *arg)
1583 {
1584 	struct intel_gt *gt = arg;
1585 	struct live_preempt_cancel data;
1586 	enum intel_engine_id id;
1587 	int err = -ENOMEM;
1588 
1589 	/*
1590 	 * To cancel an inflight context, we need to first remove it from the
1591 	 * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
1592 	 */
1593 
1594 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1595 		return 0;
1596 
1597 	if (preempt_client_init(gt, &data.a))
1598 		return -ENOMEM;
1599 	if (preempt_client_init(gt, &data.b))
1600 		goto err_client_a;
1601 
1602 	for_each_engine(data.engine, gt, id) {
1603 		if (!intel_engine_has_preemption(data.engine))
1604 			continue;
1605 
1606 		err = __cancel_active0(&data);
1607 		if (err)
1608 			goto err_wedged;
1609 
1610 		err = __cancel_active1(&data);
1611 		if (err)
1612 			goto err_wedged;
1613 
1614 		err = __cancel_queued(&data);
1615 		if (err)
1616 			goto err_wedged;
1617 
1618 		err = __cancel_hostile(&data);
1619 		if (err)
1620 			goto err_wedged;
1621 	}
1622 
1623 	err = 0;
1624 err_client_b:
1625 	preempt_client_fini(&data.b);
1626 err_client_a:
1627 	preempt_client_fini(&data.a);
1628 	return err;
1629 
1630 err_wedged:
1631 	GEM_TRACE_DUMP();
1632 	igt_spinner_end(&data.b.spin);
1633 	igt_spinner_end(&data.a.spin);
1634 	intel_gt_set_wedged(gt);
1635 	goto err_client_b;
1636 }
1637 
1638 static int live_suppress_self_preempt(void *arg)
1639 {
1640 	struct intel_gt *gt = arg;
1641 	struct intel_engine_cs *engine;
1642 	struct i915_sched_attr attr = {
1643 		.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX)
1644 	};
1645 	struct preempt_client a, b;
1646 	enum intel_engine_id id;
1647 	int err = -ENOMEM;
1648 
1649 	/*
1650 	 * Verify that if a preemption request does not cause a change in
1651 	 * the current execution order, the preempt-to-idle injection is
1652 	 * skipped and that we do not accidentally apply it after the CS
1653 	 * completion event.
1654 	 */
1655 
1656 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1657 		return 0;
1658 
1659 	if (USES_GUC_SUBMISSION(gt->i915))
1660 		return 0; /* presume black blox */
1661 
1662 	if (intel_vgpu_active(gt->i915))
1663 		return 0; /* GVT forces single port & request submission */
1664 
1665 	if (preempt_client_init(gt, &a))
1666 		return -ENOMEM;
1667 	if (preempt_client_init(gt, &b))
1668 		goto err_client_a;
1669 
1670 	for_each_engine(engine, gt, id) {
1671 		struct i915_request *rq_a, *rq_b;
1672 		int depth;
1673 
1674 		if (!intel_engine_has_preemption(engine))
1675 			continue;
1676 
1677 		if (igt_flush_test(gt->i915))
1678 			goto err_wedged;
1679 
1680 		intel_engine_pm_get(engine);
1681 		engine->execlists.preempt_hang.count = 0;
1682 
1683 		rq_a = spinner_create_request(&a.spin,
1684 					      a.ctx, engine,
1685 					      MI_NOOP);
1686 		if (IS_ERR(rq_a)) {
1687 			err = PTR_ERR(rq_a);
1688 			intel_engine_pm_put(engine);
1689 			goto err_client_b;
1690 		}
1691 
1692 		i915_request_add(rq_a);
1693 		if (!igt_wait_for_spinner(&a.spin, rq_a)) {
1694 			pr_err("First client failed to start\n");
1695 			intel_engine_pm_put(engine);
1696 			goto err_wedged;
1697 		}
1698 
1699 		/* Keep postponing the timer to avoid premature slicing */
1700 		mod_timer(&engine->execlists.timer, jiffies + HZ);
1701 		for (depth = 0; depth < 8; depth++) {
1702 			rq_b = spinner_create_request(&b.spin,
1703 						      b.ctx, engine,
1704 						      MI_NOOP);
1705 			if (IS_ERR(rq_b)) {
1706 				err = PTR_ERR(rq_b);
1707 				intel_engine_pm_put(engine);
1708 				goto err_client_b;
1709 			}
1710 			i915_request_add(rq_b);
1711 
1712 			GEM_BUG_ON(i915_request_completed(rq_a));
1713 			engine->schedule(rq_a, &attr);
1714 			igt_spinner_end(&a.spin);
1715 
1716 			if (!igt_wait_for_spinner(&b.spin, rq_b)) {
1717 				pr_err("Second client failed to start\n");
1718 				intel_engine_pm_put(engine);
1719 				goto err_wedged;
1720 			}
1721 
1722 			swap(a, b);
1723 			rq_a = rq_b;
1724 		}
1725 		igt_spinner_end(&a.spin);
1726 
1727 		if (engine->execlists.preempt_hang.count) {
1728 			pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
1729 			       engine->name,
1730 			       engine->execlists.preempt_hang.count,
1731 			       depth);
1732 			intel_engine_pm_put(engine);
1733 			err = -EINVAL;
1734 			goto err_client_b;
1735 		}
1736 
1737 		intel_engine_pm_put(engine);
1738 		if (igt_flush_test(gt->i915))
1739 			goto err_wedged;
1740 	}
1741 
1742 	err = 0;
1743 err_client_b:
1744 	preempt_client_fini(&b);
1745 err_client_a:
1746 	preempt_client_fini(&a);
1747 	return err;
1748 
1749 err_wedged:
1750 	igt_spinner_end(&b.spin);
1751 	igt_spinner_end(&a.spin);
1752 	intel_gt_set_wedged(gt);
1753 	err = -EIO;
1754 	goto err_client_b;
1755 }
1756 
1757 static int __i915_sw_fence_call
1758 dummy_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
1759 {
1760 	return NOTIFY_DONE;
1761 }
1762 
1763 static struct i915_request *dummy_request(struct intel_engine_cs *engine)
1764 {
1765 	struct i915_request *rq;
1766 
1767 	rq = kzalloc(sizeof(*rq), GFP_KERNEL);
1768 	if (!rq)
1769 		return NULL;
1770 
1771 	rq->engine = engine;
1772 
1773 	spin_lock_init(&rq->lock);
1774 	INIT_LIST_HEAD(&rq->fence.cb_list);
1775 	rq->fence.lock = &rq->lock;
1776 	rq->fence.ops = &i915_fence_ops;
1777 
1778 	i915_sched_node_init(&rq->sched);
1779 
1780 	/* mark this request as permanently incomplete */
1781 	rq->fence.seqno = 1;
1782 	BUILD_BUG_ON(sizeof(rq->fence.seqno) != 8); /* upper 32b == 0 */
1783 	rq->hwsp_seqno = (u32 *)&rq->fence.seqno + 1;
1784 	GEM_BUG_ON(i915_request_completed(rq));
1785 
1786 	i915_sw_fence_init(&rq->submit, dummy_notify);
1787 	set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
1788 
1789 	spin_lock_init(&rq->lock);
1790 	rq->fence.lock = &rq->lock;
1791 	INIT_LIST_HEAD(&rq->fence.cb_list);
1792 
1793 	return rq;
1794 }
1795 
1796 static void dummy_request_free(struct i915_request *dummy)
1797 {
1798 	/* We have to fake the CS interrupt to kick the next request */
1799 	i915_sw_fence_commit(&dummy->submit);
1800 
1801 	i915_request_mark_complete(dummy);
1802 	dma_fence_signal(&dummy->fence);
1803 
1804 	i915_sched_node_fini(&dummy->sched);
1805 	i915_sw_fence_fini(&dummy->submit);
1806 
1807 	dma_fence_free(&dummy->fence);
1808 }
1809 
1810 static int live_suppress_wait_preempt(void *arg)
1811 {
1812 	struct intel_gt *gt = arg;
1813 	struct preempt_client client[4];
1814 	struct i915_request *rq[ARRAY_SIZE(client)] = {};
1815 	struct intel_engine_cs *engine;
1816 	enum intel_engine_id id;
1817 	int err = -ENOMEM;
1818 	int i;
1819 
1820 	/*
1821 	 * Waiters are given a little priority nudge, but not enough
1822 	 * to actually cause any preemption. Double check that we do
1823 	 * not needlessly generate preempt-to-idle cycles.
1824 	 */
1825 
1826 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1827 		return 0;
1828 
1829 	if (preempt_client_init(gt, &client[0])) /* ELSP[0] */
1830 		return -ENOMEM;
1831 	if (preempt_client_init(gt, &client[1])) /* ELSP[1] */
1832 		goto err_client_0;
1833 	if (preempt_client_init(gt, &client[2])) /* head of queue */
1834 		goto err_client_1;
1835 	if (preempt_client_init(gt, &client[3])) /* bystander */
1836 		goto err_client_2;
1837 
1838 	for_each_engine(engine, gt, id) {
1839 		int depth;
1840 
1841 		if (!intel_engine_has_preemption(engine))
1842 			continue;
1843 
1844 		if (!engine->emit_init_breadcrumb)
1845 			continue;
1846 
1847 		for (depth = 0; depth < ARRAY_SIZE(client); depth++) {
1848 			struct i915_request *dummy;
1849 
1850 			engine->execlists.preempt_hang.count = 0;
1851 
1852 			dummy = dummy_request(engine);
1853 			if (!dummy)
1854 				goto err_client_3;
1855 
1856 			for (i = 0; i < ARRAY_SIZE(client); i++) {
1857 				struct i915_request *this;
1858 
1859 				this = spinner_create_request(&client[i].spin,
1860 							      client[i].ctx, engine,
1861 							      MI_NOOP);
1862 				if (IS_ERR(this)) {
1863 					err = PTR_ERR(this);
1864 					goto err_wedged;
1865 				}
1866 
1867 				/* Disable NEWCLIENT promotion */
1868 				__i915_active_fence_set(&i915_request_timeline(this)->last_request,
1869 							&dummy->fence);
1870 
1871 				rq[i] = i915_request_get(this);
1872 				i915_request_add(this);
1873 			}
1874 
1875 			dummy_request_free(dummy);
1876 
1877 			GEM_BUG_ON(i915_request_completed(rq[0]));
1878 			if (!igt_wait_for_spinner(&client[0].spin, rq[0])) {
1879 				pr_err("%s: First client failed to start\n",
1880 				       engine->name);
1881 				goto err_wedged;
1882 			}
1883 			GEM_BUG_ON(!i915_request_started(rq[0]));
1884 
1885 			if (i915_request_wait(rq[depth],
1886 					      I915_WAIT_PRIORITY,
1887 					      1) != -ETIME) {
1888 				pr_err("%s: Waiter depth:%d completed!\n",
1889 				       engine->name, depth);
1890 				goto err_wedged;
1891 			}
1892 
1893 			for (i = 0; i < ARRAY_SIZE(client); i++) {
1894 				igt_spinner_end(&client[i].spin);
1895 				i915_request_put(rq[i]);
1896 				rq[i] = NULL;
1897 			}
1898 
1899 			if (igt_flush_test(gt->i915))
1900 				goto err_wedged;
1901 
1902 			if (engine->execlists.preempt_hang.count) {
1903 				pr_err("%s: Preemption recorded x%d, depth %d; should have been suppressed!\n",
1904 				       engine->name,
1905 				       engine->execlists.preempt_hang.count,
1906 				       depth);
1907 				err = -EINVAL;
1908 				goto err_client_3;
1909 			}
1910 		}
1911 	}
1912 
1913 	err = 0;
1914 err_client_3:
1915 	preempt_client_fini(&client[3]);
1916 err_client_2:
1917 	preempt_client_fini(&client[2]);
1918 err_client_1:
1919 	preempt_client_fini(&client[1]);
1920 err_client_0:
1921 	preempt_client_fini(&client[0]);
1922 	return err;
1923 
1924 err_wedged:
1925 	for (i = 0; i < ARRAY_SIZE(client); i++) {
1926 		igt_spinner_end(&client[i].spin);
1927 		i915_request_put(rq[i]);
1928 	}
1929 	intel_gt_set_wedged(gt);
1930 	err = -EIO;
1931 	goto err_client_3;
1932 }
1933 
1934 static int live_chain_preempt(void *arg)
1935 {
1936 	struct intel_gt *gt = arg;
1937 	struct intel_engine_cs *engine;
1938 	struct preempt_client hi, lo;
1939 	enum intel_engine_id id;
1940 	int err = -ENOMEM;
1941 
1942 	/*
1943 	 * Build a chain AB...BA between two contexts (A, B) and request
1944 	 * preemption of the last request. It should then complete before
1945 	 * the previously submitted spinner in B.
1946 	 */
1947 
1948 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1949 		return 0;
1950 
1951 	if (preempt_client_init(gt, &hi))
1952 		return -ENOMEM;
1953 
1954 	if (preempt_client_init(gt, &lo))
1955 		goto err_client_hi;
1956 
1957 	for_each_engine(engine, gt, id) {
1958 		struct i915_sched_attr attr = {
1959 			.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
1960 		};
1961 		struct igt_live_test t;
1962 		struct i915_request *rq;
1963 		int ring_size, count, i;
1964 
1965 		if (!intel_engine_has_preemption(engine))
1966 			continue;
1967 
1968 		rq = spinner_create_request(&lo.spin,
1969 					    lo.ctx, engine,
1970 					    MI_ARB_CHECK);
1971 		if (IS_ERR(rq))
1972 			goto err_wedged;
1973 
1974 		i915_request_get(rq);
1975 		i915_request_add(rq);
1976 
1977 		ring_size = rq->wa_tail - rq->head;
1978 		if (ring_size < 0)
1979 			ring_size += rq->ring->size;
1980 		ring_size = rq->ring->size / ring_size;
1981 		pr_debug("%s(%s): Using maximum of %d requests\n",
1982 			 __func__, engine->name, ring_size);
1983 
1984 		igt_spinner_end(&lo.spin);
1985 		if (i915_request_wait(rq, 0, HZ / 2) < 0) {
1986 			pr_err("Timed out waiting to flush %s\n", engine->name);
1987 			i915_request_put(rq);
1988 			goto err_wedged;
1989 		}
1990 		i915_request_put(rq);
1991 
1992 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1993 			err = -EIO;
1994 			goto err_wedged;
1995 		}
1996 
1997 		for_each_prime_number_from(count, 1, ring_size) {
1998 			rq = spinner_create_request(&hi.spin,
1999 						    hi.ctx, engine,
2000 						    MI_ARB_CHECK);
2001 			if (IS_ERR(rq))
2002 				goto err_wedged;
2003 			i915_request_add(rq);
2004 			if (!igt_wait_for_spinner(&hi.spin, rq))
2005 				goto err_wedged;
2006 
2007 			rq = spinner_create_request(&lo.spin,
2008 						    lo.ctx, engine,
2009 						    MI_ARB_CHECK);
2010 			if (IS_ERR(rq))
2011 				goto err_wedged;
2012 			i915_request_add(rq);
2013 
2014 			for (i = 0; i < count; i++) {
2015 				rq = igt_request_alloc(lo.ctx, engine);
2016 				if (IS_ERR(rq))
2017 					goto err_wedged;
2018 				i915_request_add(rq);
2019 			}
2020 
2021 			rq = igt_request_alloc(hi.ctx, engine);
2022 			if (IS_ERR(rq))
2023 				goto err_wedged;
2024 
2025 			i915_request_get(rq);
2026 			i915_request_add(rq);
2027 			engine->schedule(rq, &attr);
2028 
2029 			igt_spinner_end(&hi.spin);
2030 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2031 				struct drm_printer p =
2032 					drm_info_printer(gt->i915->drm.dev);
2033 
2034 				pr_err("Failed to preempt over chain of %d\n",
2035 				       count);
2036 				intel_engine_dump(engine, &p,
2037 						  "%s\n", engine->name);
2038 				i915_request_put(rq);
2039 				goto err_wedged;
2040 			}
2041 			igt_spinner_end(&lo.spin);
2042 			i915_request_put(rq);
2043 
2044 			rq = igt_request_alloc(lo.ctx, engine);
2045 			if (IS_ERR(rq))
2046 				goto err_wedged;
2047 
2048 			i915_request_get(rq);
2049 			i915_request_add(rq);
2050 
2051 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2052 				struct drm_printer p =
2053 					drm_info_printer(gt->i915->drm.dev);
2054 
2055 				pr_err("Failed to flush low priority chain of %d requests\n",
2056 				       count);
2057 				intel_engine_dump(engine, &p,
2058 						  "%s\n", engine->name);
2059 
2060 				i915_request_put(rq);
2061 				goto err_wedged;
2062 			}
2063 			i915_request_put(rq);
2064 		}
2065 
2066 		if (igt_live_test_end(&t)) {
2067 			err = -EIO;
2068 			goto err_wedged;
2069 		}
2070 	}
2071 
2072 	err = 0;
2073 err_client_lo:
2074 	preempt_client_fini(&lo);
2075 err_client_hi:
2076 	preempt_client_fini(&hi);
2077 	return err;
2078 
2079 err_wedged:
2080 	igt_spinner_end(&hi.spin);
2081 	igt_spinner_end(&lo.spin);
2082 	intel_gt_set_wedged(gt);
2083 	err = -EIO;
2084 	goto err_client_lo;
2085 }
2086 
2087 static int create_gang(struct intel_engine_cs *engine,
2088 		       struct i915_request **prev)
2089 {
2090 	struct drm_i915_gem_object *obj;
2091 	struct intel_context *ce;
2092 	struct i915_request *rq;
2093 	struct i915_vma *vma;
2094 	u32 *cs;
2095 	int err;
2096 
2097 	ce = intel_context_create(engine);
2098 	if (IS_ERR(ce))
2099 		return PTR_ERR(ce);
2100 
2101 	obj = i915_gem_object_create_internal(engine->i915, 4096);
2102 	if (IS_ERR(obj)) {
2103 		err = PTR_ERR(obj);
2104 		goto err_ce;
2105 	}
2106 
2107 	vma = i915_vma_instance(obj, ce->vm, NULL);
2108 	if (IS_ERR(vma)) {
2109 		err = PTR_ERR(vma);
2110 		goto err_obj;
2111 	}
2112 
2113 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
2114 	if (err)
2115 		goto err_obj;
2116 
2117 	cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
2118 	if (IS_ERR(cs))
2119 		goto err_obj;
2120 
2121 	/* Semaphore target: spin until zero */
2122 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2123 
2124 	*cs++ = MI_SEMAPHORE_WAIT |
2125 		MI_SEMAPHORE_POLL |
2126 		MI_SEMAPHORE_SAD_EQ_SDD;
2127 	*cs++ = 0;
2128 	*cs++ = lower_32_bits(vma->node.start);
2129 	*cs++ = upper_32_bits(vma->node.start);
2130 
2131 	if (*prev) {
2132 		u64 offset = (*prev)->batch->node.start;
2133 
2134 		/* Terminate the spinner in the next lower priority batch. */
2135 		*cs++ = MI_STORE_DWORD_IMM_GEN4;
2136 		*cs++ = lower_32_bits(offset);
2137 		*cs++ = upper_32_bits(offset);
2138 		*cs++ = 0;
2139 	}
2140 
2141 	*cs++ = MI_BATCH_BUFFER_END;
2142 	i915_gem_object_flush_map(obj);
2143 	i915_gem_object_unpin_map(obj);
2144 
2145 	rq = intel_context_create_request(ce);
2146 	if (IS_ERR(rq))
2147 		goto err_obj;
2148 
2149 	rq->batch = vma;
2150 	i915_request_get(rq);
2151 
2152 	i915_vma_lock(vma);
2153 	err = i915_request_await_object(rq, vma->obj, false);
2154 	if (!err)
2155 		err = i915_vma_move_to_active(vma, rq, 0);
2156 	if (!err)
2157 		err = rq->engine->emit_bb_start(rq,
2158 						vma->node.start,
2159 						PAGE_SIZE, 0);
2160 	i915_vma_unlock(vma);
2161 	i915_request_add(rq);
2162 	if (err)
2163 		goto err_rq;
2164 
2165 	i915_gem_object_put(obj);
2166 	intel_context_put(ce);
2167 
2168 	rq->client_link.next = &(*prev)->client_link;
2169 	*prev = rq;
2170 	return 0;
2171 
2172 err_rq:
2173 	i915_request_put(rq);
2174 err_obj:
2175 	i915_gem_object_put(obj);
2176 err_ce:
2177 	intel_context_put(ce);
2178 	return err;
2179 }
2180 
2181 static int live_preempt_gang(void *arg)
2182 {
2183 	struct intel_gt *gt = arg;
2184 	struct intel_engine_cs *engine;
2185 	enum intel_engine_id id;
2186 
2187 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2188 		return 0;
2189 
2190 	/*
2191 	 * Build as long a chain of preempters as we can, with each
2192 	 * request higher priority than the last. Once we are ready, we release
2193 	 * the last batch which then precolates down the chain, each releasing
2194 	 * the next oldest in turn. The intent is to simply push as hard as we
2195 	 * can with the number of preemptions, trying to exceed narrow HW
2196 	 * limits. At a minimum, we insist that we can sort all the user
2197 	 * high priority levels into execution order.
2198 	 */
2199 
2200 	for_each_engine(engine, gt, id) {
2201 		struct i915_request *rq = NULL;
2202 		struct igt_live_test t;
2203 		IGT_TIMEOUT(end_time);
2204 		int prio = 0;
2205 		int err = 0;
2206 		u32 *cs;
2207 
2208 		if (!intel_engine_has_preemption(engine))
2209 			continue;
2210 
2211 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
2212 			return -EIO;
2213 
2214 		do {
2215 			struct i915_sched_attr attr = {
2216 				.priority = I915_USER_PRIORITY(prio++),
2217 			};
2218 
2219 			err = create_gang(engine, &rq);
2220 			if (err)
2221 				break;
2222 
2223 			/* Submit each spinner at increasing priority */
2224 			engine->schedule(rq, &attr);
2225 
2226 			if (prio <= I915_PRIORITY_MAX)
2227 				continue;
2228 
2229 			if (prio > (INT_MAX >> I915_USER_PRIORITY_SHIFT))
2230 				break;
2231 
2232 			if (__igt_timeout(end_time, NULL))
2233 				break;
2234 		} while (1);
2235 		pr_debug("%s: Preempt chain of %d requests\n",
2236 			 engine->name, prio);
2237 
2238 		/*
2239 		 * Such that the last spinner is the highest priority and
2240 		 * should execute first. When that spinner completes,
2241 		 * it will terminate the next lowest spinner until there
2242 		 * are no more spinners and the gang is complete.
2243 		 */
2244 		cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC);
2245 		if (!IS_ERR(cs)) {
2246 			*cs = 0;
2247 			i915_gem_object_unpin_map(rq->batch->obj);
2248 		} else {
2249 			err = PTR_ERR(cs);
2250 			intel_gt_set_wedged(gt);
2251 		}
2252 
2253 		while (rq) { /* wait for each rq from highest to lowest prio */
2254 			struct i915_request *n =
2255 				list_next_entry(rq, client_link);
2256 
2257 			if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
2258 				struct drm_printer p =
2259 					drm_info_printer(engine->i915->drm.dev);
2260 
2261 				pr_err("Failed to flush chain of %d requests, at %d\n",
2262 				       prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT);
2263 				intel_engine_dump(engine, &p,
2264 						  "%s\n", engine->name);
2265 
2266 				err = -ETIME;
2267 			}
2268 
2269 			i915_request_put(rq);
2270 			rq = n;
2271 		}
2272 
2273 		if (igt_live_test_end(&t))
2274 			err = -EIO;
2275 		if (err)
2276 			return err;
2277 	}
2278 
2279 	return 0;
2280 }
2281 
2282 static int live_preempt_hang(void *arg)
2283 {
2284 	struct intel_gt *gt = arg;
2285 	struct i915_gem_context *ctx_hi, *ctx_lo;
2286 	struct igt_spinner spin_hi, spin_lo;
2287 	struct intel_engine_cs *engine;
2288 	enum intel_engine_id id;
2289 	int err = -ENOMEM;
2290 
2291 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2292 		return 0;
2293 
2294 	if (!intel_has_reset_engine(gt))
2295 		return 0;
2296 
2297 	if (igt_spinner_init(&spin_hi, gt))
2298 		return -ENOMEM;
2299 
2300 	if (igt_spinner_init(&spin_lo, gt))
2301 		goto err_spin_hi;
2302 
2303 	ctx_hi = kernel_context(gt->i915);
2304 	if (!ctx_hi)
2305 		goto err_spin_lo;
2306 	ctx_hi->sched.priority =
2307 		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
2308 
2309 	ctx_lo = kernel_context(gt->i915);
2310 	if (!ctx_lo)
2311 		goto err_ctx_hi;
2312 	ctx_lo->sched.priority =
2313 		I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
2314 
2315 	for_each_engine(engine, gt, id) {
2316 		struct i915_request *rq;
2317 
2318 		if (!intel_engine_has_preemption(engine))
2319 			continue;
2320 
2321 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
2322 					    MI_ARB_CHECK);
2323 		if (IS_ERR(rq)) {
2324 			err = PTR_ERR(rq);
2325 			goto err_ctx_lo;
2326 		}
2327 
2328 		i915_request_add(rq);
2329 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
2330 			GEM_TRACE("lo spinner failed to start\n");
2331 			GEM_TRACE_DUMP();
2332 			intel_gt_set_wedged(gt);
2333 			err = -EIO;
2334 			goto err_ctx_lo;
2335 		}
2336 
2337 		rq = spinner_create_request(&spin_hi, ctx_hi, engine,
2338 					    MI_ARB_CHECK);
2339 		if (IS_ERR(rq)) {
2340 			igt_spinner_end(&spin_lo);
2341 			err = PTR_ERR(rq);
2342 			goto err_ctx_lo;
2343 		}
2344 
2345 		init_completion(&engine->execlists.preempt_hang.completion);
2346 		engine->execlists.preempt_hang.inject_hang = true;
2347 
2348 		i915_request_add(rq);
2349 
2350 		if (!wait_for_completion_timeout(&engine->execlists.preempt_hang.completion,
2351 						 HZ / 10)) {
2352 			pr_err("Preemption did not occur within timeout!");
2353 			GEM_TRACE_DUMP();
2354 			intel_gt_set_wedged(gt);
2355 			err = -EIO;
2356 			goto err_ctx_lo;
2357 		}
2358 
2359 		set_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
2360 		intel_engine_reset(engine, NULL);
2361 		clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
2362 
2363 		engine->execlists.preempt_hang.inject_hang = false;
2364 
2365 		if (!igt_wait_for_spinner(&spin_hi, rq)) {
2366 			GEM_TRACE("hi spinner failed to start\n");
2367 			GEM_TRACE_DUMP();
2368 			intel_gt_set_wedged(gt);
2369 			err = -EIO;
2370 			goto err_ctx_lo;
2371 		}
2372 
2373 		igt_spinner_end(&spin_hi);
2374 		igt_spinner_end(&spin_lo);
2375 		if (igt_flush_test(gt->i915)) {
2376 			err = -EIO;
2377 			goto err_ctx_lo;
2378 		}
2379 	}
2380 
2381 	err = 0;
2382 err_ctx_lo:
2383 	kernel_context_close(ctx_lo);
2384 err_ctx_hi:
2385 	kernel_context_close(ctx_hi);
2386 err_spin_lo:
2387 	igt_spinner_fini(&spin_lo);
2388 err_spin_hi:
2389 	igt_spinner_fini(&spin_hi);
2390 	return err;
2391 }
2392 
2393 static int live_preempt_timeout(void *arg)
2394 {
2395 	struct intel_gt *gt = arg;
2396 	struct i915_gem_context *ctx_hi, *ctx_lo;
2397 	struct igt_spinner spin_lo;
2398 	struct intel_engine_cs *engine;
2399 	enum intel_engine_id id;
2400 	int err = -ENOMEM;
2401 
2402 	/*
2403 	 * Check that we force preemption to occur by cancelling the previous
2404 	 * context if it refuses to yield the GPU.
2405 	 */
2406 	if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2407 		return 0;
2408 
2409 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2410 		return 0;
2411 
2412 	if (!intel_has_reset_engine(gt))
2413 		return 0;
2414 
2415 	if (igt_spinner_init(&spin_lo, gt))
2416 		return -ENOMEM;
2417 
2418 	ctx_hi = kernel_context(gt->i915);
2419 	if (!ctx_hi)
2420 		goto err_spin_lo;
2421 	ctx_hi->sched.priority =
2422 		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
2423 
2424 	ctx_lo = kernel_context(gt->i915);
2425 	if (!ctx_lo)
2426 		goto err_ctx_hi;
2427 	ctx_lo->sched.priority =
2428 		I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
2429 
2430 	for_each_engine(engine, gt, id) {
2431 		unsigned long saved_timeout;
2432 		struct i915_request *rq;
2433 
2434 		if (!intel_engine_has_preemption(engine))
2435 			continue;
2436 
2437 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
2438 					    MI_NOOP); /* preemption disabled */
2439 		if (IS_ERR(rq)) {
2440 			err = PTR_ERR(rq);
2441 			goto err_ctx_lo;
2442 		}
2443 
2444 		i915_request_add(rq);
2445 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
2446 			intel_gt_set_wedged(gt);
2447 			err = -EIO;
2448 			goto err_ctx_lo;
2449 		}
2450 
2451 		rq = igt_request_alloc(ctx_hi, engine);
2452 		if (IS_ERR(rq)) {
2453 			igt_spinner_end(&spin_lo);
2454 			err = PTR_ERR(rq);
2455 			goto err_ctx_lo;
2456 		}
2457 
2458 		/* Flush the previous CS ack before changing timeouts */
2459 		while (READ_ONCE(engine->execlists.pending[0]))
2460 			cpu_relax();
2461 
2462 		saved_timeout = engine->props.preempt_timeout_ms;
2463 		engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
2464 
2465 		i915_request_get(rq);
2466 		i915_request_add(rq);
2467 
2468 		intel_engine_flush_submission(engine);
2469 		engine->props.preempt_timeout_ms = saved_timeout;
2470 
2471 		if (i915_request_wait(rq, 0, HZ / 10) < 0) {
2472 			intel_gt_set_wedged(gt);
2473 			i915_request_put(rq);
2474 			err = -ETIME;
2475 			goto err_ctx_lo;
2476 		}
2477 
2478 		igt_spinner_end(&spin_lo);
2479 		i915_request_put(rq);
2480 	}
2481 
2482 	err = 0;
2483 err_ctx_lo:
2484 	kernel_context_close(ctx_lo);
2485 err_ctx_hi:
2486 	kernel_context_close(ctx_hi);
2487 err_spin_lo:
2488 	igt_spinner_fini(&spin_lo);
2489 	return err;
2490 }
2491 
2492 static int random_range(struct rnd_state *rnd, int min, int max)
2493 {
2494 	return i915_prandom_u32_max_state(max - min, rnd) + min;
2495 }
2496 
2497 static int random_priority(struct rnd_state *rnd)
2498 {
2499 	return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
2500 }
2501 
2502 struct preempt_smoke {
2503 	struct intel_gt *gt;
2504 	struct i915_gem_context **contexts;
2505 	struct intel_engine_cs *engine;
2506 	struct drm_i915_gem_object *batch;
2507 	unsigned int ncontext;
2508 	struct rnd_state prng;
2509 	unsigned long count;
2510 };
2511 
2512 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
2513 {
2514 	return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
2515 							  &smoke->prng)];
2516 }
2517 
2518 static int smoke_submit(struct preempt_smoke *smoke,
2519 			struct i915_gem_context *ctx, int prio,
2520 			struct drm_i915_gem_object *batch)
2521 {
2522 	struct i915_request *rq;
2523 	struct i915_vma *vma = NULL;
2524 	int err = 0;
2525 
2526 	if (batch) {
2527 		struct i915_address_space *vm;
2528 
2529 		vm = i915_gem_context_get_vm_rcu(ctx);
2530 		vma = i915_vma_instance(batch, vm, NULL);
2531 		i915_vm_put(vm);
2532 		if (IS_ERR(vma))
2533 			return PTR_ERR(vma);
2534 
2535 		err = i915_vma_pin(vma, 0, 0, PIN_USER);
2536 		if (err)
2537 			return err;
2538 	}
2539 
2540 	ctx->sched.priority = prio;
2541 
2542 	rq = igt_request_alloc(ctx, smoke->engine);
2543 	if (IS_ERR(rq)) {
2544 		err = PTR_ERR(rq);
2545 		goto unpin;
2546 	}
2547 
2548 	if (vma) {
2549 		i915_vma_lock(vma);
2550 		err = i915_request_await_object(rq, vma->obj, false);
2551 		if (!err)
2552 			err = i915_vma_move_to_active(vma, rq, 0);
2553 		if (!err)
2554 			err = rq->engine->emit_bb_start(rq,
2555 							vma->node.start,
2556 							PAGE_SIZE, 0);
2557 		i915_vma_unlock(vma);
2558 	}
2559 
2560 	i915_request_add(rq);
2561 
2562 unpin:
2563 	if (vma)
2564 		i915_vma_unpin(vma);
2565 
2566 	return err;
2567 }
2568 
2569 static int smoke_crescendo_thread(void *arg)
2570 {
2571 	struct preempt_smoke *smoke = arg;
2572 	IGT_TIMEOUT(end_time);
2573 	unsigned long count;
2574 
2575 	count = 0;
2576 	do {
2577 		struct i915_gem_context *ctx = smoke_context(smoke);
2578 		int err;
2579 
2580 		err = smoke_submit(smoke,
2581 				   ctx, count % I915_PRIORITY_MAX,
2582 				   smoke->batch);
2583 		if (err)
2584 			return err;
2585 
2586 		count++;
2587 	} while (!__igt_timeout(end_time, NULL));
2588 
2589 	smoke->count = count;
2590 	return 0;
2591 }
2592 
2593 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
2594 #define BATCH BIT(0)
2595 {
2596 	struct task_struct *tsk[I915_NUM_ENGINES] = {};
2597 	struct preempt_smoke arg[I915_NUM_ENGINES];
2598 	struct intel_engine_cs *engine;
2599 	enum intel_engine_id id;
2600 	unsigned long count;
2601 	int err = 0;
2602 
2603 	for_each_engine(engine, smoke->gt, id) {
2604 		arg[id] = *smoke;
2605 		arg[id].engine = engine;
2606 		if (!(flags & BATCH))
2607 			arg[id].batch = NULL;
2608 		arg[id].count = 0;
2609 
2610 		tsk[id] = kthread_run(smoke_crescendo_thread, &arg,
2611 				      "igt/smoke:%d", id);
2612 		if (IS_ERR(tsk[id])) {
2613 			err = PTR_ERR(tsk[id]);
2614 			break;
2615 		}
2616 		get_task_struct(tsk[id]);
2617 	}
2618 
2619 	yield(); /* start all threads before we kthread_stop() */
2620 
2621 	count = 0;
2622 	for_each_engine(engine, smoke->gt, id) {
2623 		int status;
2624 
2625 		if (IS_ERR_OR_NULL(tsk[id]))
2626 			continue;
2627 
2628 		status = kthread_stop(tsk[id]);
2629 		if (status && !err)
2630 			err = status;
2631 
2632 		count += arg[id].count;
2633 
2634 		put_task_struct(tsk[id]);
2635 	}
2636 
2637 	pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
2638 		count, flags,
2639 		RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
2640 	return 0;
2641 }
2642 
2643 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
2644 {
2645 	enum intel_engine_id id;
2646 	IGT_TIMEOUT(end_time);
2647 	unsigned long count;
2648 
2649 	count = 0;
2650 	do {
2651 		for_each_engine(smoke->engine, smoke->gt, id) {
2652 			struct i915_gem_context *ctx = smoke_context(smoke);
2653 			int err;
2654 
2655 			err = smoke_submit(smoke,
2656 					   ctx, random_priority(&smoke->prng),
2657 					   flags & BATCH ? smoke->batch : NULL);
2658 			if (err)
2659 				return err;
2660 
2661 			count++;
2662 		}
2663 	} while (!__igt_timeout(end_time, NULL));
2664 
2665 	pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
2666 		count, flags,
2667 		RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
2668 	return 0;
2669 }
2670 
2671 static int live_preempt_smoke(void *arg)
2672 {
2673 	struct preempt_smoke smoke = {
2674 		.gt = arg,
2675 		.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
2676 		.ncontext = 1024,
2677 	};
2678 	const unsigned int phase[] = { 0, BATCH };
2679 	struct igt_live_test t;
2680 	int err = -ENOMEM;
2681 	u32 *cs;
2682 	int n;
2683 
2684 	if (!HAS_LOGICAL_RING_PREEMPTION(smoke.gt->i915))
2685 		return 0;
2686 
2687 	smoke.contexts = kmalloc_array(smoke.ncontext,
2688 				       sizeof(*smoke.contexts),
2689 				       GFP_KERNEL);
2690 	if (!smoke.contexts)
2691 		return -ENOMEM;
2692 
2693 	smoke.batch =
2694 		i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
2695 	if (IS_ERR(smoke.batch)) {
2696 		err = PTR_ERR(smoke.batch);
2697 		goto err_free;
2698 	}
2699 
2700 	cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB);
2701 	if (IS_ERR(cs)) {
2702 		err = PTR_ERR(cs);
2703 		goto err_batch;
2704 	}
2705 	for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++)
2706 		cs[n] = MI_ARB_CHECK;
2707 	cs[n] = MI_BATCH_BUFFER_END;
2708 	i915_gem_object_flush_map(smoke.batch);
2709 	i915_gem_object_unpin_map(smoke.batch);
2710 
2711 	if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
2712 		err = -EIO;
2713 		goto err_batch;
2714 	}
2715 
2716 	for (n = 0; n < smoke.ncontext; n++) {
2717 		smoke.contexts[n] = kernel_context(smoke.gt->i915);
2718 		if (!smoke.contexts[n])
2719 			goto err_ctx;
2720 	}
2721 
2722 	for (n = 0; n < ARRAY_SIZE(phase); n++) {
2723 		err = smoke_crescendo(&smoke, phase[n]);
2724 		if (err)
2725 			goto err_ctx;
2726 
2727 		err = smoke_random(&smoke, phase[n]);
2728 		if (err)
2729 			goto err_ctx;
2730 	}
2731 
2732 err_ctx:
2733 	if (igt_live_test_end(&t))
2734 		err = -EIO;
2735 
2736 	for (n = 0; n < smoke.ncontext; n++) {
2737 		if (!smoke.contexts[n])
2738 			break;
2739 		kernel_context_close(smoke.contexts[n]);
2740 	}
2741 
2742 err_batch:
2743 	i915_gem_object_put(smoke.batch);
2744 err_free:
2745 	kfree(smoke.contexts);
2746 
2747 	return err;
2748 }
2749 
2750 static int nop_virtual_engine(struct intel_gt *gt,
2751 			      struct intel_engine_cs **siblings,
2752 			      unsigned int nsibling,
2753 			      unsigned int nctx,
2754 			      unsigned int flags)
2755 #define CHAIN BIT(0)
2756 {
2757 	IGT_TIMEOUT(end_time);
2758 	struct i915_request *request[16] = {};
2759 	struct intel_context *ve[16];
2760 	unsigned long n, prime, nc;
2761 	struct igt_live_test t;
2762 	ktime_t times[2] = {};
2763 	int err;
2764 
2765 	GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
2766 
2767 	for (n = 0; n < nctx; n++) {
2768 		ve[n] = intel_execlists_create_virtual(siblings, nsibling);
2769 		if (IS_ERR(ve[n])) {
2770 			err = PTR_ERR(ve[n]);
2771 			nctx = n;
2772 			goto out;
2773 		}
2774 
2775 		err = intel_context_pin(ve[n]);
2776 		if (err) {
2777 			intel_context_put(ve[n]);
2778 			nctx = n;
2779 			goto out;
2780 		}
2781 	}
2782 
2783 	err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
2784 	if (err)
2785 		goto out;
2786 
2787 	for_each_prime_number_from(prime, 1, 8192) {
2788 		times[1] = ktime_get_raw();
2789 
2790 		if (flags & CHAIN) {
2791 			for (nc = 0; nc < nctx; nc++) {
2792 				for (n = 0; n < prime; n++) {
2793 					struct i915_request *rq;
2794 
2795 					rq = i915_request_create(ve[nc]);
2796 					if (IS_ERR(rq)) {
2797 						err = PTR_ERR(rq);
2798 						goto out;
2799 					}
2800 
2801 					if (request[nc])
2802 						i915_request_put(request[nc]);
2803 					request[nc] = i915_request_get(rq);
2804 					i915_request_add(rq);
2805 				}
2806 			}
2807 		} else {
2808 			for (n = 0; n < prime; n++) {
2809 				for (nc = 0; nc < nctx; nc++) {
2810 					struct i915_request *rq;
2811 
2812 					rq = i915_request_create(ve[nc]);
2813 					if (IS_ERR(rq)) {
2814 						err = PTR_ERR(rq);
2815 						goto out;
2816 					}
2817 
2818 					if (request[nc])
2819 						i915_request_put(request[nc]);
2820 					request[nc] = i915_request_get(rq);
2821 					i915_request_add(rq);
2822 				}
2823 			}
2824 		}
2825 
2826 		for (nc = 0; nc < nctx; nc++) {
2827 			if (i915_request_wait(request[nc], 0, HZ / 10) < 0) {
2828 				pr_err("%s(%s): wait for %llx:%lld timed out\n",
2829 				       __func__, ve[0]->engine->name,
2830 				       request[nc]->fence.context,
2831 				       request[nc]->fence.seqno);
2832 
2833 				GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
2834 					  __func__, ve[0]->engine->name,
2835 					  request[nc]->fence.context,
2836 					  request[nc]->fence.seqno);
2837 				GEM_TRACE_DUMP();
2838 				intel_gt_set_wedged(gt);
2839 				break;
2840 			}
2841 		}
2842 
2843 		times[1] = ktime_sub(ktime_get_raw(), times[1]);
2844 		if (prime == 1)
2845 			times[0] = times[1];
2846 
2847 		for (nc = 0; nc < nctx; nc++) {
2848 			i915_request_put(request[nc]);
2849 			request[nc] = NULL;
2850 		}
2851 
2852 		if (__igt_timeout(end_time, NULL))
2853 			break;
2854 	}
2855 
2856 	err = igt_live_test_end(&t);
2857 	if (err)
2858 		goto out;
2859 
2860 	pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
2861 		nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
2862 		prime, div64_u64(ktime_to_ns(times[1]), prime));
2863 
2864 out:
2865 	if (igt_flush_test(gt->i915))
2866 		err = -EIO;
2867 
2868 	for (nc = 0; nc < nctx; nc++) {
2869 		i915_request_put(request[nc]);
2870 		intel_context_unpin(ve[nc]);
2871 		intel_context_put(ve[nc]);
2872 	}
2873 	return err;
2874 }
2875 
2876 static int live_virtual_engine(void *arg)
2877 {
2878 	struct intel_gt *gt = arg;
2879 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
2880 	struct intel_engine_cs *engine;
2881 	enum intel_engine_id id;
2882 	unsigned int class, inst;
2883 	int err;
2884 
2885 	if (USES_GUC_SUBMISSION(gt->i915))
2886 		return 0;
2887 
2888 	for_each_engine(engine, gt, id) {
2889 		err = nop_virtual_engine(gt, &engine, 1, 1, 0);
2890 		if (err) {
2891 			pr_err("Failed to wrap engine %s: err=%d\n",
2892 			       engine->name, err);
2893 			return err;
2894 		}
2895 	}
2896 
2897 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
2898 		int nsibling, n;
2899 
2900 		nsibling = 0;
2901 		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
2902 			if (!gt->engine_class[class][inst])
2903 				continue;
2904 
2905 			siblings[nsibling++] = gt->engine_class[class][inst];
2906 		}
2907 		if (nsibling < 2)
2908 			continue;
2909 
2910 		for (n = 1; n <= nsibling + 1; n++) {
2911 			err = nop_virtual_engine(gt, siblings, nsibling,
2912 						 n, 0);
2913 			if (err)
2914 				return err;
2915 		}
2916 
2917 		err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
2918 		if (err)
2919 			return err;
2920 	}
2921 
2922 	return 0;
2923 }
2924 
2925 static int mask_virtual_engine(struct intel_gt *gt,
2926 			       struct intel_engine_cs **siblings,
2927 			       unsigned int nsibling)
2928 {
2929 	struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
2930 	struct intel_context *ve;
2931 	struct igt_live_test t;
2932 	unsigned int n;
2933 	int err;
2934 
2935 	/*
2936 	 * Check that by setting the execution mask on a request, we can
2937 	 * restrict it to our desired engine within the virtual engine.
2938 	 */
2939 
2940 	ve = intel_execlists_create_virtual(siblings, nsibling);
2941 	if (IS_ERR(ve)) {
2942 		err = PTR_ERR(ve);
2943 		goto out_close;
2944 	}
2945 
2946 	err = intel_context_pin(ve);
2947 	if (err)
2948 		goto out_put;
2949 
2950 	err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
2951 	if (err)
2952 		goto out_unpin;
2953 
2954 	for (n = 0; n < nsibling; n++) {
2955 		request[n] = i915_request_create(ve);
2956 		if (IS_ERR(request[n])) {
2957 			err = PTR_ERR(request[n]);
2958 			nsibling = n;
2959 			goto out;
2960 		}
2961 
2962 		/* Reverse order as it's more likely to be unnatural */
2963 		request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
2964 
2965 		i915_request_get(request[n]);
2966 		i915_request_add(request[n]);
2967 	}
2968 
2969 	for (n = 0; n < nsibling; n++) {
2970 		if (i915_request_wait(request[n], 0, HZ / 10) < 0) {
2971 			pr_err("%s(%s): wait for %llx:%lld timed out\n",
2972 			       __func__, ve->engine->name,
2973 			       request[n]->fence.context,
2974 			       request[n]->fence.seqno);
2975 
2976 			GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
2977 				  __func__, ve->engine->name,
2978 				  request[n]->fence.context,
2979 				  request[n]->fence.seqno);
2980 			GEM_TRACE_DUMP();
2981 			intel_gt_set_wedged(gt);
2982 			err = -EIO;
2983 			goto out;
2984 		}
2985 
2986 		if (request[n]->engine != siblings[nsibling - n - 1]) {
2987 			pr_err("Executed on wrong sibling '%s', expected '%s'\n",
2988 			       request[n]->engine->name,
2989 			       siblings[nsibling - n - 1]->name);
2990 			err = -EINVAL;
2991 			goto out;
2992 		}
2993 	}
2994 
2995 	err = igt_live_test_end(&t);
2996 out:
2997 	if (igt_flush_test(gt->i915))
2998 		err = -EIO;
2999 
3000 	for (n = 0; n < nsibling; n++)
3001 		i915_request_put(request[n]);
3002 
3003 out_unpin:
3004 	intel_context_unpin(ve);
3005 out_put:
3006 	intel_context_put(ve);
3007 out_close:
3008 	return err;
3009 }
3010 
3011 static int live_virtual_mask(void *arg)
3012 {
3013 	struct intel_gt *gt = arg;
3014 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3015 	unsigned int class, inst;
3016 	int err;
3017 
3018 	if (USES_GUC_SUBMISSION(gt->i915))
3019 		return 0;
3020 
3021 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3022 		unsigned int nsibling;
3023 
3024 		nsibling = 0;
3025 		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3026 			if (!gt->engine_class[class][inst])
3027 				break;
3028 
3029 			siblings[nsibling++] = gt->engine_class[class][inst];
3030 		}
3031 		if (nsibling < 2)
3032 			continue;
3033 
3034 		err = mask_virtual_engine(gt, siblings, nsibling);
3035 		if (err)
3036 			return err;
3037 	}
3038 
3039 	return 0;
3040 }
3041 
3042 static int preserved_virtual_engine(struct intel_gt *gt,
3043 				    struct intel_engine_cs **siblings,
3044 				    unsigned int nsibling)
3045 {
3046 	struct i915_request *last = NULL;
3047 	struct intel_context *ve;
3048 	struct i915_vma *scratch;
3049 	struct igt_live_test t;
3050 	unsigned int n;
3051 	int err = 0;
3052 	u32 *cs;
3053 
3054 	scratch = create_scratch(siblings[0]->gt);
3055 	if (IS_ERR(scratch))
3056 		return PTR_ERR(scratch);
3057 
3058 	ve = intel_execlists_create_virtual(siblings, nsibling);
3059 	if (IS_ERR(ve)) {
3060 		err = PTR_ERR(ve);
3061 		goto out_scratch;
3062 	}
3063 
3064 	err = intel_context_pin(ve);
3065 	if (err)
3066 		goto out_put;
3067 
3068 	err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3069 	if (err)
3070 		goto out_unpin;
3071 
3072 	for (n = 0; n < NUM_GPR_DW; n++) {
3073 		struct intel_engine_cs *engine = siblings[n % nsibling];
3074 		struct i915_request *rq;
3075 
3076 		rq = i915_request_create(ve);
3077 		if (IS_ERR(rq)) {
3078 			err = PTR_ERR(rq);
3079 			goto out_end;
3080 		}
3081 
3082 		i915_request_put(last);
3083 		last = i915_request_get(rq);
3084 
3085 		cs = intel_ring_begin(rq, 8);
3086 		if (IS_ERR(cs)) {
3087 			i915_request_add(rq);
3088 			err = PTR_ERR(cs);
3089 			goto out_end;
3090 		}
3091 
3092 		*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
3093 		*cs++ = CS_GPR(engine, n);
3094 		*cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
3095 		*cs++ = 0;
3096 
3097 		*cs++ = MI_LOAD_REGISTER_IMM(1);
3098 		*cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
3099 		*cs++ = n + 1;
3100 
3101 		*cs++ = MI_NOOP;
3102 		intel_ring_advance(rq, cs);
3103 
3104 		/* Restrict this request to run on a particular engine */
3105 		rq->execution_mask = engine->mask;
3106 		i915_request_add(rq);
3107 	}
3108 
3109 	if (i915_request_wait(last, 0, HZ / 5) < 0) {
3110 		err = -ETIME;
3111 		goto out_end;
3112 	}
3113 
3114 	cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
3115 	if (IS_ERR(cs)) {
3116 		err = PTR_ERR(cs);
3117 		goto out_end;
3118 	}
3119 
3120 	for (n = 0; n < NUM_GPR_DW; n++) {
3121 		if (cs[n] != n) {
3122 			pr_err("Incorrect value[%d] found for GPR[%d]\n",
3123 			       cs[n], n);
3124 			err = -EINVAL;
3125 			break;
3126 		}
3127 	}
3128 
3129 	i915_gem_object_unpin_map(scratch->obj);
3130 
3131 out_end:
3132 	if (igt_live_test_end(&t))
3133 		err = -EIO;
3134 	i915_request_put(last);
3135 out_unpin:
3136 	intel_context_unpin(ve);
3137 out_put:
3138 	intel_context_put(ve);
3139 out_scratch:
3140 	i915_vma_unpin_and_release(&scratch, 0);
3141 	return err;
3142 }
3143 
3144 static int live_virtual_preserved(void *arg)
3145 {
3146 	struct intel_gt *gt = arg;
3147 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3148 	unsigned int class, inst;
3149 
3150 	/*
3151 	 * Check that the context image retains non-privileged (user) registers
3152 	 * from one engine to the next. For this we check that the CS_GPR
3153 	 * are preserved.
3154 	 */
3155 
3156 	if (USES_GUC_SUBMISSION(gt->i915))
3157 		return 0;
3158 
3159 	/* As we use CS_GPR we cannot run before they existed on all engines. */
3160 	if (INTEL_GEN(gt->i915) < 9)
3161 		return 0;
3162 
3163 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3164 		int nsibling, err;
3165 
3166 		nsibling = 0;
3167 		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3168 			if (!gt->engine_class[class][inst])
3169 				continue;
3170 
3171 			siblings[nsibling++] = gt->engine_class[class][inst];
3172 		}
3173 		if (nsibling < 2)
3174 			continue;
3175 
3176 		err = preserved_virtual_engine(gt, siblings, nsibling);
3177 		if (err)
3178 			return err;
3179 	}
3180 
3181 	return 0;
3182 }
3183 
3184 static int bond_virtual_engine(struct intel_gt *gt,
3185 			       unsigned int class,
3186 			       struct intel_engine_cs **siblings,
3187 			       unsigned int nsibling,
3188 			       unsigned int flags)
3189 #define BOND_SCHEDULE BIT(0)
3190 {
3191 	struct intel_engine_cs *master;
3192 	struct i915_request *rq[16];
3193 	enum intel_engine_id id;
3194 	struct igt_spinner spin;
3195 	unsigned long n;
3196 	int err;
3197 
3198 	/*
3199 	 * A set of bonded requests is intended to be run concurrently
3200 	 * across a number of engines. We use one request per-engine
3201 	 * and a magic fence to schedule each of the bonded requests
3202 	 * at the same time. A consequence of our current scheduler is that
3203 	 * we only move requests to the HW ready queue when the request
3204 	 * becomes ready, that is when all of its prerequisite fences have
3205 	 * been signaled. As one of those fences is the master submit fence,
3206 	 * there is a delay on all secondary fences as the HW may be
3207 	 * currently busy. Equally, as all the requests are independent,
3208 	 * they may have other fences that delay individual request
3209 	 * submission to HW. Ergo, we do not guarantee that all requests are
3210 	 * immediately submitted to HW at the same time, just that if the
3211 	 * rules are abided by, they are ready at the same time as the
3212 	 * first is submitted. Userspace can embed semaphores in its batch
3213 	 * to ensure parallel execution of its phases as it requires.
3214 	 * Though naturally it gets requested that perhaps the scheduler should
3215 	 * take care of parallel execution, even across preemption events on
3216 	 * different HW. (The proper answer is of course "lalalala".)
3217 	 *
3218 	 * With the submit-fence, we have identified three possible phases
3219 	 * of synchronisation depending on the master fence: queued (not
3220 	 * ready), executing, and signaled. The first two are quite simple
3221 	 * and checked below. However, the signaled master fence handling is
3222 	 * contentious. Currently we do not distinguish between a signaled
3223 	 * fence and an expired fence, as once signaled it does not convey
3224 	 * any information about the previous execution. It may even be freed
3225 	 * and hence checking later it may not exist at all. Ergo we currently
3226 	 * do not apply the bonding constraint for an already signaled fence,
3227 	 * as our expectation is that it should not constrain the secondaries
3228 	 * and is outside of the scope of the bonded request API (i.e. all
3229 	 * userspace requests are meant to be running in parallel). As
3230 	 * it imposes no constraint, and is effectively a no-op, we do not
3231 	 * check below as normal execution flows are checked extensively above.
3232 	 *
3233 	 * XXX Is the degenerate handling of signaled submit fences the
3234 	 * expected behaviour for userpace?
3235 	 */
3236 
3237 	GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
3238 
3239 	if (igt_spinner_init(&spin, gt))
3240 		return -ENOMEM;
3241 
3242 	err = 0;
3243 	rq[0] = ERR_PTR(-ENOMEM);
3244 	for_each_engine(master, gt, id) {
3245 		struct i915_sw_fence fence = {};
3246 
3247 		if (master->class == class)
3248 			continue;
3249 
3250 		memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
3251 
3252 		rq[0] = igt_spinner_create_request(&spin,
3253 						   master->kernel_context,
3254 						   MI_NOOP);
3255 		if (IS_ERR(rq[0])) {
3256 			err = PTR_ERR(rq[0]);
3257 			goto out;
3258 		}
3259 		i915_request_get(rq[0]);
3260 
3261 		if (flags & BOND_SCHEDULE) {
3262 			onstack_fence_init(&fence);
3263 			err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
3264 							       &fence,
3265 							       GFP_KERNEL);
3266 		}
3267 
3268 		i915_request_add(rq[0]);
3269 		if (err < 0)
3270 			goto out;
3271 
3272 		if (!(flags & BOND_SCHEDULE) &&
3273 		    !igt_wait_for_spinner(&spin, rq[0])) {
3274 			err = -EIO;
3275 			goto out;
3276 		}
3277 
3278 		for (n = 0; n < nsibling; n++) {
3279 			struct intel_context *ve;
3280 
3281 			ve = intel_execlists_create_virtual(siblings, nsibling);
3282 			if (IS_ERR(ve)) {
3283 				err = PTR_ERR(ve);
3284 				onstack_fence_fini(&fence);
3285 				goto out;
3286 			}
3287 
3288 			err = intel_virtual_engine_attach_bond(ve->engine,
3289 							       master,
3290 							       siblings[n]);
3291 			if (err) {
3292 				intel_context_put(ve);
3293 				onstack_fence_fini(&fence);
3294 				goto out;
3295 			}
3296 
3297 			err = intel_context_pin(ve);
3298 			intel_context_put(ve);
3299 			if (err) {
3300 				onstack_fence_fini(&fence);
3301 				goto out;
3302 			}
3303 
3304 			rq[n + 1] = i915_request_create(ve);
3305 			intel_context_unpin(ve);
3306 			if (IS_ERR(rq[n + 1])) {
3307 				err = PTR_ERR(rq[n + 1]);
3308 				onstack_fence_fini(&fence);
3309 				goto out;
3310 			}
3311 			i915_request_get(rq[n + 1]);
3312 
3313 			err = i915_request_await_execution(rq[n + 1],
3314 							   &rq[0]->fence,
3315 							   ve->engine->bond_execute);
3316 			i915_request_add(rq[n + 1]);
3317 			if (err < 0) {
3318 				onstack_fence_fini(&fence);
3319 				goto out;
3320 			}
3321 		}
3322 		onstack_fence_fini(&fence);
3323 		intel_engine_flush_submission(master);
3324 		igt_spinner_end(&spin);
3325 
3326 		if (i915_request_wait(rq[0], 0, HZ / 10) < 0) {
3327 			pr_err("Master request did not execute (on %s)!\n",
3328 			       rq[0]->engine->name);
3329 			err = -EIO;
3330 			goto out;
3331 		}
3332 
3333 		for (n = 0; n < nsibling; n++) {
3334 			if (i915_request_wait(rq[n + 1], 0,
3335 					      MAX_SCHEDULE_TIMEOUT) < 0) {
3336 				err = -EIO;
3337 				goto out;
3338 			}
3339 
3340 			if (rq[n + 1]->engine != siblings[n]) {
3341 				pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
3342 				       siblings[n]->name,
3343 				       rq[n + 1]->engine->name,
3344 				       rq[0]->engine->name);
3345 				err = -EINVAL;
3346 				goto out;
3347 			}
3348 		}
3349 
3350 		for (n = 0; !IS_ERR(rq[n]); n++)
3351 			i915_request_put(rq[n]);
3352 		rq[0] = ERR_PTR(-ENOMEM);
3353 	}
3354 
3355 out:
3356 	for (n = 0; !IS_ERR(rq[n]); n++)
3357 		i915_request_put(rq[n]);
3358 	if (igt_flush_test(gt->i915))
3359 		err = -EIO;
3360 
3361 	igt_spinner_fini(&spin);
3362 	return err;
3363 }
3364 
3365 static int live_virtual_bond(void *arg)
3366 {
3367 	static const struct phase {
3368 		const char *name;
3369 		unsigned int flags;
3370 	} phases[] = {
3371 		{ "", 0 },
3372 		{ "schedule", BOND_SCHEDULE },
3373 		{ },
3374 	};
3375 	struct intel_gt *gt = arg;
3376 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3377 	unsigned int class, inst;
3378 	int err;
3379 
3380 	if (USES_GUC_SUBMISSION(gt->i915))
3381 		return 0;
3382 
3383 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3384 		const struct phase *p;
3385 		int nsibling;
3386 
3387 		nsibling = 0;
3388 		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3389 			if (!gt->engine_class[class][inst])
3390 				break;
3391 
3392 			GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings));
3393 			siblings[nsibling++] = gt->engine_class[class][inst];
3394 		}
3395 		if (nsibling < 2)
3396 			continue;
3397 
3398 		for (p = phases; p->name; p++) {
3399 			err = bond_virtual_engine(gt,
3400 						  class, siblings, nsibling,
3401 						  p->flags);
3402 			if (err) {
3403 				pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
3404 				       __func__, p->name, class, nsibling, err);
3405 				return err;
3406 			}
3407 		}
3408 	}
3409 
3410 	return 0;
3411 }
3412 
3413 static int reset_virtual_engine(struct intel_gt *gt,
3414 				struct intel_engine_cs **siblings,
3415 				unsigned int nsibling)
3416 {
3417 	struct intel_engine_cs *engine;
3418 	struct intel_context *ve;
3419 	unsigned long *heartbeat;
3420 	struct igt_spinner spin;
3421 	struct i915_request *rq;
3422 	unsigned int n;
3423 	int err = 0;
3424 
3425 	/*
3426 	 * In order to support offline error capture for fast preempt reset,
3427 	 * we need to decouple the guilty request and ensure that it and its
3428 	 * descendents are not executed while the capture is in progress.
3429 	 */
3430 
3431 	heartbeat = kmalloc_array(nsibling, sizeof(*heartbeat), GFP_KERNEL);
3432 	if (!heartbeat)
3433 		return -ENOMEM;
3434 
3435 	if (igt_spinner_init(&spin, gt)) {
3436 		err = -ENOMEM;
3437 		goto out_free;
3438 	}
3439 
3440 	ve = intel_execlists_create_virtual(siblings, nsibling);
3441 	if (IS_ERR(ve)) {
3442 		err = PTR_ERR(ve);
3443 		goto out_spin;
3444 	}
3445 
3446 	for (n = 0; n < nsibling; n++)
3447 		engine_heartbeat_disable(siblings[n], &heartbeat[n]);
3448 
3449 	rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
3450 	if (IS_ERR(rq)) {
3451 		err = PTR_ERR(rq);
3452 		goto out_heartbeat;
3453 	}
3454 	i915_request_add(rq);
3455 
3456 	if (!igt_wait_for_spinner(&spin, rq)) {
3457 		intel_gt_set_wedged(gt);
3458 		err = -ETIME;
3459 		goto out_heartbeat;
3460 	}
3461 
3462 	engine = rq->engine;
3463 	GEM_BUG_ON(engine == ve->engine);
3464 
3465 	/* Take ownership of the reset and tasklet */
3466 	if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
3467 			     &gt->reset.flags)) {
3468 		intel_gt_set_wedged(gt);
3469 		err = -EBUSY;
3470 		goto out_heartbeat;
3471 	}
3472 	tasklet_disable(&engine->execlists.tasklet);
3473 
3474 	engine->execlists.tasklet.func(engine->execlists.tasklet.data);
3475 	GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
3476 
3477 	/* Fake a preemption event; failed of course */
3478 	spin_lock_irq(&engine->active.lock);
3479 	__unwind_incomplete_requests(engine);
3480 	spin_unlock_irq(&engine->active.lock);
3481 	GEM_BUG_ON(rq->engine != ve->engine);
3482 
3483 	/* Reset the engine while keeping our active request on hold */
3484 	execlists_hold(engine, rq);
3485 	GEM_BUG_ON(!i915_request_on_hold(rq));
3486 
3487 	intel_engine_reset(engine, NULL);
3488 	GEM_BUG_ON(rq->fence.error != -EIO);
3489 
3490 	/* Release our grasp on the engine, letting CS flow again */
3491 	tasklet_enable(&engine->execlists.tasklet);
3492 	clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, &gt->reset.flags);
3493 
3494 	/* Check that we do not resubmit the held request */
3495 	i915_request_get(rq);
3496 	if (!i915_request_wait(rq, 0, HZ / 5)) {
3497 		pr_err("%s: on hold request completed!\n",
3498 		       engine->name);
3499 		intel_gt_set_wedged(gt);
3500 		err = -EIO;
3501 		goto out_rq;
3502 	}
3503 	GEM_BUG_ON(!i915_request_on_hold(rq));
3504 
3505 	/* But is resubmitted on release */
3506 	execlists_unhold(engine, rq);
3507 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
3508 		pr_err("%s: held request did not complete!\n",
3509 		       engine->name);
3510 		intel_gt_set_wedged(gt);
3511 		err = -ETIME;
3512 	}
3513 
3514 out_rq:
3515 	i915_request_put(rq);
3516 out_heartbeat:
3517 	for (n = 0; n < nsibling; n++)
3518 		engine_heartbeat_enable(siblings[n], heartbeat[n]);
3519 
3520 	intel_context_put(ve);
3521 out_spin:
3522 	igt_spinner_fini(&spin);
3523 out_free:
3524 	kfree(heartbeat);
3525 	return err;
3526 }
3527 
3528 static int live_virtual_reset(void *arg)
3529 {
3530 	struct intel_gt *gt = arg;
3531 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3532 	unsigned int class, inst;
3533 
3534 	/*
3535 	 * Check that we handle a reset event within a virtual engine.
3536 	 * Only the physical engine is reset, but we have to check the flow
3537 	 * of the virtual requests around the reset, and make sure it is not
3538 	 * forgotten.
3539 	 */
3540 
3541 	if (USES_GUC_SUBMISSION(gt->i915))
3542 		return 0;
3543 
3544 	if (!intel_has_reset_engine(gt))
3545 		return 0;
3546 
3547 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3548 		int nsibling, err;
3549 
3550 		nsibling = 0;
3551 		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3552 			if (!gt->engine_class[class][inst])
3553 				continue;
3554 
3555 			siblings[nsibling++] = gt->engine_class[class][inst];
3556 		}
3557 		if (nsibling < 2)
3558 			continue;
3559 
3560 		err = reset_virtual_engine(gt, siblings, nsibling);
3561 		if (err)
3562 			return err;
3563 	}
3564 
3565 	return 0;
3566 }
3567 
3568 int intel_execlists_live_selftests(struct drm_i915_private *i915)
3569 {
3570 	static const struct i915_subtest tests[] = {
3571 		SUBTEST(live_sanitycheck),
3572 		SUBTEST(live_unlite_switch),
3573 		SUBTEST(live_unlite_preempt),
3574 		SUBTEST(live_hold_reset),
3575 		SUBTEST(live_timeslice_preempt),
3576 		SUBTEST(live_timeslice_queue),
3577 		SUBTEST(live_busywait_preempt),
3578 		SUBTEST(live_preempt),
3579 		SUBTEST(live_late_preempt),
3580 		SUBTEST(live_nopreempt),
3581 		SUBTEST(live_preempt_cancel),
3582 		SUBTEST(live_suppress_self_preempt),
3583 		SUBTEST(live_suppress_wait_preempt),
3584 		SUBTEST(live_chain_preempt),
3585 		SUBTEST(live_preempt_gang),
3586 		SUBTEST(live_preempt_hang),
3587 		SUBTEST(live_preempt_timeout),
3588 		SUBTEST(live_preempt_smoke),
3589 		SUBTEST(live_virtual_engine),
3590 		SUBTEST(live_virtual_mask),
3591 		SUBTEST(live_virtual_preserved),
3592 		SUBTEST(live_virtual_bond),
3593 		SUBTEST(live_virtual_reset),
3594 	};
3595 
3596 	if (!HAS_EXECLISTS(i915))
3597 		return 0;
3598 
3599 	if (intel_gt_is_wedged(&i915->gt))
3600 		return 0;
3601 
3602 	return intel_gt_live_subtests(tests, &i915->gt);
3603 }
3604 
3605 static void hexdump(const void *buf, size_t len)
3606 {
3607 	const size_t rowsize = 8 * sizeof(u32);
3608 	const void *prev = NULL;
3609 	bool skip = false;
3610 	size_t pos;
3611 
3612 	for (pos = 0; pos < len; pos += rowsize) {
3613 		char line[128];
3614 
3615 		if (prev && !memcmp(prev, buf + pos, rowsize)) {
3616 			if (!skip) {
3617 				pr_info("*\n");
3618 				skip = true;
3619 			}
3620 			continue;
3621 		}
3622 
3623 		WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
3624 						rowsize, sizeof(u32),
3625 						line, sizeof(line),
3626 						false) >= sizeof(line));
3627 		pr_info("[%04zx] %s\n", pos, line);
3628 
3629 		prev = buf + pos;
3630 		skip = false;
3631 	}
3632 }
3633 
3634 static int live_lrc_layout(void *arg)
3635 {
3636 	struct intel_gt *gt = arg;
3637 	struct intel_engine_cs *engine;
3638 	enum intel_engine_id id;
3639 	u32 *lrc;
3640 	int err;
3641 
3642 	/*
3643 	 * Check the registers offsets we use to create the initial reg state
3644 	 * match the layout saved by HW.
3645 	 */
3646 
3647 	lrc = kmalloc(PAGE_SIZE, GFP_KERNEL);
3648 	if (!lrc)
3649 		return -ENOMEM;
3650 
3651 	err = 0;
3652 	for_each_engine(engine, gt, id) {
3653 		u32 *hw;
3654 		int dw;
3655 
3656 		if (!engine->default_state)
3657 			continue;
3658 
3659 		hw = i915_gem_object_pin_map(engine->default_state,
3660 					     I915_MAP_WB);
3661 		if (IS_ERR(hw)) {
3662 			err = PTR_ERR(hw);
3663 			break;
3664 		}
3665 		hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
3666 
3667 		execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE),
3668 					 engine->kernel_context,
3669 					 engine,
3670 					 engine->kernel_context->ring,
3671 					 true);
3672 
3673 		dw = 0;
3674 		do {
3675 			u32 lri = hw[dw];
3676 
3677 			if (lri == 0) {
3678 				dw++;
3679 				continue;
3680 			}
3681 
3682 			if (lrc[dw] == 0) {
3683 				pr_debug("%s: skipped instruction %x at dword %d\n",
3684 					 engine->name, lri, dw);
3685 				dw++;
3686 				continue;
3687 			}
3688 
3689 			if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
3690 				pr_err("%s: Expected LRI command at dword %d, found %08x\n",
3691 				       engine->name, dw, lri);
3692 				err = -EINVAL;
3693 				break;
3694 			}
3695 
3696 			if (lrc[dw] != lri) {
3697 				pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
3698 				       engine->name, dw, lri, lrc[dw]);
3699 				err = -EINVAL;
3700 				break;
3701 			}
3702 
3703 			lri &= 0x7f;
3704 			lri++;
3705 			dw++;
3706 
3707 			while (lri) {
3708 				if (hw[dw] != lrc[dw]) {
3709 					pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
3710 					       engine->name, dw, hw[dw], lrc[dw]);
3711 					err = -EINVAL;
3712 					break;
3713 				}
3714 
3715 				/*
3716 				 * Skip over the actual register value as we
3717 				 * expect that to differ.
3718 				 */
3719 				dw += 2;
3720 				lri -= 2;
3721 			}
3722 		} while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
3723 
3724 		if (err) {
3725 			pr_info("%s: HW register image:\n", engine->name);
3726 			hexdump(hw, PAGE_SIZE);
3727 
3728 			pr_info("%s: SW register image:\n", engine->name);
3729 			hexdump(lrc, PAGE_SIZE);
3730 		}
3731 
3732 		i915_gem_object_unpin_map(engine->default_state);
3733 		if (err)
3734 			break;
3735 	}
3736 
3737 	kfree(lrc);
3738 	return err;
3739 }
3740 
3741 static int find_offset(const u32 *lri, u32 offset)
3742 {
3743 	int i;
3744 
3745 	for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
3746 		if (lri[i] == offset)
3747 			return i;
3748 
3749 	return -1;
3750 }
3751 
3752 static int live_lrc_fixed(void *arg)
3753 {
3754 	struct intel_gt *gt = arg;
3755 	struct intel_engine_cs *engine;
3756 	enum intel_engine_id id;
3757 	int err = 0;
3758 
3759 	/*
3760 	 * Check the assumed register offsets match the actual locations in
3761 	 * the context image.
3762 	 */
3763 
3764 	for_each_engine(engine, gt, id) {
3765 		const struct {
3766 			u32 reg;
3767 			u32 offset;
3768 			const char *name;
3769 		} tbl[] = {
3770 			{
3771 				i915_mmio_reg_offset(RING_START(engine->mmio_base)),
3772 				CTX_RING_START - 1,
3773 				"RING_START"
3774 			},
3775 			{
3776 				i915_mmio_reg_offset(RING_CTL(engine->mmio_base)),
3777 				CTX_RING_CTL - 1,
3778 				"RING_CTL"
3779 			},
3780 			{
3781 				i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)),
3782 				CTX_RING_HEAD - 1,
3783 				"RING_HEAD"
3784 			},
3785 			{
3786 				i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)),
3787 				CTX_RING_TAIL - 1,
3788 				"RING_TAIL"
3789 			},
3790 			{
3791 				i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)),
3792 				lrc_ring_mi_mode(engine),
3793 				"RING_MI_MODE"
3794 			},
3795 			{
3796 				i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)),
3797 				CTX_BB_STATE - 1,
3798 				"BB_STATE"
3799 			},
3800 			{ },
3801 		}, *t;
3802 		u32 *hw;
3803 
3804 		if (!engine->default_state)
3805 			continue;
3806 
3807 		hw = i915_gem_object_pin_map(engine->default_state,
3808 					     I915_MAP_WB);
3809 		if (IS_ERR(hw)) {
3810 			err = PTR_ERR(hw);
3811 			break;
3812 		}
3813 		hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
3814 
3815 		for (t = tbl; t->name; t++) {
3816 			int dw = find_offset(hw, t->reg);
3817 
3818 			if (dw != t->offset) {
3819 				pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
3820 				       engine->name,
3821 				       t->name,
3822 				       t->reg,
3823 				       dw,
3824 				       t->offset);
3825 				err = -EINVAL;
3826 			}
3827 		}
3828 
3829 		i915_gem_object_unpin_map(engine->default_state);
3830 	}
3831 
3832 	return err;
3833 }
3834 
3835 static int __live_lrc_state(struct intel_engine_cs *engine,
3836 			    struct i915_vma *scratch)
3837 {
3838 	struct intel_context *ce;
3839 	struct i915_request *rq;
3840 	enum {
3841 		RING_START_IDX = 0,
3842 		RING_TAIL_IDX,
3843 		MAX_IDX
3844 	};
3845 	u32 expected[MAX_IDX];
3846 	u32 *cs;
3847 	int err;
3848 	int n;
3849 
3850 	ce = intel_context_create(engine);
3851 	if (IS_ERR(ce))
3852 		return PTR_ERR(ce);
3853 
3854 	err = intel_context_pin(ce);
3855 	if (err)
3856 		goto err_put;
3857 
3858 	rq = i915_request_create(ce);
3859 	if (IS_ERR(rq)) {
3860 		err = PTR_ERR(rq);
3861 		goto err_unpin;
3862 	}
3863 
3864 	cs = intel_ring_begin(rq, 4 * MAX_IDX);
3865 	if (IS_ERR(cs)) {
3866 		err = PTR_ERR(cs);
3867 		i915_request_add(rq);
3868 		goto err_unpin;
3869 	}
3870 
3871 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
3872 	*cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
3873 	*cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32);
3874 	*cs++ = 0;
3875 
3876 	expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma);
3877 
3878 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
3879 	*cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
3880 	*cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
3881 	*cs++ = 0;
3882 
3883 	i915_request_get(rq);
3884 	i915_request_add(rq);
3885 
3886 	intel_engine_flush_submission(engine);
3887 	expected[RING_TAIL_IDX] = ce->ring->tail;
3888 
3889 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
3890 		err = -ETIME;
3891 		goto err_rq;
3892 	}
3893 
3894 	cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
3895 	if (IS_ERR(cs)) {
3896 		err = PTR_ERR(cs);
3897 		goto err_rq;
3898 	}
3899 
3900 	for (n = 0; n < MAX_IDX; n++) {
3901 		if (cs[n] != expected[n]) {
3902 			pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
3903 			       engine->name, n, cs[n], expected[n]);
3904 			err = -EINVAL;
3905 			break;
3906 		}
3907 	}
3908 
3909 	i915_gem_object_unpin_map(scratch->obj);
3910 
3911 err_rq:
3912 	i915_request_put(rq);
3913 err_unpin:
3914 	intel_context_unpin(ce);
3915 err_put:
3916 	intel_context_put(ce);
3917 	return err;
3918 }
3919 
3920 static int live_lrc_state(void *arg)
3921 {
3922 	struct intel_gt *gt = arg;
3923 	struct intel_engine_cs *engine;
3924 	struct i915_vma *scratch;
3925 	enum intel_engine_id id;
3926 	int err = 0;
3927 
3928 	/*
3929 	 * Check the live register state matches what we expect for this
3930 	 * intel_context.
3931 	 */
3932 
3933 	scratch = create_scratch(gt);
3934 	if (IS_ERR(scratch))
3935 		return PTR_ERR(scratch);
3936 
3937 	for_each_engine(engine, gt, id) {
3938 		err = __live_lrc_state(engine, scratch);
3939 		if (err)
3940 			break;
3941 	}
3942 
3943 	if (igt_flush_test(gt->i915))
3944 		err = -EIO;
3945 
3946 	i915_vma_unpin_and_release(&scratch, 0);
3947 	return err;
3948 }
3949 
3950 static int gpr_make_dirty(struct intel_engine_cs *engine)
3951 {
3952 	struct i915_request *rq;
3953 	u32 *cs;
3954 	int n;
3955 
3956 	rq = intel_engine_create_kernel_request(engine);
3957 	if (IS_ERR(rq))
3958 		return PTR_ERR(rq);
3959 
3960 	cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2);
3961 	if (IS_ERR(cs)) {
3962 		i915_request_add(rq);
3963 		return PTR_ERR(cs);
3964 	}
3965 
3966 	*cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
3967 	for (n = 0; n < NUM_GPR_DW; n++) {
3968 		*cs++ = CS_GPR(engine, n);
3969 		*cs++ = STACK_MAGIC;
3970 	}
3971 	*cs++ = MI_NOOP;
3972 
3973 	intel_ring_advance(rq, cs);
3974 	i915_request_add(rq);
3975 
3976 	return 0;
3977 }
3978 
3979 static int __live_gpr_clear(struct intel_engine_cs *engine,
3980 			    struct i915_vma *scratch)
3981 {
3982 	struct intel_context *ce;
3983 	struct i915_request *rq;
3984 	u32 *cs;
3985 	int err;
3986 	int n;
3987 
3988 	if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS)
3989 		return 0; /* GPR only on rcs0 for gen8 */
3990 
3991 	err = gpr_make_dirty(engine);
3992 	if (err)
3993 		return err;
3994 
3995 	ce = intel_context_create(engine);
3996 	if (IS_ERR(ce))
3997 		return PTR_ERR(ce);
3998 
3999 	rq = intel_context_create_request(ce);
4000 	if (IS_ERR(rq)) {
4001 		err = PTR_ERR(rq);
4002 		goto err_put;
4003 	}
4004 
4005 	cs = intel_ring_begin(rq, 4 * NUM_GPR_DW);
4006 	if (IS_ERR(cs)) {
4007 		err = PTR_ERR(cs);
4008 		i915_request_add(rq);
4009 		goto err_put;
4010 	}
4011 
4012 	for (n = 0; n < NUM_GPR_DW; n++) {
4013 		*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4014 		*cs++ = CS_GPR(engine, n);
4015 		*cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
4016 		*cs++ = 0;
4017 	}
4018 
4019 	i915_request_get(rq);
4020 	i915_request_add(rq);
4021 
4022 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4023 		err = -ETIME;
4024 		goto err_rq;
4025 	}
4026 
4027 	cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
4028 	if (IS_ERR(cs)) {
4029 		err = PTR_ERR(cs);
4030 		goto err_rq;
4031 	}
4032 
4033 	for (n = 0; n < NUM_GPR_DW; n++) {
4034 		if (cs[n]) {
4035 			pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
4036 			       engine->name,
4037 			       n / 2, n & 1 ? "udw" : "ldw",
4038 			       cs[n]);
4039 			err = -EINVAL;
4040 			break;
4041 		}
4042 	}
4043 
4044 	i915_gem_object_unpin_map(scratch->obj);
4045 
4046 err_rq:
4047 	i915_request_put(rq);
4048 err_put:
4049 	intel_context_put(ce);
4050 	return err;
4051 }
4052 
4053 static int live_gpr_clear(void *arg)
4054 {
4055 	struct intel_gt *gt = arg;
4056 	struct intel_engine_cs *engine;
4057 	struct i915_vma *scratch;
4058 	enum intel_engine_id id;
4059 	int err = 0;
4060 
4061 	/*
4062 	 * Check that GPR registers are cleared in new contexts as we need
4063 	 * to avoid leaking any information from previous contexts.
4064 	 */
4065 
4066 	scratch = create_scratch(gt);
4067 	if (IS_ERR(scratch))
4068 		return PTR_ERR(scratch);
4069 
4070 	for_each_engine(engine, gt, id) {
4071 		err = __live_gpr_clear(engine, scratch);
4072 		if (err)
4073 			break;
4074 	}
4075 
4076 	if (igt_flush_test(gt->i915))
4077 		err = -EIO;
4078 
4079 	i915_vma_unpin_and_release(&scratch, 0);
4080 	return err;
4081 }
4082 
4083 int intel_lrc_live_selftests(struct drm_i915_private *i915)
4084 {
4085 	static const struct i915_subtest tests[] = {
4086 		SUBTEST(live_lrc_layout),
4087 		SUBTEST(live_lrc_fixed),
4088 		SUBTEST(live_lrc_state),
4089 		SUBTEST(live_gpr_clear),
4090 	};
4091 
4092 	if (!HAS_LOGICAL_RING_CONTEXTS(i915))
4093 		return 0;
4094 
4095 	return intel_gt_live_subtests(tests, &i915->gt);
4096 }
4097