1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2018 Intel Corporation
5  */
6 
7 #include <linux/prime_numbers.h>
8 
9 #include "gem/i915_gem_pm.h"
10 #include "gt/intel_engine_heartbeat.h"
11 #include "gt/intel_reset.h"
12 
13 #include "i915_selftest.h"
14 #include "selftests/i915_random.h"
15 #include "selftests/igt_flush_test.h"
16 #include "selftests/igt_live_test.h"
17 #include "selftests/igt_spinner.h"
18 #include "selftests/lib_sw_fence.h"
19 
20 #include "gem/selftests/igt_gem_utils.h"
21 #include "gem/selftests/mock_context.h"
22 
23 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
24 #define NUM_GPR_DW (16 * 2) /* each GPR is 2 dwords */
25 
26 static struct i915_vma *create_scratch(struct intel_gt *gt)
27 {
28 	struct drm_i915_gem_object *obj;
29 	struct i915_vma *vma;
30 	int err;
31 
32 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
33 	if (IS_ERR(obj))
34 		return ERR_CAST(obj);
35 
36 	i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
37 
38 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
39 	if (IS_ERR(vma)) {
40 		i915_gem_object_put(obj);
41 		return vma;
42 	}
43 
44 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
45 	if (err) {
46 		i915_gem_object_put(obj);
47 		return ERR_PTR(err);
48 	}
49 
50 	return vma;
51 }
52 
53 static void engine_heartbeat_disable(struct intel_engine_cs *engine,
54 				     unsigned long *saved)
55 {
56 	*saved = engine->props.heartbeat_interval_ms;
57 	engine->props.heartbeat_interval_ms = 0;
58 
59 	intel_engine_pm_get(engine);
60 	intel_engine_park_heartbeat(engine);
61 }
62 
63 static void engine_heartbeat_enable(struct intel_engine_cs *engine,
64 				    unsigned long saved)
65 {
66 	intel_engine_pm_put(engine);
67 
68 	engine->props.heartbeat_interval_ms = saved;
69 }
70 
71 static int live_sanitycheck(void *arg)
72 {
73 	struct intel_gt *gt = arg;
74 	struct intel_engine_cs *engine;
75 	enum intel_engine_id id;
76 	struct igt_spinner spin;
77 	int err = 0;
78 
79 	if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
80 		return 0;
81 
82 	if (igt_spinner_init(&spin, gt))
83 		return -ENOMEM;
84 
85 	for_each_engine(engine, gt, id) {
86 		struct intel_context *ce;
87 		struct i915_request *rq;
88 
89 		ce = intel_context_create(engine);
90 		if (IS_ERR(ce)) {
91 			err = PTR_ERR(ce);
92 			break;
93 		}
94 
95 		rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
96 		if (IS_ERR(rq)) {
97 			err = PTR_ERR(rq);
98 			goto out_ctx;
99 		}
100 
101 		i915_request_add(rq);
102 		if (!igt_wait_for_spinner(&spin, rq)) {
103 			GEM_TRACE("spinner failed to start\n");
104 			GEM_TRACE_DUMP();
105 			intel_gt_set_wedged(gt);
106 			err = -EIO;
107 			goto out_ctx;
108 		}
109 
110 		igt_spinner_end(&spin);
111 		if (igt_flush_test(gt->i915)) {
112 			err = -EIO;
113 			goto out_ctx;
114 		}
115 
116 out_ctx:
117 		intel_context_put(ce);
118 		if (err)
119 			break;
120 	}
121 
122 	igt_spinner_fini(&spin);
123 	return err;
124 }
125 
126 static int live_unlite_restore(struct intel_gt *gt, int prio)
127 {
128 	struct intel_engine_cs *engine;
129 	enum intel_engine_id id;
130 	struct igt_spinner spin;
131 	int err = -ENOMEM;
132 
133 	/*
134 	 * Check that we can correctly context switch between 2 instances
135 	 * on the same engine from the same parent context.
136 	 */
137 
138 	if (igt_spinner_init(&spin, gt))
139 		return err;
140 
141 	err = 0;
142 	for_each_engine(engine, gt, id) {
143 		struct intel_context *ce[2] = {};
144 		struct i915_request *rq[2];
145 		struct igt_live_test t;
146 		unsigned long saved;
147 		int n;
148 
149 		if (prio && !intel_engine_has_preemption(engine))
150 			continue;
151 
152 		if (!intel_engine_can_store_dword(engine))
153 			continue;
154 
155 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
156 			err = -EIO;
157 			break;
158 		}
159 		engine_heartbeat_disable(engine, &saved);
160 
161 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
162 			struct intel_context *tmp;
163 
164 			tmp = intel_context_create(engine);
165 			if (IS_ERR(tmp)) {
166 				err = PTR_ERR(tmp);
167 				goto err_ce;
168 			}
169 
170 			err = intel_context_pin(tmp);
171 			if (err) {
172 				intel_context_put(tmp);
173 				goto err_ce;
174 			}
175 
176 			/*
177 			 * Setup the pair of contexts such that if we
178 			 * lite-restore using the RING_TAIL from ce[1] it
179 			 * will execute garbage from ce[0]->ring.
180 			 */
181 			memset(tmp->ring->vaddr,
182 			       POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
183 			       tmp->ring->vma->size);
184 
185 			ce[n] = tmp;
186 		}
187 		GEM_BUG_ON(!ce[1]->ring->size);
188 		intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
189 		__execlists_update_reg_state(ce[1], engine);
190 
191 		rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
192 		if (IS_ERR(rq[0])) {
193 			err = PTR_ERR(rq[0]);
194 			goto err_ce;
195 		}
196 
197 		i915_request_get(rq[0]);
198 		i915_request_add(rq[0]);
199 		GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
200 
201 		if (!igt_wait_for_spinner(&spin, rq[0])) {
202 			i915_request_put(rq[0]);
203 			goto err_ce;
204 		}
205 
206 		rq[1] = i915_request_create(ce[1]);
207 		if (IS_ERR(rq[1])) {
208 			err = PTR_ERR(rq[1]);
209 			i915_request_put(rq[0]);
210 			goto err_ce;
211 		}
212 
213 		if (!prio) {
214 			/*
215 			 * Ensure we do the switch to ce[1] on completion.
216 			 *
217 			 * rq[0] is already submitted, so this should reduce
218 			 * to a no-op (a wait on a request on the same engine
219 			 * uses the submit fence, not the completion fence),
220 			 * but it will install a dependency on rq[1] for rq[0]
221 			 * that will prevent the pair being reordered by
222 			 * timeslicing.
223 			 */
224 			i915_request_await_dma_fence(rq[1], &rq[0]->fence);
225 		}
226 
227 		i915_request_get(rq[1]);
228 		i915_request_add(rq[1]);
229 		GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
230 		i915_request_put(rq[0]);
231 
232 		if (prio) {
233 			struct i915_sched_attr attr = {
234 				.priority = prio,
235 			};
236 
237 			/* Alternatively preempt the spinner with ce[1] */
238 			engine->schedule(rq[1], &attr);
239 		}
240 
241 		/* And switch back to ce[0] for good measure */
242 		rq[0] = i915_request_create(ce[0]);
243 		if (IS_ERR(rq[0])) {
244 			err = PTR_ERR(rq[0]);
245 			i915_request_put(rq[1]);
246 			goto err_ce;
247 		}
248 
249 		i915_request_await_dma_fence(rq[0], &rq[1]->fence);
250 		i915_request_get(rq[0]);
251 		i915_request_add(rq[0]);
252 		GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
253 		i915_request_put(rq[1]);
254 		i915_request_put(rq[0]);
255 
256 err_ce:
257 		tasklet_kill(&engine->execlists.tasklet); /* flush submission */
258 		igt_spinner_end(&spin);
259 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
260 			if (IS_ERR_OR_NULL(ce[n]))
261 				break;
262 
263 			intel_context_unpin(ce[n]);
264 			intel_context_put(ce[n]);
265 		}
266 
267 		engine_heartbeat_enable(engine, saved);
268 		if (igt_live_test_end(&t))
269 			err = -EIO;
270 		if (err)
271 			break;
272 	}
273 
274 	igt_spinner_fini(&spin);
275 	return err;
276 }
277 
278 static int live_unlite_switch(void *arg)
279 {
280 	return live_unlite_restore(arg, 0);
281 }
282 
283 static int live_unlite_preempt(void *arg)
284 {
285 	return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX));
286 }
287 
288 static int
289 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
290 {
291 	u32 *cs;
292 
293 	cs = intel_ring_begin(rq, 10);
294 	if (IS_ERR(cs))
295 		return PTR_ERR(cs);
296 
297 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
298 
299 	*cs++ = MI_SEMAPHORE_WAIT |
300 		MI_SEMAPHORE_GLOBAL_GTT |
301 		MI_SEMAPHORE_POLL |
302 		MI_SEMAPHORE_SAD_NEQ_SDD;
303 	*cs++ = 0;
304 	*cs++ = i915_ggtt_offset(vma) + 4 * idx;
305 	*cs++ = 0;
306 
307 	if (idx > 0) {
308 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
309 		*cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
310 		*cs++ = 0;
311 		*cs++ = 1;
312 	} else {
313 		*cs++ = MI_NOOP;
314 		*cs++ = MI_NOOP;
315 		*cs++ = MI_NOOP;
316 		*cs++ = MI_NOOP;
317 	}
318 
319 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
320 
321 	intel_ring_advance(rq, cs);
322 	return 0;
323 }
324 
325 static struct i915_request *
326 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
327 {
328 	struct intel_context *ce;
329 	struct i915_request *rq;
330 	int err;
331 
332 	ce = intel_context_create(engine);
333 	if (IS_ERR(ce))
334 		return ERR_CAST(ce);
335 
336 	rq = intel_context_create_request(ce);
337 	if (IS_ERR(rq))
338 		goto out_ce;
339 
340 	err = 0;
341 	if (rq->engine->emit_init_breadcrumb)
342 		err = rq->engine->emit_init_breadcrumb(rq);
343 	if (err == 0)
344 		err = emit_semaphore_chain(rq, vma, idx);
345 	if (err == 0)
346 		i915_request_get(rq);
347 	i915_request_add(rq);
348 	if (err)
349 		rq = ERR_PTR(err);
350 
351 out_ce:
352 	intel_context_put(ce);
353 	return rq;
354 }
355 
356 static int
357 release_queue(struct intel_engine_cs *engine,
358 	      struct i915_vma *vma,
359 	      int idx, int prio)
360 {
361 	struct i915_sched_attr attr = {
362 		.priority = prio,
363 	};
364 	struct i915_request *rq;
365 	u32 *cs;
366 
367 	rq = intel_engine_create_kernel_request(engine);
368 	if (IS_ERR(rq))
369 		return PTR_ERR(rq);
370 
371 	cs = intel_ring_begin(rq, 4);
372 	if (IS_ERR(cs)) {
373 		i915_request_add(rq);
374 		return PTR_ERR(cs);
375 	}
376 
377 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
378 	*cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
379 	*cs++ = 0;
380 	*cs++ = 1;
381 
382 	intel_ring_advance(rq, cs);
383 
384 	i915_request_get(rq);
385 	i915_request_add(rq);
386 
387 	local_bh_disable();
388 	engine->schedule(rq, &attr);
389 	local_bh_enable(); /* kick tasklet */
390 
391 	i915_request_put(rq);
392 
393 	return 0;
394 }
395 
396 static int
397 slice_semaphore_queue(struct intel_engine_cs *outer,
398 		      struct i915_vma *vma,
399 		      int count)
400 {
401 	struct intel_engine_cs *engine;
402 	struct i915_request *head;
403 	enum intel_engine_id id;
404 	int err, i, n = 0;
405 
406 	head = semaphore_queue(outer, vma, n++);
407 	if (IS_ERR(head))
408 		return PTR_ERR(head);
409 
410 	for_each_engine(engine, outer->gt, id) {
411 		for (i = 0; i < count; i++) {
412 			struct i915_request *rq;
413 
414 			rq = semaphore_queue(engine, vma, n++);
415 			if (IS_ERR(rq)) {
416 				err = PTR_ERR(rq);
417 				goto out;
418 			}
419 
420 			i915_request_put(rq);
421 		}
422 	}
423 
424 	err = release_queue(outer, vma, n, INT_MAX);
425 	if (err)
426 		goto out;
427 
428 	if (i915_request_wait(head, 0,
429 			      2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) {
430 		pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n",
431 		       count, n);
432 		GEM_TRACE_DUMP();
433 		intel_gt_set_wedged(outer->gt);
434 		err = -EIO;
435 	}
436 
437 out:
438 	i915_request_put(head);
439 	return err;
440 }
441 
442 static int live_timeslice_preempt(void *arg)
443 {
444 	struct intel_gt *gt = arg;
445 	struct drm_i915_gem_object *obj;
446 	struct i915_vma *vma;
447 	void *vaddr;
448 	int err = 0;
449 	int count;
450 
451 	/*
452 	 * If a request takes too long, we would like to give other users
453 	 * a fair go on the GPU. In particular, users may create batches
454 	 * that wait upon external input, where that input may even be
455 	 * supplied by another GPU job. To avoid blocking forever, we
456 	 * need to preempt the current task and replace it with another
457 	 * ready task.
458 	 */
459 	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
460 		return 0;
461 
462 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
463 	if (IS_ERR(obj))
464 		return PTR_ERR(obj);
465 
466 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
467 	if (IS_ERR(vma)) {
468 		err = PTR_ERR(vma);
469 		goto err_obj;
470 	}
471 
472 	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
473 	if (IS_ERR(vaddr)) {
474 		err = PTR_ERR(vaddr);
475 		goto err_obj;
476 	}
477 
478 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
479 	if (err)
480 		goto err_map;
481 
482 	for_each_prime_number_from(count, 1, 16) {
483 		struct intel_engine_cs *engine;
484 		enum intel_engine_id id;
485 
486 		for_each_engine(engine, gt, id) {
487 			unsigned long saved;
488 
489 			if (!intel_engine_has_preemption(engine))
490 				continue;
491 
492 			memset(vaddr, 0, PAGE_SIZE);
493 
494 			engine_heartbeat_disable(engine, &saved);
495 			err = slice_semaphore_queue(engine, vma, count);
496 			engine_heartbeat_enable(engine, saved);
497 			if (err)
498 				goto err_pin;
499 
500 			if (igt_flush_test(gt->i915)) {
501 				err = -EIO;
502 				goto err_pin;
503 			}
504 		}
505 	}
506 
507 err_pin:
508 	i915_vma_unpin(vma);
509 err_map:
510 	i915_gem_object_unpin_map(obj);
511 err_obj:
512 	i915_gem_object_put(obj);
513 	return err;
514 }
515 
516 static struct i915_request *nop_request(struct intel_engine_cs *engine)
517 {
518 	struct i915_request *rq;
519 
520 	rq = intel_engine_create_kernel_request(engine);
521 	if (IS_ERR(rq))
522 		return rq;
523 
524 	i915_request_get(rq);
525 	i915_request_add(rq);
526 
527 	return rq;
528 }
529 
530 static int wait_for_submit(struct intel_engine_cs *engine,
531 			   struct i915_request *rq,
532 			   unsigned long timeout)
533 {
534 	timeout += jiffies;
535 	do {
536 		cond_resched();
537 		intel_engine_flush_submission(engine);
538 		if (i915_request_is_active(rq))
539 			return 0;
540 	} while (time_before(jiffies, timeout));
541 
542 	return -ETIME;
543 }
544 
545 static long timeslice_threshold(const struct intel_engine_cs *engine)
546 {
547 	return 2 * msecs_to_jiffies_timeout(timeslice(engine)) + 1;
548 }
549 
550 static int live_timeslice_queue(void *arg)
551 {
552 	struct intel_gt *gt = arg;
553 	struct drm_i915_gem_object *obj;
554 	struct intel_engine_cs *engine;
555 	enum intel_engine_id id;
556 	struct i915_vma *vma;
557 	void *vaddr;
558 	int err = 0;
559 
560 	/*
561 	 * Make sure that even if ELSP[0] and ELSP[1] are filled with
562 	 * timeslicing between them disabled, we *do* enable timeslicing
563 	 * if the queue demands it. (Normally, we do not submit if
564 	 * ELSP[1] is already occupied, so must rely on timeslicing to
565 	 * eject ELSP[0] in favour of the queue.)
566 	 */
567 	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
568 		return 0;
569 
570 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
571 	if (IS_ERR(obj))
572 		return PTR_ERR(obj);
573 
574 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
575 	if (IS_ERR(vma)) {
576 		err = PTR_ERR(vma);
577 		goto err_obj;
578 	}
579 
580 	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
581 	if (IS_ERR(vaddr)) {
582 		err = PTR_ERR(vaddr);
583 		goto err_obj;
584 	}
585 
586 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
587 	if (err)
588 		goto err_map;
589 
590 	for_each_engine(engine, gt, id) {
591 		struct i915_sched_attr attr = {
592 			.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
593 		};
594 		struct i915_request *rq, *nop;
595 		unsigned long saved;
596 
597 		if (!intel_engine_has_preemption(engine))
598 			continue;
599 
600 		engine_heartbeat_disable(engine, &saved);
601 		memset(vaddr, 0, PAGE_SIZE);
602 
603 		/* ELSP[0]: semaphore wait */
604 		rq = semaphore_queue(engine, vma, 0);
605 		if (IS_ERR(rq)) {
606 			err = PTR_ERR(rq);
607 			goto err_heartbeat;
608 		}
609 		engine->schedule(rq, &attr);
610 		err = wait_for_submit(engine, rq, HZ / 2);
611 		if (err) {
612 			pr_err("%s: Timed out trying to submit semaphores\n",
613 			       engine->name);
614 			goto err_rq;
615 		}
616 
617 		/* ELSP[1]: nop request */
618 		nop = nop_request(engine);
619 		if (IS_ERR(nop)) {
620 			err = PTR_ERR(nop);
621 			goto err_rq;
622 		}
623 		err = wait_for_submit(engine, nop, HZ / 2);
624 		i915_request_put(nop);
625 		if (err) {
626 			pr_err("%s: Timed out trying to submit nop\n",
627 			       engine->name);
628 			goto err_rq;
629 		}
630 
631 		GEM_BUG_ON(i915_request_completed(rq));
632 		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
633 
634 		/* Queue: semaphore signal, matching priority as semaphore */
635 		err = release_queue(engine, vma, 1, effective_prio(rq));
636 		if (err)
637 			goto err_rq;
638 
639 		intel_engine_flush_submission(engine);
640 		if (!READ_ONCE(engine->execlists.timer.expires) &&
641 		    !i915_request_completed(rq)) {
642 			struct drm_printer p =
643 				drm_info_printer(gt->i915->drm.dev);
644 
645 			GEM_TRACE_ERR("%s: Failed to enable timeslicing!\n",
646 				      engine->name);
647 			intel_engine_dump(engine, &p,
648 					  "%s\n", engine->name);
649 			GEM_TRACE_DUMP();
650 
651 			memset(vaddr, 0xff, PAGE_SIZE);
652 			err = -EINVAL;
653 		}
654 
655 		/* Timeslice every jiffy, so within 2 we should signal */
656 		if (i915_request_wait(rq, 0, timeslice_threshold(engine)) < 0) {
657 			struct drm_printer p =
658 				drm_info_printer(gt->i915->drm.dev);
659 
660 			pr_err("%s: Failed to timeslice into queue\n",
661 			       engine->name);
662 			intel_engine_dump(engine, &p,
663 					  "%s\n", engine->name);
664 
665 			memset(vaddr, 0xff, PAGE_SIZE);
666 			err = -EIO;
667 		}
668 err_rq:
669 		i915_request_put(rq);
670 err_heartbeat:
671 		engine_heartbeat_enable(engine, saved);
672 		if (err)
673 			break;
674 	}
675 
676 	i915_vma_unpin(vma);
677 err_map:
678 	i915_gem_object_unpin_map(obj);
679 err_obj:
680 	i915_gem_object_put(obj);
681 	return err;
682 }
683 
684 static int live_busywait_preempt(void *arg)
685 {
686 	struct intel_gt *gt = arg;
687 	struct i915_gem_context *ctx_hi, *ctx_lo;
688 	struct intel_engine_cs *engine;
689 	struct drm_i915_gem_object *obj;
690 	struct i915_vma *vma;
691 	enum intel_engine_id id;
692 	int err = -ENOMEM;
693 	u32 *map;
694 
695 	/*
696 	 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
697 	 * preempt the busywaits used to synchronise between rings.
698 	 */
699 
700 	ctx_hi = kernel_context(gt->i915);
701 	if (!ctx_hi)
702 		return -ENOMEM;
703 	ctx_hi->sched.priority =
704 		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
705 
706 	ctx_lo = kernel_context(gt->i915);
707 	if (!ctx_lo)
708 		goto err_ctx_hi;
709 	ctx_lo->sched.priority =
710 		I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
711 
712 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
713 	if (IS_ERR(obj)) {
714 		err = PTR_ERR(obj);
715 		goto err_ctx_lo;
716 	}
717 
718 	map = i915_gem_object_pin_map(obj, I915_MAP_WC);
719 	if (IS_ERR(map)) {
720 		err = PTR_ERR(map);
721 		goto err_obj;
722 	}
723 
724 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
725 	if (IS_ERR(vma)) {
726 		err = PTR_ERR(vma);
727 		goto err_map;
728 	}
729 
730 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
731 	if (err)
732 		goto err_map;
733 
734 	for_each_engine(engine, gt, id) {
735 		struct i915_request *lo, *hi;
736 		struct igt_live_test t;
737 		u32 *cs;
738 
739 		if (!intel_engine_has_preemption(engine))
740 			continue;
741 
742 		if (!intel_engine_can_store_dword(engine))
743 			continue;
744 
745 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
746 			err = -EIO;
747 			goto err_vma;
748 		}
749 
750 		/*
751 		 * We create two requests. The low priority request
752 		 * busywaits on a semaphore (inside the ringbuffer where
753 		 * is should be preemptible) and the high priority requests
754 		 * uses a MI_STORE_DWORD_IMM to update the semaphore value
755 		 * allowing the first request to complete. If preemption
756 		 * fails, we hang instead.
757 		 */
758 
759 		lo = igt_request_alloc(ctx_lo, engine);
760 		if (IS_ERR(lo)) {
761 			err = PTR_ERR(lo);
762 			goto err_vma;
763 		}
764 
765 		cs = intel_ring_begin(lo, 8);
766 		if (IS_ERR(cs)) {
767 			err = PTR_ERR(cs);
768 			i915_request_add(lo);
769 			goto err_vma;
770 		}
771 
772 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
773 		*cs++ = i915_ggtt_offset(vma);
774 		*cs++ = 0;
775 		*cs++ = 1;
776 
777 		/* XXX Do we need a flush + invalidate here? */
778 
779 		*cs++ = MI_SEMAPHORE_WAIT |
780 			MI_SEMAPHORE_GLOBAL_GTT |
781 			MI_SEMAPHORE_POLL |
782 			MI_SEMAPHORE_SAD_EQ_SDD;
783 		*cs++ = 0;
784 		*cs++ = i915_ggtt_offset(vma);
785 		*cs++ = 0;
786 
787 		intel_ring_advance(lo, cs);
788 
789 		i915_request_get(lo);
790 		i915_request_add(lo);
791 
792 		if (wait_for(READ_ONCE(*map), 10)) {
793 			i915_request_put(lo);
794 			err = -ETIMEDOUT;
795 			goto err_vma;
796 		}
797 
798 		/* Low priority request should be busywaiting now */
799 		if (i915_request_wait(lo, 0, 1) != -ETIME) {
800 			i915_request_put(lo);
801 			pr_err("%s: Busywaiting request did not!\n",
802 			       engine->name);
803 			err = -EIO;
804 			goto err_vma;
805 		}
806 
807 		hi = igt_request_alloc(ctx_hi, engine);
808 		if (IS_ERR(hi)) {
809 			err = PTR_ERR(hi);
810 			i915_request_put(lo);
811 			goto err_vma;
812 		}
813 
814 		cs = intel_ring_begin(hi, 4);
815 		if (IS_ERR(cs)) {
816 			err = PTR_ERR(cs);
817 			i915_request_add(hi);
818 			i915_request_put(lo);
819 			goto err_vma;
820 		}
821 
822 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
823 		*cs++ = i915_ggtt_offset(vma);
824 		*cs++ = 0;
825 		*cs++ = 0;
826 
827 		intel_ring_advance(hi, cs);
828 		i915_request_add(hi);
829 
830 		if (i915_request_wait(lo, 0, HZ / 5) < 0) {
831 			struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
832 
833 			pr_err("%s: Failed to preempt semaphore busywait!\n",
834 			       engine->name);
835 
836 			intel_engine_dump(engine, &p, "%s\n", engine->name);
837 			GEM_TRACE_DUMP();
838 
839 			i915_request_put(lo);
840 			intel_gt_set_wedged(gt);
841 			err = -EIO;
842 			goto err_vma;
843 		}
844 		GEM_BUG_ON(READ_ONCE(*map));
845 		i915_request_put(lo);
846 
847 		if (igt_live_test_end(&t)) {
848 			err = -EIO;
849 			goto err_vma;
850 		}
851 	}
852 
853 	err = 0;
854 err_vma:
855 	i915_vma_unpin(vma);
856 err_map:
857 	i915_gem_object_unpin_map(obj);
858 err_obj:
859 	i915_gem_object_put(obj);
860 err_ctx_lo:
861 	kernel_context_close(ctx_lo);
862 err_ctx_hi:
863 	kernel_context_close(ctx_hi);
864 	return err;
865 }
866 
867 static struct i915_request *
868 spinner_create_request(struct igt_spinner *spin,
869 		       struct i915_gem_context *ctx,
870 		       struct intel_engine_cs *engine,
871 		       u32 arb)
872 {
873 	struct intel_context *ce;
874 	struct i915_request *rq;
875 
876 	ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
877 	if (IS_ERR(ce))
878 		return ERR_CAST(ce);
879 
880 	rq = igt_spinner_create_request(spin, ce, arb);
881 	intel_context_put(ce);
882 	return rq;
883 }
884 
885 static int live_preempt(void *arg)
886 {
887 	struct intel_gt *gt = arg;
888 	struct i915_gem_context *ctx_hi, *ctx_lo;
889 	struct igt_spinner spin_hi, spin_lo;
890 	struct intel_engine_cs *engine;
891 	enum intel_engine_id id;
892 	int err = -ENOMEM;
893 
894 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
895 		return 0;
896 
897 	if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
898 		pr_err("Logical preemption supported, but not exposed\n");
899 
900 	if (igt_spinner_init(&spin_hi, gt))
901 		return -ENOMEM;
902 
903 	if (igt_spinner_init(&spin_lo, gt))
904 		goto err_spin_hi;
905 
906 	ctx_hi = kernel_context(gt->i915);
907 	if (!ctx_hi)
908 		goto err_spin_lo;
909 	ctx_hi->sched.priority =
910 		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
911 
912 	ctx_lo = kernel_context(gt->i915);
913 	if (!ctx_lo)
914 		goto err_ctx_hi;
915 	ctx_lo->sched.priority =
916 		I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
917 
918 	for_each_engine(engine, gt, id) {
919 		struct igt_live_test t;
920 		struct i915_request *rq;
921 
922 		if (!intel_engine_has_preemption(engine))
923 			continue;
924 
925 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
926 			err = -EIO;
927 			goto err_ctx_lo;
928 		}
929 
930 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
931 					    MI_ARB_CHECK);
932 		if (IS_ERR(rq)) {
933 			err = PTR_ERR(rq);
934 			goto err_ctx_lo;
935 		}
936 
937 		i915_request_add(rq);
938 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
939 			GEM_TRACE("lo spinner failed to start\n");
940 			GEM_TRACE_DUMP();
941 			intel_gt_set_wedged(gt);
942 			err = -EIO;
943 			goto err_ctx_lo;
944 		}
945 
946 		rq = spinner_create_request(&spin_hi, ctx_hi, engine,
947 					    MI_ARB_CHECK);
948 		if (IS_ERR(rq)) {
949 			igt_spinner_end(&spin_lo);
950 			err = PTR_ERR(rq);
951 			goto err_ctx_lo;
952 		}
953 
954 		i915_request_add(rq);
955 		if (!igt_wait_for_spinner(&spin_hi, rq)) {
956 			GEM_TRACE("hi spinner failed to start\n");
957 			GEM_TRACE_DUMP();
958 			intel_gt_set_wedged(gt);
959 			err = -EIO;
960 			goto err_ctx_lo;
961 		}
962 
963 		igt_spinner_end(&spin_hi);
964 		igt_spinner_end(&spin_lo);
965 
966 		if (igt_live_test_end(&t)) {
967 			err = -EIO;
968 			goto err_ctx_lo;
969 		}
970 	}
971 
972 	err = 0;
973 err_ctx_lo:
974 	kernel_context_close(ctx_lo);
975 err_ctx_hi:
976 	kernel_context_close(ctx_hi);
977 err_spin_lo:
978 	igt_spinner_fini(&spin_lo);
979 err_spin_hi:
980 	igt_spinner_fini(&spin_hi);
981 	return err;
982 }
983 
984 static int live_late_preempt(void *arg)
985 {
986 	struct intel_gt *gt = arg;
987 	struct i915_gem_context *ctx_hi, *ctx_lo;
988 	struct igt_spinner spin_hi, spin_lo;
989 	struct intel_engine_cs *engine;
990 	struct i915_sched_attr attr = {};
991 	enum intel_engine_id id;
992 	int err = -ENOMEM;
993 
994 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
995 		return 0;
996 
997 	if (igt_spinner_init(&spin_hi, gt))
998 		return -ENOMEM;
999 
1000 	if (igt_spinner_init(&spin_lo, gt))
1001 		goto err_spin_hi;
1002 
1003 	ctx_hi = kernel_context(gt->i915);
1004 	if (!ctx_hi)
1005 		goto err_spin_lo;
1006 
1007 	ctx_lo = kernel_context(gt->i915);
1008 	if (!ctx_lo)
1009 		goto err_ctx_hi;
1010 
1011 	/* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
1012 	ctx_lo->sched.priority = I915_USER_PRIORITY(1);
1013 
1014 	for_each_engine(engine, gt, id) {
1015 		struct igt_live_test t;
1016 		struct i915_request *rq;
1017 
1018 		if (!intel_engine_has_preemption(engine))
1019 			continue;
1020 
1021 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1022 			err = -EIO;
1023 			goto err_ctx_lo;
1024 		}
1025 
1026 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1027 					    MI_ARB_CHECK);
1028 		if (IS_ERR(rq)) {
1029 			err = PTR_ERR(rq);
1030 			goto err_ctx_lo;
1031 		}
1032 
1033 		i915_request_add(rq);
1034 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
1035 			pr_err("First context failed to start\n");
1036 			goto err_wedged;
1037 		}
1038 
1039 		rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1040 					    MI_NOOP);
1041 		if (IS_ERR(rq)) {
1042 			igt_spinner_end(&spin_lo);
1043 			err = PTR_ERR(rq);
1044 			goto err_ctx_lo;
1045 		}
1046 
1047 		i915_request_add(rq);
1048 		if (igt_wait_for_spinner(&spin_hi, rq)) {
1049 			pr_err("Second context overtook first?\n");
1050 			goto err_wedged;
1051 		}
1052 
1053 		attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1054 		engine->schedule(rq, &attr);
1055 
1056 		if (!igt_wait_for_spinner(&spin_hi, rq)) {
1057 			pr_err("High priority context failed to preempt the low priority context\n");
1058 			GEM_TRACE_DUMP();
1059 			goto err_wedged;
1060 		}
1061 
1062 		igt_spinner_end(&spin_hi);
1063 		igt_spinner_end(&spin_lo);
1064 
1065 		if (igt_live_test_end(&t)) {
1066 			err = -EIO;
1067 			goto err_ctx_lo;
1068 		}
1069 	}
1070 
1071 	err = 0;
1072 err_ctx_lo:
1073 	kernel_context_close(ctx_lo);
1074 err_ctx_hi:
1075 	kernel_context_close(ctx_hi);
1076 err_spin_lo:
1077 	igt_spinner_fini(&spin_lo);
1078 err_spin_hi:
1079 	igt_spinner_fini(&spin_hi);
1080 	return err;
1081 
1082 err_wedged:
1083 	igt_spinner_end(&spin_hi);
1084 	igt_spinner_end(&spin_lo);
1085 	intel_gt_set_wedged(gt);
1086 	err = -EIO;
1087 	goto err_ctx_lo;
1088 }
1089 
1090 struct preempt_client {
1091 	struct igt_spinner spin;
1092 	struct i915_gem_context *ctx;
1093 };
1094 
1095 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
1096 {
1097 	c->ctx = kernel_context(gt->i915);
1098 	if (!c->ctx)
1099 		return -ENOMEM;
1100 
1101 	if (igt_spinner_init(&c->spin, gt))
1102 		goto err_ctx;
1103 
1104 	return 0;
1105 
1106 err_ctx:
1107 	kernel_context_close(c->ctx);
1108 	return -ENOMEM;
1109 }
1110 
1111 static void preempt_client_fini(struct preempt_client *c)
1112 {
1113 	igt_spinner_fini(&c->spin);
1114 	kernel_context_close(c->ctx);
1115 }
1116 
1117 static int live_nopreempt(void *arg)
1118 {
1119 	struct intel_gt *gt = arg;
1120 	struct intel_engine_cs *engine;
1121 	struct preempt_client a, b;
1122 	enum intel_engine_id id;
1123 	int err = -ENOMEM;
1124 
1125 	/*
1126 	 * Verify that we can disable preemption for an individual request
1127 	 * that may be being observed and not want to be interrupted.
1128 	 */
1129 
1130 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1131 		return 0;
1132 
1133 	if (preempt_client_init(gt, &a))
1134 		return -ENOMEM;
1135 	if (preempt_client_init(gt, &b))
1136 		goto err_client_a;
1137 	b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1138 
1139 	for_each_engine(engine, gt, id) {
1140 		struct i915_request *rq_a, *rq_b;
1141 
1142 		if (!intel_engine_has_preemption(engine))
1143 			continue;
1144 
1145 		engine->execlists.preempt_hang.count = 0;
1146 
1147 		rq_a = spinner_create_request(&a.spin,
1148 					      a.ctx, engine,
1149 					      MI_ARB_CHECK);
1150 		if (IS_ERR(rq_a)) {
1151 			err = PTR_ERR(rq_a);
1152 			goto err_client_b;
1153 		}
1154 
1155 		/* Low priority client, but unpreemptable! */
1156 		__set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
1157 
1158 		i915_request_add(rq_a);
1159 		if (!igt_wait_for_spinner(&a.spin, rq_a)) {
1160 			pr_err("First client failed to start\n");
1161 			goto err_wedged;
1162 		}
1163 
1164 		rq_b = spinner_create_request(&b.spin,
1165 					      b.ctx, engine,
1166 					      MI_ARB_CHECK);
1167 		if (IS_ERR(rq_b)) {
1168 			err = PTR_ERR(rq_b);
1169 			goto err_client_b;
1170 		}
1171 
1172 		i915_request_add(rq_b);
1173 
1174 		/* B is much more important than A! (But A is unpreemptable.) */
1175 		GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
1176 
1177 		/* Wait long enough for preemption and timeslicing */
1178 		if (igt_wait_for_spinner(&b.spin, rq_b)) {
1179 			pr_err("Second client started too early!\n");
1180 			goto err_wedged;
1181 		}
1182 
1183 		igt_spinner_end(&a.spin);
1184 
1185 		if (!igt_wait_for_spinner(&b.spin, rq_b)) {
1186 			pr_err("Second client failed to start\n");
1187 			goto err_wedged;
1188 		}
1189 
1190 		igt_spinner_end(&b.spin);
1191 
1192 		if (engine->execlists.preempt_hang.count) {
1193 			pr_err("Preemption recorded x%d; should have been suppressed!\n",
1194 			       engine->execlists.preempt_hang.count);
1195 			err = -EINVAL;
1196 			goto err_wedged;
1197 		}
1198 
1199 		if (igt_flush_test(gt->i915))
1200 			goto err_wedged;
1201 	}
1202 
1203 	err = 0;
1204 err_client_b:
1205 	preempt_client_fini(&b);
1206 err_client_a:
1207 	preempt_client_fini(&a);
1208 	return err;
1209 
1210 err_wedged:
1211 	igt_spinner_end(&b.spin);
1212 	igt_spinner_end(&a.spin);
1213 	intel_gt_set_wedged(gt);
1214 	err = -EIO;
1215 	goto err_client_b;
1216 }
1217 
1218 struct live_preempt_cancel {
1219 	struct intel_engine_cs *engine;
1220 	struct preempt_client a, b;
1221 };
1222 
1223 static int __cancel_active0(struct live_preempt_cancel *arg)
1224 {
1225 	struct i915_request *rq;
1226 	struct igt_live_test t;
1227 	int err;
1228 
1229 	/* Preempt cancel of ELSP0 */
1230 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1231 	if (igt_live_test_begin(&t, arg->engine->i915,
1232 				__func__, arg->engine->name))
1233 		return -EIO;
1234 
1235 	rq = spinner_create_request(&arg->a.spin,
1236 				    arg->a.ctx, arg->engine,
1237 				    MI_ARB_CHECK);
1238 	if (IS_ERR(rq))
1239 		return PTR_ERR(rq);
1240 
1241 	clear_bit(CONTEXT_BANNED, &rq->context->flags);
1242 	i915_request_get(rq);
1243 	i915_request_add(rq);
1244 	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
1245 		err = -EIO;
1246 		goto out;
1247 	}
1248 
1249 	intel_context_set_banned(rq->context);
1250 	err = intel_engine_pulse(arg->engine);
1251 	if (err)
1252 		goto out;
1253 
1254 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
1255 		err = -EIO;
1256 		goto out;
1257 	}
1258 
1259 	if (rq->fence.error != -EIO) {
1260 		pr_err("Cancelled inflight0 request did not report -EIO\n");
1261 		err = -EINVAL;
1262 		goto out;
1263 	}
1264 
1265 out:
1266 	i915_request_put(rq);
1267 	if (igt_live_test_end(&t))
1268 		err = -EIO;
1269 	return err;
1270 }
1271 
1272 static int __cancel_active1(struct live_preempt_cancel *arg)
1273 {
1274 	struct i915_request *rq[2] = {};
1275 	struct igt_live_test t;
1276 	int err;
1277 
1278 	/* Preempt cancel of ELSP1 */
1279 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1280 	if (igt_live_test_begin(&t, arg->engine->i915,
1281 				__func__, arg->engine->name))
1282 		return -EIO;
1283 
1284 	rq[0] = spinner_create_request(&arg->a.spin,
1285 				       arg->a.ctx, arg->engine,
1286 				       MI_NOOP); /* no preemption */
1287 	if (IS_ERR(rq[0]))
1288 		return PTR_ERR(rq[0]);
1289 
1290 	clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
1291 	i915_request_get(rq[0]);
1292 	i915_request_add(rq[0]);
1293 	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
1294 		err = -EIO;
1295 		goto out;
1296 	}
1297 
1298 	rq[1] = spinner_create_request(&arg->b.spin,
1299 				       arg->b.ctx, arg->engine,
1300 				       MI_ARB_CHECK);
1301 	if (IS_ERR(rq[1])) {
1302 		err = PTR_ERR(rq[1]);
1303 		goto out;
1304 	}
1305 
1306 	clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
1307 	i915_request_get(rq[1]);
1308 	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
1309 	i915_request_add(rq[1]);
1310 	if (err)
1311 		goto out;
1312 
1313 	intel_context_set_banned(rq[1]->context);
1314 	err = intel_engine_pulse(arg->engine);
1315 	if (err)
1316 		goto out;
1317 
1318 	igt_spinner_end(&arg->a.spin);
1319 	if (i915_request_wait(rq[1], 0, HZ / 5) < 0) {
1320 		err = -EIO;
1321 		goto out;
1322 	}
1323 
1324 	if (rq[0]->fence.error != 0) {
1325 		pr_err("Normal inflight0 request did not complete\n");
1326 		err = -EINVAL;
1327 		goto out;
1328 	}
1329 
1330 	if (rq[1]->fence.error != -EIO) {
1331 		pr_err("Cancelled inflight1 request did not report -EIO\n");
1332 		err = -EINVAL;
1333 		goto out;
1334 	}
1335 
1336 out:
1337 	i915_request_put(rq[1]);
1338 	i915_request_put(rq[0]);
1339 	if (igt_live_test_end(&t))
1340 		err = -EIO;
1341 	return err;
1342 }
1343 
1344 static int __cancel_queued(struct live_preempt_cancel *arg)
1345 {
1346 	struct i915_request *rq[3] = {};
1347 	struct igt_live_test t;
1348 	int err;
1349 
1350 	/* Full ELSP and one in the wings */
1351 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1352 	if (igt_live_test_begin(&t, arg->engine->i915,
1353 				__func__, arg->engine->name))
1354 		return -EIO;
1355 
1356 	rq[0] = spinner_create_request(&arg->a.spin,
1357 				       arg->a.ctx, arg->engine,
1358 				       MI_ARB_CHECK);
1359 	if (IS_ERR(rq[0]))
1360 		return PTR_ERR(rq[0]);
1361 
1362 	clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
1363 	i915_request_get(rq[0]);
1364 	i915_request_add(rq[0]);
1365 	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
1366 		err = -EIO;
1367 		goto out;
1368 	}
1369 
1370 	rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
1371 	if (IS_ERR(rq[1])) {
1372 		err = PTR_ERR(rq[1]);
1373 		goto out;
1374 	}
1375 
1376 	clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
1377 	i915_request_get(rq[1]);
1378 	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
1379 	i915_request_add(rq[1]);
1380 	if (err)
1381 		goto out;
1382 
1383 	rq[2] = spinner_create_request(&arg->b.spin,
1384 				       arg->a.ctx, arg->engine,
1385 				       MI_ARB_CHECK);
1386 	if (IS_ERR(rq[2])) {
1387 		err = PTR_ERR(rq[2]);
1388 		goto out;
1389 	}
1390 
1391 	i915_request_get(rq[2]);
1392 	err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
1393 	i915_request_add(rq[2]);
1394 	if (err)
1395 		goto out;
1396 
1397 	intel_context_set_banned(rq[2]->context);
1398 	err = intel_engine_pulse(arg->engine);
1399 	if (err)
1400 		goto out;
1401 
1402 	if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
1403 		err = -EIO;
1404 		goto out;
1405 	}
1406 
1407 	if (rq[0]->fence.error != -EIO) {
1408 		pr_err("Cancelled inflight0 request did not report -EIO\n");
1409 		err = -EINVAL;
1410 		goto out;
1411 	}
1412 
1413 	if (rq[1]->fence.error != 0) {
1414 		pr_err("Normal inflight1 request did not complete\n");
1415 		err = -EINVAL;
1416 		goto out;
1417 	}
1418 
1419 	if (rq[2]->fence.error != -EIO) {
1420 		pr_err("Cancelled queued request did not report -EIO\n");
1421 		err = -EINVAL;
1422 		goto out;
1423 	}
1424 
1425 out:
1426 	i915_request_put(rq[2]);
1427 	i915_request_put(rq[1]);
1428 	i915_request_put(rq[0]);
1429 	if (igt_live_test_end(&t))
1430 		err = -EIO;
1431 	return err;
1432 }
1433 
1434 static int __cancel_hostile(struct live_preempt_cancel *arg)
1435 {
1436 	struct i915_request *rq;
1437 	int err;
1438 
1439 	/* Preempt cancel non-preemptible spinner in ELSP0 */
1440 	if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
1441 		return 0;
1442 
1443 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1444 	rq = spinner_create_request(&arg->a.spin,
1445 				    arg->a.ctx, arg->engine,
1446 				    MI_NOOP); /* preemption disabled */
1447 	if (IS_ERR(rq))
1448 		return PTR_ERR(rq);
1449 
1450 	clear_bit(CONTEXT_BANNED, &rq->context->flags);
1451 	i915_request_get(rq);
1452 	i915_request_add(rq);
1453 	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
1454 		err = -EIO;
1455 		goto out;
1456 	}
1457 
1458 	intel_context_set_banned(rq->context);
1459 	err = intel_engine_pulse(arg->engine); /* force reset */
1460 	if (err)
1461 		goto out;
1462 
1463 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
1464 		err = -EIO;
1465 		goto out;
1466 	}
1467 
1468 	if (rq->fence.error != -EIO) {
1469 		pr_err("Cancelled inflight0 request did not report -EIO\n");
1470 		err = -EINVAL;
1471 		goto out;
1472 	}
1473 
1474 out:
1475 	i915_request_put(rq);
1476 	if (igt_flush_test(arg->engine->i915))
1477 		err = -EIO;
1478 	return err;
1479 }
1480 
1481 static int live_preempt_cancel(void *arg)
1482 {
1483 	struct intel_gt *gt = arg;
1484 	struct live_preempt_cancel data;
1485 	enum intel_engine_id id;
1486 	int err = -ENOMEM;
1487 
1488 	/*
1489 	 * To cancel an inflight context, we need to first remove it from the
1490 	 * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
1491 	 */
1492 
1493 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1494 		return 0;
1495 
1496 	if (preempt_client_init(gt, &data.a))
1497 		return -ENOMEM;
1498 	if (preempt_client_init(gt, &data.b))
1499 		goto err_client_a;
1500 
1501 	for_each_engine(data.engine, gt, id) {
1502 		if (!intel_engine_has_preemption(data.engine))
1503 			continue;
1504 
1505 		err = __cancel_active0(&data);
1506 		if (err)
1507 			goto err_wedged;
1508 
1509 		err = __cancel_active1(&data);
1510 		if (err)
1511 			goto err_wedged;
1512 
1513 		err = __cancel_queued(&data);
1514 		if (err)
1515 			goto err_wedged;
1516 
1517 		err = __cancel_hostile(&data);
1518 		if (err)
1519 			goto err_wedged;
1520 	}
1521 
1522 	err = 0;
1523 err_client_b:
1524 	preempt_client_fini(&data.b);
1525 err_client_a:
1526 	preempt_client_fini(&data.a);
1527 	return err;
1528 
1529 err_wedged:
1530 	GEM_TRACE_DUMP();
1531 	igt_spinner_end(&data.b.spin);
1532 	igt_spinner_end(&data.a.spin);
1533 	intel_gt_set_wedged(gt);
1534 	goto err_client_b;
1535 }
1536 
1537 static int live_suppress_self_preempt(void *arg)
1538 {
1539 	struct intel_gt *gt = arg;
1540 	struct intel_engine_cs *engine;
1541 	struct i915_sched_attr attr = {
1542 		.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX)
1543 	};
1544 	struct preempt_client a, b;
1545 	enum intel_engine_id id;
1546 	int err = -ENOMEM;
1547 
1548 	/*
1549 	 * Verify that if a preemption request does not cause a change in
1550 	 * the current execution order, the preempt-to-idle injection is
1551 	 * skipped and that we do not accidentally apply it after the CS
1552 	 * completion event.
1553 	 */
1554 
1555 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1556 		return 0;
1557 
1558 	if (USES_GUC_SUBMISSION(gt->i915))
1559 		return 0; /* presume black blox */
1560 
1561 	if (intel_vgpu_active(gt->i915))
1562 		return 0; /* GVT forces single port & request submission */
1563 
1564 	if (preempt_client_init(gt, &a))
1565 		return -ENOMEM;
1566 	if (preempt_client_init(gt, &b))
1567 		goto err_client_a;
1568 
1569 	for_each_engine(engine, gt, id) {
1570 		struct i915_request *rq_a, *rq_b;
1571 		int depth;
1572 
1573 		if (!intel_engine_has_preemption(engine))
1574 			continue;
1575 
1576 		if (igt_flush_test(gt->i915))
1577 			goto err_wedged;
1578 
1579 		intel_engine_pm_get(engine);
1580 		engine->execlists.preempt_hang.count = 0;
1581 
1582 		rq_a = spinner_create_request(&a.spin,
1583 					      a.ctx, engine,
1584 					      MI_NOOP);
1585 		if (IS_ERR(rq_a)) {
1586 			err = PTR_ERR(rq_a);
1587 			intel_engine_pm_put(engine);
1588 			goto err_client_b;
1589 		}
1590 
1591 		i915_request_add(rq_a);
1592 		if (!igt_wait_for_spinner(&a.spin, rq_a)) {
1593 			pr_err("First client failed to start\n");
1594 			intel_engine_pm_put(engine);
1595 			goto err_wedged;
1596 		}
1597 
1598 		/* Keep postponing the timer to avoid premature slicing */
1599 		mod_timer(&engine->execlists.timer, jiffies + HZ);
1600 		for (depth = 0; depth < 8; depth++) {
1601 			rq_b = spinner_create_request(&b.spin,
1602 						      b.ctx, engine,
1603 						      MI_NOOP);
1604 			if (IS_ERR(rq_b)) {
1605 				err = PTR_ERR(rq_b);
1606 				intel_engine_pm_put(engine);
1607 				goto err_client_b;
1608 			}
1609 			i915_request_add(rq_b);
1610 
1611 			GEM_BUG_ON(i915_request_completed(rq_a));
1612 			engine->schedule(rq_a, &attr);
1613 			igt_spinner_end(&a.spin);
1614 
1615 			if (!igt_wait_for_spinner(&b.spin, rq_b)) {
1616 				pr_err("Second client failed to start\n");
1617 				intel_engine_pm_put(engine);
1618 				goto err_wedged;
1619 			}
1620 
1621 			swap(a, b);
1622 			rq_a = rq_b;
1623 		}
1624 		igt_spinner_end(&a.spin);
1625 
1626 		if (engine->execlists.preempt_hang.count) {
1627 			pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
1628 			       engine->name,
1629 			       engine->execlists.preempt_hang.count,
1630 			       depth);
1631 			intel_engine_pm_put(engine);
1632 			err = -EINVAL;
1633 			goto err_client_b;
1634 		}
1635 
1636 		intel_engine_pm_put(engine);
1637 		if (igt_flush_test(gt->i915))
1638 			goto err_wedged;
1639 	}
1640 
1641 	err = 0;
1642 err_client_b:
1643 	preempt_client_fini(&b);
1644 err_client_a:
1645 	preempt_client_fini(&a);
1646 	return err;
1647 
1648 err_wedged:
1649 	igt_spinner_end(&b.spin);
1650 	igt_spinner_end(&a.spin);
1651 	intel_gt_set_wedged(gt);
1652 	err = -EIO;
1653 	goto err_client_b;
1654 }
1655 
1656 static int __i915_sw_fence_call
1657 dummy_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
1658 {
1659 	return NOTIFY_DONE;
1660 }
1661 
1662 static struct i915_request *dummy_request(struct intel_engine_cs *engine)
1663 {
1664 	struct i915_request *rq;
1665 
1666 	rq = kzalloc(sizeof(*rq), GFP_KERNEL);
1667 	if (!rq)
1668 		return NULL;
1669 
1670 	rq->engine = engine;
1671 
1672 	spin_lock_init(&rq->lock);
1673 	INIT_LIST_HEAD(&rq->fence.cb_list);
1674 	rq->fence.lock = &rq->lock;
1675 	rq->fence.ops = &i915_fence_ops;
1676 
1677 	i915_sched_node_init(&rq->sched);
1678 
1679 	/* mark this request as permanently incomplete */
1680 	rq->fence.seqno = 1;
1681 	BUILD_BUG_ON(sizeof(rq->fence.seqno) != 8); /* upper 32b == 0 */
1682 	rq->hwsp_seqno = (u32 *)&rq->fence.seqno + 1;
1683 	GEM_BUG_ON(i915_request_completed(rq));
1684 
1685 	i915_sw_fence_init(&rq->submit, dummy_notify);
1686 	set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
1687 
1688 	spin_lock_init(&rq->lock);
1689 	rq->fence.lock = &rq->lock;
1690 	INIT_LIST_HEAD(&rq->fence.cb_list);
1691 
1692 	return rq;
1693 }
1694 
1695 static void dummy_request_free(struct i915_request *dummy)
1696 {
1697 	/* We have to fake the CS interrupt to kick the next request */
1698 	i915_sw_fence_commit(&dummy->submit);
1699 
1700 	i915_request_mark_complete(dummy);
1701 	dma_fence_signal(&dummy->fence);
1702 
1703 	i915_sched_node_fini(&dummy->sched);
1704 	i915_sw_fence_fini(&dummy->submit);
1705 
1706 	dma_fence_free(&dummy->fence);
1707 }
1708 
1709 static int live_suppress_wait_preempt(void *arg)
1710 {
1711 	struct intel_gt *gt = arg;
1712 	struct preempt_client client[4];
1713 	struct i915_request *rq[ARRAY_SIZE(client)] = {};
1714 	struct intel_engine_cs *engine;
1715 	enum intel_engine_id id;
1716 	int err = -ENOMEM;
1717 	int i;
1718 
1719 	/*
1720 	 * Waiters are given a little priority nudge, but not enough
1721 	 * to actually cause any preemption. Double check that we do
1722 	 * not needlessly generate preempt-to-idle cycles.
1723 	 */
1724 
1725 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1726 		return 0;
1727 
1728 	if (preempt_client_init(gt, &client[0])) /* ELSP[0] */
1729 		return -ENOMEM;
1730 	if (preempt_client_init(gt, &client[1])) /* ELSP[1] */
1731 		goto err_client_0;
1732 	if (preempt_client_init(gt, &client[2])) /* head of queue */
1733 		goto err_client_1;
1734 	if (preempt_client_init(gt, &client[3])) /* bystander */
1735 		goto err_client_2;
1736 
1737 	for_each_engine(engine, gt, id) {
1738 		int depth;
1739 
1740 		if (!intel_engine_has_preemption(engine))
1741 			continue;
1742 
1743 		if (!engine->emit_init_breadcrumb)
1744 			continue;
1745 
1746 		for (depth = 0; depth < ARRAY_SIZE(client); depth++) {
1747 			struct i915_request *dummy;
1748 
1749 			engine->execlists.preempt_hang.count = 0;
1750 
1751 			dummy = dummy_request(engine);
1752 			if (!dummy)
1753 				goto err_client_3;
1754 
1755 			for (i = 0; i < ARRAY_SIZE(client); i++) {
1756 				struct i915_request *this;
1757 
1758 				this = spinner_create_request(&client[i].spin,
1759 							      client[i].ctx, engine,
1760 							      MI_NOOP);
1761 				if (IS_ERR(this)) {
1762 					err = PTR_ERR(this);
1763 					goto err_wedged;
1764 				}
1765 
1766 				/* Disable NEWCLIENT promotion */
1767 				__i915_active_fence_set(&i915_request_timeline(this)->last_request,
1768 							&dummy->fence);
1769 
1770 				rq[i] = i915_request_get(this);
1771 				i915_request_add(this);
1772 			}
1773 
1774 			dummy_request_free(dummy);
1775 
1776 			GEM_BUG_ON(i915_request_completed(rq[0]));
1777 			if (!igt_wait_for_spinner(&client[0].spin, rq[0])) {
1778 				pr_err("%s: First client failed to start\n",
1779 				       engine->name);
1780 				goto err_wedged;
1781 			}
1782 			GEM_BUG_ON(!i915_request_started(rq[0]));
1783 
1784 			if (i915_request_wait(rq[depth],
1785 					      I915_WAIT_PRIORITY,
1786 					      1) != -ETIME) {
1787 				pr_err("%s: Waiter depth:%d completed!\n",
1788 				       engine->name, depth);
1789 				goto err_wedged;
1790 			}
1791 
1792 			for (i = 0; i < ARRAY_SIZE(client); i++) {
1793 				igt_spinner_end(&client[i].spin);
1794 				i915_request_put(rq[i]);
1795 				rq[i] = NULL;
1796 			}
1797 
1798 			if (igt_flush_test(gt->i915))
1799 				goto err_wedged;
1800 
1801 			if (engine->execlists.preempt_hang.count) {
1802 				pr_err("%s: Preemption recorded x%d, depth %d; should have been suppressed!\n",
1803 				       engine->name,
1804 				       engine->execlists.preempt_hang.count,
1805 				       depth);
1806 				err = -EINVAL;
1807 				goto err_client_3;
1808 			}
1809 		}
1810 	}
1811 
1812 	err = 0;
1813 err_client_3:
1814 	preempt_client_fini(&client[3]);
1815 err_client_2:
1816 	preempt_client_fini(&client[2]);
1817 err_client_1:
1818 	preempt_client_fini(&client[1]);
1819 err_client_0:
1820 	preempt_client_fini(&client[0]);
1821 	return err;
1822 
1823 err_wedged:
1824 	for (i = 0; i < ARRAY_SIZE(client); i++) {
1825 		igt_spinner_end(&client[i].spin);
1826 		i915_request_put(rq[i]);
1827 	}
1828 	intel_gt_set_wedged(gt);
1829 	err = -EIO;
1830 	goto err_client_3;
1831 }
1832 
1833 static int live_chain_preempt(void *arg)
1834 {
1835 	struct intel_gt *gt = arg;
1836 	struct intel_engine_cs *engine;
1837 	struct preempt_client hi, lo;
1838 	enum intel_engine_id id;
1839 	int err = -ENOMEM;
1840 
1841 	/*
1842 	 * Build a chain AB...BA between two contexts (A, B) and request
1843 	 * preemption of the last request. It should then complete before
1844 	 * the previously submitted spinner in B.
1845 	 */
1846 
1847 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1848 		return 0;
1849 
1850 	if (preempt_client_init(gt, &hi))
1851 		return -ENOMEM;
1852 
1853 	if (preempt_client_init(gt, &lo))
1854 		goto err_client_hi;
1855 
1856 	for_each_engine(engine, gt, id) {
1857 		struct i915_sched_attr attr = {
1858 			.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
1859 		};
1860 		struct igt_live_test t;
1861 		struct i915_request *rq;
1862 		int ring_size, count, i;
1863 
1864 		if (!intel_engine_has_preemption(engine))
1865 			continue;
1866 
1867 		rq = spinner_create_request(&lo.spin,
1868 					    lo.ctx, engine,
1869 					    MI_ARB_CHECK);
1870 		if (IS_ERR(rq))
1871 			goto err_wedged;
1872 
1873 		i915_request_get(rq);
1874 		i915_request_add(rq);
1875 
1876 		ring_size = rq->wa_tail - rq->head;
1877 		if (ring_size < 0)
1878 			ring_size += rq->ring->size;
1879 		ring_size = rq->ring->size / ring_size;
1880 		pr_debug("%s(%s): Using maximum of %d requests\n",
1881 			 __func__, engine->name, ring_size);
1882 
1883 		igt_spinner_end(&lo.spin);
1884 		if (i915_request_wait(rq, 0, HZ / 2) < 0) {
1885 			pr_err("Timed out waiting to flush %s\n", engine->name);
1886 			i915_request_put(rq);
1887 			goto err_wedged;
1888 		}
1889 		i915_request_put(rq);
1890 
1891 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1892 			err = -EIO;
1893 			goto err_wedged;
1894 		}
1895 
1896 		for_each_prime_number_from(count, 1, ring_size) {
1897 			rq = spinner_create_request(&hi.spin,
1898 						    hi.ctx, engine,
1899 						    MI_ARB_CHECK);
1900 			if (IS_ERR(rq))
1901 				goto err_wedged;
1902 			i915_request_add(rq);
1903 			if (!igt_wait_for_spinner(&hi.spin, rq))
1904 				goto err_wedged;
1905 
1906 			rq = spinner_create_request(&lo.spin,
1907 						    lo.ctx, engine,
1908 						    MI_ARB_CHECK);
1909 			if (IS_ERR(rq))
1910 				goto err_wedged;
1911 			i915_request_add(rq);
1912 
1913 			for (i = 0; i < count; i++) {
1914 				rq = igt_request_alloc(lo.ctx, engine);
1915 				if (IS_ERR(rq))
1916 					goto err_wedged;
1917 				i915_request_add(rq);
1918 			}
1919 
1920 			rq = igt_request_alloc(hi.ctx, engine);
1921 			if (IS_ERR(rq))
1922 				goto err_wedged;
1923 
1924 			i915_request_get(rq);
1925 			i915_request_add(rq);
1926 			engine->schedule(rq, &attr);
1927 
1928 			igt_spinner_end(&hi.spin);
1929 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
1930 				struct drm_printer p =
1931 					drm_info_printer(gt->i915->drm.dev);
1932 
1933 				pr_err("Failed to preempt over chain of %d\n",
1934 				       count);
1935 				intel_engine_dump(engine, &p,
1936 						  "%s\n", engine->name);
1937 				i915_request_put(rq);
1938 				goto err_wedged;
1939 			}
1940 			igt_spinner_end(&lo.spin);
1941 			i915_request_put(rq);
1942 
1943 			rq = igt_request_alloc(lo.ctx, engine);
1944 			if (IS_ERR(rq))
1945 				goto err_wedged;
1946 
1947 			i915_request_get(rq);
1948 			i915_request_add(rq);
1949 
1950 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
1951 				struct drm_printer p =
1952 					drm_info_printer(gt->i915->drm.dev);
1953 
1954 				pr_err("Failed to flush low priority chain of %d requests\n",
1955 				       count);
1956 				intel_engine_dump(engine, &p,
1957 						  "%s\n", engine->name);
1958 
1959 				i915_request_put(rq);
1960 				goto err_wedged;
1961 			}
1962 			i915_request_put(rq);
1963 		}
1964 
1965 		if (igt_live_test_end(&t)) {
1966 			err = -EIO;
1967 			goto err_wedged;
1968 		}
1969 	}
1970 
1971 	err = 0;
1972 err_client_lo:
1973 	preempt_client_fini(&lo);
1974 err_client_hi:
1975 	preempt_client_fini(&hi);
1976 	return err;
1977 
1978 err_wedged:
1979 	igt_spinner_end(&hi.spin);
1980 	igt_spinner_end(&lo.spin);
1981 	intel_gt_set_wedged(gt);
1982 	err = -EIO;
1983 	goto err_client_lo;
1984 }
1985 
1986 static int create_gang(struct intel_engine_cs *engine,
1987 		       struct i915_request **prev)
1988 {
1989 	struct drm_i915_gem_object *obj;
1990 	struct intel_context *ce;
1991 	struct i915_request *rq;
1992 	struct i915_vma *vma;
1993 	u32 *cs;
1994 	int err;
1995 
1996 	ce = intel_context_create(engine);
1997 	if (IS_ERR(ce))
1998 		return PTR_ERR(ce);
1999 
2000 	obj = i915_gem_object_create_internal(engine->i915, 4096);
2001 	if (IS_ERR(obj)) {
2002 		err = PTR_ERR(obj);
2003 		goto err_ce;
2004 	}
2005 
2006 	vma = i915_vma_instance(obj, ce->vm, NULL);
2007 	if (IS_ERR(vma)) {
2008 		err = PTR_ERR(vma);
2009 		goto err_obj;
2010 	}
2011 
2012 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
2013 	if (err)
2014 		goto err_obj;
2015 
2016 	cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
2017 	if (IS_ERR(cs))
2018 		goto err_obj;
2019 
2020 	/* Semaphore target: spin until zero */
2021 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2022 
2023 	*cs++ = MI_SEMAPHORE_WAIT |
2024 		MI_SEMAPHORE_POLL |
2025 		MI_SEMAPHORE_SAD_EQ_SDD;
2026 	*cs++ = 0;
2027 	*cs++ = lower_32_bits(vma->node.start);
2028 	*cs++ = upper_32_bits(vma->node.start);
2029 
2030 	if (*prev) {
2031 		u64 offset = (*prev)->batch->node.start;
2032 
2033 		/* Terminate the spinner in the next lower priority batch. */
2034 		*cs++ = MI_STORE_DWORD_IMM_GEN4;
2035 		*cs++ = lower_32_bits(offset);
2036 		*cs++ = upper_32_bits(offset);
2037 		*cs++ = 0;
2038 	}
2039 
2040 	*cs++ = MI_BATCH_BUFFER_END;
2041 	i915_gem_object_flush_map(obj);
2042 	i915_gem_object_unpin_map(obj);
2043 
2044 	rq = intel_context_create_request(ce);
2045 	if (IS_ERR(rq))
2046 		goto err_obj;
2047 
2048 	rq->batch = vma;
2049 	i915_request_get(rq);
2050 
2051 	i915_vma_lock(vma);
2052 	err = i915_request_await_object(rq, vma->obj, false);
2053 	if (!err)
2054 		err = i915_vma_move_to_active(vma, rq, 0);
2055 	if (!err)
2056 		err = rq->engine->emit_bb_start(rq,
2057 						vma->node.start,
2058 						PAGE_SIZE, 0);
2059 	i915_vma_unlock(vma);
2060 	i915_request_add(rq);
2061 	if (err)
2062 		goto err_rq;
2063 
2064 	i915_gem_object_put(obj);
2065 	intel_context_put(ce);
2066 
2067 	rq->client_link.next = &(*prev)->client_link;
2068 	*prev = rq;
2069 	return 0;
2070 
2071 err_rq:
2072 	i915_request_put(rq);
2073 err_obj:
2074 	i915_gem_object_put(obj);
2075 err_ce:
2076 	intel_context_put(ce);
2077 	return err;
2078 }
2079 
2080 static int live_preempt_gang(void *arg)
2081 {
2082 	struct intel_gt *gt = arg;
2083 	struct intel_engine_cs *engine;
2084 	enum intel_engine_id id;
2085 
2086 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2087 		return 0;
2088 
2089 	/*
2090 	 * Build as long a chain of preempters as we can, with each
2091 	 * request higher priority than the last. Once we are ready, we release
2092 	 * the last batch which then precolates down the chain, each releasing
2093 	 * the next oldest in turn. The intent is to simply push as hard as we
2094 	 * can with the number of preemptions, trying to exceed narrow HW
2095 	 * limits. At a minimum, we insist that we can sort all the user
2096 	 * high priority levels into execution order.
2097 	 */
2098 
2099 	for_each_engine(engine, gt, id) {
2100 		struct i915_request *rq = NULL;
2101 		struct igt_live_test t;
2102 		IGT_TIMEOUT(end_time);
2103 		int prio = 0;
2104 		int err = 0;
2105 		u32 *cs;
2106 
2107 		if (!intel_engine_has_preemption(engine))
2108 			continue;
2109 
2110 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
2111 			return -EIO;
2112 
2113 		do {
2114 			struct i915_sched_attr attr = {
2115 				.priority = I915_USER_PRIORITY(prio++),
2116 			};
2117 
2118 			err = create_gang(engine, &rq);
2119 			if (err)
2120 				break;
2121 
2122 			/* Submit each spinner at increasing priority */
2123 			engine->schedule(rq, &attr);
2124 
2125 			if (prio <= I915_PRIORITY_MAX)
2126 				continue;
2127 
2128 			if (prio > (INT_MAX >> I915_USER_PRIORITY_SHIFT))
2129 				break;
2130 
2131 			if (__igt_timeout(end_time, NULL))
2132 				break;
2133 		} while (1);
2134 		pr_debug("%s: Preempt chain of %d requests\n",
2135 			 engine->name, prio);
2136 
2137 		/*
2138 		 * Such that the last spinner is the highest priority and
2139 		 * should execute first. When that spinner completes,
2140 		 * it will terminate the next lowest spinner until there
2141 		 * are no more spinners and the gang is complete.
2142 		 */
2143 		cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC);
2144 		if (!IS_ERR(cs)) {
2145 			*cs = 0;
2146 			i915_gem_object_unpin_map(rq->batch->obj);
2147 		} else {
2148 			err = PTR_ERR(cs);
2149 			intel_gt_set_wedged(gt);
2150 		}
2151 
2152 		while (rq) { /* wait for each rq from highest to lowest prio */
2153 			struct i915_request *n =
2154 				list_next_entry(rq, client_link);
2155 
2156 			if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
2157 				struct drm_printer p =
2158 					drm_info_printer(engine->i915->drm.dev);
2159 
2160 				pr_err("Failed to flush chain of %d requests, at %d\n",
2161 				       prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT);
2162 				intel_engine_dump(engine, &p,
2163 						  "%s\n", engine->name);
2164 
2165 				err = -ETIME;
2166 			}
2167 
2168 			i915_request_put(rq);
2169 			rq = n;
2170 		}
2171 
2172 		if (igt_live_test_end(&t))
2173 			err = -EIO;
2174 		if (err)
2175 			return err;
2176 	}
2177 
2178 	return 0;
2179 }
2180 
2181 static int live_preempt_hang(void *arg)
2182 {
2183 	struct intel_gt *gt = arg;
2184 	struct i915_gem_context *ctx_hi, *ctx_lo;
2185 	struct igt_spinner spin_hi, spin_lo;
2186 	struct intel_engine_cs *engine;
2187 	enum intel_engine_id id;
2188 	int err = -ENOMEM;
2189 
2190 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2191 		return 0;
2192 
2193 	if (!intel_has_reset_engine(gt))
2194 		return 0;
2195 
2196 	if (igt_spinner_init(&spin_hi, gt))
2197 		return -ENOMEM;
2198 
2199 	if (igt_spinner_init(&spin_lo, gt))
2200 		goto err_spin_hi;
2201 
2202 	ctx_hi = kernel_context(gt->i915);
2203 	if (!ctx_hi)
2204 		goto err_spin_lo;
2205 	ctx_hi->sched.priority =
2206 		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
2207 
2208 	ctx_lo = kernel_context(gt->i915);
2209 	if (!ctx_lo)
2210 		goto err_ctx_hi;
2211 	ctx_lo->sched.priority =
2212 		I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
2213 
2214 	for_each_engine(engine, gt, id) {
2215 		struct i915_request *rq;
2216 
2217 		if (!intel_engine_has_preemption(engine))
2218 			continue;
2219 
2220 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
2221 					    MI_ARB_CHECK);
2222 		if (IS_ERR(rq)) {
2223 			err = PTR_ERR(rq);
2224 			goto err_ctx_lo;
2225 		}
2226 
2227 		i915_request_add(rq);
2228 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
2229 			GEM_TRACE("lo spinner failed to start\n");
2230 			GEM_TRACE_DUMP();
2231 			intel_gt_set_wedged(gt);
2232 			err = -EIO;
2233 			goto err_ctx_lo;
2234 		}
2235 
2236 		rq = spinner_create_request(&spin_hi, ctx_hi, engine,
2237 					    MI_ARB_CHECK);
2238 		if (IS_ERR(rq)) {
2239 			igt_spinner_end(&spin_lo);
2240 			err = PTR_ERR(rq);
2241 			goto err_ctx_lo;
2242 		}
2243 
2244 		init_completion(&engine->execlists.preempt_hang.completion);
2245 		engine->execlists.preempt_hang.inject_hang = true;
2246 
2247 		i915_request_add(rq);
2248 
2249 		if (!wait_for_completion_timeout(&engine->execlists.preempt_hang.completion,
2250 						 HZ / 10)) {
2251 			pr_err("Preemption did not occur within timeout!");
2252 			GEM_TRACE_DUMP();
2253 			intel_gt_set_wedged(gt);
2254 			err = -EIO;
2255 			goto err_ctx_lo;
2256 		}
2257 
2258 		set_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
2259 		intel_engine_reset(engine, NULL);
2260 		clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
2261 
2262 		engine->execlists.preempt_hang.inject_hang = false;
2263 
2264 		if (!igt_wait_for_spinner(&spin_hi, rq)) {
2265 			GEM_TRACE("hi spinner failed to start\n");
2266 			GEM_TRACE_DUMP();
2267 			intel_gt_set_wedged(gt);
2268 			err = -EIO;
2269 			goto err_ctx_lo;
2270 		}
2271 
2272 		igt_spinner_end(&spin_hi);
2273 		igt_spinner_end(&spin_lo);
2274 		if (igt_flush_test(gt->i915)) {
2275 			err = -EIO;
2276 			goto err_ctx_lo;
2277 		}
2278 	}
2279 
2280 	err = 0;
2281 err_ctx_lo:
2282 	kernel_context_close(ctx_lo);
2283 err_ctx_hi:
2284 	kernel_context_close(ctx_hi);
2285 err_spin_lo:
2286 	igt_spinner_fini(&spin_lo);
2287 err_spin_hi:
2288 	igt_spinner_fini(&spin_hi);
2289 	return err;
2290 }
2291 
2292 static int live_preempt_timeout(void *arg)
2293 {
2294 	struct intel_gt *gt = arg;
2295 	struct i915_gem_context *ctx_hi, *ctx_lo;
2296 	struct igt_spinner spin_lo;
2297 	struct intel_engine_cs *engine;
2298 	enum intel_engine_id id;
2299 	int err = -ENOMEM;
2300 
2301 	/*
2302 	 * Check that we force preemption to occur by cancelling the previous
2303 	 * context if it refuses to yield the GPU.
2304 	 */
2305 	if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2306 		return 0;
2307 
2308 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2309 		return 0;
2310 
2311 	if (!intel_has_reset_engine(gt))
2312 		return 0;
2313 
2314 	if (igt_spinner_init(&spin_lo, gt))
2315 		return -ENOMEM;
2316 
2317 	ctx_hi = kernel_context(gt->i915);
2318 	if (!ctx_hi)
2319 		goto err_spin_lo;
2320 	ctx_hi->sched.priority =
2321 		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
2322 
2323 	ctx_lo = kernel_context(gt->i915);
2324 	if (!ctx_lo)
2325 		goto err_ctx_hi;
2326 	ctx_lo->sched.priority =
2327 		I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
2328 
2329 	for_each_engine(engine, gt, id) {
2330 		unsigned long saved_timeout;
2331 		struct i915_request *rq;
2332 
2333 		if (!intel_engine_has_preemption(engine))
2334 			continue;
2335 
2336 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
2337 					    MI_NOOP); /* preemption disabled */
2338 		if (IS_ERR(rq)) {
2339 			err = PTR_ERR(rq);
2340 			goto err_ctx_lo;
2341 		}
2342 
2343 		i915_request_add(rq);
2344 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
2345 			intel_gt_set_wedged(gt);
2346 			err = -EIO;
2347 			goto err_ctx_lo;
2348 		}
2349 
2350 		rq = igt_request_alloc(ctx_hi, engine);
2351 		if (IS_ERR(rq)) {
2352 			igt_spinner_end(&spin_lo);
2353 			err = PTR_ERR(rq);
2354 			goto err_ctx_lo;
2355 		}
2356 
2357 		/* Flush the previous CS ack before changing timeouts */
2358 		while (READ_ONCE(engine->execlists.pending[0]))
2359 			cpu_relax();
2360 
2361 		saved_timeout = engine->props.preempt_timeout_ms;
2362 		engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
2363 
2364 		i915_request_get(rq);
2365 		i915_request_add(rq);
2366 
2367 		intel_engine_flush_submission(engine);
2368 		engine->props.preempt_timeout_ms = saved_timeout;
2369 
2370 		if (i915_request_wait(rq, 0, HZ / 10) < 0) {
2371 			intel_gt_set_wedged(gt);
2372 			i915_request_put(rq);
2373 			err = -ETIME;
2374 			goto err_ctx_lo;
2375 		}
2376 
2377 		igt_spinner_end(&spin_lo);
2378 		i915_request_put(rq);
2379 	}
2380 
2381 	err = 0;
2382 err_ctx_lo:
2383 	kernel_context_close(ctx_lo);
2384 err_ctx_hi:
2385 	kernel_context_close(ctx_hi);
2386 err_spin_lo:
2387 	igt_spinner_fini(&spin_lo);
2388 	return err;
2389 }
2390 
2391 static int random_range(struct rnd_state *rnd, int min, int max)
2392 {
2393 	return i915_prandom_u32_max_state(max - min, rnd) + min;
2394 }
2395 
2396 static int random_priority(struct rnd_state *rnd)
2397 {
2398 	return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
2399 }
2400 
2401 struct preempt_smoke {
2402 	struct intel_gt *gt;
2403 	struct i915_gem_context **contexts;
2404 	struct intel_engine_cs *engine;
2405 	struct drm_i915_gem_object *batch;
2406 	unsigned int ncontext;
2407 	struct rnd_state prng;
2408 	unsigned long count;
2409 };
2410 
2411 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
2412 {
2413 	return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
2414 							  &smoke->prng)];
2415 }
2416 
2417 static int smoke_submit(struct preempt_smoke *smoke,
2418 			struct i915_gem_context *ctx, int prio,
2419 			struct drm_i915_gem_object *batch)
2420 {
2421 	struct i915_request *rq;
2422 	struct i915_vma *vma = NULL;
2423 	int err = 0;
2424 
2425 	if (batch) {
2426 		struct i915_address_space *vm;
2427 
2428 		vm = i915_gem_context_get_vm_rcu(ctx);
2429 		vma = i915_vma_instance(batch, vm, NULL);
2430 		i915_vm_put(vm);
2431 		if (IS_ERR(vma))
2432 			return PTR_ERR(vma);
2433 
2434 		err = i915_vma_pin(vma, 0, 0, PIN_USER);
2435 		if (err)
2436 			return err;
2437 	}
2438 
2439 	ctx->sched.priority = prio;
2440 
2441 	rq = igt_request_alloc(ctx, smoke->engine);
2442 	if (IS_ERR(rq)) {
2443 		err = PTR_ERR(rq);
2444 		goto unpin;
2445 	}
2446 
2447 	if (vma) {
2448 		i915_vma_lock(vma);
2449 		err = i915_request_await_object(rq, vma->obj, false);
2450 		if (!err)
2451 			err = i915_vma_move_to_active(vma, rq, 0);
2452 		if (!err)
2453 			err = rq->engine->emit_bb_start(rq,
2454 							vma->node.start,
2455 							PAGE_SIZE, 0);
2456 		i915_vma_unlock(vma);
2457 	}
2458 
2459 	i915_request_add(rq);
2460 
2461 unpin:
2462 	if (vma)
2463 		i915_vma_unpin(vma);
2464 
2465 	return err;
2466 }
2467 
2468 static int smoke_crescendo_thread(void *arg)
2469 {
2470 	struct preempt_smoke *smoke = arg;
2471 	IGT_TIMEOUT(end_time);
2472 	unsigned long count;
2473 
2474 	count = 0;
2475 	do {
2476 		struct i915_gem_context *ctx = smoke_context(smoke);
2477 		int err;
2478 
2479 		err = smoke_submit(smoke,
2480 				   ctx, count % I915_PRIORITY_MAX,
2481 				   smoke->batch);
2482 		if (err)
2483 			return err;
2484 
2485 		count++;
2486 	} while (!__igt_timeout(end_time, NULL));
2487 
2488 	smoke->count = count;
2489 	return 0;
2490 }
2491 
2492 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
2493 #define BATCH BIT(0)
2494 {
2495 	struct task_struct *tsk[I915_NUM_ENGINES] = {};
2496 	struct preempt_smoke arg[I915_NUM_ENGINES];
2497 	struct intel_engine_cs *engine;
2498 	enum intel_engine_id id;
2499 	unsigned long count;
2500 	int err = 0;
2501 
2502 	for_each_engine(engine, smoke->gt, id) {
2503 		arg[id] = *smoke;
2504 		arg[id].engine = engine;
2505 		if (!(flags & BATCH))
2506 			arg[id].batch = NULL;
2507 		arg[id].count = 0;
2508 
2509 		tsk[id] = kthread_run(smoke_crescendo_thread, &arg,
2510 				      "igt/smoke:%d", id);
2511 		if (IS_ERR(tsk[id])) {
2512 			err = PTR_ERR(tsk[id]);
2513 			break;
2514 		}
2515 		get_task_struct(tsk[id]);
2516 	}
2517 
2518 	yield(); /* start all threads before we kthread_stop() */
2519 
2520 	count = 0;
2521 	for_each_engine(engine, smoke->gt, id) {
2522 		int status;
2523 
2524 		if (IS_ERR_OR_NULL(tsk[id]))
2525 			continue;
2526 
2527 		status = kthread_stop(tsk[id]);
2528 		if (status && !err)
2529 			err = status;
2530 
2531 		count += arg[id].count;
2532 
2533 		put_task_struct(tsk[id]);
2534 	}
2535 
2536 	pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
2537 		count, flags,
2538 		RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
2539 	return 0;
2540 }
2541 
2542 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
2543 {
2544 	enum intel_engine_id id;
2545 	IGT_TIMEOUT(end_time);
2546 	unsigned long count;
2547 
2548 	count = 0;
2549 	do {
2550 		for_each_engine(smoke->engine, smoke->gt, id) {
2551 			struct i915_gem_context *ctx = smoke_context(smoke);
2552 			int err;
2553 
2554 			err = smoke_submit(smoke,
2555 					   ctx, random_priority(&smoke->prng),
2556 					   flags & BATCH ? smoke->batch : NULL);
2557 			if (err)
2558 				return err;
2559 
2560 			count++;
2561 		}
2562 	} while (!__igt_timeout(end_time, NULL));
2563 
2564 	pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
2565 		count, flags,
2566 		RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
2567 	return 0;
2568 }
2569 
2570 static int live_preempt_smoke(void *arg)
2571 {
2572 	struct preempt_smoke smoke = {
2573 		.gt = arg,
2574 		.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
2575 		.ncontext = 1024,
2576 	};
2577 	const unsigned int phase[] = { 0, BATCH };
2578 	struct igt_live_test t;
2579 	int err = -ENOMEM;
2580 	u32 *cs;
2581 	int n;
2582 
2583 	if (!HAS_LOGICAL_RING_PREEMPTION(smoke.gt->i915))
2584 		return 0;
2585 
2586 	smoke.contexts = kmalloc_array(smoke.ncontext,
2587 				       sizeof(*smoke.contexts),
2588 				       GFP_KERNEL);
2589 	if (!smoke.contexts)
2590 		return -ENOMEM;
2591 
2592 	smoke.batch =
2593 		i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
2594 	if (IS_ERR(smoke.batch)) {
2595 		err = PTR_ERR(smoke.batch);
2596 		goto err_free;
2597 	}
2598 
2599 	cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB);
2600 	if (IS_ERR(cs)) {
2601 		err = PTR_ERR(cs);
2602 		goto err_batch;
2603 	}
2604 	for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++)
2605 		cs[n] = MI_ARB_CHECK;
2606 	cs[n] = MI_BATCH_BUFFER_END;
2607 	i915_gem_object_flush_map(smoke.batch);
2608 	i915_gem_object_unpin_map(smoke.batch);
2609 
2610 	if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
2611 		err = -EIO;
2612 		goto err_batch;
2613 	}
2614 
2615 	for (n = 0; n < smoke.ncontext; n++) {
2616 		smoke.contexts[n] = kernel_context(smoke.gt->i915);
2617 		if (!smoke.contexts[n])
2618 			goto err_ctx;
2619 	}
2620 
2621 	for (n = 0; n < ARRAY_SIZE(phase); n++) {
2622 		err = smoke_crescendo(&smoke, phase[n]);
2623 		if (err)
2624 			goto err_ctx;
2625 
2626 		err = smoke_random(&smoke, phase[n]);
2627 		if (err)
2628 			goto err_ctx;
2629 	}
2630 
2631 err_ctx:
2632 	if (igt_live_test_end(&t))
2633 		err = -EIO;
2634 
2635 	for (n = 0; n < smoke.ncontext; n++) {
2636 		if (!smoke.contexts[n])
2637 			break;
2638 		kernel_context_close(smoke.contexts[n]);
2639 	}
2640 
2641 err_batch:
2642 	i915_gem_object_put(smoke.batch);
2643 err_free:
2644 	kfree(smoke.contexts);
2645 
2646 	return err;
2647 }
2648 
2649 static int nop_virtual_engine(struct intel_gt *gt,
2650 			      struct intel_engine_cs **siblings,
2651 			      unsigned int nsibling,
2652 			      unsigned int nctx,
2653 			      unsigned int flags)
2654 #define CHAIN BIT(0)
2655 {
2656 	IGT_TIMEOUT(end_time);
2657 	struct i915_request *request[16] = {};
2658 	struct intel_context *ve[16];
2659 	unsigned long n, prime, nc;
2660 	struct igt_live_test t;
2661 	ktime_t times[2] = {};
2662 	int err;
2663 
2664 	GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
2665 
2666 	for (n = 0; n < nctx; n++) {
2667 		ve[n] = intel_execlists_create_virtual(siblings, nsibling);
2668 		if (IS_ERR(ve[n])) {
2669 			err = PTR_ERR(ve[n]);
2670 			nctx = n;
2671 			goto out;
2672 		}
2673 
2674 		err = intel_context_pin(ve[n]);
2675 		if (err) {
2676 			intel_context_put(ve[n]);
2677 			nctx = n;
2678 			goto out;
2679 		}
2680 	}
2681 
2682 	err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
2683 	if (err)
2684 		goto out;
2685 
2686 	for_each_prime_number_from(prime, 1, 8192) {
2687 		times[1] = ktime_get_raw();
2688 
2689 		if (flags & CHAIN) {
2690 			for (nc = 0; nc < nctx; nc++) {
2691 				for (n = 0; n < prime; n++) {
2692 					struct i915_request *rq;
2693 
2694 					rq = i915_request_create(ve[nc]);
2695 					if (IS_ERR(rq)) {
2696 						err = PTR_ERR(rq);
2697 						goto out;
2698 					}
2699 
2700 					if (request[nc])
2701 						i915_request_put(request[nc]);
2702 					request[nc] = i915_request_get(rq);
2703 					i915_request_add(rq);
2704 				}
2705 			}
2706 		} else {
2707 			for (n = 0; n < prime; n++) {
2708 				for (nc = 0; nc < nctx; nc++) {
2709 					struct i915_request *rq;
2710 
2711 					rq = i915_request_create(ve[nc]);
2712 					if (IS_ERR(rq)) {
2713 						err = PTR_ERR(rq);
2714 						goto out;
2715 					}
2716 
2717 					if (request[nc])
2718 						i915_request_put(request[nc]);
2719 					request[nc] = i915_request_get(rq);
2720 					i915_request_add(rq);
2721 				}
2722 			}
2723 		}
2724 
2725 		for (nc = 0; nc < nctx; nc++) {
2726 			if (i915_request_wait(request[nc], 0, HZ / 10) < 0) {
2727 				pr_err("%s(%s): wait for %llx:%lld timed out\n",
2728 				       __func__, ve[0]->engine->name,
2729 				       request[nc]->fence.context,
2730 				       request[nc]->fence.seqno);
2731 
2732 				GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
2733 					  __func__, ve[0]->engine->name,
2734 					  request[nc]->fence.context,
2735 					  request[nc]->fence.seqno);
2736 				GEM_TRACE_DUMP();
2737 				intel_gt_set_wedged(gt);
2738 				break;
2739 			}
2740 		}
2741 
2742 		times[1] = ktime_sub(ktime_get_raw(), times[1]);
2743 		if (prime == 1)
2744 			times[0] = times[1];
2745 
2746 		for (nc = 0; nc < nctx; nc++) {
2747 			i915_request_put(request[nc]);
2748 			request[nc] = NULL;
2749 		}
2750 
2751 		if (__igt_timeout(end_time, NULL))
2752 			break;
2753 	}
2754 
2755 	err = igt_live_test_end(&t);
2756 	if (err)
2757 		goto out;
2758 
2759 	pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
2760 		nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
2761 		prime, div64_u64(ktime_to_ns(times[1]), prime));
2762 
2763 out:
2764 	if (igt_flush_test(gt->i915))
2765 		err = -EIO;
2766 
2767 	for (nc = 0; nc < nctx; nc++) {
2768 		i915_request_put(request[nc]);
2769 		intel_context_unpin(ve[nc]);
2770 		intel_context_put(ve[nc]);
2771 	}
2772 	return err;
2773 }
2774 
2775 static int live_virtual_engine(void *arg)
2776 {
2777 	struct intel_gt *gt = arg;
2778 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
2779 	struct intel_engine_cs *engine;
2780 	enum intel_engine_id id;
2781 	unsigned int class, inst;
2782 	int err;
2783 
2784 	if (USES_GUC_SUBMISSION(gt->i915))
2785 		return 0;
2786 
2787 	for_each_engine(engine, gt, id) {
2788 		err = nop_virtual_engine(gt, &engine, 1, 1, 0);
2789 		if (err) {
2790 			pr_err("Failed to wrap engine %s: err=%d\n",
2791 			       engine->name, err);
2792 			return err;
2793 		}
2794 	}
2795 
2796 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
2797 		int nsibling, n;
2798 
2799 		nsibling = 0;
2800 		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
2801 			if (!gt->engine_class[class][inst])
2802 				continue;
2803 
2804 			siblings[nsibling++] = gt->engine_class[class][inst];
2805 		}
2806 		if (nsibling < 2)
2807 			continue;
2808 
2809 		for (n = 1; n <= nsibling + 1; n++) {
2810 			err = nop_virtual_engine(gt, siblings, nsibling,
2811 						 n, 0);
2812 			if (err)
2813 				return err;
2814 		}
2815 
2816 		err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
2817 		if (err)
2818 			return err;
2819 	}
2820 
2821 	return 0;
2822 }
2823 
2824 static int mask_virtual_engine(struct intel_gt *gt,
2825 			       struct intel_engine_cs **siblings,
2826 			       unsigned int nsibling)
2827 {
2828 	struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
2829 	struct intel_context *ve;
2830 	struct igt_live_test t;
2831 	unsigned int n;
2832 	int err;
2833 
2834 	/*
2835 	 * Check that by setting the execution mask on a request, we can
2836 	 * restrict it to our desired engine within the virtual engine.
2837 	 */
2838 
2839 	ve = intel_execlists_create_virtual(siblings, nsibling);
2840 	if (IS_ERR(ve)) {
2841 		err = PTR_ERR(ve);
2842 		goto out_close;
2843 	}
2844 
2845 	err = intel_context_pin(ve);
2846 	if (err)
2847 		goto out_put;
2848 
2849 	err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
2850 	if (err)
2851 		goto out_unpin;
2852 
2853 	for (n = 0; n < nsibling; n++) {
2854 		request[n] = i915_request_create(ve);
2855 		if (IS_ERR(request[n])) {
2856 			err = PTR_ERR(request[n]);
2857 			nsibling = n;
2858 			goto out;
2859 		}
2860 
2861 		/* Reverse order as it's more likely to be unnatural */
2862 		request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
2863 
2864 		i915_request_get(request[n]);
2865 		i915_request_add(request[n]);
2866 	}
2867 
2868 	for (n = 0; n < nsibling; n++) {
2869 		if (i915_request_wait(request[n], 0, HZ / 10) < 0) {
2870 			pr_err("%s(%s): wait for %llx:%lld timed out\n",
2871 			       __func__, ve->engine->name,
2872 			       request[n]->fence.context,
2873 			       request[n]->fence.seqno);
2874 
2875 			GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
2876 				  __func__, ve->engine->name,
2877 				  request[n]->fence.context,
2878 				  request[n]->fence.seqno);
2879 			GEM_TRACE_DUMP();
2880 			intel_gt_set_wedged(gt);
2881 			err = -EIO;
2882 			goto out;
2883 		}
2884 
2885 		if (request[n]->engine != siblings[nsibling - n - 1]) {
2886 			pr_err("Executed on wrong sibling '%s', expected '%s'\n",
2887 			       request[n]->engine->name,
2888 			       siblings[nsibling - n - 1]->name);
2889 			err = -EINVAL;
2890 			goto out;
2891 		}
2892 	}
2893 
2894 	err = igt_live_test_end(&t);
2895 out:
2896 	if (igt_flush_test(gt->i915))
2897 		err = -EIO;
2898 
2899 	for (n = 0; n < nsibling; n++)
2900 		i915_request_put(request[n]);
2901 
2902 out_unpin:
2903 	intel_context_unpin(ve);
2904 out_put:
2905 	intel_context_put(ve);
2906 out_close:
2907 	return err;
2908 }
2909 
2910 static int live_virtual_mask(void *arg)
2911 {
2912 	struct intel_gt *gt = arg;
2913 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
2914 	unsigned int class, inst;
2915 	int err;
2916 
2917 	if (USES_GUC_SUBMISSION(gt->i915))
2918 		return 0;
2919 
2920 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
2921 		unsigned int nsibling;
2922 
2923 		nsibling = 0;
2924 		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
2925 			if (!gt->engine_class[class][inst])
2926 				break;
2927 
2928 			siblings[nsibling++] = gt->engine_class[class][inst];
2929 		}
2930 		if (nsibling < 2)
2931 			continue;
2932 
2933 		err = mask_virtual_engine(gt, siblings, nsibling);
2934 		if (err)
2935 			return err;
2936 	}
2937 
2938 	return 0;
2939 }
2940 
2941 static int preserved_virtual_engine(struct intel_gt *gt,
2942 				    struct intel_engine_cs **siblings,
2943 				    unsigned int nsibling)
2944 {
2945 	struct i915_request *last = NULL;
2946 	struct intel_context *ve;
2947 	struct i915_vma *scratch;
2948 	struct igt_live_test t;
2949 	unsigned int n;
2950 	int err = 0;
2951 	u32 *cs;
2952 
2953 	scratch = create_scratch(siblings[0]->gt);
2954 	if (IS_ERR(scratch))
2955 		return PTR_ERR(scratch);
2956 
2957 	ve = intel_execlists_create_virtual(siblings, nsibling);
2958 	if (IS_ERR(ve)) {
2959 		err = PTR_ERR(ve);
2960 		goto out_scratch;
2961 	}
2962 
2963 	err = intel_context_pin(ve);
2964 	if (err)
2965 		goto out_put;
2966 
2967 	err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
2968 	if (err)
2969 		goto out_unpin;
2970 
2971 	for (n = 0; n < NUM_GPR_DW; n++) {
2972 		struct intel_engine_cs *engine = siblings[n % nsibling];
2973 		struct i915_request *rq;
2974 
2975 		rq = i915_request_create(ve);
2976 		if (IS_ERR(rq)) {
2977 			err = PTR_ERR(rq);
2978 			goto out_end;
2979 		}
2980 
2981 		i915_request_put(last);
2982 		last = i915_request_get(rq);
2983 
2984 		cs = intel_ring_begin(rq, 8);
2985 		if (IS_ERR(cs)) {
2986 			i915_request_add(rq);
2987 			err = PTR_ERR(cs);
2988 			goto out_end;
2989 		}
2990 
2991 		*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
2992 		*cs++ = CS_GPR(engine, n);
2993 		*cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
2994 		*cs++ = 0;
2995 
2996 		*cs++ = MI_LOAD_REGISTER_IMM(1);
2997 		*cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
2998 		*cs++ = n + 1;
2999 
3000 		*cs++ = MI_NOOP;
3001 		intel_ring_advance(rq, cs);
3002 
3003 		/* Restrict this request to run on a particular engine */
3004 		rq->execution_mask = engine->mask;
3005 		i915_request_add(rq);
3006 	}
3007 
3008 	if (i915_request_wait(last, 0, HZ / 5) < 0) {
3009 		err = -ETIME;
3010 		goto out_end;
3011 	}
3012 
3013 	cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
3014 	if (IS_ERR(cs)) {
3015 		err = PTR_ERR(cs);
3016 		goto out_end;
3017 	}
3018 
3019 	for (n = 0; n < NUM_GPR_DW; n++) {
3020 		if (cs[n] != n) {
3021 			pr_err("Incorrect value[%d] found for GPR[%d]\n",
3022 			       cs[n], n);
3023 			err = -EINVAL;
3024 			break;
3025 		}
3026 	}
3027 
3028 	i915_gem_object_unpin_map(scratch->obj);
3029 
3030 out_end:
3031 	if (igt_live_test_end(&t))
3032 		err = -EIO;
3033 	i915_request_put(last);
3034 out_unpin:
3035 	intel_context_unpin(ve);
3036 out_put:
3037 	intel_context_put(ve);
3038 out_scratch:
3039 	i915_vma_unpin_and_release(&scratch, 0);
3040 	return err;
3041 }
3042 
3043 static int live_virtual_preserved(void *arg)
3044 {
3045 	struct intel_gt *gt = arg;
3046 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3047 	unsigned int class, inst;
3048 
3049 	/*
3050 	 * Check that the context image retains non-privileged (user) registers
3051 	 * from one engine to the next. For this we check that the CS_GPR
3052 	 * are preserved.
3053 	 */
3054 
3055 	if (USES_GUC_SUBMISSION(gt->i915))
3056 		return 0;
3057 
3058 	/* As we use CS_GPR we cannot run before they existed on all engines. */
3059 	if (INTEL_GEN(gt->i915) < 9)
3060 		return 0;
3061 
3062 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3063 		int nsibling, err;
3064 
3065 		nsibling = 0;
3066 		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3067 			if (!gt->engine_class[class][inst])
3068 				continue;
3069 
3070 			siblings[nsibling++] = gt->engine_class[class][inst];
3071 		}
3072 		if (nsibling < 2)
3073 			continue;
3074 
3075 		err = preserved_virtual_engine(gt, siblings, nsibling);
3076 		if (err)
3077 			return err;
3078 	}
3079 
3080 	return 0;
3081 }
3082 
3083 static int bond_virtual_engine(struct intel_gt *gt,
3084 			       unsigned int class,
3085 			       struct intel_engine_cs **siblings,
3086 			       unsigned int nsibling,
3087 			       unsigned int flags)
3088 #define BOND_SCHEDULE BIT(0)
3089 {
3090 	struct intel_engine_cs *master;
3091 	struct i915_request *rq[16];
3092 	enum intel_engine_id id;
3093 	struct igt_spinner spin;
3094 	unsigned long n;
3095 	int err;
3096 
3097 	/*
3098 	 * A set of bonded requests is intended to be run concurrently
3099 	 * across a number of engines. We use one request per-engine
3100 	 * and a magic fence to schedule each of the bonded requests
3101 	 * at the same time. A consequence of our current scheduler is that
3102 	 * we only move requests to the HW ready queue when the request
3103 	 * becomes ready, that is when all of its prerequisite fences have
3104 	 * been signaled. As one of those fences is the master submit fence,
3105 	 * there is a delay on all secondary fences as the HW may be
3106 	 * currently busy. Equally, as all the requests are independent,
3107 	 * they may have other fences that delay individual request
3108 	 * submission to HW. Ergo, we do not guarantee that all requests are
3109 	 * immediately submitted to HW at the same time, just that if the
3110 	 * rules are abided by, they are ready at the same time as the
3111 	 * first is submitted. Userspace can embed semaphores in its batch
3112 	 * to ensure parallel execution of its phases as it requires.
3113 	 * Though naturally it gets requested that perhaps the scheduler should
3114 	 * take care of parallel execution, even across preemption events on
3115 	 * different HW. (The proper answer is of course "lalalala".)
3116 	 *
3117 	 * With the submit-fence, we have identified three possible phases
3118 	 * of synchronisation depending on the master fence: queued (not
3119 	 * ready), executing, and signaled. The first two are quite simple
3120 	 * and checked below. However, the signaled master fence handling is
3121 	 * contentious. Currently we do not distinguish between a signaled
3122 	 * fence and an expired fence, as once signaled it does not convey
3123 	 * any information about the previous execution. It may even be freed
3124 	 * and hence checking later it may not exist at all. Ergo we currently
3125 	 * do not apply the bonding constraint for an already signaled fence,
3126 	 * as our expectation is that it should not constrain the secondaries
3127 	 * and is outside of the scope of the bonded request API (i.e. all
3128 	 * userspace requests are meant to be running in parallel). As
3129 	 * it imposes no constraint, and is effectively a no-op, we do not
3130 	 * check below as normal execution flows are checked extensively above.
3131 	 *
3132 	 * XXX Is the degenerate handling of signaled submit fences the
3133 	 * expected behaviour for userpace?
3134 	 */
3135 
3136 	GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
3137 
3138 	if (igt_spinner_init(&spin, gt))
3139 		return -ENOMEM;
3140 
3141 	err = 0;
3142 	rq[0] = ERR_PTR(-ENOMEM);
3143 	for_each_engine(master, gt, id) {
3144 		struct i915_sw_fence fence = {};
3145 
3146 		if (master->class == class)
3147 			continue;
3148 
3149 		memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
3150 
3151 		rq[0] = igt_spinner_create_request(&spin,
3152 						   master->kernel_context,
3153 						   MI_NOOP);
3154 		if (IS_ERR(rq[0])) {
3155 			err = PTR_ERR(rq[0]);
3156 			goto out;
3157 		}
3158 		i915_request_get(rq[0]);
3159 
3160 		if (flags & BOND_SCHEDULE) {
3161 			onstack_fence_init(&fence);
3162 			err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
3163 							       &fence,
3164 							       GFP_KERNEL);
3165 		}
3166 
3167 		i915_request_add(rq[0]);
3168 		if (err < 0)
3169 			goto out;
3170 
3171 		if (!(flags & BOND_SCHEDULE) &&
3172 		    !igt_wait_for_spinner(&spin, rq[0])) {
3173 			err = -EIO;
3174 			goto out;
3175 		}
3176 
3177 		for (n = 0; n < nsibling; n++) {
3178 			struct intel_context *ve;
3179 
3180 			ve = intel_execlists_create_virtual(siblings, nsibling);
3181 			if (IS_ERR(ve)) {
3182 				err = PTR_ERR(ve);
3183 				onstack_fence_fini(&fence);
3184 				goto out;
3185 			}
3186 
3187 			err = intel_virtual_engine_attach_bond(ve->engine,
3188 							       master,
3189 							       siblings[n]);
3190 			if (err) {
3191 				intel_context_put(ve);
3192 				onstack_fence_fini(&fence);
3193 				goto out;
3194 			}
3195 
3196 			err = intel_context_pin(ve);
3197 			intel_context_put(ve);
3198 			if (err) {
3199 				onstack_fence_fini(&fence);
3200 				goto out;
3201 			}
3202 
3203 			rq[n + 1] = i915_request_create(ve);
3204 			intel_context_unpin(ve);
3205 			if (IS_ERR(rq[n + 1])) {
3206 				err = PTR_ERR(rq[n + 1]);
3207 				onstack_fence_fini(&fence);
3208 				goto out;
3209 			}
3210 			i915_request_get(rq[n + 1]);
3211 
3212 			err = i915_request_await_execution(rq[n + 1],
3213 							   &rq[0]->fence,
3214 							   ve->engine->bond_execute);
3215 			i915_request_add(rq[n + 1]);
3216 			if (err < 0) {
3217 				onstack_fence_fini(&fence);
3218 				goto out;
3219 			}
3220 		}
3221 		onstack_fence_fini(&fence);
3222 		intel_engine_flush_submission(master);
3223 		igt_spinner_end(&spin);
3224 
3225 		if (i915_request_wait(rq[0], 0, HZ / 10) < 0) {
3226 			pr_err("Master request did not execute (on %s)!\n",
3227 			       rq[0]->engine->name);
3228 			err = -EIO;
3229 			goto out;
3230 		}
3231 
3232 		for (n = 0; n < nsibling; n++) {
3233 			if (i915_request_wait(rq[n + 1], 0,
3234 					      MAX_SCHEDULE_TIMEOUT) < 0) {
3235 				err = -EIO;
3236 				goto out;
3237 			}
3238 
3239 			if (rq[n + 1]->engine != siblings[n]) {
3240 				pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
3241 				       siblings[n]->name,
3242 				       rq[n + 1]->engine->name,
3243 				       rq[0]->engine->name);
3244 				err = -EINVAL;
3245 				goto out;
3246 			}
3247 		}
3248 
3249 		for (n = 0; !IS_ERR(rq[n]); n++)
3250 			i915_request_put(rq[n]);
3251 		rq[0] = ERR_PTR(-ENOMEM);
3252 	}
3253 
3254 out:
3255 	for (n = 0; !IS_ERR(rq[n]); n++)
3256 		i915_request_put(rq[n]);
3257 	if (igt_flush_test(gt->i915))
3258 		err = -EIO;
3259 
3260 	igt_spinner_fini(&spin);
3261 	return err;
3262 }
3263 
3264 static int live_virtual_bond(void *arg)
3265 {
3266 	static const struct phase {
3267 		const char *name;
3268 		unsigned int flags;
3269 	} phases[] = {
3270 		{ "", 0 },
3271 		{ "schedule", BOND_SCHEDULE },
3272 		{ },
3273 	};
3274 	struct intel_gt *gt = arg;
3275 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3276 	unsigned int class, inst;
3277 	int err;
3278 
3279 	if (USES_GUC_SUBMISSION(gt->i915))
3280 		return 0;
3281 
3282 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3283 		const struct phase *p;
3284 		int nsibling;
3285 
3286 		nsibling = 0;
3287 		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3288 			if (!gt->engine_class[class][inst])
3289 				break;
3290 
3291 			GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings));
3292 			siblings[nsibling++] = gt->engine_class[class][inst];
3293 		}
3294 		if (nsibling < 2)
3295 			continue;
3296 
3297 		for (p = phases; p->name; p++) {
3298 			err = bond_virtual_engine(gt,
3299 						  class, siblings, nsibling,
3300 						  p->flags);
3301 			if (err) {
3302 				pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
3303 				       __func__, p->name, class, nsibling, err);
3304 				return err;
3305 			}
3306 		}
3307 	}
3308 
3309 	return 0;
3310 }
3311 
3312 int intel_execlists_live_selftests(struct drm_i915_private *i915)
3313 {
3314 	static const struct i915_subtest tests[] = {
3315 		SUBTEST(live_sanitycheck),
3316 		SUBTEST(live_unlite_switch),
3317 		SUBTEST(live_unlite_preempt),
3318 		SUBTEST(live_timeslice_preempt),
3319 		SUBTEST(live_timeslice_queue),
3320 		SUBTEST(live_busywait_preempt),
3321 		SUBTEST(live_preempt),
3322 		SUBTEST(live_late_preempt),
3323 		SUBTEST(live_nopreempt),
3324 		SUBTEST(live_preempt_cancel),
3325 		SUBTEST(live_suppress_self_preempt),
3326 		SUBTEST(live_suppress_wait_preempt),
3327 		SUBTEST(live_chain_preempt),
3328 		SUBTEST(live_preempt_gang),
3329 		SUBTEST(live_preempt_hang),
3330 		SUBTEST(live_preempt_timeout),
3331 		SUBTEST(live_preempt_smoke),
3332 		SUBTEST(live_virtual_engine),
3333 		SUBTEST(live_virtual_mask),
3334 		SUBTEST(live_virtual_preserved),
3335 		SUBTEST(live_virtual_bond),
3336 	};
3337 
3338 	if (!HAS_EXECLISTS(i915))
3339 		return 0;
3340 
3341 	if (intel_gt_is_wedged(&i915->gt))
3342 		return 0;
3343 
3344 	return intel_gt_live_subtests(tests, &i915->gt);
3345 }
3346 
3347 static void hexdump(const void *buf, size_t len)
3348 {
3349 	const size_t rowsize = 8 * sizeof(u32);
3350 	const void *prev = NULL;
3351 	bool skip = false;
3352 	size_t pos;
3353 
3354 	for (pos = 0; pos < len; pos += rowsize) {
3355 		char line[128];
3356 
3357 		if (prev && !memcmp(prev, buf + pos, rowsize)) {
3358 			if (!skip) {
3359 				pr_info("*\n");
3360 				skip = true;
3361 			}
3362 			continue;
3363 		}
3364 
3365 		WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
3366 						rowsize, sizeof(u32),
3367 						line, sizeof(line),
3368 						false) >= sizeof(line));
3369 		pr_info("[%04zx] %s\n", pos, line);
3370 
3371 		prev = buf + pos;
3372 		skip = false;
3373 	}
3374 }
3375 
3376 static int live_lrc_layout(void *arg)
3377 {
3378 	struct intel_gt *gt = arg;
3379 	struct intel_engine_cs *engine;
3380 	enum intel_engine_id id;
3381 	u32 *lrc;
3382 	int err;
3383 
3384 	/*
3385 	 * Check the registers offsets we use to create the initial reg state
3386 	 * match the layout saved by HW.
3387 	 */
3388 
3389 	lrc = kmalloc(PAGE_SIZE, GFP_KERNEL);
3390 	if (!lrc)
3391 		return -ENOMEM;
3392 
3393 	err = 0;
3394 	for_each_engine(engine, gt, id) {
3395 		u32 *hw;
3396 		int dw;
3397 
3398 		if (!engine->default_state)
3399 			continue;
3400 
3401 		hw = i915_gem_object_pin_map(engine->default_state,
3402 					     I915_MAP_WB);
3403 		if (IS_ERR(hw)) {
3404 			err = PTR_ERR(hw);
3405 			break;
3406 		}
3407 		hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
3408 
3409 		execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE),
3410 					 engine->kernel_context,
3411 					 engine,
3412 					 engine->kernel_context->ring,
3413 					 true);
3414 
3415 		dw = 0;
3416 		do {
3417 			u32 lri = hw[dw];
3418 
3419 			if (lri == 0) {
3420 				dw++;
3421 				continue;
3422 			}
3423 
3424 			if (lrc[dw] == 0) {
3425 				pr_debug("%s: skipped instruction %x at dword %d\n",
3426 					 engine->name, lri, dw);
3427 				dw++;
3428 				continue;
3429 			}
3430 
3431 			if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
3432 				pr_err("%s: Expected LRI command at dword %d, found %08x\n",
3433 				       engine->name, dw, lri);
3434 				err = -EINVAL;
3435 				break;
3436 			}
3437 
3438 			if (lrc[dw] != lri) {
3439 				pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
3440 				       engine->name, dw, lri, lrc[dw]);
3441 				err = -EINVAL;
3442 				break;
3443 			}
3444 
3445 			lri &= 0x7f;
3446 			lri++;
3447 			dw++;
3448 
3449 			while (lri) {
3450 				if (hw[dw] != lrc[dw]) {
3451 					pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
3452 					       engine->name, dw, hw[dw], lrc[dw]);
3453 					err = -EINVAL;
3454 					break;
3455 				}
3456 
3457 				/*
3458 				 * Skip over the actual register value as we
3459 				 * expect that to differ.
3460 				 */
3461 				dw += 2;
3462 				lri -= 2;
3463 			}
3464 		} while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
3465 
3466 		if (err) {
3467 			pr_info("%s: HW register image:\n", engine->name);
3468 			hexdump(hw, PAGE_SIZE);
3469 
3470 			pr_info("%s: SW register image:\n", engine->name);
3471 			hexdump(lrc, PAGE_SIZE);
3472 		}
3473 
3474 		i915_gem_object_unpin_map(engine->default_state);
3475 		if (err)
3476 			break;
3477 	}
3478 
3479 	kfree(lrc);
3480 	return err;
3481 }
3482 
3483 static int find_offset(const u32 *lri, u32 offset)
3484 {
3485 	int i;
3486 
3487 	for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
3488 		if (lri[i] == offset)
3489 			return i;
3490 
3491 	return -1;
3492 }
3493 
3494 static int live_lrc_fixed(void *arg)
3495 {
3496 	struct intel_gt *gt = arg;
3497 	struct intel_engine_cs *engine;
3498 	enum intel_engine_id id;
3499 	int err = 0;
3500 
3501 	/*
3502 	 * Check the assumed register offsets match the actual locations in
3503 	 * the context image.
3504 	 */
3505 
3506 	for_each_engine(engine, gt, id) {
3507 		const struct {
3508 			u32 reg;
3509 			u32 offset;
3510 			const char *name;
3511 		} tbl[] = {
3512 			{
3513 				i915_mmio_reg_offset(RING_START(engine->mmio_base)),
3514 				CTX_RING_START - 1,
3515 				"RING_START"
3516 			},
3517 			{
3518 				i915_mmio_reg_offset(RING_CTL(engine->mmio_base)),
3519 				CTX_RING_CTL - 1,
3520 				"RING_CTL"
3521 			},
3522 			{
3523 				i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)),
3524 				CTX_RING_HEAD - 1,
3525 				"RING_HEAD"
3526 			},
3527 			{
3528 				i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)),
3529 				CTX_RING_TAIL - 1,
3530 				"RING_TAIL"
3531 			},
3532 			{
3533 				i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)),
3534 				lrc_ring_mi_mode(engine),
3535 				"RING_MI_MODE"
3536 			},
3537 			{
3538 				i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)),
3539 				CTX_BB_STATE - 1,
3540 				"BB_STATE"
3541 			},
3542 			{ },
3543 		}, *t;
3544 		u32 *hw;
3545 
3546 		if (!engine->default_state)
3547 			continue;
3548 
3549 		hw = i915_gem_object_pin_map(engine->default_state,
3550 					     I915_MAP_WB);
3551 		if (IS_ERR(hw)) {
3552 			err = PTR_ERR(hw);
3553 			break;
3554 		}
3555 		hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
3556 
3557 		for (t = tbl; t->name; t++) {
3558 			int dw = find_offset(hw, t->reg);
3559 
3560 			if (dw != t->offset) {
3561 				pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
3562 				       engine->name,
3563 				       t->name,
3564 				       t->reg,
3565 				       dw,
3566 				       t->offset);
3567 				err = -EINVAL;
3568 			}
3569 		}
3570 
3571 		i915_gem_object_unpin_map(engine->default_state);
3572 	}
3573 
3574 	return err;
3575 }
3576 
3577 static int __live_lrc_state(struct intel_engine_cs *engine,
3578 			    struct i915_vma *scratch)
3579 {
3580 	struct intel_context *ce;
3581 	struct i915_request *rq;
3582 	enum {
3583 		RING_START_IDX = 0,
3584 		RING_TAIL_IDX,
3585 		MAX_IDX
3586 	};
3587 	u32 expected[MAX_IDX];
3588 	u32 *cs;
3589 	int err;
3590 	int n;
3591 
3592 	ce = intel_context_create(engine);
3593 	if (IS_ERR(ce))
3594 		return PTR_ERR(ce);
3595 
3596 	err = intel_context_pin(ce);
3597 	if (err)
3598 		goto err_put;
3599 
3600 	rq = i915_request_create(ce);
3601 	if (IS_ERR(rq)) {
3602 		err = PTR_ERR(rq);
3603 		goto err_unpin;
3604 	}
3605 
3606 	cs = intel_ring_begin(rq, 4 * MAX_IDX);
3607 	if (IS_ERR(cs)) {
3608 		err = PTR_ERR(cs);
3609 		i915_request_add(rq);
3610 		goto err_unpin;
3611 	}
3612 
3613 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
3614 	*cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
3615 	*cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32);
3616 	*cs++ = 0;
3617 
3618 	expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma);
3619 
3620 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
3621 	*cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
3622 	*cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
3623 	*cs++ = 0;
3624 
3625 	i915_request_get(rq);
3626 	i915_request_add(rq);
3627 
3628 	intel_engine_flush_submission(engine);
3629 	expected[RING_TAIL_IDX] = ce->ring->tail;
3630 
3631 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
3632 		err = -ETIME;
3633 		goto err_rq;
3634 	}
3635 
3636 	cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
3637 	if (IS_ERR(cs)) {
3638 		err = PTR_ERR(cs);
3639 		goto err_rq;
3640 	}
3641 
3642 	for (n = 0; n < MAX_IDX; n++) {
3643 		if (cs[n] != expected[n]) {
3644 			pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
3645 			       engine->name, n, cs[n], expected[n]);
3646 			err = -EINVAL;
3647 			break;
3648 		}
3649 	}
3650 
3651 	i915_gem_object_unpin_map(scratch->obj);
3652 
3653 err_rq:
3654 	i915_request_put(rq);
3655 err_unpin:
3656 	intel_context_unpin(ce);
3657 err_put:
3658 	intel_context_put(ce);
3659 	return err;
3660 }
3661 
3662 static int live_lrc_state(void *arg)
3663 {
3664 	struct intel_gt *gt = arg;
3665 	struct intel_engine_cs *engine;
3666 	struct i915_vma *scratch;
3667 	enum intel_engine_id id;
3668 	int err = 0;
3669 
3670 	/*
3671 	 * Check the live register state matches what we expect for this
3672 	 * intel_context.
3673 	 */
3674 
3675 	scratch = create_scratch(gt);
3676 	if (IS_ERR(scratch))
3677 		return PTR_ERR(scratch);
3678 
3679 	for_each_engine(engine, gt, id) {
3680 		err = __live_lrc_state(engine, scratch);
3681 		if (err)
3682 			break;
3683 	}
3684 
3685 	if (igt_flush_test(gt->i915))
3686 		err = -EIO;
3687 
3688 	i915_vma_unpin_and_release(&scratch, 0);
3689 	return err;
3690 }
3691 
3692 static int gpr_make_dirty(struct intel_engine_cs *engine)
3693 {
3694 	struct i915_request *rq;
3695 	u32 *cs;
3696 	int n;
3697 
3698 	rq = intel_engine_create_kernel_request(engine);
3699 	if (IS_ERR(rq))
3700 		return PTR_ERR(rq);
3701 
3702 	cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2);
3703 	if (IS_ERR(cs)) {
3704 		i915_request_add(rq);
3705 		return PTR_ERR(cs);
3706 	}
3707 
3708 	*cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
3709 	for (n = 0; n < NUM_GPR_DW; n++) {
3710 		*cs++ = CS_GPR(engine, n);
3711 		*cs++ = STACK_MAGIC;
3712 	}
3713 	*cs++ = MI_NOOP;
3714 
3715 	intel_ring_advance(rq, cs);
3716 	i915_request_add(rq);
3717 
3718 	return 0;
3719 }
3720 
3721 static int __live_gpr_clear(struct intel_engine_cs *engine,
3722 			    struct i915_vma *scratch)
3723 {
3724 	struct intel_context *ce;
3725 	struct i915_request *rq;
3726 	u32 *cs;
3727 	int err;
3728 	int n;
3729 
3730 	if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS)
3731 		return 0; /* GPR only on rcs0 for gen8 */
3732 
3733 	err = gpr_make_dirty(engine);
3734 	if (err)
3735 		return err;
3736 
3737 	ce = intel_context_create(engine);
3738 	if (IS_ERR(ce))
3739 		return PTR_ERR(ce);
3740 
3741 	rq = intel_context_create_request(ce);
3742 	if (IS_ERR(rq)) {
3743 		err = PTR_ERR(rq);
3744 		goto err_put;
3745 	}
3746 
3747 	cs = intel_ring_begin(rq, 4 * NUM_GPR_DW);
3748 	if (IS_ERR(cs)) {
3749 		err = PTR_ERR(cs);
3750 		i915_request_add(rq);
3751 		goto err_put;
3752 	}
3753 
3754 	for (n = 0; n < NUM_GPR_DW; n++) {
3755 		*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
3756 		*cs++ = CS_GPR(engine, n);
3757 		*cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
3758 		*cs++ = 0;
3759 	}
3760 
3761 	i915_request_get(rq);
3762 	i915_request_add(rq);
3763 
3764 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
3765 		err = -ETIME;
3766 		goto err_rq;
3767 	}
3768 
3769 	cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
3770 	if (IS_ERR(cs)) {
3771 		err = PTR_ERR(cs);
3772 		goto err_rq;
3773 	}
3774 
3775 	for (n = 0; n < NUM_GPR_DW; n++) {
3776 		if (cs[n]) {
3777 			pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
3778 			       engine->name,
3779 			       n / 2, n & 1 ? "udw" : "ldw",
3780 			       cs[n]);
3781 			err = -EINVAL;
3782 			break;
3783 		}
3784 	}
3785 
3786 	i915_gem_object_unpin_map(scratch->obj);
3787 
3788 err_rq:
3789 	i915_request_put(rq);
3790 err_put:
3791 	intel_context_put(ce);
3792 	return err;
3793 }
3794 
3795 static int live_gpr_clear(void *arg)
3796 {
3797 	struct intel_gt *gt = arg;
3798 	struct intel_engine_cs *engine;
3799 	struct i915_vma *scratch;
3800 	enum intel_engine_id id;
3801 	int err = 0;
3802 
3803 	/*
3804 	 * Check that GPR registers are cleared in new contexts as we need
3805 	 * to avoid leaking any information from previous contexts.
3806 	 */
3807 
3808 	scratch = create_scratch(gt);
3809 	if (IS_ERR(scratch))
3810 		return PTR_ERR(scratch);
3811 
3812 	for_each_engine(engine, gt, id) {
3813 		err = __live_gpr_clear(engine, scratch);
3814 		if (err)
3815 			break;
3816 	}
3817 
3818 	if (igt_flush_test(gt->i915))
3819 		err = -EIO;
3820 
3821 	i915_vma_unpin_and_release(&scratch, 0);
3822 	return err;
3823 }
3824 
3825 int intel_lrc_live_selftests(struct drm_i915_private *i915)
3826 {
3827 	static const struct i915_subtest tests[] = {
3828 		SUBTEST(live_lrc_layout),
3829 		SUBTEST(live_lrc_fixed),
3830 		SUBTEST(live_lrc_state),
3831 		SUBTEST(live_gpr_clear),
3832 	};
3833 
3834 	if (!HAS_LOGICAL_RING_CONTEXTS(i915))
3835 		return 0;
3836 
3837 	return intel_gt_live_subtests(tests, &i915->gt);
3838 }
3839