1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2018 Intel Corporation
4  */
5 
6 #include <linux/prime_numbers.h>
7 
8 #include "gem/i915_gem_internal.h"
9 
10 #include "i915_selftest.h"
11 #include "intel_engine_heartbeat.h"
12 #include "intel_engine_pm.h"
13 #include "intel_reset.h"
14 #include "intel_ring.h"
15 #include "selftest_engine_heartbeat.h"
16 #include "selftests/i915_random.h"
17 #include "selftests/igt_flush_test.h"
18 #include "selftests/igt_live_test.h"
19 #include "selftests/igt_spinner.h"
20 #include "selftests/lib_sw_fence.h"
21 #include "shmem_utils.h"
22 
23 #include "gem/selftests/igt_gem_utils.h"
24 #include "gem/selftests/mock_context.h"
25 
26 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
27 #define NUM_GPR 16
28 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
29 
30 #define LRI_HEADER MI_INSTR(0x22, 0)
31 #define LRI_LENGTH_MASK GENMASK(7, 0)
32 
33 static struct i915_vma *create_scratch(struct intel_gt *gt)
34 {
35 	return __vm_create_scratch_for_read_pinned(&gt->ggtt->vm, PAGE_SIZE);
36 }
37 
38 static bool is_active(struct i915_request *rq)
39 {
40 	if (i915_request_is_active(rq))
41 		return true;
42 
43 	if (i915_request_on_hold(rq))
44 		return true;
45 
46 	if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq))
47 		return true;
48 
49 	return false;
50 }
51 
52 static int wait_for_submit(struct intel_engine_cs *engine,
53 			   struct i915_request *rq,
54 			   unsigned long timeout)
55 {
56 	/* Ignore our own attempts to suppress excess tasklets */
57 	tasklet_hi_schedule(&engine->sched_engine->tasklet);
58 
59 	timeout += jiffies;
60 	do {
61 		bool done = time_after(jiffies, timeout);
62 
63 		if (i915_request_completed(rq)) /* that was quick! */
64 			return 0;
65 
66 		/* Wait until the HW has acknowleged the submission (or err) */
67 		intel_engine_flush_submission(engine);
68 		if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
69 			return 0;
70 
71 		if (done)
72 			return -ETIME;
73 
74 		cond_resched();
75 	} while (1);
76 }
77 
78 static int emit_semaphore_signal(struct intel_context *ce, void *slot)
79 {
80 	const u32 offset =
81 		i915_ggtt_offset(ce->engine->status_page.vma) +
82 		offset_in_page(slot);
83 	struct i915_request *rq;
84 	u32 *cs;
85 
86 	rq = intel_context_create_request(ce);
87 	if (IS_ERR(rq))
88 		return PTR_ERR(rq);
89 
90 	cs = intel_ring_begin(rq, 4);
91 	if (IS_ERR(cs)) {
92 		i915_request_add(rq);
93 		return PTR_ERR(cs);
94 	}
95 
96 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
97 	*cs++ = offset;
98 	*cs++ = 0;
99 	*cs++ = 1;
100 
101 	intel_ring_advance(rq, cs);
102 
103 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
104 	i915_request_add(rq);
105 	return 0;
106 }
107 
108 static int context_flush(struct intel_context *ce, long timeout)
109 {
110 	struct i915_request *rq;
111 	struct dma_fence *fence;
112 	int err = 0;
113 
114 	rq = intel_engine_create_kernel_request(ce->engine);
115 	if (IS_ERR(rq))
116 		return PTR_ERR(rq);
117 
118 	fence = i915_active_fence_get(&ce->timeline->last_request);
119 	if (fence) {
120 		i915_request_await_dma_fence(rq, fence);
121 		dma_fence_put(fence);
122 	}
123 
124 	rq = i915_request_get(rq);
125 	i915_request_add(rq);
126 	if (i915_request_wait(rq, 0, timeout) < 0)
127 		err = -ETIME;
128 	i915_request_put(rq);
129 
130 	rmb(); /* We know the request is written, make sure all state is too! */
131 	return err;
132 }
133 
134 static int get_lri_mask(struct intel_engine_cs *engine, u32 lri)
135 {
136 	if ((lri & MI_LRI_LRM_CS_MMIO) == 0)
137 		return ~0u;
138 
139 	if (GRAPHICS_VER(engine->i915) < 12)
140 		return 0xfff;
141 
142 	switch (engine->class) {
143 	default:
144 	case RENDER_CLASS:
145 	case COMPUTE_CLASS:
146 		return 0x07ff;
147 	case COPY_ENGINE_CLASS:
148 		return 0x0fff;
149 	case VIDEO_DECODE_CLASS:
150 	case VIDEO_ENHANCEMENT_CLASS:
151 		return 0x3fff;
152 	}
153 }
154 
155 static int live_lrc_layout(void *arg)
156 {
157 	struct intel_gt *gt = arg;
158 	struct intel_engine_cs *engine;
159 	enum intel_engine_id id;
160 	u32 *lrc;
161 	int err;
162 
163 	/*
164 	 * Check the registers offsets we use to create the initial reg state
165 	 * match the layout saved by HW.
166 	 */
167 
168 	lrc = (u32 *)__get_free_page(GFP_KERNEL); /* requires page alignment */
169 	if (!lrc)
170 		return -ENOMEM;
171 	GEM_BUG_ON(offset_in_page(lrc));
172 
173 	err = 0;
174 	for_each_engine(engine, gt, id) {
175 		u32 *hw;
176 		int dw;
177 
178 		if (!engine->default_state)
179 			continue;
180 
181 		hw = shmem_pin_map(engine->default_state);
182 		if (!hw) {
183 			err = -ENOMEM;
184 			break;
185 		}
186 		hw += LRC_STATE_OFFSET / sizeof(*hw);
187 
188 		__lrc_init_regs(memset(lrc, POISON_INUSE, PAGE_SIZE),
189 				engine->kernel_context, engine, true);
190 
191 		dw = 0;
192 		do {
193 			u32 lri = READ_ONCE(hw[dw]);
194 			u32 lri_mask;
195 
196 			if (lri == 0) {
197 				dw++;
198 				continue;
199 			}
200 
201 			if (lrc[dw] == 0) {
202 				pr_debug("%s: skipped instruction %x at dword %d\n",
203 					 engine->name, lri, dw);
204 				dw++;
205 				continue;
206 			}
207 
208 			if ((lri & GENMASK(31, 23)) != LRI_HEADER) {
209 				pr_err("%s: Expected LRI command at dword %d, found %08x\n",
210 				       engine->name, dw, lri);
211 				err = -EINVAL;
212 				break;
213 			}
214 
215 			if (lrc[dw] != lri) {
216 				pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
217 				       engine->name, dw, lri, lrc[dw]);
218 				err = -EINVAL;
219 				break;
220 			}
221 
222 			/*
223 			 * When bit 19 of MI_LOAD_REGISTER_IMM instruction
224 			 * opcode is set on Gen12+ devices, HW does not
225 			 * care about certain register address offsets, and
226 			 * instead check the following for valid address
227 			 * ranges on specific engines:
228 			 * RCS && CCS: BITS(0 - 10)
229 			 * BCS: BITS(0 - 11)
230 			 * VECS && VCS: BITS(0 - 13)
231 			 */
232 			lri_mask = get_lri_mask(engine, lri);
233 
234 			lri &= 0x7f;
235 			lri++;
236 			dw++;
237 
238 			while (lri) {
239 				u32 offset = READ_ONCE(hw[dw]);
240 
241 				if ((offset ^ lrc[dw]) & lri_mask) {
242 					pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
243 					       engine->name, dw, offset, lrc[dw]);
244 					err = -EINVAL;
245 					break;
246 				}
247 
248 				/*
249 				 * Skip over the actual register value as we
250 				 * expect that to differ.
251 				 */
252 				dw += 2;
253 				lri -= 2;
254 			}
255 		} while (!err && (lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
256 
257 		if (err) {
258 			pr_info("%s: HW register image:\n", engine->name);
259 			igt_hexdump(hw, PAGE_SIZE);
260 
261 			pr_info("%s: SW register image:\n", engine->name);
262 			igt_hexdump(lrc, PAGE_SIZE);
263 		}
264 
265 		shmem_unpin_map(engine->default_state, hw);
266 		if (err)
267 			break;
268 	}
269 
270 	free_page((unsigned long)lrc);
271 	return err;
272 }
273 
274 static int find_offset(const u32 *lri, u32 offset)
275 {
276 	int i;
277 
278 	for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
279 		if (lri[i] == offset)
280 			return i;
281 
282 	return -1;
283 }
284 
285 static int live_lrc_fixed(void *arg)
286 {
287 	struct intel_gt *gt = arg;
288 	struct intel_engine_cs *engine;
289 	enum intel_engine_id id;
290 	int err = 0;
291 
292 	/*
293 	 * Check the assumed register offsets match the actual locations in
294 	 * the context image.
295 	 */
296 
297 	for_each_engine(engine, gt, id) {
298 		const struct {
299 			u32 reg;
300 			u32 offset;
301 			const char *name;
302 		} tbl[] = {
303 			{
304 				i915_mmio_reg_offset(RING_START(engine->mmio_base)),
305 				CTX_RING_START - 1,
306 				"RING_START"
307 			},
308 			{
309 				i915_mmio_reg_offset(RING_CTL(engine->mmio_base)),
310 				CTX_RING_CTL - 1,
311 				"RING_CTL"
312 			},
313 			{
314 				i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)),
315 				CTX_RING_HEAD - 1,
316 				"RING_HEAD"
317 			},
318 			{
319 				i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)),
320 				CTX_RING_TAIL - 1,
321 				"RING_TAIL"
322 			},
323 			{
324 				i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)),
325 				lrc_ring_mi_mode(engine),
326 				"RING_MI_MODE"
327 			},
328 			{
329 				i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)),
330 				CTX_BB_STATE - 1,
331 				"BB_STATE"
332 			},
333 			{
334 				i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(engine->mmio_base)),
335 				lrc_ring_wa_bb_per_ctx(engine),
336 				"RING_BB_PER_CTX_PTR"
337 			},
338 			{
339 				i915_mmio_reg_offset(RING_INDIRECT_CTX(engine->mmio_base)),
340 				lrc_ring_indirect_ptr(engine),
341 				"RING_INDIRECT_CTX_PTR"
342 			},
343 			{
344 				i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(engine->mmio_base)),
345 				lrc_ring_indirect_offset(engine),
346 				"RING_INDIRECT_CTX_OFFSET"
347 			},
348 			{
349 				i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)),
350 				CTX_TIMESTAMP - 1,
351 				"RING_CTX_TIMESTAMP"
352 			},
353 			{
354 				i915_mmio_reg_offset(GEN8_RING_CS_GPR(engine->mmio_base, 0)),
355 				lrc_ring_gpr0(engine),
356 				"RING_CS_GPR0"
357 			},
358 			{
359 				i915_mmio_reg_offset(RING_CMD_BUF_CCTL(engine->mmio_base)),
360 				lrc_ring_cmd_buf_cctl(engine),
361 				"RING_CMD_BUF_CCTL"
362 			},
363 			{
364 				i915_mmio_reg_offset(RING_BB_OFFSET(engine->mmio_base)),
365 				lrc_ring_bb_offset(engine),
366 				"RING_BB_OFFSET"
367 			},
368 			{ },
369 		}, *t;
370 		u32 *hw;
371 
372 		if (!engine->default_state)
373 			continue;
374 
375 		hw = shmem_pin_map(engine->default_state);
376 		if (!hw) {
377 			err = -ENOMEM;
378 			break;
379 		}
380 		hw += LRC_STATE_OFFSET / sizeof(*hw);
381 
382 		for (t = tbl; t->name; t++) {
383 			int dw = find_offset(hw, t->reg);
384 
385 			if (dw != t->offset) {
386 				pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
387 				       engine->name,
388 				       t->name,
389 				       t->reg,
390 				       dw,
391 				       t->offset);
392 				err = -EINVAL;
393 			}
394 		}
395 
396 		shmem_unpin_map(engine->default_state, hw);
397 	}
398 
399 	return err;
400 }
401 
402 static int __live_lrc_state(struct intel_engine_cs *engine,
403 			    struct i915_vma *scratch)
404 {
405 	struct intel_context *ce;
406 	struct i915_request *rq;
407 	struct i915_gem_ww_ctx ww;
408 	enum {
409 		RING_START_IDX = 0,
410 		RING_TAIL_IDX,
411 		MAX_IDX
412 	};
413 	u32 expected[MAX_IDX];
414 	u32 *cs;
415 	int err;
416 	int n;
417 
418 	ce = intel_context_create(engine);
419 	if (IS_ERR(ce))
420 		return PTR_ERR(ce);
421 
422 	i915_gem_ww_ctx_init(&ww, false);
423 retry:
424 	err = i915_gem_object_lock(scratch->obj, &ww);
425 	if (!err)
426 		err = intel_context_pin_ww(ce, &ww);
427 	if (err)
428 		goto err_put;
429 
430 	rq = i915_request_create(ce);
431 	if (IS_ERR(rq)) {
432 		err = PTR_ERR(rq);
433 		goto err_unpin;
434 	}
435 
436 	cs = intel_ring_begin(rq, 4 * MAX_IDX);
437 	if (IS_ERR(cs)) {
438 		err = PTR_ERR(cs);
439 		i915_request_add(rq);
440 		goto err_unpin;
441 	}
442 
443 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
444 	*cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
445 	*cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32);
446 	*cs++ = 0;
447 
448 	expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma);
449 
450 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
451 	*cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
452 	*cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
453 	*cs++ = 0;
454 
455 	err = i915_request_await_object(rq, scratch->obj, true);
456 	if (!err)
457 		err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
458 
459 	i915_request_get(rq);
460 	i915_request_add(rq);
461 	if (err)
462 		goto err_rq;
463 
464 	intel_engine_flush_submission(engine);
465 	expected[RING_TAIL_IDX] = ce->ring->tail;
466 
467 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
468 		err = -ETIME;
469 		goto err_rq;
470 	}
471 
472 	cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
473 	if (IS_ERR(cs)) {
474 		err = PTR_ERR(cs);
475 		goto err_rq;
476 	}
477 
478 	for (n = 0; n < MAX_IDX; n++) {
479 		if (cs[n] != expected[n]) {
480 			pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
481 			       engine->name, n, cs[n], expected[n]);
482 			err = -EINVAL;
483 			break;
484 		}
485 	}
486 
487 	i915_gem_object_unpin_map(scratch->obj);
488 
489 err_rq:
490 	i915_request_put(rq);
491 err_unpin:
492 	intel_context_unpin(ce);
493 err_put:
494 	if (err == -EDEADLK) {
495 		err = i915_gem_ww_ctx_backoff(&ww);
496 		if (!err)
497 			goto retry;
498 	}
499 	i915_gem_ww_ctx_fini(&ww);
500 	intel_context_put(ce);
501 	return err;
502 }
503 
504 static int live_lrc_state(void *arg)
505 {
506 	struct intel_gt *gt = arg;
507 	struct intel_engine_cs *engine;
508 	struct i915_vma *scratch;
509 	enum intel_engine_id id;
510 	int err = 0;
511 
512 	/*
513 	 * Check the live register state matches what we expect for this
514 	 * intel_context.
515 	 */
516 
517 	scratch = create_scratch(gt);
518 	if (IS_ERR(scratch))
519 		return PTR_ERR(scratch);
520 
521 	for_each_engine(engine, gt, id) {
522 		err = __live_lrc_state(engine, scratch);
523 		if (err)
524 			break;
525 	}
526 
527 	if (igt_flush_test(gt->i915))
528 		err = -EIO;
529 
530 	i915_vma_unpin_and_release(&scratch, 0);
531 	return err;
532 }
533 
534 static int gpr_make_dirty(struct intel_context *ce)
535 {
536 	struct i915_request *rq;
537 	u32 *cs;
538 	int n;
539 
540 	rq = intel_context_create_request(ce);
541 	if (IS_ERR(rq))
542 		return PTR_ERR(rq);
543 
544 	cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2);
545 	if (IS_ERR(cs)) {
546 		i915_request_add(rq);
547 		return PTR_ERR(cs);
548 	}
549 
550 	*cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
551 	for (n = 0; n < NUM_GPR_DW; n++) {
552 		*cs++ = CS_GPR(ce->engine, n);
553 		*cs++ = STACK_MAGIC;
554 	}
555 	*cs++ = MI_NOOP;
556 
557 	intel_ring_advance(rq, cs);
558 
559 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
560 	i915_request_add(rq);
561 
562 	return 0;
563 }
564 
565 static struct i915_request *
566 __gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot)
567 {
568 	const u32 offset =
569 		i915_ggtt_offset(ce->engine->status_page.vma) +
570 		offset_in_page(slot);
571 	struct i915_request *rq;
572 	u32 *cs;
573 	int err;
574 	int n;
575 
576 	rq = intel_context_create_request(ce);
577 	if (IS_ERR(rq))
578 		return rq;
579 
580 	cs = intel_ring_begin(rq, 6 + 4 * NUM_GPR_DW);
581 	if (IS_ERR(cs)) {
582 		i915_request_add(rq);
583 		return ERR_CAST(cs);
584 	}
585 
586 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
587 	*cs++ = MI_NOOP;
588 
589 	*cs++ = MI_SEMAPHORE_WAIT |
590 		MI_SEMAPHORE_GLOBAL_GTT |
591 		MI_SEMAPHORE_POLL |
592 		MI_SEMAPHORE_SAD_NEQ_SDD;
593 	*cs++ = 0;
594 	*cs++ = offset;
595 	*cs++ = 0;
596 
597 	for (n = 0; n < NUM_GPR_DW; n++) {
598 		*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
599 		*cs++ = CS_GPR(ce->engine, n);
600 		*cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
601 		*cs++ = 0;
602 	}
603 
604 	i915_vma_lock(scratch);
605 	err = i915_request_await_object(rq, scratch->obj, true);
606 	if (!err)
607 		err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
608 	i915_vma_unlock(scratch);
609 
610 	i915_request_get(rq);
611 	i915_request_add(rq);
612 	if (err) {
613 		i915_request_put(rq);
614 		rq = ERR_PTR(err);
615 	}
616 
617 	return rq;
618 }
619 
620 static int __live_lrc_gpr(struct intel_engine_cs *engine,
621 			  struct i915_vma *scratch,
622 			  bool preempt)
623 {
624 	u32 *slot = memset32(engine->status_page.addr + 1000, 0, 4);
625 	struct intel_context *ce;
626 	struct i915_request *rq;
627 	u32 *cs;
628 	int err;
629 	int n;
630 
631 	if (GRAPHICS_VER(engine->i915) < 9 && engine->class != RENDER_CLASS)
632 		return 0; /* GPR only on rcs0 for gen8 */
633 
634 	err = gpr_make_dirty(engine->kernel_context);
635 	if (err)
636 		return err;
637 
638 	ce = intel_context_create(engine);
639 	if (IS_ERR(ce))
640 		return PTR_ERR(ce);
641 
642 	rq = __gpr_read(ce, scratch, slot);
643 	if (IS_ERR(rq)) {
644 		err = PTR_ERR(rq);
645 		goto err_put;
646 	}
647 
648 	err = wait_for_submit(engine, rq, HZ / 2);
649 	if (err)
650 		goto err_rq;
651 
652 	if (preempt) {
653 		err = gpr_make_dirty(engine->kernel_context);
654 		if (err)
655 			goto err_rq;
656 
657 		err = emit_semaphore_signal(engine->kernel_context, slot);
658 		if (err)
659 			goto err_rq;
660 
661 		err = wait_for_submit(engine, rq, HZ / 2);
662 		if (err)
663 			goto err_rq;
664 	} else {
665 		slot[0] = 1;
666 		wmb();
667 	}
668 
669 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
670 		err = -ETIME;
671 		goto err_rq;
672 	}
673 
674 	cs = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB);
675 	if (IS_ERR(cs)) {
676 		err = PTR_ERR(cs);
677 		goto err_rq;
678 	}
679 
680 	for (n = 0; n < NUM_GPR_DW; n++) {
681 		if (cs[n]) {
682 			pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
683 			       engine->name,
684 			       n / 2, n & 1 ? "udw" : "ldw",
685 			       cs[n]);
686 			err = -EINVAL;
687 			break;
688 		}
689 	}
690 
691 	i915_gem_object_unpin_map(scratch->obj);
692 
693 err_rq:
694 	memset32(&slot[0], -1, 4);
695 	wmb();
696 	i915_request_put(rq);
697 err_put:
698 	intel_context_put(ce);
699 	return err;
700 }
701 
702 static int live_lrc_gpr(void *arg)
703 {
704 	struct intel_gt *gt = arg;
705 	struct intel_engine_cs *engine;
706 	struct i915_vma *scratch;
707 	enum intel_engine_id id;
708 	int err = 0;
709 
710 	/*
711 	 * Check that GPR registers are cleared in new contexts as we need
712 	 * to avoid leaking any information from previous contexts.
713 	 */
714 
715 	scratch = create_scratch(gt);
716 	if (IS_ERR(scratch))
717 		return PTR_ERR(scratch);
718 
719 	for_each_engine(engine, gt, id) {
720 		st_engine_heartbeat_disable(engine);
721 
722 		err = __live_lrc_gpr(engine, scratch, false);
723 		if (err)
724 			goto err;
725 
726 		err = __live_lrc_gpr(engine, scratch, true);
727 		if (err)
728 			goto err;
729 
730 err:
731 		st_engine_heartbeat_enable(engine);
732 		if (igt_flush_test(gt->i915))
733 			err = -EIO;
734 		if (err)
735 			break;
736 	}
737 
738 	i915_vma_unpin_and_release(&scratch, 0);
739 	return err;
740 }
741 
742 static struct i915_request *
743 create_timestamp(struct intel_context *ce, void *slot, int idx)
744 {
745 	const u32 offset =
746 		i915_ggtt_offset(ce->engine->status_page.vma) +
747 		offset_in_page(slot);
748 	struct i915_request *rq;
749 	u32 *cs;
750 	int err;
751 
752 	rq = intel_context_create_request(ce);
753 	if (IS_ERR(rq))
754 		return rq;
755 
756 	cs = intel_ring_begin(rq, 10);
757 	if (IS_ERR(cs)) {
758 		err = PTR_ERR(cs);
759 		goto err;
760 	}
761 
762 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
763 	*cs++ = MI_NOOP;
764 
765 	*cs++ = MI_SEMAPHORE_WAIT |
766 		MI_SEMAPHORE_GLOBAL_GTT |
767 		MI_SEMAPHORE_POLL |
768 		MI_SEMAPHORE_SAD_NEQ_SDD;
769 	*cs++ = 0;
770 	*cs++ = offset;
771 	*cs++ = 0;
772 
773 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
774 	*cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base));
775 	*cs++ = offset + idx * sizeof(u32);
776 	*cs++ = 0;
777 
778 	intel_ring_advance(rq, cs);
779 
780 	err = 0;
781 err:
782 	i915_request_get(rq);
783 	i915_request_add(rq);
784 	if (err) {
785 		i915_request_put(rq);
786 		return ERR_PTR(err);
787 	}
788 
789 	return rq;
790 }
791 
792 struct lrc_timestamp {
793 	struct intel_engine_cs *engine;
794 	struct intel_context *ce[2];
795 	u32 poison;
796 };
797 
798 static bool timestamp_advanced(u32 start, u32 end)
799 {
800 	return (s32)(end - start) > 0;
801 }
802 
803 static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt)
804 {
805 	u32 *slot = memset32(arg->engine->status_page.addr + 1000, 0, 4);
806 	struct i915_request *rq;
807 	u32 timestamp;
808 	int err = 0;
809 
810 	arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison;
811 	rq = create_timestamp(arg->ce[0], slot, 1);
812 	if (IS_ERR(rq))
813 		return PTR_ERR(rq);
814 
815 	err = wait_for_submit(rq->engine, rq, HZ / 2);
816 	if (err)
817 		goto err;
818 
819 	if (preempt) {
820 		arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef;
821 		err = emit_semaphore_signal(arg->ce[1], slot);
822 		if (err)
823 			goto err;
824 	} else {
825 		slot[0] = 1;
826 		wmb();
827 	}
828 
829 	/* And wait for switch to kernel (to save our context to memory) */
830 	err = context_flush(arg->ce[0], HZ / 2);
831 	if (err)
832 		goto err;
833 
834 	if (!timestamp_advanced(arg->poison, slot[1])) {
835 		pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n",
836 		       arg->engine->name, preempt ? "preempt" : "simple",
837 		       arg->poison, slot[1]);
838 		err = -EINVAL;
839 	}
840 
841 	timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]);
842 	if (!timestamp_advanced(slot[1], timestamp)) {
843 		pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n",
844 		       arg->engine->name, preempt ? "preempt" : "simple",
845 		       slot[1], timestamp);
846 		err = -EINVAL;
847 	}
848 
849 err:
850 	memset32(slot, -1, 4);
851 	i915_request_put(rq);
852 	return err;
853 }
854 
855 static int live_lrc_timestamp(void *arg)
856 {
857 	struct lrc_timestamp data = {};
858 	struct intel_gt *gt = arg;
859 	enum intel_engine_id id;
860 	const u32 poison[] = {
861 		0,
862 		S32_MAX,
863 		(u32)S32_MAX + 1,
864 		U32_MAX,
865 	};
866 
867 	/*
868 	 * We want to verify that the timestamp is saved and restore across
869 	 * context switches and is monotonic.
870 	 *
871 	 * So we do this with a little bit of LRC poisoning to check various
872 	 * boundary conditions, and see what happens if we preempt the context
873 	 * with a second request (carrying more poison into the timestamp).
874 	 */
875 
876 	for_each_engine(data.engine, gt, id) {
877 		int i, err = 0;
878 
879 		st_engine_heartbeat_disable(data.engine);
880 
881 		for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
882 			struct intel_context *tmp;
883 
884 			tmp = intel_context_create(data.engine);
885 			if (IS_ERR(tmp)) {
886 				err = PTR_ERR(tmp);
887 				goto err;
888 			}
889 
890 			err = intel_context_pin(tmp);
891 			if (err) {
892 				intel_context_put(tmp);
893 				goto err;
894 			}
895 
896 			data.ce[i] = tmp;
897 		}
898 
899 		for (i = 0; i < ARRAY_SIZE(poison); i++) {
900 			data.poison = poison[i];
901 
902 			err = __lrc_timestamp(&data, false);
903 			if (err)
904 				break;
905 
906 			err = __lrc_timestamp(&data, true);
907 			if (err)
908 				break;
909 		}
910 
911 err:
912 		st_engine_heartbeat_enable(data.engine);
913 		for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
914 			if (!data.ce[i])
915 				break;
916 
917 			intel_context_unpin(data.ce[i]);
918 			intel_context_put(data.ce[i]);
919 		}
920 
921 		if (igt_flush_test(gt->i915))
922 			err = -EIO;
923 		if (err)
924 			return err;
925 	}
926 
927 	return 0;
928 }
929 
930 static struct i915_vma *
931 create_user_vma(struct i915_address_space *vm, unsigned long size)
932 {
933 	struct drm_i915_gem_object *obj;
934 	struct i915_vma *vma;
935 	int err;
936 
937 	obj = i915_gem_object_create_internal(vm->i915, size);
938 	if (IS_ERR(obj))
939 		return ERR_CAST(obj);
940 
941 	vma = i915_vma_instance(obj, vm, NULL);
942 	if (IS_ERR(vma)) {
943 		i915_gem_object_put(obj);
944 		return vma;
945 	}
946 
947 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
948 	if (err) {
949 		i915_gem_object_put(obj);
950 		return ERR_PTR(err);
951 	}
952 
953 	return vma;
954 }
955 
956 static u32 safe_poison(u32 offset, u32 poison)
957 {
958 	/*
959 	 * Do not enable predication as it will nop all subsequent commands,
960 	 * not only disabling the tests (by preventing all the other SRM) but
961 	 * also preventing the arbitration events at the end of the request.
962 	 */
963 	if (offset == i915_mmio_reg_offset(RING_PREDICATE_RESULT(0)))
964 		poison &= ~REG_BIT(0);
965 
966 	return poison;
967 }
968 
969 static struct i915_vma *
970 store_context(struct intel_context *ce, struct i915_vma *scratch)
971 {
972 	struct i915_vma *batch;
973 	u32 dw, x, *cs, *hw;
974 	u32 *defaults;
975 
976 	batch = create_user_vma(ce->vm, SZ_64K);
977 	if (IS_ERR(batch))
978 		return batch;
979 
980 	cs = i915_gem_object_pin_map_unlocked(batch->obj, I915_MAP_WC);
981 	if (IS_ERR(cs)) {
982 		i915_vma_put(batch);
983 		return ERR_CAST(cs);
984 	}
985 
986 	defaults = shmem_pin_map(ce->engine->default_state);
987 	if (!defaults) {
988 		i915_gem_object_unpin_map(batch->obj);
989 		i915_vma_put(batch);
990 		return ERR_PTR(-ENOMEM);
991 	}
992 
993 	x = 0;
994 	dw = 0;
995 	hw = defaults;
996 	hw += LRC_STATE_OFFSET / sizeof(*hw);
997 	do {
998 		u32 len = hw[dw] & LRI_LENGTH_MASK;
999 
1000 		/*
1001 		 * Keep it simple, skip parsing complex commands
1002 		 *
1003 		 * At present, there are no more MI_LOAD_REGISTER_IMM
1004 		 * commands after the first 3D state command. Rather
1005 		 * than include a table (see i915_cmd_parser.c) of all
1006 		 * the possible commands and their instruction lengths
1007 		 * (or mask for variable length instructions), assume
1008 		 * we have gathered the complete list of registers and
1009 		 * bail out.
1010 		 */
1011 		if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT)
1012 			break;
1013 
1014 		if (hw[dw] == 0) {
1015 			dw++;
1016 			continue;
1017 		}
1018 
1019 		if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) {
1020 			/* Assume all other MI commands match LRI length mask */
1021 			dw += len + 2;
1022 			continue;
1023 		}
1024 
1025 		if (!len) {
1026 			pr_err("%s: invalid LRI found in context image\n",
1027 			       ce->engine->name);
1028 			igt_hexdump(defaults, PAGE_SIZE);
1029 			break;
1030 		}
1031 
1032 		dw++;
1033 		len = (len + 1) / 2;
1034 		while (len--) {
1035 			*cs++ = MI_STORE_REGISTER_MEM_GEN8;
1036 			*cs++ = hw[dw];
1037 			*cs++ = lower_32_bits(scratch->node.start + x);
1038 			*cs++ = upper_32_bits(scratch->node.start + x);
1039 
1040 			dw += 2;
1041 			x += 4;
1042 		}
1043 	} while (dw < PAGE_SIZE / sizeof(u32) &&
1044 		 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
1045 
1046 	*cs++ = MI_BATCH_BUFFER_END;
1047 
1048 	shmem_unpin_map(ce->engine->default_state, defaults);
1049 
1050 	i915_gem_object_flush_map(batch->obj);
1051 	i915_gem_object_unpin_map(batch->obj);
1052 
1053 	return batch;
1054 }
1055 
1056 static int move_to_active(struct i915_request *rq,
1057 			  struct i915_vma *vma,
1058 			  unsigned int flags)
1059 {
1060 	int err;
1061 
1062 	i915_vma_lock(vma);
1063 	err = i915_request_await_object(rq, vma->obj, flags);
1064 	if (!err)
1065 		err = i915_vma_move_to_active(vma, rq, flags);
1066 	i915_vma_unlock(vma);
1067 
1068 	return err;
1069 }
1070 
1071 static struct i915_request *
1072 record_registers(struct intel_context *ce,
1073 		 struct i915_vma *before,
1074 		 struct i915_vma *after,
1075 		 u32 *sema)
1076 {
1077 	struct i915_vma *b_before, *b_after;
1078 	struct i915_request *rq;
1079 	u32 *cs;
1080 	int err;
1081 
1082 	b_before = store_context(ce, before);
1083 	if (IS_ERR(b_before))
1084 		return ERR_CAST(b_before);
1085 
1086 	b_after = store_context(ce, after);
1087 	if (IS_ERR(b_after)) {
1088 		rq = ERR_CAST(b_after);
1089 		goto err_before;
1090 	}
1091 
1092 	rq = intel_context_create_request(ce);
1093 	if (IS_ERR(rq))
1094 		goto err_after;
1095 
1096 	err = move_to_active(rq, before, EXEC_OBJECT_WRITE);
1097 	if (err)
1098 		goto err_rq;
1099 
1100 	err = move_to_active(rq, b_before, 0);
1101 	if (err)
1102 		goto err_rq;
1103 
1104 	err = move_to_active(rq, after, EXEC_OBJECT_WRITE);
1105 	if (err)
1106 		goto err_rq;
1107 
1108 	err = move_to_active(rq, b_after, 0);
1109 	if (err)
1110 		goto err_rq;
1111 
1112 	cs = intel_ring_begin(rq, 14);
1113 	if (IS_ERR(cs)) {
1114 		err = PTR_ERR(cs);
1115 		goto err_rq;
1116 	}
1117 
1118 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1119 	*cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
1120 	*cs++ = lower_32_bits(b_before->node.start);
1121 	*cs++ = upper_32_bits(b_before->node.start);
1122 
1123 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1124 	*cs++ = MI_SEMAPHORE_WAIT |
1125 		MI_SEMAPHORE_GLOBAL_GTT |
1126 		MI_SEMAPHORE_POLL |
1127 		MI_SEMAPHORE_SAD_NEQ_SDD;
1128 	*cs++ = 0;
1129 	*cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
1130 		offset_in_page(sema);
1131 	*cs++ = 0;
1132 	*cs++ = MI_NOOP;
1133 
1134 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1135 	*cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
1136 	*cs++ = lower_32_bits(b_after->node.start);
1137 	*cs++ = upper_32_bits(b_after->node.start);
1138 
1139 	intel_ring_advance(rq, cs);
1140 
1141 	WRITE_ONCE(*sema, 0);
1142 	i915_request_get(rq);
1143 	i915_request_add(rq);
1144 err_after:
1145 	i915_vma_put(b_after);
1146 err_before:
1147 	i915_vma_put(b_before);
1148 	return rq;
1149 
1150 err_rq:
1151 	i915_request_add(rq);
1152 	rq = ERR_PTR(err);
1153 	goto err_after;
1154 }
1155 
1156 static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
1157 {
1158 	struct i915_vma *batch;
1159 	u32 dw, *cs, *hw;
1160 	u32 *defaults;
1161 
1162 	batch = create_user_vma(ce->vm, SZ_64K);
1163 	if (IS_ERR(batch))
1164 		return batch;
1165 
1166 	cs = i915_gem_object_pin_map_unlocked(batch->obj, I915_MAP_WC);
1167 	if (IS_ERR(cs)) {
1168 		i915_vma_put(batch);
1169 		return ERR_CAST(cs);
1170 	}
1171 
1172 	defaults = shmem_pin_map(ce->engine->default_state);
1173 	if (!defaults) {
1174 		i915_gem_object_unpin_map(batch->obj);
1175 		i915_vma_put(batch);
1176 		return ERR_PTR(-ENOMEM);
1177 	}
1178 
1179 	dw = 0;
1180 	hw = defaults;
1181 	hw += LRC_STATE_OFFSET / sizeof(*hw);
1182 	do {
1183 		u32 len = hw[dw] & LRI_LENGTH_MASK;
1184 
1185 		/* For simplicity, break parsing at the first complex command */
1186 		if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT)
1187 			break;
1188 
1189 		if (hw[dw] == 0) {
1190 			dw++;
1191 			continue;
1192 		}
1193 
1194 		if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) {
1195 			dw += len + 2;
1196 			continue;
1197 		}
1198 
1199 		if (!len) {
1200 			pr_err("%s: invalid LRI found in context image\n",
1201 			       ce->engine->name);
1202 			igt_hexdump(defaults, PAGE_SIZE);
1203 			break;
1204 		}
1205 
1206 		dw++;
1207 		len = (len + 1) / 2;
1208 		*cs++ = MI_LOAD_REGISTER_IMM(len);
1209 		while (len--) {
1210 			*cs++ = hw[dw];
1211 			*cs++ = safe_poison(hw[dw] & get_lri_mask(ce->engine,
1212 								  MI_LRI_LRM_CS_MMIO),
1213 					    poison);
1214 			dw += 2;
1215 		}
1216 	} while (dw < PAGE_SIZE / sizeof(u32) &&
1217 		 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
1218 
1219 	*cs++ = MI_BATCH_BUFFER_END;
1220 
1221 	shmem_unpin_map(ce->engine->default_state, defaults);
1222 
1223 	i915_gem_object_flush_map(batch->obj);
1224 	i915_gem_object_unpin_map(batch->obj);
1225 
1226 	return batch;
1227 }
1228 
1229 static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema)
1230 {
1231 	struct i915_request *rq;
1232 	struct i915_vma *batch;
1233 	u32 *cs;
1234 	int err;
1235 
1236 	batch = load_context(ce, poison);
1237 	if (IS_ERR(batch))
1238 		return PTR_ERR(batch);
1239 
1240 	rq = intel_context_create_request(ce);
1241 	if (IS_ERR(rq)) {
1242 		err = PTR_ERR(rq);
1243 		goto err_batch;
1244 	}
1245 
1246 	err = move_to_active(rq, batch, 0);
1247 	if (err)
1248 		goto err_rq;
1249 
1250 	cs = intel_ring_begin(rq, 8);
1251 	if (IS_ERR(cs)) {
1252 		err = PTR_ERR(cs);
1253 		goto err_rq;
1254 	}
1255 
1256 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1257 	*cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
1258 	*cs++ = lower_32_bits(batch->node.start);
1259 	*cs++ = upper_32_bits(batch->node.start);
1260 
1261 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1262 	*cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
1263 		offset_in_page(sema);
1264 	*cs++ = 0;
1265 	*cs++ = 1;
1266 
1267 	intel_ring_advance(rq, cs);
1268 
1269 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
1270 err_rq:
1271 	i915_request_add(rq);
1272 err_batch:
1273 	i915_vma_put(batch);
1274 	return err;
1275 }
1276 
1277 static bool is_moving(u32 a, u32 b)
1278 {
1279 	return a != b;
1280 }
1281 
1282 static int compare_isolation(struct intel_engine_cs *engine,
1283 			     struct i915_vma *ref[2],
1284 			     struct i915_vma *result[2],
1285 			     struct intel_context *ce,
1286 			     u32 poison)
1287 {
1288 	u32 x, dw, *hw, *lrc;
1289 	u32 *A[2], *B[2];
1290 	u32 *defaults;
1291 	int err = 0;
1292 
1293 	A[0] = i915_gem_object_pin_map_unlocked(ref[0]->obj, I915_MAP_WC);
1294 	if (IS_ERR(A[0]))
1295 		return PTR_ERR(A[0]);
1296 
1297 	A[1] = i915_gem_object_pin_map_unlocked(ref[1]->obj, I915_MAP_WC);
1298 	if (IS_ERR(A[1])) {
1299 		err = PTR_ERR(A[1]);
1300 		goto err_A0;
1301 	}
1302 
1303 	B[0] = i915_gem_object_pin_map_unlocked(result[0]->obj, I915_MAP_WC);
1304 	if (IS_ERR(B[0])) {
1305 		err = PTR_ERR(B[0]);
1306 		goto err_A1;
1307 	}
1308 
1309 	B[1] = i915_gem_object_pin_map_unlocked(result[1]->obj, I915_MAP_WC);
1310 	if (IS_ERR(B[1])) {
1311 		err = PTR_ERR(B[1]);
1312 		goto err_B0;
1313 	}
1314 
1315 	lrc = i915_gem_object_pin_map_unlocked(ce->state->obj,
1316 					       i915_coherent_map_type(engine->i915,
1317 								      ce->state->obj,
1318 								      false));
1319 	if (IS_ERR(lrc)) {
1320 		err = PTR_ERR(lrc);
1321 		goto err_B1;
1322 	}
1323 	lrc += LRC_STATE_OFFSET / sizeof(*hw);
1324 
1325 	defaults = shmem_pin_map(ce->engine->default_state);
1326 	if (!defaults) {
1327 		err = -ENOMEM;
1328 		goto err_lrc;
1329 	}
1330 
1331 	x = 0;
1332 	dw = 0;
1333 	hw = defaults;
1334 	hw += LRC_STATE_OFFSET / sizeof(*hw);
1335 	do {
1336 		u32 len = hw[dw] & LRI_LENGTH_MASK;
1337 
1338 		/* For simplicity, break parsing at the first complex command */
1339 		if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT)
1340 			break;
1341 
1342 		if (hw[dw] == 0) {
1343 			dw++;
1344 			continue;
1345 		}
1346 
1347 		if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) {
1348 			dw += len + 2;
1349 			continue;
1350 		}
1351 
1352 		if (!len) {
1353 			pr_err("%s: invalid LRI found in context image\n",
1354 			       engine->name);
1355 			igt_hexdump(defaults, PAGE_SIZE);
1356 			break;
1357 		}
1358 
1359 		dw++;
1360 		len = (len + 1) / 2;
1361 		while (len--) {
1362 			if (!is_moving(A[0][x], A[1][x]) &&
1363 			    (A[0][x] != B[0][x] || A[1][x] != B[1][x])) {
1364 				switch (hw[dw] & 4095) {
1365 				case 0x30: /* RING_HEAD */
1366 				case 0x34: /* RING_TAIL */
1367 					break;
1368 
1369 				default:
1370 					pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n",
1371 					       engine->name, dw,
1372 					       hw[dw], hw[dw + 1],
1373 					       A[0][x], B[0][x], B[1][x],
1374 					       poison, lrc[dw + 1]);
1375 					err = -EINVAL;
1376 				}
1377 			}
1378 			dw += 2;
1379 			x++;
1380 		}
1381 	} while (dw < PAGE_SIZE / sizeof(u32) &&
1382 		 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
1383 
1384 	shmem_unpin_map(ce->engine->default_state, defaults);
1385 err_lrc:
1386 	i915_gem_object_unpin_map(ce->state->obj);
1387 err_B1:
1388 	i915_gem_object_unpin_map(result[1]->obj);
1389 err_B0:
1390 	i915_gem_object_unpin_map(result[0]->obj);
1391 err_A1:
1392 	i915_gem_object_unpin_map(ref[1]->obj);
1393 err_A0:
1394 	i915_gem_object_unpin_map(ref[0]->obj);
1395 	return err;
1396 }
1397 
1398 static struct i915_vma *
1399 create_result_vma(struct i915_address_space *vm, unsigned long sz)
1400 {
1401 	struct i915_vma *vma;
1402 	void *ptr;
1403 
1404 	vma = create_user_vma(vm, sz);
1405 	if (IS_ERR(vma))
1406 		return vma;
1407 
1408 	/* Set the results to a known value distinct from the poison */
1409 	ptr = i915_gem_object_pin_map_unlocked(vma->obj, I915_MAP_WC);
1410 	if (IS_ERR(ptr)) {
1411 		i915_vma_put(vma);
1412 		return ERR_CAST(ptr);
1413 	}
1414 
1415 	memset(ptr, POISON_INUSE, vma->size);
1416 	i915_gem_object_flush_map(vma->obj);
1417 	i915_gem_object_unpin_map(vma->obj);
1418 
1419 	return vma;
1420 }
1421 
1422 static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison)
1423 {
1424 	u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1);
1425 	struct i915_vma *ref[2], *result[2];
1426 	struct intel_context *A, *B;
1427 	struct i915_request *rq;
1428 	int err;
1429 
1430 	A = intel_context_create(engine);
1431 	if (IS_ERR(A))
1432 		return PTR_ERR(A);
1433 
1434 	B = intel_context_create(engine);
1435 	if (IS_ERR(B)) {
1436 		err = PTR_ERR(B);
1437 		goto err_A;
1438 	}
1439 
1440 	ref[0] = create_result_vma(A->vm, SZ_64K);
1441 	if (IS_ERR(ref[0])) {
1442 		err = PTR_ERR(ref[0]);
1443 		goto err_B;
1444 	}
1445 
1446 	ref[1] = create_result_vma(A->vm, SZ_64K);
1447 	if (IS_ERR(ref[1])) {
1448 		err = PTR_ERR(ref[1]);
1449 		goto err_ref0;
1450 	}
1451 
1452 	rq = record_registers(A, ref[0], ref[1], sema);
1453 	if (IS_ERR(rq)) {
1454 		err = PTR_ERR(rq);
1455 		goto err_ref1;
1456 	}
1457 
1458 	WRITE_ONCE(*sema, 1);
1459 	wmb();
1460 
1461 	if (i915_request_wait(rq, 0, HZ / 2) < 0) {
1462 		i915_request_put(rq);
1463 		err = -ETIME;
1464 		goto err_ref1;
1465 	}
1466 	i915_request_put(rq);
1467 
1468 	result[0] = create_result_vma(A->vm, SZ_64K);
1469 	if (IS_ERR(result[0])) {
1470 		err = PTR_ERR(result[0]);
1471 		goto err_ref1;
1472 	}
1473 
1474 	result[1] = create_result_vma(A->vm, SZ_64K);
1475 	if (IS_ERR(result[1])) {
1476 		err = PTR_ERR(result[1]);
1477 		goto err_result0;
1478 	}
1479 
1480 	rq = record_registers(A, result[0], result[1], sema);
1481 	if (IS_ERR(rq)) {
1482 		err = PTR_ERR(rq);
1483 		goto err_result1;
1484 	}
1485 
1486 	err = poison_registers(B, poison, sema);
1487 	if (err == 0 && i915_request_wait(rq, 0, HZ / 2) < 0) {
1488 		pr_err("%s(%s): wait for results timed out\n",
1489 		       __func__, engine->name);
1490 		err = -ETIME;
1491 	}
1492 
1493 	/* Always cancel the semaphore wait, just in case the GPU gets stuck */
1494 	WRITE_ONCE(*sema, -1);
1495 	i915_request_put(rq);
1496 	if (err)
1497 		goto err_result1;
1498 
1499 	err = compare_isolation(engine, ref, result, A, poison);
1500 
1501 err_result1:
1502 	i915_vma_put(result[1]);
1503 err_result0:
1504 	i915_vma_put(result[0]);
1505 err_ref1:
1506 	i915_vma_put(ref[1]);
1507 err_ref0:
1508 	i915_vma_put(ref[0]);
1509 err_B:
1510 	intel_context_put(B);
1511 err_A:
1512 	intel_context_put(A);
1513 	return err;
1514 }
1515 
1516 static bool skip_isolation(const struct intel_engine_cs *engine)
1517 {
1518 	if (engine->class == COPY_ENGINE_CLASS && GRAPHICS_VER(engine->i915) == 9)
1519 		return true;
1520 
1521 	if (engine->class == RENDER_CLASS && GRAPHICS_VER(engine->i915) == 11)
1522 		return true;
1523 
1524 	return false;
1525 }
1526 
1527 static int live_lrc_isolation(void *arg)
1528 {
1529 	struct intel_gt *gt = arg;
1530 	struct intel_engine_cs *engine;
1531 	enum intel_engine_id id;
1532 	const u32 poison[] = {
1533 		STACK_MAGIC,
1534 		0x3a3a3a3a,
1535 		0x5c5c5c5c,
1536 		0xffffffff,
1537 		0xffff0000,
1538 	};
1539 	int err = 0;
1540 
1541 	/*
1542 	 * Our goal is try and verify that per-context state cannot be
1543 	 * tampered with by another non-privileged client.
1544 	 *
1545 	 * We take the list of context registers from the LRI in the default
1546 	 * context image and attempt to modify that list from a remote context.
1547 	 */
1548 
1549 	for_each_engine(engine, gt, id) {
1550 		int i;
1551 
1552 		/* Just don't even ask */
1553 		if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) &&
1554 		    skip_isolation(engine))
1555 			continue;
1556 
1557 		intel_engine_pm_get(engine);
1558 		for (i = 0; i < ARRAY_SIZE(poison); i++) {
1559 			int result;
1560 
1561 			result = __lrc_isolation(engine, poison[i]);
1562 			if (result && !err)
1563 				err = result;
1564 
1565 			result = __lrc_isolation(engine, ~poison[i]);
1566 			if (result && !err)
1567 				err = result;
1568 		}
1569 		intel_engine_pm_put(engine);
1570 		if (igt_flush_test(gt->i915)) {
1571 			err = -EIO;
1572 			break;
1573 		}
1574 	}
1575 
1576 	return err;
1577 }
1578 
1579 static int indirect_ctx_submit_req(struct intel_context *ce)
1580 {
1581 	struct i915_request *rq;
1582 	int err = 0;
1583 
1584 	rq = intel_context_create_request(ce);
1585 	if (IS_ERR(rq))
1586 		return PTR_ERR(rq);
1587 
1588 	i915_request_get(rq);
1589 	i915_request_add(rq);
1590 
1591 	if (i915_request_wait(rq, 0, HZ / 5) < 0)
1592 		err = -ETIME;
1593 
1594 	i915_request_put(rq);
1595 
1596 	return err;
1597 }
1598 
1599 #define CTX_BB_CANARY_OFFSET (3 * 1024)
1600 #define CTX_BB_CANARY_INDEX  (CTX_BB_CANARY_OFFSET / sizeof(u32))
1601 
1602 static u32 *
1603 emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
1604 {
1605 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 |
1606 		MI_SRM_LRM_GLOBAL_GTT |
1607 		MI_LRI_LRM_CS_MMIO;
1608 	*cs++ = i915_mmio_reg_offset(RING_START(0));
1609 	*cs++ = i915_ggtt_offset(ce->state) +
1610 		context_wa_bb_offset(ce) +
1611 		CTX_BB_CANARY_OFFSET;
1612 	*cs++ = 0;
1613 
1614 	return cs;
1615 }
1616 
1617 static void
1618 indirect_ctx_bb_setup(struct intel_context *ce)
1619 {
1620 	u32 *cs = context_indirect_bb(ce);
1621 
1622 	cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d;
1623 
1624 	setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary);
1625 }
1626 
1627 static bool check_ring_start(struct intel_context *ce)
1628 {
1629 	const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) -
1630 		LRC_STATE_OFFSET + context_wa_bb_offset(ce);
1631 
1632 	if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START])
1633 		return true;
1634 
1635 	pr_err("ring start mismatch: canary 0x%08x vs state 0x%08x\n",
1636 	       ctx_bb[CTX_BB_CANARY_INDEX],
1637 	       ce->lrc_reg_state[CTX_RING_START]);
1638 
1639 	return false;
1640 }
1641 
1642 static int indirect_ctx_bb_check(struct intel_context *ce)
1643 {
1644 	int err;
1645 
1646 	err = indirect_ctx_submit_req(ce);
1647 	if (err)
1648 		return err;
1649 
1650 	if (!check_ring_start(ce))
1651 		return -EINVAL;
1652 
1653 	return 0;
1654 }
1655 
1656 static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine)
1657 {
1658 	struct intel_context *a, *b;
1659 	int err;
1660 
1661 	a = intel_context_create(engine);
1662 	if (IS_ERR(a))
1663 		return PTR_ERR(a);
1664 	err = intel_context_pin(a);
1665 	if (err)
1666 		goto put_a;
1667 
1668 	b = intel_context_create(engine);
1669 	if (IS_ERR(b)) {
1670 		err = PTR_ERR(b);
1671 		goto unpin_a;
1672 	}
1673 	err = intel_context_pin(b);
1674 	if (err)
1675 		goto put_b;
1676 
1677 	/* We use the already reserved extra page in context state */
1678 	if (!a->wa_bb_page) {
1679 		GEM_BUG_ON(b->wa_bb_page);
1680 		GEM_BUG_ON(GRAPHICS_VER(engine->i915) == 12);
1681 		goto unpin_b;
1682 	}
1683 
1684 	/*
1685 	 * In order to test that our per context bb is truly per context,
1686 	 * and executes at the intended spot on context restoring process,
1687 	 * make the batch store the ring start value to memory.
1688 	 * As ring start is restored apriori of starting the indirect ctx bb and
1689 	 * as it will be different for each context, it fits to this purpose.
1690 	 */
1691 	indirect_ctx_bb_setup(a);
1692 	indirect_ctx_bb_setup(b);
1693 
1694 	err = indirect_ctx_bb_check(a);
1695 	if (err)
1696 		goto unpin_b;
1697 
1698 	err = indirect_ctx_bb_check(b);
1699 
1700 unpin_b:
1701 	intel_context_unpin(b);
1702 put_b:
1703 	intel_context_put(b);
1704 unpin_a:
1705 	intel_context_unpin(a);
1706 put_a:
1707 	intel_context_put(a);
1708 
1709 	return err;
1710 }
1711 
1712 static int live_lrc_indirect_ctx_bb(void *arg)
1713 {
1714 	struct intel_gt *gt = arg;
1715 	struct intel_engine_cs *engine;
1716 	enum intel_engine_id id;
1717 	int err = 0;
1718 
1719 	for_each_engine(engine, gt, id) {
1720 		intel_engine_pm_get(engine);
1721 		err = __live_lrc_indirect_ctx_bb(engine);
1722 		intel_engine_pm_put(engine);
1723 
1724 		if (igt_flush_test(gt->i915))
1725 			err = -EIO;
1726 
1727 		if (err)
1728 			break;
1729 	}
1730 
1731 	return err;
1732 }
1733 
1734 static void garbage_reset(struct intel_engine_cs *engine,
1735 			  struct i915_request *rq)
1736 {
1737 	const unsigned int bit = I915_RESET_ENGINE + engine->id;
1738 	unsigned long *lock = &engine->gt->reset.flags;
1739 
1740 	local_bh_disable();
1741 	if (!test_and_set_bit(bit, lock)) {
1742 		tasklet_disable(&engine->sched_engine->tasklet);
1743 
1744 		if (!rq->fence.error)
1745 			__intel_engine_reset_bh(engine, NULL);
1746 
1747 		tasklet_enable(&engine->sched_engine->tasklet);
1748 		clear_and_wake_up_bit(bit, lock);
1749 	}
1750 	local_bh_enable();
1751 }
1752 
1753 static struct i915_request *garbage(struct intel_context *ce,
1754 				    struct rnd_state *prng)
1755 {
1756 	struct i915_request *rq;
1757 	int err;
1758 
1759 	err = intel_context_pin(ce);
1760 	if (err)
1761 		return ERR_PTR(err);
1762 
1763 	prandom_bytes_state(prng,
1764 			    ce->lrc_reg_state,
1765 			    ce->engine->context_size -
1766 			    LRC_STATE_OFFSET);
1767 
1768 	rq = intel_context_create_request(ce);
1769 	if (IS_ERR(rq)) {
1770 		err = PTR_ERR(rq);
1771 		goto err_unpin;
1772 	}
1773 
1774 	i915_request_get(rq);
1775 	i915_request_add(rq);
1776 	return rq;
1777 
1778 err_unpin:
1779 	intel_context_unpin(ce);
1780 	return ERR_PTR(err);
1781 }
1782 
1783 static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng)
1784 {
1785 	struct intel_context *ce;
1786 	struct i915_request *hang;
1787 	int err = 0;
1788 
1789 	ce = intel_context_create(engine);
1790 	if (IS_ERR(ce))
1791 		return PTR_ERR(ce);
1792 
1793 	hang = garbage(ce, prng);
1794 	if (IS_ERR(hang)) {
1795 		err = PTR_ERR(hang);
1796 		goto err_ce;
1797 	}
1798 
1799 	if (wait_for_submit(engine, hang, HZ / 2)) {
1800 		i915_request_put(hang);
1801 		err = -ETIME;
1802 		goto err_ce;
1803 	}
1804 
1805 	intel_context_set_banned(ce);
1806 	garbage_reset(engine, hang);
1807 
1808 	intel_engine_flush_submission(engine);
1809 	if (!hang->fence.error) {
1810 		i915_request_put(hang);
1811 		pr_err("%s: corrupted context was not reset\n",
1812 		       engine->name);
1813 		err = -EINVAL;
1814 		goto err_ce;
1815 	}
1816 
1817 	if (i915_request_wait(hang, 0, HZ / 2) < 0) {
1818 		pr_err("%s: corrupted context did not recover\n",
1819 		       engine->name);
1820 		i915_request_put(hang);
1821 		err = -EIO;
1822 		goto err_ce;
1823 	}
1824 	i915_request_put(hang);
1825 
1826 err_ce:
1827 	intel_context_put(ce);
1828 	return err;
1829 }
1830 
1831 static int live_lrc_garbage(void *arg)
1832 {
1833 	struct intel_gt *gt = arg;
1834 	struct intel_engine_cs *engine;
1835 	enum intel_engine_id id;
1836 
1837 	/*
1838 	 * Verify that we can recover if one context state is completely
1839 	 * corrupted.
1840 	 */
1841 
1842 	if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN))
1843 		return 0;
1844 
1845 	for_each_engine(engine, gt, id) {
1846 		I915_RND_STATE(prng);
1847 		int err = 0, i;
1848 
1849 		if (!intel_has_reset_engine(engine->gt))
1850 			continue;
1851 
1852 		intel_engine_pm_get(engine);
1853 		for (i = 0; i < 3; i++) {
1854 			err = __lrc_garbage(engine, &prng);
1855 			if (err)
1856 				break;
1857 		}
1858 		intel_engine_pm_put(engine);
1859 
1860 		if (igt_flush_test(gt->i915))
1861 			err = -EIO;
1862 		if (err)
1863 			return err;
1864 	}
1865 
1866 	return 0;
1867 }
1868 
1869 static int __live_pphwsp_runtime(struct intel_engine_cs *engine)
1870 {
1871 	struct intel_context *ce;
1872 	struct i915_request *rq;
1873 	IGT_TIMEOUT(end_time);
1874 	int err;
1875 
1876 	ce = intel_context_create(engine);
1877 	if (IS_ERR(ce))
1878 		return PTR_ERR(ce);
1879 
1880 	ce->stats.runtime.num_underflow = 0;
1881 	ce->stats.runtime.max_underflow = 0;
1882 
1883 	do {
1884 		unsigned int loop = 1024;
1885 
1886 		while (loop) {
1887 			rq = intel_context_create_request(ce);
1888 			if (IS_ERR(rq)) {
1889 				err = PTR_ERR(rq);
1890 				goto err_rq;
1891 			}
1892 
1893 			if (--loop == 0)
1894 				i915_request_get(rq);
1895 
1896 			i915_request_add(rq);
1897 		}
1898 
1899 		if (__igt_timeout(end_time, NULL))
1900 			break;
1901 
1902 		i915_request_put(rq);
1903 	} while (1);
1904 
1905 	err = i915_request_wait(rq, 0, HZ / 5);
1906 	if (err < 0) {
1907 		pr_err("%s: request not completed!\n", engine->name);
1908 		goto err_wait;
1909 	}
1910 
1911 	igt_flush_test(engine->i915);
1912 
1913 	pr_info("%s: pphwsp runtime %lluns, average %lluns\n",
1914 		engine->name,
1915 		intel_context_get_total_runtime_ns(ce),
1916 		intel_context_get_avg_runtime_ns(ce));
1917 
1918 	err = 0;
1919 	if (ce->stats.runtime.num_underflow) {
1920 		pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n",
1921 		       engine->name,
1922 		       ce->stats.runtime.num_underflow,
1923 		       ce->stats.runtime.max_underflow);
1924 		GEM_TRACE_DUMP();
1925 		err = -EOVERFLOW;
1926 	}
1927 
1928 err_wait:
1929 	i915_request_put(rq);
1930 err_rq:
1931 	intel_context_put(ce);
1932 	return err;
1933 }
1934 
1935 static int live_pphwsp_runtime(void *arg)
1936 {
1937 	struct intel_gt *gt = arg;
1938 	struct intel_engine_cs *engine;
1939 	enum intel_engine_id id;
1940 	int err = 0;
1941 
1942 	/*
1943 	 * Check that cumulative context runtime as stored in the pphwsp[16]
1944 	 * is monotonic.
1945 	 */
1946 
1947 	for_each_engine(engine, gt, id) {
1948 		err = __live_pphwsp_runtime(engine);
1949 		if (err)
1950 			break;
1951 	}
1952 
1953 	if (igt_flush_test(gt->i915))
1954 		err = -EIO;
1955 
1956 	return err;
1957 }
1958 
1959 int intel_lrc_live_selftests(struct drm_i915_private *i915)
1960 {
1961 	static const struct i915_subtest tests[] = {
1962 		SUBTEST(live_lrc_layout),
1963 		SUBTEST(live_lrc_fixed),
1964 		SUBTEST(live_lrc_state),
1965 		SUBTEST(live_lrc_gpr),
1966 		SUBTEST(live_lrc_isolation),
1967 		SUBTEST(live_lrc_timestamp),
1968 		SUBTEST(live_lrc_garbage),
1969 		SUBTEST(live_pphwsp_runtime),
1970 		SUBTEST(live_lrc_indirect_ctx_bb),
1971 	};
1972 
1973 	if (!HAS_LOGICAL_RING_CONTEXTS(i915))
1974 		return 0;
1975 
1976 	return intel_gt_live_subtests(tests, to_gt(i915));
1977 }
1978