1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2018 Intel Corporation
4  */
5 
6 #include <linux/prime_numbers.h>
7 
8 #include "gem/i915_gem_internal.h"
9 
10 #include "i915_selftest.h"
11 #include "intel_engine_heartbeat.h"
12 #include "intel_engine_pm.h"
13 #include "intel_reset.h"
14 #include "intel_ring.h"
15 #include "selftest_engine_heartbeat.h"
16 #include "selftests/i915_random.h"
17 #include "selftests/igt_flush_test.h"
18 #include "selftests/igt_live_test.h"
19 #include "selftests/igt_spinner.h"
20 #include "selftests/lib_sw_fence.h"
21 #include "shmem_utils.h"
22 
23 #include "gem/selftests/igt_gem_utils.h"
24 #include "gem/selftests/mock_context.h"
25 
26 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
27 #define NUM_GPR 16
28 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
29 
30 #define LRI_HEADER MI_INSTR(0x22, 0)
31 #define LRI_LENGTH_MASK GENMASK(7, 0)
32 
33 static struct i915_vma *create_scratch(struct intel_gt *gt)
34 {
35 	return __vm_create_scratch_for_read_pinned(&gt->ggtt->vm, PAGE_SIZE);
36 }
37 
38 static bool is_active(struct i915_request *rq)
39 {
40 	if (i915_request_is_active(rq))
41 		return true;
42 
43 	if (i915_request_on_hold(rq))
44 		return true;
45 
46 	if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq))
47 		return true;
48 
49 	return false;
50 }
51 
52 static int wait_for_submit(struct intel_engine_cs *engine,
53 			   struct i915_request *rq,
54 			   unsigned long timeout)
55 {
56 	/* Ignore our own attempts to suppress excess tasklets */
57 	tasklet_hi_schedule(&engine->sched_engine->tasklet);
58 
59 	timeout += jiffies;
60 	do {
61 		bool done = time_after(jiffies, timeout);
62 
63 		if (i915_request_completed(rq)) /* that was quick! */
64 			return 0;
65 
66 		/* Wait until the HW has acknowleged the submission (or err) */
67 		intel_engine_flush_submission(engine);
68 		if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
69 			return 0;
70 
71 		if (done)
72 			return -ETIME;
73 
74 		cond_resched();
75 	} while (1);
76 }
77 
78 static int emit_semaphore_signal(struct intel_context *ce, void *slot)
79 {
80 	const u32 offset =
81 		i915_ggtt_offset(ce->engine->status_page.vma) +
82 		offset_in_page(slot);
83 	struct i915_request *rq;
84 	u32 *cs;
85 
86 	rq = intel_context_create_request(ce);
87 	if (IS_ERR(rq))
88 		return PTR_ERR(rq);
89 
90 	cs = intel_ring_begin(rq, 4);
91 	if (IS_ERR(cs)) {
92 		i915_request_add(rq);
93 		return PTR_ERR(cs);
94 	}
95 
96 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
97 	*cs++ = offset;
98 	*cs++ = 0;
99 	*cs++ = 1;
100 
101 	intel_ring_advance(rq, cs);
102 
103 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
104 	i915_request_add(rq);
105 	return 0;
106 }
107 
108 static int context_flush(struct intel_context *ce, long timeout)
109 {
110 	struct i915_request *rq;
111 	struct dma_fence *fence;
112 	int err = 0;
113 
114 	rq = intel_engine_create_kernel_request(ce->engine);
115 	if (IS_ERR(rq))
116 		return PTR_ERR(rq);
117 
118 	fence = i915_active_fence_get(&ce->timeline->last_request);
119 	if (fence) {
120 		i915_request_await_dma_fence(rq, fence);
121 		dma_fence_put(fence);
122 	}
123 
124 	rq = i915_request_get(rq);
125 	i915_request_add(rq);
126 	if (i915_request_wait(rq, 0, timeout) < 0)
127 		err = -ETIME;
128 	i915_request_put(rq);
129 
130 	rmb(); /* We know the request is written, make sure all state is too! */
131 	return err;
132 }
133 
134 static int get_lri_mask(struct intel_engine_cs *engine, u32 lri)
135 {
136 	if ((lri & MI_LRI_LRM_CS_MMIO) == 0)
137 		return ~0u;
138 
139 	if (GRAPHICS_VER(engine->i915) < 12)
140 		return 0xfff;
141 
142 	switch (engine->class) {
143 	default:
144 	case RENDER_CLASS:
145 	case COMPUTE_CLASS:
146 		return 0x07ff;
147 	case COPY_ENGINE_CLASS:
148 		return 0x0fff;
149 	case VIDEO_DECODE_CLASS:
150 	case VIDEO_ENHANCEMENT_CLASS:
151 		return 0x3fff;
152 	}
153 }
154 
155 static int live_lrc_layout(void *arg)
156 {
157 	struct intel_gt *gt = arg;
158 	struct intel_engine_cs *engine;
159 	enum intel_engine_id id;
160 	u32 *lrc;
161 	int err;
162 
163 	/*
164 	 * Check the registers offsets we use to create the initial reg state
165 	 * match the layout saved by HW.
166 	 */
167 
168 	lrc = (u32 *)__get_free_page(GFP_KERNEL); /* requires page alignment */
169 	if (!lrc)
170 		return -ENOMEM;
171 	GEM_BUG_ON(offset_in_page(lrc));
172 
173 	err = 0;
174 	for_each_engine(engine, gt, id) {
175 		u32 *hw;
176 		int dw;
177 
178 		if (!engine->default_state)
179 			continue;
180 
181 		hw = shmem_pin_map(engine->default_state);
182 		if (!hw) {
183 			err = -ENOMEM;
184 			break;
185 		}
186 		hw += LRC_STATE_OFFSET / sizeof(*hw);
187 
188 		__lrc_init_regs(memset(lrc, POISON_INUSE, PAGE_SIZE),
189 				engine->kernel_context, engine, true);
190 
191 		dw = 0;
192 		do {
193 			u32 lri = READ_ONCE(hw[dw]);
194 			u32 lri_mask;
195 
196 			if (lri == 0) {
197 				dw++;
198 				continue;
199 			}
200 
201 			if (lrc[dw] == 0) {
202 				pr_debug("%s: skipped instruction %x at dword %d\n",
203 					 engine->name, lri, dw);
204 				dw++;
205 				continue;
206 			}
207 
208 			if ((lri & GENMASK(31, 23)) != LRI_HEADER) {
209 				pr_err("%s: Expected LRI command at dword %d, found %08x\n",
210 				       engine->name, dw, lri);
211 				err = -EINVAL;
212 				break;
213 			}
214 
215 			if (lrc[dw] != lri) {
216 				pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
217 				       engine->name, dw, lri, lrc[dw]);
218 				err = -EINVAL;
219 				break;
220 			}
221 
222 			/*
223 			 * When bit 19 of MI_LOAD_REGISTER_IMM instruction
224 			 * opcode is set on Gen12+ devices, HW does not
225 			 * care about certain register address offsets, and
226 			 * instead check the following for valid address
227 			 * ranges on specific engines:
228 			 * RCS && CCS: BITS(0 - 10)
229 			 * BCS: BITS(0 - 11)
230 			 * VECS && VCS: BITS(0 - 13)
231 			 */
232 			lri_mask = get_lri_mask(engine, lri);
233 
234 			lri &= 0x7f;
235 			lri++;
236 			dw++;
237 
238 			while (lri) {
239 				u32 offset = READ_ONCE(hw[dw]);
240 
241 				if ((offset ^ lrc[dw]) & lri_mask) {
242 					pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
243 					       engine->name, dw, offset, lrc[dw]);
244 					err = -EINVAL;
245 					break;
246 				}
247 
248 				/*
249 				 * Skip over the actual register value as we
250 				 * expect that to differ.
251 				 */
252 				dw += 2;
253 				lri -= 2;
254 			}
255 		} while (!err && (lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
256 
257 		if (err) {
258 			pr_info("%s: HW register image:\n", engine->name);
259 			igt_hexdump(hw, PAGE_SIZE);
260 
261 			pr_info("%s: SW register image:\n", engine->name);
262 			igt_hexdump(lrc, PAGE_SIZE);
263 		}
264 
265 		shmem_unpin_map(engine->default_state, hw);
266 		if (err)
267 			break;
268 	}
269 
270 	free_page((unsigned long)lrc);
271 	return err;
272 }
273 
274 static int find_offset(const u32 *lri, u32 offset)
275 {
276 	int i;
277 
278 	for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
279 		if (lri[i] == offset)
280 			return i;
281 
282 	return -1;
283 }
284 
285 static int live_lrc_fixed(void *arg)
286 {
287 	struct intel_gt *gt = arg;
288 	struct intel_engine_cs *engine;
289 	enum intel_engine_id id;
290 	int err = 0;
291 
292 	/*
293 	 * Check the assumed register offsets match the actual locations in
294 	 * the context image.
295 	 */
296 
297 	for_each_engine(engine, gt, id) {
298 		const struct {
299 			u32 reg;
300 			u32 offset;
301 			const char *name;
302 		} tbl[] = {
303 			{
304 				i915_mmio_reg_offset(RING_START(engine->mmio_base)),
305 				CTX_RING_START - 1,
306 				"RING_START"
307 			},
308 			{
309 				i915_mmio_reg_offset(RING_CTL(engine->mmio_base)),
310 				CTX_RING_CTL - 1,
311 				"RING_CTL"
312 			},
313 			{
314 				i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)),
315 				CTX_RING_HEAD - 1,
316 				"RING_HEAD"
317 			},
318 			{
319 				i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)),
320 				CTX_RING_TAIL - 1,
321 				"RING_TAIL"
322 			},
323 			{
324 				i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)),
325 				lrc_ring_mi_mode(engine),
326 				"RING_MI_MODE"
327 			},
328 			{
329 				i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)),
330 				CTX_BB_STATE - 1,
331 				"BB_STATE"
332 			},
333 			{
334 				i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(engine->mmio_base)),
335 				lrc_ring_wa_bb_per_ctx(engine),
336 				"RING_BB_PER_CTX_PTR"
337 			},
338 			{
339 				i915_mmio_reg_offset(RING_INDIRECT_CTX(engine->mmio_base)),
340 				lrc_ring_indirect_ptr(engine),
341 				"RING_INDIRECT_CTX_PTR"
342 			},
343 			{
344 				i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(engine->mmio_base)),
345 				lrc_ring_indirect_offset(engine),
346 				"RING_INDIRECT_CTX_OFFSET"
347 			},
348 			{
349 				i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)),
350 				CTX_TIMESTAMP - 1,
351 				"RING_CTX_TIMESTAMP"
352 			},
353 			{
354 				i915_mmio_reg_offset(GEN8_RING_CS_GPR(engine->mmio_base, 0)),
355 				lrc_ring_gpr0(engine),
356 				"RING_CS_GPR0"
357 			},
358 			{
359 				i915_mmio_reg_offset(RING_CMD_BUF_CCTL(engine->mmio_base)),
360 				lrc_ring_cmd_buf_cctl(engine),
361 				"RING_CMD_BUF_CCTL"
362 			},
363 			{
364 				i915_mmio_reg_offset(RING_BB_OFFSET(engine->mmio_base)),
365 				lrc_ring_bb_offset(engine),
366 				"RING_BB_OFFSET"
367 			},
368 			{ },
369 		}, *t;
370 		u32 *hw;
371 
372 		if (!engine->default_state)
373 			continue;
374 
375 		hw = shmem_pin_map(engine->default_state);
376 		if (!hw) {
377 			err = -ENOMEM;
378 			break;
379 		}
380 		hw += LRC_STATE_OFFSET / sizeof(*hw);
381 
382 		for (t = tbl; t->name; t++) {
383 			int dw = find_offset(hw, t->reg);
384 
385 			if (dw != t->offset) {
386 				pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
387 				       engine->name,
388 				       t->name,
389 				       t->reg,
390 				       dw,
391 				       t->offset);
392 				err = -EINVAL;
393 			}
394 		}
395 
396 		shmem_unpin_map(engine->default_state, hw);
397 	}
398 
399 	return err;
400 }
401 
402 static int __live_lrc_state(struct intel_engine_cs *engine,
403 			    struct i915_vma *scratch)
404 {
405 	struct intel_context *ce;
406 	struct i915_request *rq;
407 	struct i915_gem_ww_ctx ww;
408 	enum {
409 		RING_START_IDX = 0,
410 		RING_TAIL_IDX,
411 		MAX_IDX
412 	};
413 	u32 expected[MAX_IDX];
414 	u32 *cs;
415 	int err;
416 	int n;
417 
418 	ce = intel_context_create(engine);
419 	if (IS_ERR(ce))
420 		return PTR_ERR(ce);
421 
422 	i915_gem_ww_ctx_init(&ww, false);
423 retry:
424 	err = i915_gem_object_lock(scratch->obj, &ww);
425 	if (!err)
426 		err = intel_context_pin_ww(ce, &ww);
427 	if (err)
428 		goto err_put;
429 
430 	rq = i915_request_create(ce);
431 	if (IS_ERR(rq)) {
432 		err = PTR_ERR(rq);
433 		goto err_unpin;
434 	}
435 
436 	cs = intel_ring_begin(rq, 4 * MAX_IDX);
437 	if (IS_ERR(cs)) {
438 		err = PTR_ERR(cs);
439 		i915_request_add(rq);
440 		goto err_unpin;
441 	}
442 
443 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
444 	*cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
445 	*cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32);
446 	*cs++ = 0;
447 
448 	expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma);
449 
450 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
451 	*cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
452 	*cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
453 	*cs++ = 0;
454 
455 	err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
456 
457 	i915_request_get(rq);
458 	i915_request_add(rq);
459 	if (err)
460 		goto err_rq;
461 
462 	intel_engine_flush_submission(engine);
463 	expected[RING_TAIL_IDX] = ce->ring->tail;
464 
465 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
466 		err = -ETIME;
467 		goto err_rq;
468 	}
469 
470 	cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
471 	if (IS_ERR(cs)) {
472 		err = PTR_ERR(cs);
473 		goto err_rq;
474 	}
475 
476 	for (n = 0; n < MAX_IDX; n++) {
477 		if (cs[n] != expected[n]) {
478 			pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
479 			       engine->name, n, cs[n], expected[n]);
480 			err = -EINVAL;
481 			break;
482 		}
483 	}
484 
485 	i915_gem_object_unpin_map(scratch->obj);
486 
487 err_rq:
488 	i915_request_put(rq);
489 err_unpin:
490 	intel_context_unpin(ce);
491 err_put:
492 	if (err == -EDEADLK) {
493 		err = i915_gem_ww_ctx_backoff(&ww);
494 		if (!err)
495 			goto retry;
496 	}
497 	i915_gem_ww_ctx_fini(&ww);
498 	intel_context_put(ce);
499 	return err;
500 }
501 
502 static int live_lrc_state(void *arg)
503 {
504 	struct intel_gt *gt = arg;
505 	struct intel_engine_cs *engine;
506 	struct i915_vma *scratch;
507 	enum intel_engine_id id;
508 	int err = 0;
509 
510 	/*
511 	 * Check the live register state matches what we expect for this
512 	 * intel_context.
513 	 */
514 
515 	scratch = create_scratch(gt);
516 	if (IS_ERR(scratch))
517 		return PTR_ERR(scratch);
518 
519 	for_each_engine(engine, gt, id) {
520 		err = __live_lrc_state(engine, scratch);
521 		if (err)
522 			break;
523 	}
524 
525 	if (igt_flush_test(gt->i915))
526 		err = -EIO;
527 
528 	i915_vma_unpin_and_release(&scratch, 0);
529 	return err;
530 }
531 
532 static int gpr_make_dirty(struct intel_context *ce)
533 {
534 	struct i915_request *rq;
535 	u32 *cs;
536 	int n;
537 
538 	rq = intel_context_create_request(ce);
539 	if (IS_ERR(rq))
540 		return PTR_ERR(rq);
541 
542 	cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2);
543 	if (IS_ERR(cs)) {
544 		i915_request_add(rq);
545 		return PTR_ERR(cs);
546 	}
547 
548 	*cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
549 	for (n = 0; n < NUM_GPR_DW; n++) {
550 		*cs++ = CS_GPR(ce->engine, n);
551 		*cs++ = STACK_MAGIC;
552 	}
553 	*cs++ = MI_NOOP;
554 
555 	intel_ring_advance(rq, cs);
556 
557 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
558 	i915_request_add(rq);
559 
560 	return 0;
561 }
562 
563 static struct i915_request *
564 __gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot)
565 {
566 	const u32 offset =
567 		i915_ggtt_offset(ce->engine->status_page.vma) +
568 		offset_in_page(slot);
569 	struct i915_request *rq;
570 	u32 *cs;
571 	int err;
572 	int n;
573 
574 	rq = intel_context_create_request(ce);
575 	if (IS_ERR(rq))
576 		return rq;
577 
578 	cs = intel_ring_begin(rq, 6 + 4 * NUM_GPR_DW);
579 	if (IS_ERR(cs)) {
580 		i915_request_add(rq);
581 		return ERR_CAST(cs);
582 	}
583 
584 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
585 	*cs++ = MI_NOOP;
586 
587 	*cs++ = MI_SEMAPHORE_WAIT |
588 		MI_SEMAPHORE_GLOBAL_GTT |
589 		MI_SEMAPHORE_POLL |
590 		MI_SEMAPHORE_SAD_NEQ_SDD;
591 	*cs++ = 0;
592 	*cs++ = offset;
593 	*cs++ = 0;
594 
595 	for (n = 0; n < NUM_GPR_DW; n++) {
596 		*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
597 		*cs++ = CS_GPR(ce->engine, n);
598 		*cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
599 		*cs++ = 0;
600 	}
601 
602 	i915_vma_lock(scratch);
603 	err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
604 	i915_vma_unlock(scratch);
605 
606 	i915_request_get(rq);
607 	i915_request_add(rq);
608 	if (err) {
609 		i915_request_put(rq);
610 		rq = ERR_PTR(err);
611 	}
612 
613 	return rq;
614 }
615 
616 static int __live_lrc_gpr(struct intel_engine_cs *engine,
617 			  struct i915_vma *scratch,
618 			  bool preempt)
619 {
620 	u32 *slot = memset32(engine->status_page.addr + 1000, 0, 4);
621 	struct intel_context *ce;
622 	struct i915_request *rq;
623 	u32 *cs;
624 	int err;
625 	int n;
626 
627 	if (GRAPHICS_VER(engine->i915) < 9 && engine->class != RENDER_CLASS)
628 		return 0; /* GPR only on rcs0 for gen8 */
629 
630 	err = gpr_make_dirty(engine->kernel_context);
631 	if (err)
632 		return err;
633 
634 	ce = intel_context_create(engine);
635 	if (IS_ERR(ce))
636 		return PTR_ERR(ce);
637 
638 	rq = __gpr_read(ce, scratch, slot);
639 	if (IS_ERR(rq)) {
640 		err = PTR_ERR(rq);
641 		goto err_put;
642 	}
643 
644 	err = wait_for_submit(engine, rq, HZ / 2);
645 	if (err)
646 		goto err_rq;
647 
648 	if (preempt) {
649 		err = gpr_make_dirty(engine->kernel_context);
650 		if (err)
651 			goto err_rq;
652 
653 		err = emit_semaphore_signal(engine->kernel_context, slot);
654 		if (err)
655 			goto err_rq;
656 
657 		err = wait_for_submit(engine, rq, HZ / 2);
658 		if (err)
659 			goto err_rq;
660 	} else {
661 		slot[0] = 1;
662 		wmb();
663 	}
664 
665 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
666 		err = -ETIME;
667 		goto err_rq;
668 	}
669 
670 	cs = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB);
671 	if (IS_ERR(cs)) {
672 		err = PTR_ERR(cs);
673 		goto err_rq;
674 	}
675 
676 	for (n = 0; n < NUM_GPR_DW; n++) {
677 		if (cs[n]) {
678 			pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
679 			       engine->name,
680 			       n / 2, n & 1 ? "udw" : "ldw",
681 			       cs[n]);
682 			err = -EINVAL;
683 			break;
684 		}
685 	}
686 
687 	i915_gem_object_unpin_map(scratch->obj);
688 
689 err_rq:
690 	memset32(&slot[0], -1, 4);
691 	wmb();
692 	i915_request_put(rq);
693 err_put:
694 	intel_context_put(ce);
695 	return err;
696 }
697 
698 static int live_lrc_gpr(void *arg)
699 {
700 	struct intel_gt *gt = arg;
701 	struct intel_engine_cs *engine;
702 	struct i915_vma *scratch;
703 	enum intel_engine_id id;
704 	int err = 0;
705 
706 	/*
707 	 * Check that GPR registers are cleared in new contexts as we need
708 	 * to avoid leaking any information from previous contexts.
709 	 */
710 
711 	scratch = create_scratch(gt);
712 	if (IS_ERR(scratch))
713 		return PTR_ERR(scratch);
714 
715 	for_each_engine(engine, gt, id) {
716 		st_engine_heartbeat_disable(engine);
717 
718 		err = __live_lrc_gpr(engine, scratch, false);
719 		if (err)
720 			goto err;
721 
722 		err = __live_lrc_gpr(engine, scratch, true);
723 		if (err)
724 			goto err;
725 
726 err:
727 		st_engine_heartbeat_enable(engine);
728 		if (igt_flush_test(gt->i915))
729 			err = -EIO;
730 		if (err)
731 			break;
732 	}
733 
734 	i915_vma_unpin_and_release(&scratch, 0);
735 	return err;
736 }
737 
738 static struct i915_request *
739 create_timestamp(struct intel_context *ce, void *slot, int idx)
740 {
741 	const u32 offset =
742 		i915_ggtt_offset(ce->engine->status_page.vma) +
743 		offset_in_page(slot);
744 	struct i915_request *rq;
745 	u32 *cs;
746 	int err;
747 
748 	rq = intel_context_create_request(ce);
749 	if (IS_ERR(rq))
750 		return rq;
751 
752 	cs = intel_ring_begin(rq, 10);
753 	if (IS_ERR(cs)) {
754 		err = PTR_ERR(cs);
755 		goto err;
756 	}
757 
758 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
759 	*cs++ = MI_NOOP;
760 
761 	*cs++ = MI_SEMAPHORE_WAIT |
762 		MI_SEMAPHORE_GLOBAL_GTT |
763 		MI_SEMAPHORE_POLL |
764 		MI_SEMAPHORE_SAD_NEQ_SDD;
765 	*cs++ = 0;
766 	*cs++ = offset;
767 	*cs++ = 0;
768 
769 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
770 	*cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base));
771 	*cs++ = offset + idx * sizeof(u32);
772 	*cs++ = 0;
773 
774 	intel_ring_advance(rq, cs);
775 
776 	err = 0;
777 err:
778 	i915_request_get(rq);
779 	i915_request_add(rq);
780 	if (err) {
781 		i915_request_put(rq);
782 		return ERR_PTR(err);
783 	}
784 
785 	return rq;
786 }
787 
788 struct lrc_timestamp {
789 	struct intel_engine_cs *engine;
790 	struct intel_context *ce[2];
791 	u32 poison;
792 };
793 
794 static bool timestamp_advanced(u32 start, u32 end)
795 {
796 	return (s32)(end - start) > 0;
797 }
798 
799 static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt)
800 {
801 	u32 *slot = memset32(arg->engine->status_page.addr + 1000, 0, 4);
802 	struct i915_request *rq;
803 	u32 timestamp;
804 	int err = 0;
805 
806 	arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison;
807 	rq = create_timestamp(arg->ce[0], slot, 1);
808 	if (IS_ERR(rq))
809 		return PTR_ERR(rq);
810 
811 	err = wait_for_submit(rq->engine, rq, HZ / 2);
812 	if (err)
813 		goto err;
814 
815 	if (preempt) {
816 		arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef;
817 		err = emit_semaphore_signal(arg->ce[1], slot);
818 		if (err)
819 			goto err;
820 	} else {
821 		slot[0] = 1;
822 		wmb();
823 	}
824 
825 	/* And wait for switch to kernel (to save our context to memory) */
826 	err = context_flush(arg->ce[0], HZ / 2);
827 	if (err)
828 		goto err;
829 
830 	if (!timestamp_advanced(arg->poison, slot[1])) {
831 		pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n",
832 		       arg->engine->name, preempt ? "preempt" : "simple",
833 		       arg->poison, slot[1]);
834 		err = -EINVAL;
835 	}
836 
837 	timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]);
838 	if (!timestamp_advanced(slot[1], timestamp)) {
839 		pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n",
840 		       arg->engine->name, preempt ? "preempt" : "simple",
841 		       slot[1], timestamp);
842 		err = -EINVAL;
843 	}
844 
845 err:
846 	memset32(slot, -1, 4);
847 	i915_request_put(rq);
848 	return err;
849 }
850 
851 static int live_lrc_timestamp(void *arg)
852 {
853 	struct lrc_timestamp data = {};
854 	struct intel_gt *gt = arg;
855 	enum intel_engine_id id;
856 	const u32 poison[] = {
857 		0,
858 		S32_MAX,
859 		(u32)S32_MAX + 1,
860 		U32_MAX,
861 	};
862 
863 	/*
864 	 * We want to verify that the timestamp is saved and restore across
865 	 * context switches and is monotonic.
866 	 *
867 	 * So we do this with a little bit of LRC poisoning to check various
868 	 * boundary conditions, and see what happens if we preempt the context
869 	 * with a second request (carrying more poison into the timestamp).
870 	 */
871 
872 	for_each_engine(data.engine, gt, id) {
873 		int i, err = 0;
874 
875 		st_engine_heartbeat_disable(data.engine);
876 
877 		for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
878 			struct intel_context *tmp;
879 
880 			tmp = intel_context_create(data.engine);
881 			if (IS_ERR(tmp)) {
882 				err = PTR_ERR(tmp);
883 				goto err;
884 			}
885 
886 			err = intel_context_pin(tmp);
887 			if (err) {
888 				intel_context_put(tmp);
889 				goto err;
890 			}
891 
892 			data.ce[i] = tmp;
893 		}
894 
895 		for (i = 0; i < ARRAY_SIZE(poison); i++) {
896 			data.poison = poison[i];
897 
898 			err = __lrc_timestamp(&data, false);
899 			if (err)
900 				break;
901 
902 			err = __lrc_timestamp(&data, true);
903 			if (err)
904 				break;
905 		}
906 
907 err:
908 		st_engine_heartbeat_enable(data.engine);
909 		for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
910 			if (!data.ce[i])
911 				break;
912 
913 			intel_context_unpin(data.ce[i]);
914 			intel_context_put(data.ce[i]);
915 		}
916 
917 		if (igt_flush_test(gt->i915))
918 			err = -EIO;
919 		if (err)
920 			return err;
921 	}
922 
923 	return 0;
924 }
925 
926 static struct i915_vma *
927 create_user_vma(struct i915_address_space *vm, unsigned long size)
928 {
929 	struct drm_i915_gem_object *obj;
930 	struct i915_vma *vma;
931 	int err;
932 
933 	obj = i915_gem_object_create_internal(vm->i915, size);
934 	if (IS_ERR(obj))
935 		return ERR_CAST(obj);
936 
937 	vma = i915_vma_instance(obj, vm, NULL);
938 	if (IS_ERR(vma)) {
939 		i915_gem_object_put(obj);
940 		return vma;
941 	}
942 
943 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
944 	if (err) {
945 		i915_gem_object_put(obj);
946 		return ERR_PTR(err);
947 	}
948 
949 	return vma;
950 }
951 
952 static u32 safe_poison(u32 offset, u32 poison)
953 {
954 	/*
955 	 * Do not enable predication as it will nop all subsequent commands,
956 	 * not only disabling the tests (by preventing all the other SRM) but
957 	 * also preventing the arbitration events at the end of the request.
958 	 */
959 	if (offset == i915_mmio_reg_offset(RING_PREDICATE_RESULT(0)))
960 		poison &= ~REG_BIT(0);
961 
962 	return poison;
963 }
964 
965 static struct i915_vma *
966 store_context(struct intel_context *ce, struct i915_vma *scratch)
967 {
968 	struct i915_vma *batch;
969 	u32 dw, x, *cs, *hw;
970 	u32 *defaults;
971 
972 	batch = create_user_vma(ce->vm, SZ_64K);
973 	if (IS_ERR(batch))
974 		return batch;
975 
976 	cs = i915_gem_object_pin_map_unlocked(batch->obj, I915_MAP_WC);
977 	if (IS_ERR(cs)) {
978 		i915_vma_put(batch);
979 		return ERR_CAST(cs);
980 	}
981 
982 	defaults = shmem_pin_map(ce->engine->default_state);
983 	if (!defaults) {
984 		i915_gem_object_unpin_map(batch->obj);
985 		i915_vma_put(batch);
986 		return ERR_PTR(-ENOMEM);
987 	}
988 
989 	x = 0;
990 	dw = 0;
991 	hw = defaults;
992 	hw += LRC_STATE_OFFSET / sizeof(*hw);
993 	do {
994 		u32 len = hw[dw] & LRI_LENGTH_MASK;
995 
996 		/*
997 		 * Keep it simple, skip parsing complex commands
998 		 *
999 		 * At present, there are no more MI_LOAD_REGISTER_IMM
1000 		 * commands after the first 3D state command. Rather
1001 		 * than include a table (see i915_cmd_parser.c) of all
1002 		 * the possible commands and their instruction lengths
1003 		 * (or mask for variable length instructions), assume
1004 		 * we have gathered the complete list of registers and
1005 		 * bail out.
1006 		 */
1007 		if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT)
1008 			break;
1009 
1010 		if (hw[dw] == 0) {
1011 			dw++;
1012 			continue;
1013 		}
1014 
1015 		if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) {
1016 			/* Assume all other MI commands match LRI length mask */
1017 			dw += len + 2;
1018 			continue;
1019 		}
1020 
1021 		if (!len) {
1022 			pr_err("%s: invalid LRI found in context image\n",
1023 			       ce->engine->name);
1024 			igt_hexdump(defaults, PAGE_SIZE);
1025 			break;
1026 		}
1027 
1028 		dw++;
1029 		len = (len + 1) / 2;
1030 		while (len--) {
1031 			*cs++ = MI_STORE_REGISTER_MEM_GEN8;
1032 			*cs++ = hw[dw];
1033 			*cs++ = lower_32_bits(scratch->node.start + x);
1034 			*cs++ = upper_32_bits(scratch->node.start + x);
1035 
1036 			dw += 2;
1037 			x += 4;
1038 		}
1039 	} while (dw < PAGE_SIZE / sizeof(u32) &&
1040 		 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
1041 
1042 	*cs++ = MI_BATCH_BUFFER_END;
1043 
1044 	shmem_unpin_map(ce->engine->default_state, defaults);
1045 
1046 	i915_gem_object_flush_map(batch->obj);
1047 	i915_gem_object_unpin_map(batch->obj);
1048 
1049 	return batch;
1050 }
1051 
1052 static struct i915_request *
1053 record_registers(struct intel_context *ce,
1054 		 struct i915_vma *before,
1055 		 struct i915_vma *after,
1056 		 u32 *sema)
1057 {
1058 	struct i915_vma *b_before, *b_after;
1059 	struct i915_request *rq;
1060 	u32 *cs;
1061 	int err;
1062 
1063 	b_before = store_context(ce, before);
1064 	if (IS_ERR(b_before))
1065 		return ERR_CAST(b_before);
1066 
1067 	b_after = store_context(ce, after);
1068 	if (IS_ERR(b_after)) {
1069 		rq = ERR_CAST(b_after);
1070 		goto err_before;
1071 	}
1072 
1073 	rq = intel_context_create_request(ce);
1074 	if (IS_ERR(rq))
1075 		goto err_after;
1076 
1077 	err = igt_vma_move_to_active_unlocked(before, rq, EXEC_OBJECT_WRITE);
1078 	if (err)
1079 		goto err_rq;
1080 
1081 	err = igt_vma_move_to_active_unlocked(b_before, rq, 0);
1082 	if (err)
1083 		goto err_rq;
1084 
1085 	err = igt_vma_move_to_active_unlocked(after, rq, EXEC_OBJECT_WRITE);
1086 	if (err)
1087 		goto err_rq;
1088 
1089 	err = igt_vma_move_to_active_unlocked(b_after, rq, 0);
1090 	if (err)
1091 		goto err_rq;
1092 
1093 	cs = intel_ring_begin(rq, 14);
1094 	if (IS_ERR(cs)) {
1095 		err = PTR_ERR(cs);
1096 		goto err_rq;
1097 	}
1098 
1099 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1100 	*cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
1101 	*cs++ = lower_32_bits(b_before->node.start);
1102 	*cs++ = upper_32_bits(b_before->node.start);
1103 
1104 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1105 	*cs++ = MI_SEMAPHORE_WAIT |
1106 		MI_SEMAPHORE_GLOBAL_GTT |
1107 		MI_SEMAPHORE_POLL |
1108 		MI_SEMAPHORE_SAD_NEQ_SDD;
1109 	*cs++ = 0;
1110 	*cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
1111 		offset_in_page(sema);
1112 	*cs++ = 0;
1113 	*cs++ = MI_NOOP;
1114 
1115 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1116 	*cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
1117 	*cs++ = lower_32_bits(b_after->node.start);
1118 	*cs++ = upper_32_bits(b_after->node.start);
1119 
1120 	intel_ring_advance(rq, cs);
1121 
1122 	WRITE_ONCE(*sema, 0);
1123 	i915_request_get(rq);
1124 	i915_request_add(rq);
1125 err_after:
1126 	i915_vma_put(b_after);
1127 err_before:
1128 	i915_vma_put(b_before);
1129 	return rq;
1130 
1131 err_rq:
1132 	i915_request_add(rq);
1133 	rq = ERR_PTR(err);
1134 	goto err_after;
1135 }
1136 
1137 static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
1138 {
1139 	struct i915_vma *batch;
1140 	u32 dw, *cs, *hw;
1141 	u32 *defaults;
1142 
1143 	batch = create_user_vma(ce->vm, SZ_64K);
1144 	if (IS_ERR(batch))
1145 		return batch;
1146 
1147 	cs = i915_gem_object_pin_map_unlocked(batch->obj, I915_MAP_WC);
1148 	if (IS_ERR(cs)) {
1149 		i915_vma_put(batch);
1150 		return ERR_CAST(cs);
1151 	}
1152 
1153 	defaults = shmem_pin_map(ce->engine->default_state);
1154 	if (!defaults) {
1155 		i915_gem_object_unpin_map(batch->obj);
1156 		i915_vma_put(batch);
1157 		return ERR_PTR(-ENOMEM);
1158 	}
1159 
1160 	dw = 0;
1161 	hw = defaults;
1162 	hw += LRC_STATE_OFFSET / sizeof(*hw);
1163 	do {
1164 		u32 len = hw[dw] & LRI_LENGTH_MASK;
1165 
1166 		/* For simplicity, break parsing at the first complex command */
1167 		if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT)
1168 			break;
1169 
1170 		if (hw[dw] == 0) {
1171 			dw++;
1172 			continue;
1173 		}
1174 
1175 		if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) {
1176 			dw += len + 2;
1177 			continue;
1178 		}
1179 
1180 		if (!len) {
1181 			pr_err("%s: invalid LRI found in context image\n",
1182 			       ce->engine->name);
1183 			igt_hexdump(defaults, PAGE_SIZE);
1184 			break;
1185 		}
1186 
1187 		dw++;
1188 		len = (len + 1) / 2;
1189 		*cs++ = MI_LOAD_REGISTER_IMM(len);
1190 		while (len--) {
1191 			*cs++ = hw[dw];
1192 			*cs++ = safe_poison(hw[dw] & get_lri_mask(ce->engine,
1193 								  MI_LRI_LRM_CS_MMIO),
1194 					    poison);
1195 			dw += 2;
1196 		}
1197 	} while (dw < PAGE_SIZE / sizeof(u32) &&
1198 		 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
1199 
1200 	*cs++ = MI_BATCH_BUFFER_END;
1201 
1202 	shmem_unpin_map(ce->engine->default_state, defaults);
1203 
1204 	i915_gem_object_flush_map(batch->obj);
1205 	i915_gem_object_unpin_map(batch->obj);
1206 
1207 	return batch;
1208 }
1209 
1210 static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema)
1211 {
1212 	struct i915_request *rq;
1213 	struct i915_vma *batch;
1214 	u32 *cs;
1215 	int err;
1216 
1217 	batch = load_context(ce, poison);
1218 	if (IS_ERR(batch))
1219 		return PTR_ERR(batch);
1220 
1221 	rq = intel_context_create_request(ce);
1222 	if (IS_ERR(rq)) {
1223 		err = PTR_ERR(rq);
1224 		goto err_batch;
1225 	}
1226 
1227 	err = igt_vma_move_to_active_unlocked(batch, rq, 0);
1228 	if (err)
1229 		goto err_rq;
1230 
1231 	cs = intel_ring_begin(rq, 8);
1232 	if (IS_ERR(cs)) {
1233 		err = PTR_ERR(cs);
1234 		goto err_rq;
1235 	}
1236 
1237 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1238 	*cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
1239 	*cs++ = lower_32_bits(batch->node.start);
1240 	*cs++ = upper_32_bits(batch->node.start);
1241 
1242 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1243 	*cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
1244 		offset_in_page(sema);
1245 	*cs++ = 0;
1246 	*cs++ = 1;
1247 
1248 	intel_ring_advance(rq, cs);
1249 
1250 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
1251 err_rq:
1252 	i915_request_add(rq);
1253 err_batch:
1254 	i915_vma_put(batch);
1255 	return err;
1256 }
1257 
1258 static bool is_moving(u32 a, u32 b)
1259 {
1260 	return a != b;
1261 }
1262 
1263 static int compare_isolation(struct intel_engine_cs *engine,
1264 			     struct i915_vma *ref[2],
1265 			     struct i915_vma *result[2],
1266 			     struct intel_context *ce,
1267 			     u32 poison)
1268 {
1269 	u32 x, dw, *hw, *lrc;
1270 	u32 *A[2], *B[2];
1271 	u32 *defaults;
1272 	int err = 0;
1273 
1274 	A[0] = i915_gem_object_pin_map_unlocked(ref[0]->obj, I915_MAP_WC);
1275 	if (IS_ERR(A[0]))
1276 		return PTR_ERR(A[0]);
1277 
1278 	A[1] = i915_gem_object_pin_map_unlocked(ref[1]->obj, I915_MAP_WC);
1279 	if (IS_ERR(A[1])) {
1280 		err = PTR_ERR(A[1]);
1281 		goto err_A0;
1282 	}
1283 
1284 	B[0] = i915_gem_object_pin_map_unlocked(result[0]->obj, I915_MAP_WC);
1285 	if (IS_ERR(B[0])) {
1286 		err = PTR_ERR(B[0]);
1287 		goto err_A1;
1288 	}
1289 
1290 	B[1] = i915_gem_object_pin_map_unlocked(result[1]->obj, I915_MAP_WC);
1291 	if (IS_ERR(B[1])) {
1292 		err = PTR_ERR(B[1]);
1293 		goto err_B0;
1294 	}
1295 
1296 	lrc = i915_gem_object_pin_map_unlocked(ce->state->obj,
1297 					       i915_coherent_map_type(engine->i915,
1298 								      ce->state->obj,
1299 								      false));
1300 	if (IS_ERR(lrc)) {
1301 		err = PTR_ERR(lrc);
1302 		goto err_B1;
1303 	}
1304 	lrc += LRC_STATE_OFFSET / sizeof(*hw);
1305 
1306 	defaults = shmem_pin_map(ce->engine->default_state);
1307 	if (!defaults) {
1308 		err = -ENOMEM;
1309 		goto err_lrc;
1310 	}
1311 
1312 	x = 0;
1313 	dw = 0;
1314 	hw = defaults;
1315 	hw += LRC_STATE_OFFSET / sizeof(*hw);
1316 	do {
1317 		u32 len = hw[dw] & LRI_LENGTH_MASK;
1318 
1319 		/* For simplicity, break parsing at the first complex command */
1320 		if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT)
1321 			break;
1322 
1323 		if (hw[dw] == 0) {
1324 			dw++;
1325 			continue;
1326 		}
1327 
1328 		if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) {
1329 			dw += len + 2;
1330 			continue;
1331 		}
1332 
1333 		if (!len) {
1334 			pr_err("%s: invalid LRI found in context image\n",
1335 			       engine->name);
1336 			igt_hexdump(defaults, PAGE_SIZE);
1337 			break;
1338 		}
1339 
1340 		dw++;
1341 		len = (len + 1) / 2;
1342 		while (len--) {
1343 			if (!is_moving(A[0][x], A[1][x]) &&
1344 			    (A[0][x] != B[0][x] || A[1][x] != B[1][x])) {
1345 				switch (hw[dw] & 4095) {
1346 				case 0x30: /* RING_HEAD */
1347 				case 0x34: /* RING_TAIL */
1348 					break;
1349 
1350 				default:
1351 					pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n",
1352 					       engine->name, dw,
1353 					       hw[dw], hw[dw + 1],
1354 					       A[0][x], B[0][x], B[1][x],
1355 					       poison, lrc[dw + 1]);
1356 					err = -EINVAL;
1357 				}
1358 			}
1359 			dw += 2;
1360 			x++;
1361 		}
1362 	} while (dw < PAGE_SIZE / sizeof(u32) &&
1363 		 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
1364 
1365 	shmem_unpin_map(ce->engine->default_state, defaults);
1366 err_lrc:
1367 	i915_gem_object_unpin_map(ce->state->obj);
1368 err_B1:
1369 	i915_gem_object_unpin_map(result[1]->obj);
1370 err_B0:
1371 	i915_gem_object_unpin_map(result[0]->obj);
1372 err_A1:
1373 	i915_gem_object_unpin_map(ref[1]->obj);
1374 err_A0:
1375 	i915_gem_object_unpin_map(ref[0]->obj);
1376 	return err;
1377 }
1378 
1379 static struct i915_vma *
1380 create_result_vma(struct i915_address_space *vm, unsigned long sz)
1381 {
1382 	struct i915_vma *vma;
1383 	void *ptr;
1384 
1385 	vma = create_user_vma(vm, sz);
1386 	if (IS_ERR(vma))
1387 		return vma;
1388 
1389 	/* Set the results to a known value distinct from the poison */
1390 	ptr = i915_gem_object_pin_map_unlocked(vma->obj, I915_MAP_WC);
1391 	if (IS_ERR(ptr)) {
1392 		i915_vma_put(vma);
1393 		return ERR_CAST(ptr);
1394 	}
1395 
1396 	memset(ptr, POISON_INUSE, vma->size);
1397 	i915_gem_object_flush_map(vma->obj);
1398 	i915_gem_object_unpin_map(vma->obj);
1399 
1400 	return vma;
1401 }
1402 
1403 static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison)
1404 {
1405 	u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1);
1406 	struct i915_vma *ref[2], *result[2];
1407 	struct intel_context *A, *B;
1408 	struct i915_request *rq;
1409 	int err;
1410 
1411 	A = intel_context_create(engine);
1412 	if (IS_ERR(A))
1413 		return PTR_ERR(A);
1414 
1415 	B = intel_context_create(engine);
1416 	if (IS_ERR(B)) {
1417 		err = PTR_ERR(B);
1418 		goto err_A;
1419 	}
1420 
1421 	ref[0] = create_result_vma(A->vm, SZ_64K);
1422 	if (IS_ERR(ref[0])) {
1423 		err = PTR_ERR(ref[0]);
1424 		goto err_B;
1425 	}
1426 
1427 	ref[1] = create_result_vma(A->vm, SZ_64K);
1428 	if (IS_ERR(ref[1])) {
1429 		err = PTR_ERR(ref[1]);
1430 		goto err_ref0;
1431 	}
1432 
1433 	rq = record_registers(A, ref[0], ref[1], sema);
1434 	if (IS_ERR(rq)) {
1435 		err = PTR_ERR(rq);
1436 		goto err_ref1;
1437 	}
1438 
1439 	WRITE_ONCE(*sema, 1);
1440 	wmb();
1441 
1442 	if (i915_request_wait(rq, 0, HZ / 2) < 0) {
1443 		i915_request_put(rq);
1444 		err = -ETIME;
1445 		goto err_ref1;
1446 	}
1447 	i915_request_put(rq);
1448 
1449 	result[0] = create_result_vma(A->vm, SZ_64K);
1450 	if (IS_ERR(result[0])) {
1451 		err = PTR_ERR(result[0]);
1452 		goto err_ref1;
1453 	}
1454 
1455 	result[1] = create_result_vma(A->vm, SZ_64K);
1456 	if (IS_ERR(result[1])) {
1457 		err = PTR_ERR(result[1]);
1458 		goto err_result0;
1459 	}
1460 
1461 	rq = record_registers(A, result[0], result[1], sema);
1462 	if (IS_ERR(rq)) {
1463 		err = PTR_ERR(rq);
1464 		goto err_result1;
1465 	}
1466 
1467 	err = poison_registers(B, poison, sema);
1468 	if (err == 0 && i915_request_wait(rq, 0, HZ / 2) < 0) {
1469 		pr_err("%s(%s): wait for results timed out\n",
1470 		       __func__, engine->name);
1471 		err = -ETIME;
1472 	}
1473 
1474 	/* Always cancel the semaphore wait, just in case the GPU gets stuck */
1475 	WRITE_ONCE(*sema, -1);
1476 	i915_request_put(rq);
1477 	if (err)
1478 		goto err_result1;
1479 
1480 	err = compare_isolation(engine, ref, result, A, poison);
1481 
1482 err_result1:
1483 	i915_vma_put(result[1]);
1484 err_result0:
1485 	i915_vma_put(result[0]);
1486 err_ref1:
1487 	i915_vma_put(ref[1]);
1488 err_ref0:
1489 	i915_vma_put(ref[0]);
1490 err_B:
1491 	intel_context_put(B);
1492 err_A:
1493 	intel_context_put(A);
1494 	return err;
1495 }
1496 
1497 static bool skip_isolation(const struct intel_engine_cs *engine)
1498 {
1499 	if (engine->class == COPY_ENGINE_CLASS && GRAPHICS_VER(engine->i915) == 9)
1500 		return true;
1501 
1502 	if (engine->class == RENDER_CLASS && GRAPHICS_VER(engine->i915) == 11)
1503 		return true;
1504 
1505 	return false;
1506 }
1507 
1508 static int live_lrc_isolation(void *arg)
1509 {
1510 	struct intel_gt *gt = arg;
1511 	struct intel_engine_cs *engine;
1512 	enum intel_engine_id id;
1513 	const u32 poison[] = {
1514 		STACK_MAGIC,
1515 		0x3a3a3a3a,
1516 		0x5c5c5c5c,
1517 		0xffffffff,
1518 		0xffff0000,
1519 	};
1520 	int err = 0;
1521 
1522 	/*
1523 	 * Our goal is try and verify that per-context state cannot be
1524 	 * tampered with by another non-privileged client.
1525 	 *
1526 	 * We take the list of context registers from the LRI in the default
1527 	 * context image and attempt to modify that list from a remote context.
1528 	 */
1529 
1530 	for_each_engine(engine, gt, id) {
1531 		int i;
1532 
1533 		/* Just don't even ask */
1534 		if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) &&
1535 		    skip_isolation(engine))
1536 			continue;
1537 
1538 		intel_engine_pm_get(engine);
1539 		for (i = 0; i < ARRAY_SIZE(poison); i++) {
1540 			int result;
1541 
1542 			result = __lrc_isolation(engine, poison[i]);
1543 			if (result && !err)
1544 				err = result;
1545 
1546 			result = __lrc_isolation(engine, ~poison[i]);
1547 			if (result && !err)
1548 				err = result;
1549 		}
1550 		intel_engine_pm_put(engine);
1551 		if (igt_flush_test(gt->i915)) {
1552 			err = -EIO;
1553 			break;
1554 		}
1555 	}
1556 
1557 	return err;
1558 }
1559 
1560 static int indirect_ctx_submit_req(struct intel_context *ce)
1561 {
1562 	struct i915_request *rq;
1563 	int err = 0;
1564 
1565 	rq = intel_context_create_request(ce);
1566 	if (IS_ERR(rq))
1567 		return PTR_ERR(rq);
1568 
1569 	i915_request_get(rq);
1570 	i915_request_add(rq);
1571 
1572 	if (i915_request_wait(rq, 0, HZ / 5) < 0)
1573 		err = -ETIME;
1574 
1575 	i915_request_put(rq);
1576 
1577 	return err;
1578 }
1579 
1580 #define CTX_BB_CANARY_OFFSET (3 * 1024)
1581 #define CTX_BB_CANARY_INDEX  (CTX_BB_CANARY_OFFSET / sizeof(u32))
1582 
1583 static u32 *
1584 emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
1585 {
1586 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 |
1587 		MI_SRM_LRM_GLOBAL_GTT |
1588 		MI_LRI_LRM_CS_MMIO;
1589 	*cs++ = i915_mmio_reg_offset(RING_START(0));
1590 	*cs++ = i915_ggtt_offset(ce->state) +
1591 		context_wa_bb_offset(ce) +
1592 		CTX_BB_CANARY_OFFSET;
1593 	*cs++ = 0;
1594 
1595 	return cs;
1596 }
1597 
1598 static void
1599 indirect_ctx_bb_setup(struct intel_context *ce)
1600 {
1601 	u32 *cs = context_indirect_bb(ce);
1602 
1603 	cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d;
1604 
1605 	setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary);
1606 }
1607 
1608 static bool check_ring_start(struct intel_context *ce)
1609 {
1610 	const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) -
1611 		LRC_STATE_OFFSET + context_wa_bb_offset(ce);
1612 
1613 	if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START])
1614 		return true;
1615 
1616 	pr_err("ring start mismatch: canary 0x%08x vs state 0x%08x\n",
1617 	       ctx_bb[CTX_BB_CANARY_INDEX],
1618 	       ce->lrc_reg_state[CTX_RING_START]);
1619 
1620 	return false;
1621 }
1622 
1623 static int indirect_ctx_bb_check(struct intel_context *ce)
1624 {
1625 	int err;
1626 
1627 	err = indirect_ctx_submit_req(ce);
1628 	if (err)
1629 		return err;
1630 
1631 	if (!check_ring_start(ce))
1632 		return -EINVAL;
1633 
1634 	return 0;
1635 }
1636 
1637 static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine)
1638 {
1639 	struct intel_context *a, *b;
1640 	int err;
1641 
1642 	a = intel_context_create(engine);
1643 	if (IS_ERR(a))
1644 		return PTR_ERR(a);
1645 	err = intel_context_pin(a);
1646 	if (err)
1647 		goto put_a;
1648 
1649 	b = intel_context_create(engine);
1650 	if (IS_ERR(b)) {
1651 		err = PTR_ERR(b);
1652 		goto unpin_a;
1653 	}
1654 	err = intel_context_pin(b);
1655 	if (err)
1656 		goto put_b;
1657 
1658 	/* We use the already reserved extra page in context state */
1659 	if (!a->wa_bb_page) {
1660 		GEM_BUG_ON(b->wa_bb_page);
1661 		GEM_BUG_ON(GRAPHICS_VER(engine->i915) == 12);
1662 		goto unpin_b;
1663 	}
1664 
1665 	/*
1666 	 * In order to test that our per context bb is truly per context,
1667 	 * and executes at the intended spot on context restoring process,
1668 	 * make the batch store the ring start value to memory.
1669 	 * As ring start is restored apriori of starting the indirect ctx bb and
1670 	 * as it will be different for each context, it fits to this purpose.
1671 	 */
1672 	indirect_ctx_bb_setup(a);
1673 	indirect_ctx_bb_setup(b);
1674 
1675 	err = indirect_ctx_bb_check(a);
1676 	if (err)
1677 		goto unpin_b;
1678 
1679 	err = indirect_ctx_bb_check(b);
1680 
1681 unpin_b:
1682 	intel_context_unpin(b);
1683 put_b:
1684 	intel_context_put(b);
1685 unpin_a:
1686 	intel_context_unpin(a);
1687 put_a:
1688 	intel_context_put(a);
1689 
1690 	return err;
1691 }
1692 
1693 static int live_lrc_indirect_ctx_bb(void *arg)
1694 {
1695 	struct intel_gt *gt = arg;
1696 	struct intel_engine_cs *engine;
1697 	enum intel_engine_id id;
1698 	int err = 0;
1699 
1700 	for_each_engine(engine, gt, id) {
1701 		intel_engine_pm_get(engine);
1702 		err = __live_lrc_indirect_ctx_bb(engine);
1703 		intel_engine_pm_put(engine);
1704 
1705 		if (igt_flush_test(gt->i915))
1706 			err = -EIO;
1707 
1708 		if (err)
1709 			break;
1710 	}
1711 
1712 	return err;
1713 }
1714 
1715 static void garbage_reset(struct intel_engine_cs *engine,
1716 			  struct i915_request *rq)
1717 {
1718 	const unsigned int bit = I915_RESET_ENGINE + engine->id;
1719 	unsigned long *lock = &engine->gt->reset.flags;
1720 
1721 	local_bh_disable();
1722 	if (!test_and_set_bit(bit, lock)) {
1723 		tasklet_disable(&engine->sched_engine->tasklet);
1724 
1725 		if (!rq->fence.error)
1726 			__intel_engine_reset_bh(engine, NULL);
1727 
1728 		tasklet_enable(&engine->sched_engine->tasklet);
1729 		clear_and_wake_up_bit(bit, lock);
1730 	}
1731 	local_bh_enable();
1732 }
1733 
1734 static struct i915_request *garbage(struct intel_context *ce,
1735 				    struct rnd_state *prng)
1736 {
1737 	struct i915_request *rq;
1738 	int err;
1739 
1740 	err = intel_context_pin(ce);
1741 	if (err)
1742 		return ERR_PTR(err);
1743 
1744 	prandom_bytes_state(prng,
1745 			    ce->lrc_reg_state,
1746 			    ce->engine->context_size -
1747 			    LRC_STATE_OFFSET);
1748 
1749 	rq = intel_context_create_request(ce);
1750 	if (IS_ERR(rq)) {
1751 		err = PTR_ERR(rq);
1752 		goto err_unpin;
1753 	}
1754 
1755 	i915_request_get(rq);
1756 	i915_request_add(rq);
1757 	return rq;
1758 
1759 err_unpin:
1760 	intel_context_unpin(ce);
1761 	return ERR_PTR(err);
1762 }
1763 
1764 static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng)
1765 {
1766 	struct intel_context *ce;
1767 	struct i915_request *hang;
1768 	int err = 0;
1769 
1770 	ce = intel_context_create(engine);
1771 	if (IS_ERR(ce))
1772 		return PTR_ERR(ce);
1773 
1774 	hang = garbage(ce, prng);
1775 	if (IS_ERR(hang)) {
1776 		err = PTR_ERR(hang);
1777 		goto err_ce;
1778 	}
1779 
1780 	if (wait_for_submit(engine, hang, HZ / 2)) {
1781 		i915_request_put(hang);
1782 		err = -ETIME;
1783 		goto err_ce;
1784 	}
1785 
1786 	intel_context_set_banned(ce);
1787 	garbage_reset(engine, hang);
1788 
1789 	intel_engine_flush_submission(engine);
1790 	if (!hang->fence.error) {
1791 		i915_request_put(hang);
1792 		pr_err("%s: corrupted context was not reset\n",
1793 		       engine->name);
1794 		err = -EINVAL;
1795 		goto err_ce;
1796 	}
1797 
1798 	if (i915_request_wait(hang, 0, HZ / 2) < 0) {
1799 		pr_err("%s: corrupted context did not recover\n",
1800 		       engine->name);
1801 		i915_request_put(hang);
1802 		err = -EIO;
1803 		goto err_ce;
1804 	}
1805 	i915_request_put(hang);
1806 
1807 err_ce:
1808 	intel_context_put(ce);
1809 	return err;
1810 }
1811 
1812 static int live_lrc_garbage(void *arg)
1813 {
1814 	struct intel_gt *gt = arg;
1815 	struct intel_engine_cs *engine;
1816 	enum intel_engine_id id;
1817 
1818 	/*
1819 	 * Verify that we can recover if one context state is completely
1820 	 * corrupted.
1821 	 */
1822 
1823 	if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN))
1824 		return 0;
1825 
1826 	for_each_engine(engine, gt, id) {
1827 		I915_RND_STATE(prng);
1828 		int err = 0, i;
1829 
1830 		if (!intel_has_reset_engine(engine->gt))
1831 			continue;
1832 
1833 		intel_engine_pm_get(engine);
1834 		for (i = 0; i < 3; i++) {
1835 			err = __lrc_garbage(engine, &prng);
1836 			if (err)
1837 				break;
1838 		}
1839 		intel_engine_pm_put(engine);
1840 
1841 		if (igt_flush_test(gt->i915))
1842 			err = -EIO;
1843 		if (err)
1844 			return err;
1845 	}
1846 
1847 	return 0;
1848 }
1849 
1850 static int __live_pphwsp_runtime(struct intel_engine_cs *engine)
1851 {
1852 	struct intel_context *ce;
1853 	struct i915_request *rq;
1854 	IGT_TIMEOUT(end_time);
1855 	int err;
1856 
1857 	ce = intel_context_create(engine);
1858 	if (IS_ERR(ce))
1859 		return PTR_ERR(ce);
1860 
1861 	ce->stats.runtime.num_underflow = 0;
1862 	ce->stats.runtime.max_underflow = 0;
1863 
1864 	do {
1865 		unsigned int loop = 1024;
1866 
1867 		while (loop) {
1868 			rq = intel_context_create_request(ce);
1869 			if (IS_ERR(rq)) {
1870 				err = PTR_ERR(rq);
1871 				goto err_rq;
1872 			}
1873 
1874 			if (--loop == 0)
1875 				i915_request_get(rq);
1876 
1877 			i915_request_add(rq);
1878 		}
1879 
1880 		if (__igt_timeout(end_time, NULL))
1881 			break;
1882 
1883 		i915_request_put(rq);
1884 	} while (1);
1885 
1886 	err = i915_request_wait(rq, 0, HZ / 5);
1887 	if (err < 0) {
1888 		pr_err("%s: request not completed!\n", engine->name);
1889 		goto err_wait;
1890 	}
1891 
1892 	igt_flush_test(engine->i915);
1893 
1894 	pr_info("%s: pphwsp runtime %lluns, average %lluns\n",
1895 		engine->name,
1896 		intel_context_get_total_runtime_ns(ce),
1897 		intel_context_get_avg_runtime_ns(ce));
1898 
1899 	err = 0;
1900 	if (ce->stats.runtime.num_underflow) {
1901 		pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n",
1902 		       engine->name,
1903 		       ce->stats.runtime.num_underflow,
1904 		       ce->stats.runtime.max_underflow);
1905 		GEM_TRACE_DUMP();
1906 		err = -EOVERFLOW;
1907 	}
1908 
1909 err_wait:
1910 	i915_request_put(rq);
1911 err_rq:
1912 	intel_context_put(ce);
1913 	return err;
1914 }
1915 
1916 static int live_pphwsp_runtime(void *arg)
1917 {
1918 	struct intel_gt *gt = arg;
1919 	struct intel_engine_cs *engine;
1920 	enum intel_engine_id id;
1921 	int err = 0;
1922 
1923 	/*
1924 	 * Check that cumulative context runtime as stored in the pphwsp[16]
1925 	 * is monotonic.
1926 	 */
1927 
1928 	for_each_engine(engine, gt, id) {
1929 		err = __live_pphwsp_runtime(engine);
1930 		if (err)
1931 			break;
1932 	}
1933 
1934 	if (igt_flush_test(gt->i915))
1935 		err = -EIO;
1936 
1937 	return err;
1938 }
1939 
1940 int intel_lrc_live_selftests(struct drm_i915_private *i915)
1941 {
1942 	static const struct i915_subtest tests[] = {
1943 		SUBTEST(live_lrc_layout),
1944 		SUBTEST(live_lrc_fixed),
1945 		SUBTEST(live_lrc_state),
1946 		SUBTEST(live_lrc_gpr),
1947 		SUBTEST(live_lrc_isolation),
1948 		SUBTEST(live_lrc_timestamp),
1949 		SUBTEST(live_lrc_garbage),
1950 		SUBTEST(live_pphwsp_runtime),
1951 		SUBTEST(live_lrc_indirect_ctx_bb),
1952 	};
1953 
1954 	if (!HAS_LOGICAL_RING_CONTEXTS(i915))
1955 		return 0;
1956 
1957 	return intel_gt_live_subtests(tests, to_gt(i915));
1958 }
1959