1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2018 Intel Corporation
4 */
5
6 #include <linux/prime_numbers.h>
7
8 #include "gem/i915_gem_internal.h"
9
10 #include "i915_selftest.h"
11 #include "intel_engine_heartbeat.h"
12 #include "intel_engine_pm.h"
13 #include "intel_reset.h"
14 #include "intel_ring.h"
15 #include "selftest_engine_heartbeat.h"
16 #include "selftests/i915_random.h"
17 #include "selftests/igt_flush_test.h"
18 #include "selftests/igt_live_test.h"
19 #include "selftests/igt_spinner.h"
20 #include "selftests/lib_sw_fence.h"
21 #include "shmem_utils.h"
22
23 #include "gem/selftests/igt_gem_utils.h"
24 #include "gem/selftests/mock_context.h"
25
26 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
27 #define NUM_GPR 16
28 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
29
30 #define LRI_HEADER MI_INSTR(0x22, 0)
31 #define LRI_LENGTH_MASK GENMASK(7, 0)
32
create_scratch(struct intel_gt * gt)33 static struct i915_vma *create_scratch(struct intel_gt *gt)
34 {
35 return __vm_create_scratch_for_read_pinned(>->ggtt->vm, PAGE_SIZE);
36 }
37
is_active(struct i915_request * rq)38 static bool is_active(struct i915_request *rq)
39 {
40 if (i915_request_is_active(rq))
41 return true;
42
43 if (i915_request_on_hold(rq))
44 return true;
45
46 if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq))
47 return true;
48
49 return false;
50 }
51
wait_for_submit(struct intel_engine_cs * engine,struct i915_request * rq,unsigned long timeout)52 static int wait_for_submit(struct intel_engine_cs *engine,
53 struct i915_request *rq,
54 unsigned long timeout)
55 {
56 /* Ignore our own attempts to suppress excess tasklets */
57 tasklet_hi_schedule(&engine->sched_engine->tasklet);
58
59 timeout += jiffies;
60 do {
61 bool done = time_after(jiffies, timeout);
62
63 if (i915_request_completed(rq)) /* that was quick! */
64 return 0;
65
66 /* Wait until the HW has acknowleged the submission (or err) */
67 intel_engine_flush_submission(engine);
68 if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
69 return 0;
70
71 if (done)
72 return -ETIME;
73
74 cond_resched();
75 } while (1);
76 }
77
emit_semaphore_signal(struct intel_context * ce,void * slot)78 static int emit_semaphore_signal(struct intel_context *ce, void *slot)
79 {
80 const u32 offset =
81 i915_ggtt_offset(ce->engine->status_page.vma) +
82 offset_in_page(slot);
83 struct i915_request *rq;
84 u32 *cs;
85
86 rq = intel_context_create_request(ce);
87 if (IS_ERR(rq))
88 return PTR_ERR(rq);
89
90 cs = intel_ring_begin(rq, 4);
91 if (IS_ERR(cs)) {
92 i915_request_add(rq);
93 return PTR_ERR(cs);
94 }
95
96 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
97 *cs++ = offset;
98 *cs++ = 0;
99 *cs++ = 1;
100
101 intel_ring_advance(rq, cs);
102
103 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
104 i915_request_add(rq);
105 return 0;
106 }
107
context_flush(struct intel_context * ce,long timeout)108 static int context_flush(struct intel_context *ce, long timeout)
109 {
110 struct i915_request *rq;
111 struct dma_fence *fence;
112 int err = 0;
113
114 rq = intel_engine_create_kernel_request(ce->engine);
115 if (IS_ERR(rq))
116 return PTR_ERR(rq);
117
118 fence = i915_active_fence_get(&ce->timeline->last_request);
119 if (fence) {
120 i915_request_await_dma_fence(rq, fence);
121 dma_fence_put(fence);
122 }
123
124 rq = i915_request_get(rq);
125 i915_request_add(rq);
126 if (i915_request_wait(rq, 0, timeout) < 0)
127 err = -ETIME;
128 i915_request_put(rq);
129
130 rmb(); /* We know the request is written, make sure all state is too! */
131 return err;
132 }
133
get_lri_mask(struct intel_engine_cs * engine,u32 lri)134 static int get_lri_mask(struct intel_engine_cs *engine, u32 lri)
135 {
136 if ((lri & MI_LRI_LRM_CS_MMIO) == 0)
137 return ~0u;
138
139 if (GRAPHICS_VER(engine->i915) < 12)
140 return 0xfff;
141
142 switch (engine->class) {
143 default:
144 case RENDER_CLASS:
145 case COMPUTE_CLASS:
146 return 0x07ff;
147 case COPY_ENGINE_CLASS:
148 return 0x0fff;
149 case VIDEO_DECODE_CLASS:
150 case VIDEO_ENHANCEMENT_CLASS:
151 return 0x3fff;
152 }
153 }
154
live_lrc_layout(void * arg)155 static int live_lrc_layout(void *arg)
156 {
157 struct intel_gt *gt = arg;
158 struct intel_engine_cs *engine;
159 enum intel_engine_id id;
160 u32 *lrc;
161 int err;
162
163 /*
164 * Check the registers offsets we use to create the initial reg state
165 * match the layout saved by HW.
166 */
167
168 lrc = (u32 *)__get_free_page(GFP_KERNEL); /* requires page alignment */
169 if (!lrc)
170 return -ENOMEM;
171 GEM_BUG_ON(offset_in_page(lrc));
172
173 err = 0;
174 for_each_engine(engine, gt, id) {
175 u32 *hw;
176 int dw;
177
178 if (!engine->default_state)
179 continue;
180
181 hw = shmem_pin_map(engine->default_state);
182 if (!hw) {
183 err = -ENOMEM;
184 break;
185 }
186 hw += LRC_STATE_OFFSET / sizeof(*hw);
187
188 __lrc_init_regs(memset(lrc, POISON_INUSE, PAGE_SIZE),
189 engine->kernel_context, engine, true);
190
191 dw = 0;
192 do {
193 u32 lri = READ_ONCE(hw[dw]);
194 u32 lri_mask;
195
196 if (lri == 0) {
197 dw++;
198 continue;
199 }
200
201 if (lrc[dw] == 0) {
202 pr_debug("%s: skipped instruction %x at dword %d\n",
203 engine->name, lri, dw);
204 dw++;
205 continue;
206 }
207
208 if ((lri & GENMASK(31, 23)) != LRI_HEADER) {
209 pr_err("%s: Expected LRI command at dword %d, found %08x\n",
210 engine->name, dw, lri);
211 err = -EINVAL;
212 break;
213 }
214
215 if (lrc[dw] != lri) {
216 pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
217 engine->name, dw, lri, lrc[dw]);
218 err = -EINVAL;
219 break;
220 }
221
222 /*
223 * When bit 19 of MI_LOAD_REGISTER_IMM instruction
224 * opcode is set on Gen12+ devices, HW does not
225 * care about certain register address offsets, and
226 * instead check the following for valid address
227 * ranges on specific engines:
228 * RCS && CCS: BITS(0 - 10)
229 * BCS: BITS(0 - 11)
230 * VECS && VCS: BITS(0 - 13)
231 */
232 lri_mask = get_lri_mask(engine, lri);
233
234 lri &= 0x7f;
235 lri++;
236 dw++;
237
238 while (lri) {
239 u32 offset = READ_ONCE(hw[dw]);
240
241 if ((offset ^ lrc[dw]) & lri_mask) {
242 pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
243 engine->name, dw, offset, lrc[dw]);
244 err = -EINVAL;
245 break;
246 }
247
248 /*
249 * Skip over the actual register value as we
250 * expect that to differ.
251 */
252 dw += 2;
253 lri -= 2;
254 }
255 } while (!err && (lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
256
257 if (err) {
258 pr_info("%s: HW register image:\n", engine->name);
259 igt_hexdump(hw, PAGE_SIZE);
260
261 pr_info("%s: SW register image:\n", engine->name);
262 igt_hexdump(lrc, PAGE_SIZE);
263 }
264
265 shmem_unpin_map(engine->default_state, hw);
266 if (err)
267 break;
268 }
269
270 free_page((unsigned long)lrc);
271 return err;
272 }
273
find_offset(const u32 * lri,u32 offset)274 static int find_offset(const u32 *lri, u32 offset)
275 {
276 int i;
277
278 for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
279 if (lri[i] == offset)
280 return i;
281
282 return -1;
283 }
284
live_lrc_fixed(void * arg)285 static int live_lrc_fixed(void *arg)
286 {
287 struct intel_gt *gt = arg;
288 struct intel_engine_cs *engine;
289 enum intel_engine_id id;
290 int err = 0;
291
292 /*
293 * Check the assumed register offsets match the actual locations in
294 * the context image.
295 */
296
297 for_each_engine(engine, gt, id) {
298 const struct {
299 u32 reg;
300 u32 offset;
301 const char *name;
302 } tbl[] = {
303 {
304 i915_mmio_reg_offset(RING_START(engine->mmio_base)),
305 CTX_RING_START - 1,
306 "RING_START"
307 },
308 {
309 i915_mmio_reg_offset(RING_CTL(engine->mmio_base)),
310 CTX_RING_CTL - 1,
311 "RING_CTL"
312 },
313 {
314 i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)),
315 CTX_RING_HEAD - 1,
316 "RING_HEAD"
317 },
318 {
319 i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)),
320 CTX_RING_TAIL - 1,
321 "RING_TAIL"
322 },
323 {
324 i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)),
325 lrc_ring_mi_mode(engine),
326 "RING_MI_MODE"
327 },
328 {
329 i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)),
330 CTX_BB_STATE - 1,
331 "BB_STATE"
332 },
333 {
334 i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(engine->mmio_base)),
335 lrc_ring_wa_bb_per_ctx(engine),
336 "RING_BB_PER_CTX_PTR"
337 },
338 {
339 i915_mmio_reg_offset(RING_INDIRECT_CTX(engine->mmio_base)),
340 lrc_ring_indirect_ptr(engine),
341 "RING_INDIRECT_CTX_PTR"
342 },
343 {
344 i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(engine->mmio_base)),
345 lrc_ring_indirect_offset(engine),
346 "RING_INDIRECT_CTX_OFFSET"
347 },
348 {
349 i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)),
350 CTX_TIMESTAMP - 1,
351 "RING_CTX_TIMESTAMP"
352 },
353 {
354 i915_mmio_reg_offset(GEN8_RING_CS_GPR(engine->mmio_base, 0)),
355 lrc_ring_gpr0(engine),
356 "RING_CS_GPR0"
357 },
358 {
359 i915_mmio_reg_offset(RING_CMD_BUF_CCTL(engine->mmio_base)),
360 lrc_ring_cmd_buf_cctl(engine),
361 "RING_CMD_BUF_CCTL"
362 },
363 {
364 i915_mmio_reg_offset(RING_BB_OFFSET(engine->mmio_base)),
365 lrc_ring_bb_offset(engine),
366 "RING_BB_OFFSET"
367 },
368 { },
369 }, *t;
370 u32 *hw;
371
372 if (!engine->default_state)
373 continue;
374
375 hw = shmem_pin_map(engine->default_state);
376 if (!hw) {
377 err = -ENOMEM;
378 break;
379 }
380 hw += LRC_STATE_OFFSET / sizeof(*hw);
381
382 for (t = tbl; t->name; t++) {
383 int dw = find_offset(hw, t->reg);
384
385 if (dw != t->offset) {
386 pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
387 engine->name,
388 t->name,
389 t->reg,
390 dw,
391 t->offset);
392 err = -EINVAL;
393 }
394 }
395
396 shmem_unpin_map(engine->default_state, hw);
397 }
398
399 return err;
400 }
401
__live_lrc_state(struct intel_engine_cs * engine,struct i915_vma * scratch)402 static int __live_lrc_state(struct intel_engine_cs *engine,
403 struct i915_vma *scratch)
404 {
405 struct intel_context *ce;
406 struct i915_request *rq;
407 struct i915_gem_ww_ctx ww;
408 enum {
409 RING_START_IDX = 0,
410 RING_TAIL_IDX,
411 MAX_IDX
412 };
413 u32 expected[MAX_IDX];
414 u32 *cs;
415 int err;
416 int n;
417
418 ce = intel_context_create(engine);
419 if (IS_ERR(ce))
420 return PTR_ERR(ce);
421
422 i915_gem_ww_ctx_init(&ww, false);
423 retry:
424 err = i915_gem_object_lock(scratch->obj, &ww);
425 if (!err)
426 err = intel_context_pin_ww(ce, &ww);
427 if (err)
428 goto err_put;
429
430 rq = i915_request_create(ce);
431 if (IS_ERR(rq)) {
432 err = PTR_ERR(rq);
433 goto err_unpin;
434 }
435
436 cs = intel_ring_begin(rq, 4 * MAX_IDX);
437 if (IS_ERR(cs)) {
438 err = PTR_ERR(cs);
439 i915_request_add(rq);
440 goto err_unpin;
441 }
442
443 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
444 *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
445 *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32);
446 *cs++ = 0;
447
448 expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma);
449
450 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
451 *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
452 *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
453 *cs++ = 0;
454
455 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
456
457 i915_request_get(rq);
458 i915_request_add(rq);
459 if (err)
460 goto err_rq;
461
462 intel_engine_flush_submission(engine);
463 expected[RING_TAIL_IDX] = ce->ring->tail;
464
465 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
466 err = -ETIME;
467 goto err_rq;
468 }
469
470 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
471 if (IS_ERR(cs)) {
472 err = PTR_ERR(cs);
473 goto err_rq;
474 }
475
476 for (n = 0; n < MAX_IDX; n++) {
477 if (cs[n] != expected[n]) {
478 pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
479 engine->name, n, cs[n], expected[n]);
480 err = -EINVAL;
481 break;
482 }
483 }
484
485 i915_gem_object_unpin_map(scratch->obj);
486
487 err_rq:
488 i915_request_put(rq);
489 err_unpin:
490 intel_context_unpin(ce);
491 err_put:
492 if (err == -EDEADLK) {
493 err = i915_gem_ww_ctx_backoff(&ww);
494 if (!err)
495 goto retry;
496 }
497 i915_gem_ww_ctx_fini(&ww);
498 intel_context_put(ce);
499 return err;
500 }
501
live_lrc_state(void * arg)502 static int live_lrc_state(void *arg)
503 {
504 struct intel_gt *gt = arg;
505 struct intel_engine_cs *engine;
506 struct i915_vma *scratch;
507 enum intel_engine_id id;
508 int err = 0;
509
510 /*
511 * Check the live register state matches what we expect for this
512 * intel_context.
513 */
514
515 scratch = create_scratch(gt);
516 if (IS_ERR(scratch))
517 return PTR_ERR(scratch);
518
519 for_each_engine(engine, gt, id) {
520 err = __live_lrc_state(engine, scratch);
521 if (err)
522 break;
523 }
524
525 if (igt_flush_test(gt->i915))
526 err = -EIO;
527
528 i915_vma_unpin_and_release(&scratch, 0);
529 return err;
530 }
531
gpr_make_dirty(struct intel_context * ce)532 static int gpr_make_dirty(struct intel_context *ce)
533 {
534 struct i915_request *rq;
535 u32 *cs;
536 int n;
537
538 rq = intel_context_create_request(ce);
539 if (IS_ERR(rq))
540 return PTR_ERR(rq);
541
542 cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2);
543 if (IS_ERR(cs)) {
544 i915_request_add(rq);
545 return PTR_ERR(cs);
546 }
547
548 *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
549 for (n = 0; n < NUM_GPR_DW; n++) {
550 *cs++ = CS_GPR(ce->engine, n);
551 *cs++ = STACK_MAGIC;
552 }
553 *cs++ = MI_NOOP;
554
555 intel_ring_advance(rq, cs);
556
557 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
558 i915_request_add(rq);
559
560 return 0;
561 }
562
563 static struct i915_request *
__gpr_read(struct intel_context * ce,struct i915_vma * scratch,u32 * slot)564 __gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot)
565 {
566 const u32 offset =
567 i915_ggtt_offset(ce->engine->status_page.vma) +
568 offset_in_page(slot);
569 struct i915_request *rq;
570 u32 *cs;
571 int err;
572 int n;
573
574 rq = intel_context_create_request(ce);
575 if (IS_ERR(rq))
576 return rq;
577
578 cs = intel_ring_begin(rq, 6 + 4 * NUM_GPR_DW);
579 if (IS_ERR(cs)) {
580 i915_request_add(rq);
581 return ERR_CAST(cs);
582 }
583
584 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
585 *cs++ = MI_NOOP;
586
587 *cs++ = MI_SEMAPHORE_WAIT |
588 MI_SEMAPHORE_GLOBAL_GTT |
589 MI_SEMAPHORE_POLL |
590 MI_SEMAPHORE_SAD_NEQ_SDD;
591 *cs++ = 0;
592 *cs++ = offset;
593 *cs++ = 0;
594
595 for (n = 0; n < NUM_GPR_DW; n++) {
596 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
597 *cs++ = CS_GPR(ce->engine, n);
598 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
599 *cs++ = 0;
600 }
601
602 err = igt_vma_move_to_active_unlocked(scratch, rq, EXEC_OBJECT_WRITE);
603
604 i915_request_get(rq);
605 i915_request_add(rq);
606 if (err) {
607 i915_request_put(rq);
608 rq = ERR_PTR(err);
609 }
610
611 return rq;
612 }
613
__live_lrc_gpr(struct intel_engine_cs * engine,struct i915_vma * scratch,bool preempt)614 static int __live_lrc_gpr(struct intel_engine_cs *engine,
615 struct i915_vma *scratch,
616 bool preempt)
617 {
618 u32 *slot = memset32(engine->status_page.addr + 1000, 0, 4);
619 struct intel_context *ce;
620 struct i915_request *rq;
621 u32 *cs;
622 int err;
623 int n;
624
625 if (GRAPHICS_VER(engine->i915) < 9 && engine->class != RENDER_CLASS)
626 return 0; /* GPR only on rcs0 for gen8 */
627
628 err = gpr_make_dirty(engine->kernel_context);
629 if (err)
630 return err;
631
632 ce = intel_context_create(engine);
633 if (IS_ERR(ce))
634 return PTR_ERR(ce);
635
636 rq = __gpr_read(ce, scratch, slot);
637 if (IS_ERR(rq)) {
638 err = PTR_ERR(rq);
639 goto err_put;
640 }
641
642 err = wait_for_submit(engine, rq, HZ / 2);
643 if (err)
644 goto err_rq;
645
646 if (preempt) {
647 err = gpr_make_dirty(engine->kernel_context);
648 if (err)
649 goto err_rq;
650
651 err = emit_semaphore_signal(engine->kernel_context, slot);
652 if (err)
653 goto err_rq;
654
655 err = wait_for_submit(engine, rq, HZ / 2);
656 if (err)
657 goto err_rq;
658 } else {
659 slot[0] = 1;
660 wmb();
661 }
662
663 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
664 err = -ETIME;
665 goto err_rq;
666 }
667
668 cs = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB);
669 if (IS_ERR(cs)) {
670 err = PTR_ERR(cs);
671 goto err_rq;
672 }
673
674 for (n = 0; n < NUM_GPR_DW; n++) {
675 if (cs[n]) {
676 pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
677 engine->name,
678 n / 2, n & 1 ? "udw" : "ldw",
679 cs[n]);
680 err = -EINVAL;
681 break;
682 }
683 }
684
685 i915_gem_object_unpin_map(scratch->obj);
686
687 err_rq:
688 memset32(&slot[0], -1, 4);
689 wmb();
690 i915_request_put(rq);
691 err_put:
692 intel_context_put(ce);
693 return err;
694 }
695
live_lrc_gpr(void * arg)696 static int live_lrc_gpr(void *arg)
697 {
698 struct intel_gt *gt = arg;
699 struct intel_engine_cs *engine;
700 struct i915_vma *scratch;
701 enum intel_engine_id id;
702 int err = 0;
703
704 /*
705 * Check that GPR registers are cleared in new contexts as we need
706 * to avoid leaking any information from previous contexts.
707 */
708
709 scratch = create_scratch(gt);
710 if (IS_ERR(scratch))
711 return PTR_ERR(scratch);
712
713 for_each_engine(engine, gt, id) {
714 st_engine_heartbeat_disable(engine);
715
716 err = __live_lrc_gpr(engine, scratch, false);
717 if (err)
718 goto err;
719
720 err = __live_lrc_gpr(engine, scratch, true);
721 if (err)
722 goto err;
723
724 err:
725 st_engine_heartbeat_enable(engine);
726 if (igt_flush_test(gt->i915))
727 err = -EIO;
728 if (err)
729 break;
730 }
731
732 i915_vma_unpin_and_release(&scratch, 0);
733 return err;
734 }
735
736 static struct i915_request *
create_timestamp(struct intel_context * ce,void * slot,int idx)737 create_timestamp(struct intel_context *ce, void *slot, int idx)
738 {
739 const u32 offset =
740 i915_ggtt_offset(ce->engine->status_page.vma) +
741 offset_in_page(slot);
742 struct i915_request *rq;
743 u32 *cs;
744 int err;
745
746 rq = intel_context_create_request(ce);
747 if (IS_ERR(rq))
748 return rq;
749
750 cs = intel_ring_begin(rq, 10);
751 if (IS_ERR(cs)) {
752 err = PTR_ERR(cs);
753 goto err;
754 }
755
756 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
757 *cs++ = MI_NOOP;
758
759 *cs++ = MI_SEMAPHORE_WAIT |
760 MI_SEMAPHORE_GLOBAL_GTT |
761 MI_SEMAPHORE_POLL |
762 MI_SEMAPHORE_SAD_NEQ_SDD;
763 *cs++ = 0;
764 *cs++ = offset;
765 *cs++ = 0;
766
767 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
768 *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base));
769 *cs++ = offset + idx * sizeof(u32);
770 *cs++ = 0;
771
772 intel_ring_advance(rq, cs);
773
774 err = 0;
775 err:
776 i915_request_get(rq);
777 i915_request_add(rq);
778 if (err) {
779 i915_request_put(rq);
780 return ERR_PTR(err);
781 }
782
783 return rq;
784 }
785
786 struct lrc_timestamp {
787 struct intel_engine_cs *engine;
788 struct intel_context *ce[2];
789 u32 poison;
790 };
791
timestamp_advanced(u32 start,u32 end)792 static bool timestamp_advanced(u32 start, u32 end)
793 {
794 return (s32)(end - start) > 0;
795 }
796
__lrc_timestamp(const struct lrc_timestamp * arg,bool preempt)797 static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt)
798 {
799 u32 *slot = memset32(arg->engine->status_page.addr + 1000, 0, 4);
800 struct i915_request *rq;
801 u32 timestamp;
802 int err = 0;
803
804 arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison;
805 rq = create_timestamp(arg->ce[0], slot, 1);
806 if (IS_ERR(rq))
807 return PTR_ERR(rq);
808
809 err = wait_for_submit(rq->engine, rq, HZ / 2);
810 if (err)
811 goto err;
812
813 if (preempt) {
814 arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef;
815 err = emit_semaphore_signal(arg->ce[1], slot);
816 if (err)
817 goto err;
818 } else {
819 slot[0] = 1;
820 wmb();
821 }
822
823 /* And wait for switch to kernel (to save our context to memory) */
824 err = context_flush(arg->ce[0], HZ / 2);
825 if (err)
826 goto err;
827
828 if (!timestamp_advanced(arg->poison, slot[1])) {
829 pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n",
830 arg->engine->name, preempt ? "preempt" : "simple",
831 arg->poison, slot[1]);
832 err = -EINVAL;
833 }
834
835 timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]);
836 if (!timestamp_advanced(slot[1], timestamp)) {
837 pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n",
838 arg->engine->name, preempt ? "preempt" : "simple",
839 slot[1], timestamp);
840 err = -EINVAL;
841 }
842
843 err:
844 memset32(slot, -1, 4);
845 i915_request_put(rq);
846 return err;
847 }
848
live_lrc_timestamp(void * arg)849 static int live_lrc_timestamp(void *arg)
850 {
851 struct lrc_timestamp data = {};
852 struct intel_gt *gt = arg;
853 enum intel_engine_id id;
854 const u32 poison[] = {
855 0,
856 S32_MAX,
857 (u32)S32_MAX + 1,
858 U32_MAX,
859 };
860
861 /*
862 * We want to verify that the timestamp is saved and restore across
863 * context switches and is monotonic.
864 *
865 * So we do this with a little bit of LRC poisoning to check various
866 * boundary conditions, and see what happens if we preempt the context
867 * with a second request (carrying more poison into the timestamp).
868 */
869
870 for_each_engine(data.engine, gt, id) {
871 int i, err = 0;
872
873 st_engine_heartbeat_disable(data.engine);
874
875 for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
876 struct intel_context *tmp;
877
878 tmp = intel_context_create(data.engine);
879 if (IS_ERR(tmp)) {
880 err = PTR_ERR(tmp);
881 goto err;
882 }
883
884 err = intel_context_pin(tmp);
885 if (err) {
886 intel_context_put(tmp);
887 goto err;
888 }
889
890 data.ce[i] = tmp;
891 }
892
893 for (i = 0; i < ARRAY_SIZE(poison); i++) {
894 data.poison = poison[i];
895
896 err = __lrc_timestamp(&data, false);
897 if (err)
898 break;
899
900 err = __lrc_timestamp(&data, true);
901 if (err)
902 break;
903 }
904
905 err:
906 st_engine_heartbeat_enable(data.engine);
907 for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
908 if (!data.ce[i])
909 break;
910
911 intel_context_unpin(data.ce[i]);
912 intel_context_put(data.ce[i]);
913 }
914
915 if (igt_flush_test(gt->i915))
916 err = -EIO;
917 if (err)
918 return err;
919 }
920
921 return 0;
922 }
923
924 static struct i915_vma *
create_user_vma(struct i915_address_space * vm,unsigned long size)925 create_user_vma(struct i915_address_space *vm, unsigned long size)
926 {
927 struct drm_i915_gem_object *obj;
928 struct i915_vma *vma;
929 int err;
930
931 obj = i915_gem_object_create_internal(vm->i915, size);
932 if (IS_ERR(obj))
933 return ERR_CAST(obj);
934
935 vma = i915_vma_instance(obj, vm, NULL);
936 if (IS_ERR(vma)) {
937 i915_gem_object_put(obj);
938 return vma;
939 }
940
941 err = i915_vma_pin(vma, 0, 0, PIN_USER);
942 if (err) {
943 i915_gem_object_put(obj);
944 return ERR_PTR(err);
945 }
946
947 return vma;
948 }
949
safe_poison(u32 offset,u32 poison)950 static u32 safe_poison(u32 offset, u32 poison)
951 {
952 /*
953 * Do not enable predication as it will nop all subsequent commands,
954 * not only disabling the tests (by preventing all the other SRM) but
955 * also preventing the arbitration events at the end of the request.
956 */
957 if (offset == i915_mmio_reg_offset(RING_PREDICATE_RESULT(0)))
958 poison &= ~REG_BIT(0);
959
960 return poison;
961 }
962
963 static struct i915_vma *
store_context(struct intel_context * ce,struct i915_vma * scratch)964 store_context(struct intel_context *ce, struct i915_vma *scratch)
965 {
966 struct i915_vma *batch;
967 u32 dw, x, *cs, *hw;
968 u32 *defaults;
969
970 batch = create_user_vma(ce->vm, SZ_64K);
971 if (IS_ERR(batch))
972 return batch;
973
974 cs = i915_gem_object_pin_map_unlocked(batch->obj, I915_MAP_WC);
975 if (IS_ERR(cs)) {
976 i915_vma_put(batch);
977 return ERR_CAST(cs);
978 }
979
980 defaults = shmem_pin_map(ce->engine->default_state);
981 if (!defaults) {
982 i915_gem_object_unpin_map(batch->obj);
983 i915_vma_put(batch);
984 return ERR_PTR(-ENOMEM);
985 }
986
987 x = 0;
988 dw = 0;
989 hw = defaults;
990 hw += LRC_STATE_OFFSET / sizeof(*hw);
991 do {
992 u32 len = hw[dw] & LRI_LENGTH_MASK;
993
994 /*
995 * Keep it simple, skip parsing complex commands
996 *
997 * At present, there are no more MI_LOAD_REGISTER_IMM
998 * commands after the first 3D state command. Rather
999 * than include a table (see i915_cmd_parser.c) of all
1000 * the possible commands and their instruction lengths
1001 * (or mask for variable length instructions), assume
1002 * we have gathered the complete list of registers and
1003 * bail out.
1004 */
1005 if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT)
1006 break;
1007
1008 if (hw[dw] == 0) {
1009 dw++;
1010 continue;
1011 }
1012
1013 if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) {
1014 /* Assume all other MI commands match LRI length mask */
1015 dw += len + 2;
1016 continue;
1017 }
1018
1019 if (!len) {
1020 pr_err("%s: invalid LRI found in context image\n",
1021 ce->engine->name);
1022 igt_hexdump(defaults, PAGE_SIZE);
1023 break;
1024 }
1025
1026 dw++;
1027 len = (len + 1) / 2;
1028 while (len--) {
1029 *cs++ = MI_STORE_REGISTER_MEM_GEN8;
1030 *cs++ = hw[dw];
1031 *cs++ = lower_32_bits(i915_vma_offset(scratch) + x);
1032 *cs++ = upper_32_bits(i915_vma_offset(scratch) + x);
1033
1034 dw += 2;
1035 x += 4;
1036 }
1037 } while (dw < PAGE_SIZE / sizeof(u32) &&
1038 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
1039
1040 *cs++ = MI_BATCH_BUFFER_END;
1041
1042 shmem_unpin_map(ce->engine->default_state, defaults);
1043
1044 i915_gem_object_flush_map(batch->obj);
1045 i915_gem_object_unpin_map(batch->obj);
1046
1047 return batch;
1048 }
1049
1050 static struct i915_request *
record_registers(struct intel_context * ce,struct i915_vma * before,struct i915_vma * after,u32 * sema)1051 record_registers(struct intel_context *ce,
1052 struct i915_vma *before,
1053 struct i915_vma *after,
1054 u32 *sema)
1055 {
1056 struct i915_vma *b_before, *b_after;
1057 struct i915_request *rq;
1058 u32 *cs;
1059 int err;
1060
1061 b_before = store_context(ce, before);
1062 if (IS_ERR(b_before))
1063 return ERR_CAST(b_before);
1064
1065 b_after = store_context(ce, after);
1066 if (IS_ERR(b_after)) {
1067 rq = ERR_CAST(b_after);
1068 goto err_before;
1069 }
1070
1071 rq = intel_context_create_request(ce);
1072 if (IS_ERR(rq))
1073 goto err_after;
1074
1075 err = igt_vma_move_to_active_unlocked(before, rq, EXEC_OBJECT_WRITE);
1076 if (err)
1077 goto err_rq;
1078
1079 err = igt_vma_move_to_active_unlocked(b_before, rq, 0);
1080 if (err)
1081 goto err_rq;
1082
1083 err = igt_vma_move_to_active_unlocked(after, rq, EXEC_OBJECT_WRITE);
1084 if (err)
1085 goto err_rq;
1086
1087 err = igt_vma_move_to_active_unlocked(b_after, rq, 0);
1088 if (err)
1089 goto err_rq;
1090
1091 cs = intel_ring_begin(rq, 14);
1092 if (IS_ERR(cs)) {
1093 err = PTR_ERR(cs);
1094 goto err_rq;
1095 }
1096
1097 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1098 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
1099 *cs++ = lower_32_bits(i915_vma_offset(b_before));
1100 *cs++ = upper_32_bits(i915_vma_offset(b_before));
1101
1102 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1103 *cs++ = MI_SEMAPHORE_WAIT |
1104 MI_SEMAPHORE_GLOBAL_GTT |
1105 MI_SEMAPHORE_POLL |
1106 MI_SEMAPHORE_SAD_NEQ_SDD;
1107 *cs++ = 0;
1108 *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
1109 offset_in_page(sema);
1110 *cs++ = 0;
1111 *cs++ = MI_NOOP;
1112
1113 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1114 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
1115 *cs++ = lower_32_bits(i915_vma_offset(b_after));
1116 *cs++ = upper_32_bits(i915_vma_offset(b_after));
1117
1118 intel_ring_advance(rq, cs);
1119
1120 WRITE_ONCE(*sema, 0);
1121 i915_request_get(rq);
1122 i915_request_add(rq);
1123 err_after:
1124 i915_vma_put(b_after);
1125 err_before:
1126 i915_vma_put(b_before);
1127 return rq;
1128
1129 err_rq:
1130 i915_request_add(rq);
1131 rq = ERR_PTR(err);
1132 goto err_after;
1133 }
1134
load_context(struct intel_context * ce,u32 poison)1135 static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
1136 {
1137 struct i915_vma *batch;
1138 u32 dw, *cs, *hw;
1139 u32 *defaults;
1140
1141 batch = create_user_vma(ce->vm, SZ_64K);
1142 if (IS_ERR(batch))
1143 return batch;
1144
1145 cs = i915_gem_object_pin_map_unlocked(batch->obj, I915_MAP_WC);
1146 if (IS_ERR(cs)) {
1147 i915_vma_put(batch);
1148 return ERR_CAST(cs);
1149 }
1150
1151 defaults = shmem_pin_map(ce->engine->default_state);
1152 if (!defaults) {
1153 i915_gem_object_unpin_map(batch->obj);
1154 i915_vma_put(batch);
1155 return ERR_PTR(-ENOMEM);
1156 }
1157
1158 dw = 0;
1159 hw = defaults;
1160 hw += LRC_STATE_OFFSET / sizeof(*hw);
1161 do {
1162 u32 len = hw[dw] & LRI_LENGTH_MASK;
1163
1164 /* For simplicity, break parsing at the first complex command */
1165 if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT)
1166 break;
1167
1168 if (hw[dw] == 0) {
1169 dw++;
1170 continue;
1171 }
1172
1173 if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) {
1174 dw += len + 2;
1175 continue;
1176 }
1177
1178 if (!len) {
1179 pr_err("%s: invalid LRI found in context image\n",
1180 ce->engine->name);
1181 igt_hexdump(defaults, PAGE_SIZE);
1182 break;
1183 }
1184
1185 dw++;
1186 len = (len + 1) / 2;
1187 *cs++ = MI_LOAD_REGISTER_IMM(len);
1188 while (len--) {
1189 *cs++ = hw[dw];
1190 *cs++ = safe_poison(hw[dw] & get_lri_mask(ce->engine,
1191 MI_LRI_LRM_CS_MMIO),
1192 poison);
1193 dw += 2;
1194 }
1195 } while (dw < PAGE_SIZE / sizeof(u32) &&
1196 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
1197
1198 *cs++ = MI_BATCH_BUFFER_END;
1199
1200 shmem_unpin_map(ce->engine->default_state, defaults);
1201
1202 i915_gem_object_flush_map(batch->obj);
1203 i915_gem_object_unpin_map(batch->obj);
1204
1205 return batch;
1206 }
1207
poison_registers(struct intel_context * ce,u32 poison,u32 * sema)1208 static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema)
1209 {
1210 struct i915_request *rq;
1211 struct i915_vma *batch;
1212 u32 *cs;
1213 int err;
1214
1215 batch = load_context(ce, poison);
1216 if (IS_ERR(batch))
1217 return PTR_ERR(batch);
1218
1219 rq = intel_context_create_request(ce);
1220 if (IS_ERR(rq)) {
1221 err = PTR_ERR(rq);
1222 goto err_batch;
1223 }
1224
1225 err = igt_vma_move_to_active_unlocked(batch, rq, 0);
1226 if (err)
1227 goto err_rq;
1228
1229 cs = intel_ring_begin(rq, 8);
1230 if (IS_ERR(cs)) {
1231 err = PTR_ERR(cs);
1232 goto err_rq;
1233 }
1234
1235 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1236 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
1237 *cs++ = lower_32_bits(i915_vma_offset(batch));
1238 *cs++ = upper_32_bits(i915_vma_offset(batch));
1239
1240 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1241 *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
1242 offset_in_page(sema);
1243 *cs++ = 0;
1244 *cs++ = 1;
1245
1246 intel_ring_advance(rq, cs);
1247
1248 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
1249 err_rq:
1250 i915_request_add(rq);
1251 err_batch:
1252 i915_vma_put(batch);
1253 return err;
1254 }
1255
is_moving(u32 a,u32 b)1256 static bool is_moving(u32 a, u32 b)
1257 {
1258 return a != b;
1259 }
1260
compare_isolation(struct intel_engine_cs * engine,struct i915_vma * ref[2],struct i915_vma * result[2],struct intel_context * ce,u32 poison)1261 static int compare_isolation(struct intel_engine_cs *engine,
1262 struct i915_vma *ref[2],
1263 struct i915_vma *result[2],
1264 struct intel_context *ce,
1265 u32 poison)
1266 {
1267 u32 x, dw, *hw, *lrc;
1268 u32 *A[2], *B[2];
1269 u32 *defaults;
1270 int err = 0;
1271
1272 A[0] = i915_gem_object_pin_map_unlocked(ref[0]->obj, I915_MAP_WC);
1273 if (IS_ERR(A[0]))
1274 return PTR_ERR(A[0]);
1275
1276 A[1] = i915_gem_object_pin_map_unlocked(ref[1]->obj, I915_MAP_WC);
1277 if (IS_ERR(A[1])) {
1278 err = PTR_ERR(A[1]);
1279 goto err_A0;
1280 }
1281
1282 B[0] = i915_gem_object_pin_map_unlocked(result[0]->obj, I915_MAP_WC);
1283 if (IS_ERR(B[0])) {
1284 err = PTR_ERR(B[0]);
1285 goto err_A1;
1286 }
1287
1288 B[1] = i915_gem_object_pin_map_unlocked(result[1]->obj, I915_MAP_WC);
1289 if (IS_ERR(B[1])) {
1290 err = PTR_ERR(B[1]);
1291 goto err_B0;
1292 }
1293
1294 lrc = i915_gem_object_pin_map_unlocked(ce->state->obj,
1295 intel_gt_coherent_map_type(engine->gt,
1296 ce->state->obj,
1297 false));
1298 if (IS_ERR(lrc)) {
1299 err = PTR_ERR(lrc);
1300 goto err_B1;
1301 }
1302 lrc += LRC_STATE_OFFSET / sizeof(*hw);
1303
1304 defaults = shmem_pin_map(ce->engine->default_state);
1305 if (!defaults) {
1306 err = -ENOMEM;
1307 goto err_lrc;
1308 }
1309
1310 x = 0;
1311 dw = 0;
1312 hw = defaults;
1313 hw += LRC_STATE_OFFSET / sizeof(*hw);
1314 do {
1315 u32 len = hw[dw] & LRI_LENGTH_MASK;
1316
1317 /* For simplicity, break parsing at the first complex command */
1318 if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT)
1319 break;
1320
1321 if (hw[dw] == 0) {
1322 dw++;
1323 continue;
1324 }
1325
1326 if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) {
1327 dw += len + 2;
1328 continue;
1329 }
1330
1331 if (!len) {
1332 pr_err("%s: invalid LRI found in context image\n",
1333 engine->name);
1334 igt_hexdump(defaults, PAGE_SIZE);
1335 break;
1336 }
1337
1338 dw++;
1339 len = (len + 1) / 2;
1340 while (len--) {
1341 if (!is_moving(A[0][x], A[1][x]) &&
1342 (A[0][x] != B[0][x] || A[1][x] != B[1][x])) {
1343 switch (hw[dw] & 4095) {
1344 case 0x30: /* RING_HEAD */
1345 case 0x34: /* RING_TAIL */
1346 break;
1347
1348 default:
1349 pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n",
1350 engine->name, dw,
1351 hw[dw], hw[dw + 1],
1352 A[0][x], B[0][x], B[1][x],
1353 poison, lrc[dw + 1]);
1354 err = -EINVAL;
1355 }
1356 }
1357 dw += 2;
1358 x++;
1359 }
1360 } while (dw < PAGE_SIZE / sizeof(u32) &&
1361 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
1362
1363 shmem_unpin_map(ce->engine->default_state, defaults);
1364 err_lrc:
1365 i915_gem_object_unpin_map(ce->state->obj);
1366 err_B1:
1367 i915_gem_object_unpin_map(result[1]->obj);
1368 err_B0:
1369 i915_gem_object_unpin_map(result[0]->obj);
1370 err_A1:
1371 i915_gem_object_unpin_map(ref[1]->obj);
1372 err_A0:
1373 i915_gem_object_unpin_map(ref[0]->obj);
1374 return err;
1375 }
1376
1377 static struct i915_vma *
create_result_vma(struct i915_address_space * vm,unsigned long sz)1378 create_result_vma(struct i915_address_space *vm, unsigned long sz)
1379 {
1380 struct i915_vma *vma;
1381 void *ptr;
1382
1383 vma = create_user_vma(vm, sz);
1384 if (IS_ERR(vma))
1385 return vma;
1386
1387 /* Set the results to a known value distinct from the poison */
1388 ptr = i915_gem_object_pin_map_unlocked(vma->obj, I915_MAP_WC);
1389 if (IS_ERR(ptr)) {
1390 i915_vma_put(vma);
1391 return ERR_CAST(ptr);
1392 }
1393
1394 memset(ptr, POISON_INUSE, vma->size);
1395 i915_gem_object_flush_map(vma->obj);
1396 i915_gem_object_unpin_map(vma->obj);
1397
1398 return vma;
1399 }
1400
__lrc_isolation(struct intel_engine_cs * engine,u32 poison)1401 static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison)
1402 {
1403 u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1);
1404 struct i915_vma *ref[2], *result[2];
1405 struct intel_context *A, *B;
1406 struct i915_request *rq;
1407 int err;
1408
1409 A = intel_context_create(engine);
1410 if (IS_ERR(A))
1411 return PTR_ERR(A);
1412
1413 B = intel_context_create(engine);
1414 if (IS_ERR(B)) {
1415 err = PTR_ERR(B);
1416 goto err_A;
1417 }
1418
1419 ref[0] = create_result_vma(A->vm, SZ_64K);
1420 if (IS_ERR(ref[0])) {
1421 err = PTR_ERR(ref[0]);
1422 goto err_B;
1423 }
1424
1425 ref[1] = create_result_vma(A->vm, SZ_64K);
1426 if (IS_ERR(ref[1])) {
1427 err = PTR_ERR(ref[1]);
1428 goto err_ref0;
1429 }
1430
1431 rq = record_registers(A, ref[0], ref[1], sema);
1432 if (IS_ERR(rq)) {
1433 err = PTR_ERR(rq);
1434 goto err_ref1;
1435 }
1436
1437 WRITE_ONCE(*sema, 1);
1438 wmb();
1439
1440 if (i915_request_wait(rq, 0, HZ / 2) < 0) {
1441 i915_request_put(rq);
1442 err = -ETIME;
1443 goto err_ref1;
1444 }
1445 i915_request_put(rq);
1446
1447 result[0] = create_result_vma(A->vm, SZ_64K);
1448 if (IS_ERR(result[0])) {
1449 err = PTR_ERR(result[0]);
1450 goto err_ref1;
1451 }
1452
1453 result[1] = create_result_vma(A->vm, SZ_64K);
1454 if (IS_ERR(result[1])) {
1455 err = PTR_ERR(result[1]);
1456 goto err_result0;
1457 }
1458
1459 rq = record_registers(A, result[0], result[1], sema);
1460 if (IS_ERR(rq)) {
1461 err = PTR_ERR(rq);
1462 goto err_result1;
1463 }
1464
1465 err = poison_registers(B, poison, sema);
1466 if (err == 0 && i915_request_wait(rq, 0, HZ / 2) < 0) {
1467 pr_err("%s(%s): wait for results timed out\n",
1468 __func__, engine->name);
1469 err = -ETIME;
1470 }
1471
1472 /* Always cancel the semaphore wait, just in case the GPU gets stuck */
1473 WRITE_ONCE(*sema, -1);
1474 i915_request_put(rq);
1475 if (err)
1476 goto err_result1;
1477
1478 err = compare_isolation(engine, ref, result, A, poison);
1479
1480 err_result1:
1481 i915_vma_put(result[1]);
1482 err_result0:
1483 i915_vma_put(result[0]);
1484 err_ref1:
1485 i915_vma_put(ref[1]);
1486 err_ref0:
1487 i915_vma_put(ref[0]);
1488 err_B:
1489 intel_context_put(B);
1490 err_A:
1491 intel_context_put(A);
1492 return err;
1493 }
1494
skip_isolation(const struct intel_engine_cs * engine)1495 static bool skip_isolation(const struct intel_engine_cs *engine)
1496 {
1497 if (engine->class == COPY_ENGINE_CLASS && GRAPHICS_VER(engine->i915) == 9)
1498 return true;
1499
1500 if (engine->class == RENDER_CLASS && GRAPHICS_VER(engine->i915) == 11)
1501 return true;
1502
1503 return false;
1504 }
1505
live_lrc_isolation(void * arg)1506 static int live_lrc_isolation(void *arg)
1507 {
1508 struct intel_gt *gt = arg;
1509 struct intel_engine_cs *engine;
1510 enum intel_engine_id id;
1511 const u32 poison[] = {
1512 STACK_MAGIC,
1513 0x3a3a3a3a,
1514 0x5c5c5c5c,
1515 0xffffffff,
1516 0xffff0000,
1517 };
1518 int err = 0;
1519
1520 /*
1521 * Our goal is try and verify that per-context state cannot be
1522 * tampered with by another non-privileged client.
1523 *
1524 * We take the list of context registers from the LRI in the default
1525 * context image and attempt to modify that list from a remote context.
1526 */
1527
1528 for_each_engine(engine, gt, id) {
1529 int i;
1530
1531 /* Just don't even ask */
1532 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) &&
1533 skip_isolation(engine))
1534 continue;
1535
1536 intel_engine_pm_get(engine);
1537 for (i = 0; i < ARRAY_SIZE(poison); i++) {
1538 int result;
1539
1540 result = __lrc_isolation(engine, poison[i]);
1541 if (result && !err)
1542 err = result;
1543
1544 result = __lrc_isolation(engine, ~poison[i]);
1545 if (result && !err)
1546 err = result;
1547 }
1548 intel_engine_pm_put(engine);
1549 if (igt_flush_test(gt->i915)) {
1550 err = -EIO;
1551 break;
1552 }
1553 }
1554
1555 return err;
1556 }
1557
indirect_ctx_submit_req(struct intel_context * ce)1558 static int indirect_ctx_submit_req(struct intel_context *ce)
1559 {
1560 struct i915_request *rq;
1561 int err = 0;
1562
1563 rq = intel_context_create_request(ce);
1564 if (IS_ERR(rq))
1565 return PTR_ERR(rq);
1566
1567 i915_request_get(rq);
1568 i915_request_add(rq);
1569
1570 if (i915_request_wait(rq, 0, HZ / 5) < 0)
1571 err = -ETIME;
1572
1573 i915_request_put(rq);
1574
1575 return err;
1576 }
1577
1578 #define CTX_BB_CANARY_OFFSET (3 * 1024)
1579 #define CTX_BB_CANARY_INDEX (CTX_BB_CANARY_OFFSET / sizeof(u32))
1580
1581 static u32 *
emit_indirect_ctx_bb_canary(const struct intel_context * ce,u32 * cs)1582 emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
1583 {
1584 *cs++ = MI_STORE_REGISTER_MEM_GEN8 |
1585 MI_SRM_LRM_GLOBAL_GTT |
1586 MI_LRI_LRM_CS_MMIO;
1587 *cs++ = i915_mmio_reg_offset(RING_START(0));
1588 *cs++ = i915_ggtt_offset(ce->state) +
1589 context_wa_bb_offset(ce) +
1590 CTX_BB_CANARY_OFFSET;
1591 *cs++ = 0;
1592
1593 return cs;
1594 }
1595
1596 static void
indirect_ctx_bb_setup(struct intel_context * ce)1597 indirect_ctx_bb_setup(struct intel_context *ce)
1598 {
1599 u32 *cs = context_indirect_bb(ce);
1600
1601 cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d;
1602
1603 setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary);
1604 }
1605
check_ring_start(struct intel_context * ce)1606 static bool check_ring_start(struct intel_context *ce)
1607 {
1608 const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) -
1609 LRC_STATE_OFFSET + context_wa_bb_offset(ce);
1610
1611 if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START])
1612 return true;
1613
1614 pr_err("ring start mismatch: canary 0x%08x vs state 0x%08x\n",
1615 ctx_bb[CTX_BB_CANARY_INDEX],
1616 ce->lrc_reg_state[CTX_RING_START]);
1617
1618 return false;
1619 }
1620
indirect_ctx_bb_check(struct intel_context * ce)1621 static int indirect_ctx_bb_check(struct intel_context *ce)
1622 {
1623 int err;
1624
1625 err = indirect_ctx_submit_req(ce);
1626 if (err)
1627 return err;
1628
1629 if (!check_ring_start(ce))
1630 return -EINVAL;
1631
1632 return 0;
1633 }
1634
__live_lrc_indirect_ctx_bb(struct intel_engine_cs * engine)1635 static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine)
1636 {
1637 struct intel_context *a, *b;
1638 int err;
1639
1640 a = intel_context_create(engine);
1641 if (IS_ERR(a))
1642 return PTR_ERR(a);
1643 err = intel_context_pin(a);
1644 if (err)
1645 goto put_a;
1646
1647 b = intel_context_create(engine);
1648 if (IS_ERR(b)) {
1649 err = PTR_ERR(b);
1650 goto unpin_a;
1651 }
1652 err = intel_context_pin(b);
1653 if (err)
1654 goto put_b;
1655
1656 /* We use the already reserved extra page in context state */
1657 if (!a->wa_bb_page) {
1658 GEM_BUG_ON(b->wa_bb_page);
1659 GEM_BUG_ON(GRAPHICS_VER(engine->i915) == 12);
1660 goto unpin_b;
1661 }
1662
1663 /*
1664 * In order to test that our per context bb is truly per context,
1665 * and executes at the intended spot on context restoring process,
1666 * make the batch store the ring start value to memory.
1667 * As ring start is restored apriori of starting the indirect ctx bb and
1668 * as it will be different for each context, it fits to this purpose.
1669 */
1670 indirect_ctx_bb_setup(a);
1671 indirect_ctx_bb_setup(b);
1672
1673 err = indirect_ctx_bb_check(a);
1674 if (err)
1675 goto unpin_b;
1676
1677 err = indirect_ctx_bb_check(b);
1678
1679 unpin_b:
1680 intel_context_unpin(b);
1681 put_b:
1682 intel_context_put(b);
1683 unpin_a:
1684 intel_context_unpin(a);
1685 put_a:
1686 intel_context_put(a);
1687
1688 return err;
1689 }
1690
live_lrc_indirect_ctx_bb(void * arg)1691 static int live_lrc_indirect_ctx_bb(void *arg)
1692 {
1693 struct intel_gt *gt = arg;
1694 struct intel_engine_cs *engine;
1695 enum intel_engine_id id;
1696 int err = 0;
1697
1698 for_each_engine(engine, gt, id) {
1699 intel_engine_pm_get(engine);
1700 err = __live_lrc_indirect_ctx_bb(engine);
1701 intel_engine_pm_put(engine);
1702
1703 if (igt_flush_test(gt->i915))
1704 err = -EIO;
1705
1706 if (err)
1707 break;
1708 }
1709
1710 return err;
1711 }
1712
garbage_reset(struct intel_engine_cs * engine,struct i915_request * rq)1713 static void garbage_reset(struct intel_engine_cs *engine,
1714 struct i915_request *rq)
1715 {
1716 const unsigned int bit = I915_RESET_ENGINE + engine->id;
1717 unsigned long *lock = &engine->gt->reset.flags;
1718
1719 local_bh_disable();
1720 if (!test_and_set_bit(bit, lock)) {
1721 tasklet_disable(&engine->sched_engine->tasklet);
1722
1723 if (!rq->fence.error)
1724 __intel_engine_reset_bh(engine, NULL);
1725
1726 tasklet_enable(&engine->sched_engine->tasklet);
1727 clear_and_wake_up_bit(bit, lock);
1728 }
1729 local_bh_enable();
1730 }
1731
garbage(struct intel_context * ce,struct rnd_state * prng)1732 static struct i915_request *garbage(struct intel_context *ce,
1733 struct rnd_state *prng)
1734 {
1735 struct i915_request *rq;
1736 int err;
1737
1738 err = intel_context_pin(ce);
1739 if (err)
1740 return ERR_PTR(err);
1741
1742 prandom_bytes_state(prng,
1743 ce->lrc_reg_state,
1744 ce->engine->context_size -
1745 LRC_STATE_OFFSET);
1746
1747 rq = intel_context_create_request(ce);
1748 if (IS_ERR(rq)) {
1749 err = PTR_ERR(rq);
1750 goto err_unpin;
1751 }
1752
1753 i915_request_get(rq);
1754 i915_request_add(rq);
1755 return rq;
1756
1757 err_unpin:
1758 intel_context_unpin(ce);
1759 return ERR_PTR(err);
1760 }
1761
__lrc_garbage(struct intel_engine_cs * engine,struct rnd_state * prng)1762 static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng)
1763 {
1764 struct intel_context *ce;
1765 struct i915_request *hang;
1766 int err = 0;
1767
1768 ce = intel_context_create(engine);
1769 if (IS_ERR(ce))
1770 return PTR_ERR(ce);
1771
1772 hang = garbage(ce, prng);
1773 if (IS_ERR(hang)) {
1774 err = PTR_ERR(hang);
1775 goto err_ce;
1776 }
1777
1778 if (wait_for_submit(engine, hang, HZ / 2)) {
1779 i915_request_put(hang);
1780 err = -ETIME;
1781 goto err_ce;
1782 }
1783
1784 intel_context_set_banned(ce);
1785 garbage_reset(engine, hang);
1786
1787 intel_engine_flush_submission(engine);
1788 if (!hang->fence.error) {
1789 i915_request_put(hang);
1790 pr_err("%s: corrupted context was not reset\n",
1791 engine->name);
1792 err = -EINVAL;
1793 goto err_ce;
1794 }
1795
1796 if (i915_request_wait(hang, 0, HZ / 2) < 0) {
1797 pr_err("%s: corrupted context did not recover\n",
1798 engine->name);
1799 i915_request_put(hang);
1800 err = -EIO;
1801 goto err_ce;
1802 }
1803 i915_request_put(hang);
1804
1805 err_ce:
1806 intel_context_put(ce);
1807 return err;
1808 }
1809
live_lrc_garbage(void * arg)1810 static int live_lrc_garbage(void *arg)
1811 {
1812 struct intel_gt *gt = arg;
1813 struct intel_engine_cs *engine;
1814 enum intel_engine_id id;
1815
1816 /*
1817 * Verify that we can recover if one context state is completely
1818 * corrupted.
1819 */
1820
1821 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN))
1822 return 0;
1823
1824 for_each_engine(engine, gt, id) {
1825 I915_RND_STATE(prng);
1826 int err = 0, i;
1827
1828 if (!intel_has_reset_engine(engine->gt))
1829 continue;
1830
1831 intel_engine_pm_get(engine);
1832 for (i = 0; i < 3; i++) {
1833 err = __lrc_garbage(engine, &prng);
1834 if (err)
1835 break;
1836 }
1837 intel_engine_pm_put(engine);
1838
1839 if (igt_flush_test(gt->i915))
1840 err = -EIO;
1841 if (err)
1842 return err;
1843 }
1844
1845 return 0;
1846 }
1847
__live_pphwsp_runtime(struct intel_engine_cs * engine)1848 static int __live_pphwsp_runtime(struct intel_engine_cs *engine)
1849 {
1850 struct intel_context *ce;
1851 struct i915_request *rq;
1852 IGT_TIMEOUT(end_time);
1853 int err;
1854
1855 ce = intel_context_create(engine);
1856 if (IS_ERR(ce))
1857 return PTR_ERR(ce);
1858
1859 ce->stats.runtime.num_underflow = 0;
1860 ce->stats.runtime.max_underflow = 0;
1861
1862 do {
1863 unsigned int loop = 1024;
1864
1865 while (loop) {
1866 rq = intel_context_create_request(ce);
1867 if (IS_ERR(rq)) {
1868 err = PTR_ERR(rq);
1869 goto err_rq;
1870 }
1871
1872 if (--loop == 0)
1873 i915_request_get(rq);
1874
1875 i915_request_add(rq);
1876 }
1877
1878 if (__igt_timeout(end_time, NULL))
1879 break;
1880
1881 i915_request_put(rq);
1882 } while (1);
1883
1884 err = i915_request_wait(rq, 0, HZ / 5);
1885 if (err < 0) {
1886 pr_err("%s: request not completed!\n", engine->name);
1887 goto err_wait;
1888 }
1889
1890 igt_flush_test(engine->i915);
1891
1892 pr_info("%s: pphwsp runtime %lluns, average %lluns\n",
1893 engine->name,
1894 intel_context_get_total_runtime_ns(ce),
1895 intel_context_get_avg_runtime_ns(ce));
1896
1897 err = 0;
1898 if (ce->stats.runtime.num_underflow) {
1899 pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n",
1900 engine->name,
1901 ce->stats.runtime.num_underflow,
1902 ce->stats.runtime.max_underflow);
1903 GEM_TRACE_DUMP();
1904 err = -EOVERFLOW;
1905 }
1906
1907 err_wait:
1908 i915_request_put(rq);
1909 err_rq:
1910 intel_context_put(ce);
1911 return err;
1912 }
1913
live_pphwsp_runtime(void * arg)1914 static int live_pphwsp_runtime(void *arg)
1915 {
1916 struct intel_gt *gt = arg;
1917 struct intel_engine_cs *engine;
1918 enum intel_engine_id id;
1919 int err = 0;
1920
1921 /*
1922 * Check that cumulative context runtime as stored in the pphwsp[16]
1923 * is monotonic.
1924 */
1925
1926 for_each_engine(engine, gt, id) {
1927 err = __live_pphwsp_runtime(engine);
1928 if (err)
1929 break;
1930 }
1931
1932 if (igt_flush_test(gt->i915))
1933 err = -EIO;
1934
1935 return err;
1936 }
1937
intel_lrc_live_selftests(struct drm_i915_private * i915)1938 int intel_lrc_live_selftests(struct drm_i915_private *i915)
1939 {
1940 static const struct i915_subtest tests[] = {
1941 SUBTEST(live_lrc_layout),
1942 SUBTEST(live_lrc_fixed),
1943 SUBTEST(live_lrc_state),
1944 SUBTEST(live_lrc_gpr),
1945 SUBTEST(live_lrc_isolation),
1946 SUBTEST(live_lrc_timestamp),
1947 SUBTEST(live_lrc_garbage),
1948 SUBTEST(live_pphwsp_runtime),
1949 SUBTEST(live_lrc_indirect_ctx_bb),
1950 };
1951
1952 if (!HAS_LOGICAL_RING_CONTEXTS(i915))
1953 return 0;
1954
1955 return intel_gt_live_subtests(tests, to_gt(i915));
1956 }
1957