1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2018 Intel Corporation 4 */ 5 6 #include <linux/prime_numbers.h> 7 8 #include "gem/i915_gem_internal.h" 9 10 #include "i915_selftest.h" 11 #include "intel_engine_heartbeat.h" 12 #include "intel_engine_pm.h" 13 #include "intel_reset.h" 14 #include "intel_ring.h" 15 #include "selftest_engine_heartbeat.h" 16 #include "selftests/i915_random.h" 17 #include "selftests/igt_flush_test.h" 18 #include "selftests/igt_live_test.h" 19 #include "selftests/igt_spinner.h" 20 #include "selftests/lib_sw_fence.h" 21 #include "shmem_utils.h" 22 23 #include "gem/selftests/igt_gem_utils.h" 24 #include "gem/selftests/mock_context.h" 25 26 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4) 27 #define NUM_GPR 16 28 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */ 29 30 #define LRI_HEADER MI_INSTR(0x22, 0) 31 #define LRI_LENGTH_MASK GENMASK(7, 0) 32 33 static struct i915_vma *create_scratch(struct intel_gt *gt) 34 { 35 return __vm_create_scratch_for_read_pinned(>->ggtt->vm, PAGE_SIZE); 36 } 37 38 static bool is_active(struct i915_request *rq) 39 { 40 if (i915_request_is_active(rq)) 41 return true; 42 43 if (i915_request_on_hold(rq)) 44 return true; 45 46 if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq)) 47 return true; 48 49 return false; 50 } 51 52 static int wait_for_submit(struct intel_engine_cs *engine, 53 struct i915_request *rq, 54 unsigned long timeout) 55 { 56 /* Ignore our own attempts to suppress excess tasklets */ 57 tasklet_hi_schedule(&engine->sched_engine->tasklet); 58 59 timeout += jiffies; 60 do { 61 bool done = time_after(jiffies, timeout); 62 63 if (i915_request_completed(rq)) /* that was quick! */ 64 return 0; 65 66 /* Wait until the HW has acknowleged the submission (or err) */ 67 intel_engine_flush_submission(engine); 68 if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq)) 69 return 0; 70 71 if (done) 72 return -ETIME; 73 74 cond_resched(); 75 } while (1); 76 } 77 78 static int emit_semaphore_signal(struct intel_context *ce, void *slot) 79 { 80 const u32 offset = 81 i915_ggtt_offset(ce->engine->status_page.vma) + 82 offset_in_page(slot); 83 struct i915_request *rq; 84 u32 *cs; 85 86 rq = intel_context_create_request(ce); 87 if (IS_ERR(rq)) 88 return PTR_ERR(rq); 89 90 cs = intel_ring_begin(rq, 4); 91 if (IS_ERR(cs)) { 92 i915_request_add(rq); 93 return PTR_ERR(cs); 94 } 95 96 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 97 *cs++ = offset; 98 *cs++ = 0; 99 *cs++ = 1; 100 101 intel_ring_advance(rq, cs); 102 103 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 104 i915_request_add(rq); 105 return 0; 106 } 107 108 static int context_flush(struct intel_context *ce, long timeout) 109 { 110 struct i915_request *rq; 111 struct dma_fence *fence; 112 int err = 0; 113 114 rq = intel_engine_create_kernel_request(ce->engine); 115 if (IS_ERR(rq)) 116 return PTR_ERR(rq); 117 118 fence = i915_active_fence_get(&ce->timeline->last_request); 119 if (fence) { 120 i915_request_await_dma_fence(rq, fence); 121 dma_fence_put(fence); 122 } 123 124 rq = i915_request_get(rq); 125 i915_request_add(rq); 126 if (i915_request_wait(rq, 0, timeout) < 0) 127 err = -ETIME; 128 i915_request_put(rq); 129 130 rmb(); /* We know the request is written, make sure all state is too! */ 131 return err; 132 } 133 134 static int get_lri_mask(struct intel_engine_cs *engine, u32 lri) 135 { 136 if ((lri & MI_LRI_LRM_CS_MMIO) == 0) 137 return ~0u; 138 139 if (GRAPHICS_VER(engine->i915) < 12) 140 return 0xfff; 141 142 switch (engine->class) { 143 default: 144 case RENDER_CLASS: 145 case COMPUTE_CLASS: 146 return 0x07ff; 147 case COPY_ENGINE_CLASS: 148 return 0x0fff; 149 case VIDEO_DECODE_CLASS: 150 case VIDEO_ENHANCEMENT_CLASS: 151 return 0x3fff; 152 } 153 } 154 155 static int live_lrc_layout(void *arg) 156 { 157 struct intel_gt *gt = arg; 158 struct intel_engine_cs *engine; 159 enum intel_engine_id id; 160 u32 *lrc; 161 int err; 162 163 /* 164 * Check the registers offsets we use to create the initial reg state 165 * match the layout saved by HW. 166 */ 167 168 lrc = (u32 *)__get_free_page(GFP_KERNEL); /* requires page alignment */ 169 if (!lrc) 170 return -ENOMEM; 171 GEM_BUG_ON(offset_in_page(lrc)); 172 173 err = 0; 174 for_each_engine(engine, gt, id) { 175 u32 *hw; 176 int dw; 177 178 if (!engine->default_state) 179 continue; 180 181 hw = shmem_pin_map(engine->default_state); 182 if (!hw) { 183 err = -ENOMEM; 184 break; 185 } 186 hw += LRC_STATE_OFFSET / sizeof(*hw); 187 188 __lrc_init_regs(memset(lrc, POISON_INUSE, PAGE_SIZE), 189 engine->kernel_context, engine, true); 190 191 dw = 0; 192 do { 193 u32 lri = READ_ONCE(hw[dw]); 194 u32 lri_mask; 195 196 if (lri == 0) { 197 dw++; 198 continue; 199 } 200 201 if (lrc[dw] == 0) { 202 pr_debug("%s: skipped instruction %x at dword %d\n", 203 engine->name, lri, dw); 204 dw++; 205 continue; 206 } 207 208 if ((lri & GENMASK(31, 23)) != LRI_HEADER) { 209 pr_err("%s: Expected LRI command at dword %d, found %08x\n", 210 engine->name, dw, lri); 211 err = -EINVAL; 212 break; 213 } 214 215 if (lrc[dw] != lri) { 216 pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n", 217 engine->name, dw, lri, lrc[dw]); 218 err = -EINVAL; 219 break; 220 } 221 222 /* 223 * When bit 19 of MI_LOAD_REGISTER_IMM instruction 224 * opcode is set on Gen12+ devices, HW does not 225 * care about certain register address offsets, and 226 * instead check the following for valid address 227 * ranges on specific engines: 228 * RCS && CCS: BITS(0 - 10) 229 * BCS: BITS(0 - 11) 230 * VECS && VCS: BITS(0 - 13) 231 */ 232 lri_mask = get_lri_mask(engine, lri); 233 234 lri &= 0x7f; 235 lri++; 236 dw++; 237 238 while (lri) { 239 u32 offset = READ_ONCE(hw[dw]); 240 241 if ((offset ^ lrc[dw]) & lri_mask) { 242 pr_err("%s: Different registers found at dword %d, expected %x, found %x\n", 243 engine->name, dw, offset, lrc[dw]); 244 err = -EINVAL; 245 break; 246 } 247 248 /* 249 * Skip over the actual register value as we 250 * expect that to differ. 251 */ 252 dw += 2; 253 lri -= 2; 254 } 255 } while (!err && (lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); 256 257 if (err) { 258 pr_info("%s: HW register image:\n", engine->name); 259 igt_hexdump(hw, PAGE_SIZE); 260 261 pr_info("%s: SW register image:\n", engine->name); 262 igt_hexdump(lrc, PAGE_SIZE); 263 } 264 265 shmem_unpin_map(engine->default_state, hw); 266 if (err) 267 break; 268 } 269 270 free_page((unsigned long)lrc); 271 return err; 272 } 273 274 static int find_offset(const u32 *lri, u32 offset) 275 { 276 int i; 277 278 for (i = 0; i < PAGE_SIZE / sizeof(u32); i++) 279 if (lri[i] == offset) 280 return i; 281 282 return -1; 283 } 284 285 static int live_lrc_fixed(void *arg) 286 { 287 struct intel_gt *gt = arg; 288 struct intel_engine_cs *engine; 289 enum intel_engine_id id; 290 int err = 0; 291 292 /* 293 * Check the assumed register offsets match the actual locations in 294 * the context image. 295 */ 296 297 for_each_engine(engine, gt, id) { 298 const struct { 299 u32 reg; 300 u32 offset; 301 const char *name; 302 } tbl[] = { 303 { 304 i915_mmio_reg_offset(RING_START(engine->mmio_base)), 305 CTX_RING_START - 1, 306 "RING_START" 307 }, 308 { 309 i915_mmio_reg_offset(RING_CTL(engine->mmio_base)), 310 CTX_RING_CTL - 1, 311 "RING_CTL" 312 }, 313 { 314 i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)), 315 CTX_RING_HEAD - 1, 316 "RING_HEAD" 317 }, 318 { 319 i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)), 320 CTX_RING_TAIL - 1, 321 "RING_TAIL" 322 }, 323 { 324 i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)), 325 lrc_ring_mi_mode(engine), 326 "RING_MI_MODE" 327 }, 328 { 329 i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)), 330 CTX_BB_STATE - 1, 331 "BB_STATE" 332 }, 333 { 334 i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(engine->mmio_base)), 335 lrc_ring_wa_bb_per_ctx(engine), 336 "RING_BB_PER_CTX_PTR" 337 }, 338 { 339 i915_mmio_reg_offset(RING_INDIRECT_CTX(engine->mmio_base)), 340 lrc_ring_indirect_ptr(engine), 341 "RING_INDIRECT_CTX_PTR" 342 }, 343 { 344 i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(engine->mmio_base)), 345 lrc_ring_indirect_offset(engine), 346 "RING_INDIRECT_CTX_OFFSET" 347 }, 348 { 349 i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)), 350 CTX_TIMESTAMP - 1, 351 "RING_CTX_TIMESTAMP" 352 }, 353 { 354 i915_mmio_reg_offset(GEN8_RING_CS_GPR(engine->mmio_base, 0)), 355 lrc_ring_gpr0(engine), 356 "RING_CS_GPR0" 357 }, 358 { 359 i915_mmio_reg_offset(RING_CMD_BUF_CCTL(engine->mmio_base)), 360 lrc_ring_cmd_buf_cctl(engine), 361 "RING_CMD_BUF_CCTL" 362 }, 363 { 364 i915_mmio_reg_offset(RING_BB_OFFSET(engine->mmio_base)), 365 lrc_ring_bb_offset(engine), 366 "RING_BB_OFFSET" 367 }, 368 { }, 369 }, *t; 370 u32 *hw; 371 372 if (!engine->default_state) 373 continue; 374 375 hw = shmem_pin_map(engine->default_state); 376 if (!hw) { 377 err = -ENOMEM; 378 break; 379 } 380 hw += LRC_STATE_OFFSET / sizeof(*hw); 381 382 for (t = tbl; t->name; t++) { 383 int dw = find_offset(hw, t->reg); 384 385 if (dw != t->offset) { 386 pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n", 387 engine->name, 388 t->name, 389 t->reg, 390 dw, 391 t->offset); 392 err = -EINVAL; 393 } 394 } 395 396 shmem_unpin_map(engine->default_state, hw); 397 } 398 399 return err; 400 } 401 402 static int __live_lrc_state(struct intel_engine_cs *engine, 403 struct i915_vma *scratch) 404 { 405 struct intel_context *ce; 406 struct i915_request *rq; 407 struct i915_gem_ww_ctx ww; 408 enum { 409 RING_START_IDX = 0, 410 RING_TAIL_IDX, 411 MAX_IDX 412 }; 413 u32 expected[MAX_IDX]; 414 u32 *cs; 415 int err; 416 int n; 417 418 ce = intel_context_create(engine); 419 if (IS_ERR(ce)) 420 return PTR_ERR(ce); 421 422 i915_gem_ww_ctx_init(&ww, false); 423 retry: 424 err = i915_gem_object_lock(scratch->obj, &ww); 425 if (!err) 426 err = intel_context_pin_ww(ce, &ww); 427 if (err) 428 goto err_put; 429 430 rq = i915_request_create(ce); 431 if (IS_ERR(rq)) { 432 err = PTR_ERR(rq); 433 goto err_unpin; 434 } 435 436 cs = intel_ring_begin(rq, 4 * MAX_IDX); 437 if (IS_ERR(cs)) { 438 err = PTR_ERR(cs); 439 i915_request_add(rq); 440 goto err_unpin; 441 } 442 443 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 444 *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base)); 445 *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32); 446 *cs++ = 0; 447 448 expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma); 449 450 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 451 *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)); 452 *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32); 453 *cs++ = 0; 454 455 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE); 456 457 i915_request_get(rq); 458 i915_request_add(rq); 459 if (err) 460 goto err_rq; 461 462 intel_engine_flush_submission(engine); 463 expected[RING_TAIL_IDX] = ce->ring->tail; 464 465 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 466 err = -ETIME; 467 goto err_rq; 468 } 469 470 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); 471 if (IS_ERR(cs)) { 472 err = PTR_ERR(cs); 473 goto err_rq; 474 } 475 476 for (n = 0; n < MAX_IDX; n++) { 477 if (cs[n] != expected[n]) { 478 pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n", 479 engine->name, n, cs[n], expected[n]); 480 err = -EINVAL; 481 break; 482 } 483 } 484 485 i915_gem_object_unpin_map(scratch->obj); 486 487 err_rq: 488 i915_request_put(rq); 489 err_unpin: 490 intel_context_unpin(ce); 491 err_put: 492 if (err == -EDEADLK) { 493 err = i915_gem_ww_ctx_backoff(&ww); 494 if (!err) 495 goto retry; 496 } 497 i915_gem_ww_ctx_fini(&ww); 498 intel_context_put(ce); 499 return err; 500 } 501 502 static int live_lrc_state(void *arg) 503 { 504 struct intel_gt *gt = arg; 505 struct intel_engine_cs *engine; 506 struct i915_vma *scratch; 507 enum intel_engine_id id; 508 int err = 0; 509 510 /* 511 * Check the live register state matches what we expect for this 512 * intel_context. 513 */ 514 515 scratch = create_scratch(gt); 516 if (IS_ERR(scratch)) 517 return PTR_ERR(scratch); 518 519 for_each_engine(engine, gt, id) { 520 err = __live_lrc_state(engine, scratch); 521 if (err) 522 break; 523 } 524 525 if (igt_flush_test(gt->i915)) 526 err = -EIO; 527 528 i915_vma_unpin_and_release(&scratch, 0); 529 return err; 530 } 531 532 static int gpr_make_dirty(struct intel_context *ce) 533 { 534 struct i915_request *rq; 535 u32 *cs; 536 int n; 537 538 rq = intel_context_create_request(ce); 539 if (IS_ERR(rq)) 540 return PTR_ERR(rq); 541 542 cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2); 543 if (IS_ERR(cs)) { 544 i915_request_add(rq); 545 return PTR_ERR(cs); 546 } 547 548 *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW); 549 for (n = 0; n < NUM_GPR_DW; n++) { 550 *cs++ = CS_GPR(ce->engine, n); 551 *cs++ = STACK_MAGIC; 552 } 553 *cs++ = MI_NOOP; 554 555 intel_ring_advance(rq, cs); 556 557 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 558 i915_request_add(rq); 559 560 return 0; 561 } 562 563 static struct i915_request * 564 __gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot) 565 { 566 const u32 offset = 567 i915_ggtt_offset(ce->engine->status_page.vma) + 568 offset_in_page(slot); 569 struct i915_request *rq; 570 u32 *cs; 571 int err; 572 int n; 573 574 rq = intel_context_create_request(ce); 575 if (IS_ERR(rq)) 576 return rq; 577 578 cs = intel_ring_begin(rq, 6 + 4 * NUM_GPR_DW); 579 if (IS_ERR(cs)) { 580 i915_request_add(rq); 581 return ERR_CAST(cs); 582 } 583 584 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 585 *cs++ = MI_NOOP; 586 587 *cs++ = MI_SEMAPHORE_WAIT | 588 MI_SEMAPHORE_GLOBAL_GTT | 589 MI_SEMAPHORE_POLL | 590 MI_SEMAPHORE_SAD_NEQ_SDD; 591 *cs++ = 0; 592 *cs++ = offset; 593 *cs++ = 0; 594 595 for (n = 0; n < NUM_GPR_DW; n++) { 596 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 597 *cs++ = CS_GPR(ce->engine, n); 598 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32); 599 *cs++ = 0; 600 } 601 602 i915_vma_lock(scratch); 603 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE); 604 i915_vma_unlock(scratch); 605 606 i915_request_get(rq); 607 i915_request_add(rq); 608 if (err) { 609 i915_request_put(rq); 610 rq = ERR_PTR(err); 611 } 612 613 return rq; 614 } 615 616 static int __live_lrc_gpr(struct intel_engine_cs *engine, 617 struct i915_vma *scratch, 618 bool preempt) 619 { 620 u32 *slot = memset32(engine->status_page.addr + 1000, 0, 4); 621 struct intel_context *ce; 622 struct i915_request *rq; 623 u32 *cs; 624 int err; 625 int n; 626 627 if (GRAPHICS_VER(engine->i915) < 9 && engine->class != RENDER_CLASS) 628 return 0; /* GPR only on rcs0 for gen8 */ 629 630 err = gpr_make_dirty(engine->kernel_context); 631 if (err) 632 return err; 633 634 ce = intel_context_create(engine); 635 if (IS_ERR(ce)) 636 return PTR_ERR(ce); 637 638 rq = __gpr_read(ce, scratch, slot); 639 if (IS_ERR(rq)) { 640 err = PTR_ERR(rq); 641 goto err_put; 642 } 643 644 err = wait_for_submit(engine, rq, HZ / 2); 645 if (err) 646 goto err_rq; 647 648 if (preempt) { 649 err = gpr_make_dirty(engine->kernel_context); 650 if (err) 651 goto err_rq; 652 653 err = emit_semaphore_signal(engine->kernel_context, slot); 654 if (err) 655 goto err_rq; 656 657 err = wait_for_submit(engine, rq, HZ / 2); 658 if (err) 659 goto err_rq; 660 } else { 661 slot[0] = 1; 662 wmb(); 663 } 664 665 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 666 err = -ETIME; 667 goto err_rq; 668 } 669 670 cs = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB); 671 if (IS_ERR(cs)) { 672 err = PTR_ERR(cs); 673 goto err_rq; 674 } 675 676 for (n = 0; n < NUM_GPR_DW; n++) { 677 if (cs[n]) { 678 pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n", 679 engine->name, 680 n / 2, n & 1 ? "udw" : "ldw", 681 cs[n]); 682 err = -EINVAL; 683 break; 684 } 685 } 686 687 i915_gem_object_unpin_map(scratch->obj); 688 689 err_rq: 690 memset32(&slot[0], -1, 4); 691 wmb(); 692 i915_request_put(rq); 693 err_put: 694 intel_context_put(ce); 695 return err; 696 } 697 698 static int live_lrc_gpr(void *arg) 699 { 700 struct intel_gt *gt = arg; 701 struct intel_engine_cs *engine; 702 struct i915_vma *scratch; 703 enum intel_engine_id id; 704 int err = 0; 705 706 /* 707 * Check that GPR registers are cleared in new contexts as we need 708 * to avoid leaking any information from previous contexts. 709 */ 710 711 scratch = create_scratch(gt); 712 if (IS_ERR(scratch)) 713 return PTR_ERR(scratch); 714 715 for_each_engine(engine, gt, id) { 716 st_engine_heartbeat_disable(engine); 717 718 err = __live_lrc_gpr(engine, scratch, false); 719 if (err) 720 goto err; 721 722 err = __live_lrc_gpr(engine, scratch, true); 723 if (err) 724 goto err; 725 726 err: 727 st_engine_heartbeat_enable(engine); 728 if (igt_flush_test(gt->i915)) 729 err = -EIO; 730 if (err) 731 break; 732 } 733 734 i915_vma_unpin_and_release(&scratch, 0); 735 return err; 736 } 737 738 static struct i915_request * 739 create_timestamp(struct intel_context *ce, void *slot, int idx) 740 { 741 const u32 offset = 742 i915_ggtt_offset(ce->engine->status_page.vma) + 743 offset_in_page(slot); 744 struct i915_request *rq; 745 u32 *cs; 746 int err; 747 748 rq = intel_context_create_request(ce); 749 if (IS_ERR(rq)) 750 return rq; 751 752 cs = intel_ring_begin(rq, 10); 753 if (IS_ERR(cs)) { 754 err = PTR_ERR(cs); 755 goto err; 756 } 757 758 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 759 *cs++ = MI_NOOP; 760 761 *cs++ = MI_SEMAPHORE_WAIT | 762 MI_SEMAPHORE_GLOBAL_GTT | 763 MI_SEMAPHORE_POLL | 764 MI_SEMAPHORE_SAD_NEQ_SDD; 765 *cs++ = 0; 766 *cs++ = offset; 767 *cs++ = 0; 768 769 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 770 *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base)); 771 *cs++ = offset + idx * sizeof(u32); 772 *cs++ = 0; 773 774 intel_ring_advance(rq, cs); 775 776 err = 0; 777 err: 778 i915_request_get(rq); 779 i915_request_add(rq); 780 if (err) { 781 i915_request_put(rq); 782 return ERR_PTR(err); 783 } 784 785 return rq; 786 } 787 788 struct lrc_timestamp { 789 struct intel_engine_cs *engine; 790 struct intel_context *ce[2]; 791 u32 poison; 792 }; 793 794 static bool timestamp_advanced(u32 start, u32 end) 795 { 796 return (s32)(end - start) > 0; 797 } 798 799 static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt) 800 { 801 u32 *slot = memset32(arg->engine->status_page.addr + 1000, 0, 4); 802 struct i915_request *rq; 803 u32 timestamp; 804 int err = 0; 805 806 arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison; 807 rq = create_timestamp(arg->ce[0], slot, 1); 808 if (IS_ERR(rq)) 809 return PTR_ERR(rq); 810 811 err = wait_for_submit(rq->engine, rq, HZ / 2); 812 if (err) 813 goto err; 814 815 if (preempt) { 816 arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef; 817 err = emit_semaphore_signal(arg->ce[1], slot); 818 if (err) 819 goto err; 820 } else { 821 slot[0] = 1; 822 wmb(); 823 } 824 825 /* And wait for switch to kernel (to save our context to memory) */ 826 err = context_flush(arg->ce[0], HZ / 2); 827 if (err) 828 goto err; 829 830 if (!timestamp_advanced(arg->poison, slot[1])) { 831 pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n", 832 arg->engine->name, preempt ? "preempt" : "simple", 833 arg->poison, slot[1]); 834 err = -EINVAL; 835 } 836 837 timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]); 838 if (!timestamp_advanced(slot[1], timestamp)) { 839 pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n", 840 arg->engine->name, preempt ? "preempt" : "simple", 841 slot[1], timestamp); 842 err = -EINVAL; 843 } 844 845 err: 846 memset32(slot, -1, 4); 847 i915_request_put(rq); 848 return err; 849 } 850 851 static int live_lrc_timestamp(void *arg) 852 { 853 struct lrc_timestamp data = {}; 854 struct intel_gt *gt = arg; 855 enum intel_engine_id id; 856 const u32 poison[] = { 857 0, 858 S32_MAX, 859 (u32)S32_MAX + 1, 860 U32_MAX, 861 }; 862 863 /* 864 * We want to verify that the timestamp is saved and restore across 865 * context switches and is monotonic. 866 * 867 * So we do this with a little bit of LRC poisoning to check various 868 * boundary conditions, and see what happens if we preempt the context 869 * with a second request (carrying more poison into the timestamp). 870 */ 871 872 for_each_engine(data.engine, gt, id) { 873 int i, err = 0; 874 875 st_engine_heartbeat_disable(data.engine); 876 877 for (i = 0; i < ARRAY_SIZE(data.ce); i++) { 878 struct intel_context *tmp; 879 880 tmp = intel_context_create(data.engine); 881 if (IS_ERR(tmp)) { 882 err = PTR_ERR(tmp); 883 goto err; 884 } 885 886 err = intel_context_pin(tmp); 887 if (err) { 888 intel_context_put(tmp); 889 goto err; 890 } 891 892 data.ce[i] = tmp; 893 } 894 895 for (i = 0; i < ARRAY_SIZE(poison); i++) { 896 data.poison = poison[i]; 897 898 err = __lrc_timestamp(&data, false); 899 if (err) 900 break; 901 902 err = __lrc_timestamp(&data, true); 903 if (err) 904 break; 905 } 906 907 err: 908 st_engine_heartbeat_enable(data.engine); 909 for (i = 0; i < ARRAY_SIZE(data.ce); i++) { 910 if (!data.ce[i]) 911 break; 912 913 intel_context_unpin(data.ce[i]); 914 intel_context_put(data.ce[i]); 915 } 916 917 if (igt_flush_test(gt->i915)) 918 err = -EIO; 919 if (err) 920 return err; 921 } 922 923 return 0; 924 } 925 926 static struct i915_vma * 927 create_user_vma(struct i915_address_space *vm, unsigned long size) 928 { 929 struct drm_i915_gem_object *obj; 930 struct i915_vma *vma; 931 int err; 932 933 obj = i915_gem_object_create_internal(vm->i915, size); 934 if (IS_ERR(obj)) 935 return ERR_CAST(obj); 936 937 vma = i915_vma_instance(obj, vm, NULL); 938 if (IS_ERR(vma)) { 939 i915_gem_object_put(obj); 940 return vma; 941 } 942 943 err = i915_vma_pin(vma, 0, 0, PIN_USER); 944 if (err) { 945 i915_gem_object_put(obj); 946 return ERR_PTR(err); 947 } 948 949 return vma; 950 } 951 952 static u32 safe_poison(u32 offset, u32 poison) 953 { 954 /* 955 * Do not enable predication as it will nop all subsequent commands, 956 * not only disabling the tests (by preventing all the other SRM) but 957 * also preventing the arbitration events at the end of the request. 958 */ 959 if (offset == i915_mmio_reg_offset(RING_PREDICATE_RESULT(0))) 960 poison &= ~REG_BIT(0); 961 962 return poison; 963 } 964 965 static struct i915_vma * 966 store_context(struct intel_context *ce, struct i915_vma *scratch) 967 { 968 struct i915_vma *batch; 969 u32 dw, x, *cs, *hw; 970 u32 *defaults; 971 972 batch = create_user_vma(ce->vm, SZ_64K); 973 if (IS_ERR(batch)) 974 return batch; 975 976 cs = i915_gem_object_pin_map_unlocked(batch->obj, I915_MAP_WC); 977 if (IS_ERR(cs)) { 978 i915_vma_put(batch); 979 return ERR_CAST(cs); 980 } 981 982 defaults = shmem_pin_map(ce->engine->default_state); 983 if (!defaults) { 984 i915_gem_object_unpin_map(batch->obj); 985 i915_vma_put(batch); 986 return ERR_PTR(-ENOMEM); 987 } 988 989 x = 0; 990 dw = 0; 991 hw = defaults; 992 hw += LRC_STATE_OFFSET / sizeof(*hw); 993 do { 994 u32 len = hw[dw] & LRI_LENGTH_MASK; 995 996 /* 997 * Keep it simple, skip parsing complex commands 998 * 999 * At present, there are no more MI_LOAD_REGISTER_IMM 1000 * commands after the first 3D state command. Rather 1001 * than include a table (see i915_cmd_parser.c) of all 1002 * the possible commands and their instruction lengths 1003 * (or mask for variable length instructions), assume 1004 * we have gathered the complete list of registers and 1005 * bail out. 1006 */ 1007 if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT) 1008 break; 1009 1010 if (hw[dw] == 0) { 1011 dw++; 1012 continue; 1013 } 1014 1015 if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) { 1016 /* Assume all other MI commands match LRI length mask */ 1017 dw += len + 2; 1018 continue; 1019 } 1020 1021 if (!len) { 1022 pr_err("%s: invalid LRI found in context image\n", 1023 ce->engine->name); 1024 igt_hexdump(defaults, PAGE_SIZE); 1025 break; 1026 } 1027 1028 dw++; 1029 len = (len + 1) / 2; 1030 while (len--) { 1031 *cs++ = MI_STORE_REGISTER_MEM_GEN8; 1032 *cs++ = hw[dw]; 1033 *cs++ = lower_32_bits(scratch->node.start + x); 1034 *cs++ = upper_32_bits(scratch->node.start + x); 1035 1036 dw += 2; 1037 x += 4; 1038 } 1039 } while (dw < PAGE_SIZE / sizeof(u32) && 1040 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); 1041 1042 *cs++ = MI_BATCH_BUFFER_END; 1043 1044 shmem_unpin_map(ce->engine->default_state, defaults); 1045 1046 i915_gem_object_flush_map(batch->obj); 1047 i915_gem_object_unpin_map(batch->obj); 1048 1049 return batch; 1050 } 1051 1052 static struct i915_request * 1053 record_registers(struct intel_context *ce, 1054 struct i915_vma *before, 1055 struct i915_vma *after, 1056 u32 *sema) 1057 { 1058 struct i915_vma *b_before, *b_after; 1059 struct i915_request *rq; 1060 u32 *cs; 1061 int err; 1062 1063 b_before = store_context(ce, before); 1064 if (IS_ERR(b_before)) 1065 return ERR_CAST(b_before); 1066 1067 b_after = store_context(ce, after); 1068 if (IS_ERR(b_after)) { 1069 rq = ERR_CAST(b_after); 1070 goto err_before; 1071 } 1072 1073 rq = intel_context_create_request(ce); 1074 if (IS_ERR(rq)) 1075 goto err_after; 1076 1077 err = igt_vma_move_to_active_unlocked(before, rq, EXEC_OBJECT_WRITE); 1078 if (err) 1079 goto err_rq; 1080 1081 err = igt_vma_move_to_active_unlocked(b_before, rq, 0); 1082 if (err) 1083 goto err_rq; 1084 1085 err = igt_vma_move_to_active_unlocked(after, rq, EXEC_OBJECT_WRITE); 1086 if (err) 1087 goto err_rq; 1088 1089 err = igt_vma_move_to_active_unlocked(b_after, rq, 0); 1090 if (err) 1091 goto err_rq; 1092 1093 cs = intel_ring_begin(rq, 14); 1094 if (IS_ERR(cs)) { 1095 err = PTR_ERR(cs); 1096 goto err_rq; 1097 } 1098 1099 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 1100 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); 1101 *cs++ = lower_32_bits(b_before->node.start); 1102 *cs++ = upper_32_bits(b_before->node.start); 1103 1104 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 1105 *cs++ = MI_SEMAPHORE_WAIT | 1106 MI_SEMAPHORE_GLOBAL_GTT | 1107 MI_SEMAPHORE_POLL | 1108 MI_SEMAPHORE_SAD_NEQ_SDD; 1109 *cs++ = 0; 1110 *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) + 1111 offset_in_page(sema); 1112 *cs++ = 0; 1113 *cs++ = MI_NOOP; 1114 1115 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 1116 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); 1117 *cs++ = lower_32_bits(b_after->node.start); 1118 *cs++ = upper_32_bits(b_after->node.start); 1119 1120 intel_ring_advance(rq, cs); 1121 1122 WRITE_ONCE(*sema, 0); 1123 i915_request_get(rq); 1124 i915_request_add(rq); 1125 err_after: 1126 i915_vma_put(b_after); 1127 err_before: 1128 i915_vma_put(b_before); 1129 return rq; 1130 1131 err_rq: 1132 i915_request_add(rq); 1133 rq = ERR_PTR(err); 1134 goto err_after; 1135 } 1136 1137 static struct i915_vma *load_context(struct intel_context *ce, u32 poison) 1138 { 1139 struct i915_vma *batch; 1140 u32 dw, *cs, *hw; 1141 u32 *defaults; 1142 1143 batch = create_user_vma(ce->vm, SZ_64K); 1144 if (IS_ERR(batch)) 1145 return batch; 1146 1147 cs = i915_gem_object_pin_map_unlocked(batch->obj, I915_MAP_WC); 1148 if (IS_ERR(cs)) { 1149 i915_vma_put(batch); 1150 return ERR_CAST(cs); 1151 } 1152 1153 defaults = shmem_pin_map(ce->engine->default_state); 1154 if (!defaults) { 1155 i915_gem_object_unpin_map(batch->obj); 1156 i915_vma_put(batch); 1157 return ERR_PTR(-ENOMEM); 1158 } 1159 1160 dw = 0; 1161 hw = defaults; 1162 hw += LRC_STATE_OFFSET / sizeof(*hw); 1163 do { 1164 u32 len = hw[dw] & LRI_LENGTH_MASK; 1165 1166 /* For simplicity, break parsing at the first complex command */ 1167 if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT) 1168 break; 1169 1170 if (hw[dw] == 0) { 1171 dw++; 1172 continue; 1173 } 1174 1175 if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) { 1176 dw += len + 2; 1177 continue; 1178 } 1179 1180 if (!len) { 1181 pr_err("%s: invalid LRI found in context image\n", 1182 ce->engine->name); 1183 igt_hexdump(defaults, PAGE_SIZE); 1184 break; 1185 } 1186 1187 dw++; 1188 len = (len + 1) / 2; 1189 *cs++ = MI_LOAD_REGISTER_IMM(len); 1190 while (len--) { 1191 *cs++ = hw[dw]; 1192 *cs++ = safe_poison(hw[dw] & get_lri_mask(ce->engine, 1193 MI_LRI_LRM_CS_MMIO), 1194 poison); 1195 dw += 2; 1196 } 1197 } while (dw < PAGE_SIZE / sizeof(u32) && 1198 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); 1199 1200 *cs++ = MI_BATCH_BUFFER_END; 1201 1202 shmem_unpin_map(ce->engine->default_state, defaults); 1203 1204 i915_gem_object_flush_map(batch->obj); 1205 i915_gem_object_unpin_map(batch->obj); 1206 1207 return batch; 1208 } 1209 1210 static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema) 1211 { 1212 struct i915_request *rq; 1213 struct i915_vma *batch; 1214 u32 *cs; 1215 int err; 1216 1217 batch = load_context(ce, poison); 1218 if (IS_ERR(batch)) 1219 return PTR_ERR(batch); 1220 1221 rq = intel_context_create_request(ce); 1222 if (IS_ERR(rq)) { 1223 err = PTR_ERR(rq); 1224 goto err_batch; 1225 } 1226 1227 err = igt_vma_move_to_active_unlocked(batch, rq, 0); 1228 if (err) 1229 goto err_rq; 1230 1231 cs = intel_ring_begin(rq, 8); 1232 if (IS_ERR(cs)) { 1233 err = PTR_ERR(cs); 1234 goto err_rq; 1235 } 1236 1237 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 1238 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); 1239 *cs++ = lower_32_bits(batch->node.start); 1240 *cs++ = upper_32_bits(batch->node.start); 1241 1242 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 1243 *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) + 1244 offset_in_page(sema); 1245 *cs++ = 0; 1246 *cs++ = 1; 1247 1248 intel_ring_advance(rq, cs); 1249 1250 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 1251 err_rq: 1252 i915_request_add(rq); 1253 err_batch: 1254 i915_vma_put(batch); 1255 return err; 1256 } 1257 1258 static bool is_moving(u32 a, u32 b) 1259 { 1260 return a != b; 1261 } 1262 1263 static int compare_isolation(struct intel_engine_cs *engine, 1264 struct i915_vma *ref[2], 1265 struct i915_vma *result[2], 1266 struct intel_context *ce, 1267 u32 poison) 1268 { 1269 u32 x, dw, *hw, *lrc; 1270 u32 *A[2], *B[2]; 1271 u32 *defaults; 1272 int err = 0; 1273 1274 A[0] = i915_gem_object_pin_map_unlocked(ref[0]->obj, I915_MAP_WC); 1275 if (IS_ERR(A[0])) 1276 return PTR_ERR(A[0]); 1277 1278 A[1] = i915_gem_object_pin_map_unlocked(ref[1]->obj, I915_MAP_WC); 1279 if (IS_ERR(A[1])) { 1280 err = PTR_ERR(A[1]); 1281 goto err_A0; 1282 } 1283 1284 B[0] = i915_gem_object_pin_map_unlocked(result[0]->obj, I915_MAP_WC); 1285 if (IS_ERR(B[0])) { 1286 err = PTR_ERR(B[0]); 1287 goto err_A1; 1288 } 1289 1290 B[1] = i915_gem_object_pin_map_unlocked(result[1]->obj, I915_MAP_WC); 1291 if (IS_ERR(B[1])) { 1292 err = PTR_ERR(B[1]); 1293 goto err_B0; 1294 } 1295 1296 lrc = i915_gem_object_pin_map_unlocked(ce->state->obj, 1297 i915_coherent_map_type(engine->i915, 1298 ce->state->obj, 1299 false)); 1300 if (IS_ERR(lrc)) { 1301 err = PTR_ERR(lrc); 1302 goto err_B1; 1303 } 1304 lrc += LRC_STATE_OFFSET / sizeof(*hw); 1305 1306 defaults = shmem_pin_map(ce->engine->default_state); 1307 if (!defaults) { 1308 err = -ENOMEM; 1309 goto err_lrc; 1310 } 1311 1312 x = 0; 1313 dw = 0; 1314 hw = defaults; 1315 hw += LRC_STATE_OFFSET / sizeof(*hw); 1316 do { 1317 u32 len = hw[dw] & LRI_LENGTH_MASK; 1318 1319 /* For simplicity, break parsing at the first complex command */ 1320 if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT) 1321 break; 1322 1323 if (hw[dw] == 0) { 1324 dw++; 1325 continue; 1326 } 1327 1328 if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) { 1329 dw += len + 2; 1330 continue; 1331 } 1332 1333 if (!len) { 1334 pr_err("%s: invalid LRI found in context image\n", 1335 engine->name); 1336 igt_hexdump(defaults, PAGE_SIZE); 1337 break; 1338 } 1339 1340 dw++; 1341 len = (len + 1) / 2; 1342 while (len--) { 1343 if (!is_moving(A[0][x], A[1][x]) && 1344 (A[0][x] != B[0][x] || A[1][x] != B[1][x])) { 1345 switch (hw[dw] & 4095) { 1346 case 0x30: /* RING_HEAD */ 1347 case 0x34: /* RING_TAIL */ 1348 break; 1349 1350 default: 1351 pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n", 1352 engine->name, dw, 1353 hw[dw], hw[dw + 1], 1354 A[0][x], B[0][x], B[1][x], 1355 poison, lrc[dw + 1]); 1356 err = -EINVAL; 1357 } 1358 } 1359 dw += 2; 1360 x++; 1361 } 1362 } while (dw < PAGE_SIZE / sizeof(u32) && 1363 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); 1364 1365 shmem_unpin_map(ce->engine->default_state, defaults); 1366 err_lrc: 1367 i915_gem_object_unpin_map(ce->state->obj); 1368 err_B1: 1369 i915_gem_object_unpin_map(result[1]->obj); 1370 err_B0: 1371 i915_gem_object_unpin_map(result[0]->obj); 1372 err_A1: 1373 i915_gem_object_unpin_map(ref[1]->obj); 1374 err_A0: 1375 i915_gem_object_unpin_map(ref[0]->obj); 1376 return err; 1377 } 1378 1379 static struct i915_vma * 1380 create_result_vma(struct i915_address_space *vm, unsigned long sz) 1381 { 1382 struct i915_vma *vma; 1383 void *ptr; 1384 1385 vma = create_user_vma(vm, sz); 1386 if (IS_ERR(vma)) 1387 return vma; 1388 1389 /* Set the results to a known value distinct from the poison */ 1390 ptr = i915_gem_object_pin_map_unlocked(vma->obj, I915_MAP_WC); 1391 if (IS_ERR(ptr)) { 1392 i915_vma_put(vma); 1393 return ERR_CAST(ptr); 1394 } 1395 1396 memset(ptr, POISON_INUSE, vma->size); 1397 i915_gem_object_flush_map(vma->obj); 1398 i915_gem_object_unpin_map(vma->obj); 1399 1400 return vma; 1401 } 1402 1403 static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison) 1404 { 1405 u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1); 1406 struct i915_vma *ref[2], *result[2]; 1407 struct intel_context *A, *B; 1408 struct i915_request *rq; 1409 int err; 1410 1411 A = intel_context_create(engine); 1412 if (IS_ERR(A)) 1413 return PTR_ERR(A); 1414 1415 B = intel_context_create(engine); 1416 if (IS_ERR(B)) { 1417 err = PTR_ERR(B); 1418 goto err_A; 1419 } 1420 1421 ref[0] = create_result_vma(A->vm, SZ_64K); 1422 if (IS_ERR(ref[0])) { 1423 err = PTR_ERR(ref[0]); 1424 goto err_B; 1425 } 1426 1427 ref[1] = create_result_vma(A->vm, SZ_64K); 1428 if (IS_ERR(ref[1])) { 1429 err = PTR_ERR(ref[1]); 1430 goto err_ref0; 1431 } 1432 1433 rq = record_registers(A, ref[0], ref[1], sema); 1434 if (IS_ERR(rq)) { 1435 err = PTR_ERR(rq); 1436 goto err_ref1; 1437 } 1438 1439 WRITE_ONCE(*sema, 1); 1440 wmb(); 1441 1442 if (i915_request_wait(rq, 0, HZ / 2) < 0) { 1443 i915_request_put(rq); 1444 err = -ETIME; 1445 goto err_ref1; 1446 } 1447 i915_request_put(rq); 1448 1449 result[0] = create_result_vma(A->vm, SZ_64K); 1450 if (IS_ERR(result[0])) { 1451 err = PTR_ERR(result[0]); 1452 goto err_ref1; 1453 } 1454 1455 result[1] = create_result_vma(A->vm, SZ_64K); 1456 if (IS_ERR(result[1])) { 1457 err = PTR_ERR(result[1]); 1458 goto err_result0; 1459 } 1460 1461 rq = record_registers(A, result[0], result[1], sema); 1462 if (IS_ERR(rq)) { 1463 err = PTR_ERR(rq); 1464 goto err_result1; 1465 } 1466 1467 err = poison_registers(B, poison, sema); 1468 if (err == 0 && i915_request_wait(rq, 0, HZ / 2) < 0) { 1469 pr_err("%s(%s): wait for results timed out\n", 1470 __func__, engine->name); 1471 err = -ETIME; 1472 } 1473 1474 /* Always cancel the semaphore wait, just in case the GPU gets stuck */ 1475 WRITE_ONCE(*sema, -1); 1476 i915_request_put(rq); 1477 if (err) 1478 goto err_result1; 1479 1480 err = compare_isolation(engine, ref, result, A, poison); 1481 1482 err_result1: 1483 i915_vma_put(result[1]); 1484 err_result0: 1485 i915_vma_put(result[0]); 1486 err_ref1: 1487 i915_vma_put(ref[1]); 1488 err_ref0: 1489 i915_vma_put(ref[0]); 1490 err_B: 1491 intel_context_put(B); 1492 err_A: 1493 intel_context_put(A); 1494 return err; 1495 } 1496 1497 static bool skip_isolation(const struct intel_engine_cs *engine) 1498 { 1499 if (engine->class == COPY_ENGINE_CLASS && GRAPHICS_VER(engine->i915) == 9) 1500 return true; 1501 1502 if (engine->class == RENDER_CLASS && GRAPHICS_VER(engine->i915) == 11) 1503 return true; 1504 1505 return false; 1506 } 1507 1508 static int live_lrc_isolation(void *arg) 1509 { 1510 struct intel_gt *gt = arg; 1511 struct intel_engine_cs *engine; 1512 enum intel_engine_id id; 1513 const u32 poison[] = { 1514 STACK_MAGIC, 1515 0x3a3a3a3a, 1516 0x5c5c5c5c, 1517 0xffffffff, 1518 0xffff0000, 1519 }; 1520 int err = 0; 1521 1522 /* 1523 * Our goal is try and verify that per-context state cannot be 1524 * tampered with by another non-privileged client. 1525 * 1526 * We take the list of context registers from the LRI in the default 1527 * context image and attempt to modify that list from a remote context. 1528 */ 1529 1530 for_each_engine(engine, gt, id) { 1531 int i; 1532 1533 /* Just don't even ask */ 1534 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) && 1535 skip_isolation(engine)) 1536 continue; 1537 1538 intel_engine_pm_get(engine); 1539 for (i = 0; i < ARRAY_SIZE(poison); i++) { 1540 int result; 1541 1542 result = __lrc_isolation(engine, poison[i]); 1543 if (result && !err) 1544 err = result; 1545 1546 result = __lrc_isolation(engine, ~poison[i]); 1547 if (result && !err) 1548 err = result; 1549 } 1550 intel_engine_pm_put(engine); 1551 if (igt_flush_test(gt->i915)) { 1552 err = -EIO; 1553 break; 1554 } 1555 } 1556 1557 return err; 1558 } 1559 1560 static int indirect_ctx_submit_req(struct intel_context *ce) 1561 { 1562 struct i915_request *rq; 1563 int err = 0; 1564 1565 rq = intel_context_create_request(ce); 1566 if (IS_ERR(rq)) 1567 return PTR_ERR(rq); 1568 1569 i915_request_get(rq); 1570 i915_request_add(rq); 1571 1572 if (i915_request_wait(rq, 0, HZ / 5) < 0) 1573 err = -ETIME; 1574 1575 i915_request_put(rq); 1576 1577 return err; 1578 } 1579 1580 #define CTX_BB_CANARY_OFFSET (3 * 1024) 1581 #define CTX_BB_CANARY_INDEX (CTX_BB_CANARY_OFFSET / sizeof(u32)) 1582 1583 static u32 * 1584 emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs) 1585 { 1586 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | 1587 MI_SRM_LRM_GLOBAL_GTT | 1588 MI_LRI_LRM_CS_MMIO; 1589 *cs++ = i915_mmio_reg_offset(RING_START(0)); 1590 *cs++ = i915_ggtt_offset(ce->state) + 1591 context_wa_bb_offset(ce) + 1592 CTX_BB_CANARY_OFFSET; 1593 *cs++ = 0; 1594 1595 return cs; 1596 } 1597 1598 static void 1599 indirect_ctx_bb_setup(struct intel_context *ce) 1600 { 1601 u32 *cs = context_indirect_bb(ce); 1602 1603 cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d; 1604 1605 setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary); 1606 } 1607 1608 static bool check_ring_start(struct intel_context *ce) 1609 { 1610 const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) - 1611 LRC_STATE_OFFSET + context_wa_bb_offset(ce); 1612 1613 if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START]) 1614 return true; 1615 1616 pr_err("ring start mismatch: canary 0x%08x vs state 0x%08x\n", 1617 ctx_bb[CTX_BB_CANARY_INDEX], 1618 ce->lrc_reg_state[CTX_RING_START]); 1619 1620 return false; 1621 } 1622 1623 static int indirect_ctx_bb_check(struct intel_context *ce) 1624 { 1625 int err; 1626 1627 err = indirect_ctx_submit_req(ce); 1628 if (err) 1629 return err; 1630 1631 if (!check_ring_start(ce)) 1632 return -EINVAL; 1633 1634 return 0; 1635 } 1636 1637 static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine) 1638 { 1639 struct intel_context *a, *b; 1640 int err; 1641 1642 a = intel_context_create(engine); 1643 if (IS_ERR(a)) 1644 return PTR_ERR(a); 1645 err = intel_context_pin(a); 1646 if (err) 1647 goto put_a; 1648 1649 b = intel_context_create(engine); 1650 if (IS_ERR(b)) { 1651 err = PTR_ERR(b); 1652 goto unpin_a; 1653 } 1654 err = intel_context_pin(b); 1655 if (err) 1656 goto put_b; 1657 1658 /* We use the already reserved extra page in context state */ 1659 if (!a->wa_bb_page) { 1660 GEM_BUG_ON(b->wa_bb_page); 1661 GEM_BUG_ON(GRAPHICS_VER(engine->i915) == 12); 1662 goto unpin_b; 1663 } 1664 1665 /* 1666 * In order to test that our per context bb is truly per context, 1667 * and executes at the intended spot on context restoring process, 1668 * make the batch store the ring start value to memory. 1669 * As ring start is restored apriori of starting the indirect ctx bb and 1670 * as it will be different for each context, it fits to this purpose. 1671 */ 1672 indirect_ctx_bb_setup(a); 1673 indirect_ctx_bb_setup(b); 1674 1675 err = indirect_ctx_bb_check(a); 1676 if (err) 1677 goto unpin_b; 1678 1679 err = indirect_ctx_bb_check(b); 1680 1681 unpin_b: 1682 intel_context_unpin(b); 1683 put_b: 1684 intel_context_put(b); 1685 unpin_a: 1686 intel_context_unpin(a); 1687 put_a: 1688 intel_context_put(a); 1689 1690 return err; 1691 } 1692 1693 static int live_lrc_indirect_ctx_bb(void *arg) 1694 { 1695 struct intel_gt *gt = arg; 1696 struct intel_engine_cs *engine; 1697 enum intel_engine_id id; 1698 int err = 0; 1699 1700 for_each_engine(engine, gt, id) { 1701 intel_engine_pm_get(engine); 1702 err = __live_lrc_indirect_ctx_bb(engine); 1703 intel_engine_pm_put(engine); 1704 1705 if (igt_flush_test(gt->i915)) 1706 err = -EIO; 1707 1708 if (err) 1709 break; 1710 } 1711 1712 return err; 1713 } 1714 1715 static void garbage_reset(struct intel_engine_cs *engine, 1716 struct i915_request *rq) 1717 { 1718 const unsigned int bit = I915_RESET_ENGINE + engine->id; 1719 unsigned long *lock = &engine->gt->reset.flags; 1720 1721 local_bh_disable(); 1722 if (!test_and_set_bit(bit, lock)) { 1723 tasklet_disable(&engine->sched_engine->tasklet); 1724 1725 if (!rq->fence.error) 1726 __intel_engine_reset_bh(engine, NULL); 1727 1728 tasklet_enable(&engine->sched_engine->tasklet); 1729 clear_and_wake_up_bit(bit, lock); 1730 } 1731 local_bh_enable(); 1732 } 1733 1734 static struct i915_request *garbage(struct intel_context *ce, 1735 struct rnd_state *prng) 1736 { 1737 struct i915_request *rq; 1738 int err; 1739 1740 err = intel_context_pin(ce); 1741 if (err) 1742 return ERR_PTR(err); 1743 1744 prandom_bytes_state(prng, 1745 ce->lrc_reg_state, 1746 ce->engine->context_size - 1747 LRC_STATE_OFFSET); 1748 1749 rq = intel_context_create_request(ce); 1750 if (IS_ERR(rq)) { 1751 err = PTR_ERR(rq); 1752 goto err_unpin; 1753 } 1754 1755 i915_request_get(rq); 1756 i915_request_add(rq); 1757 return rq; 1758 1759 err_unpin: 1760 intel_context_unpin(ce); 1761 return ERR_PTR(err); 1762 } 1763 1764 static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng) 1765 { 1766 struct intel_context *ce; 1767 struct i915_request *hang; 1768 int err = 0; 1769 1770 ce = intel_context_create(engine); 1771 if (IS_ERR(ce)) 1772 return PTR_ERR(ce); 1773 1774 hang = garbage(ce, prng); 1775 if (IS_ERR(hang)) { 1776 err = PTR_ERR(hang); 1777 goto err_ce; 1778 } 1779 1780 if (wait_for_submit(engine, hang, HZ / 2)) { 1781 i915_request_put(hang); 1782 err = -ETIME; 1783 goto err_ce; 1784 } 1785 1786 intel_context_set_banned(ce); 1787 garbage_reset(engine, hang); 1788 1789 intel_engine_flush_submission(engine); 1790 if (!hang->fence.error) { 1791 i915_request_put(hang); 1792 pr_err("%s: corrupted context was not reset\n", 1793 engine->name); 1794 err = -EINVAL; 1795 goto err_ce; 1796 } 1797 1798 if (i915_request_wait(hang, 0, HZ / 2) < 0) { 1799 pr_err("%s: corrupted context did not recover\n", 1800 engine->name); 1801 i915_request_put(hang); 1802 err = -EIO; 1803 goto err_ce; 1804 } 1805 i915_request_put(hang); 1806 1807 err_ce: 1808 intel_context_put(ce); 1809 return err; 1810 } 1811 1812 static int live_lrc_garbage(void *arg) 1813 { 1814 struct intel_gt *gt = arg; 1815 struct intel_engine_cs *engine; 1816 enum intel_engine_id id; 1817 1818 /* 1819 * Verify that we can recover if one context state is completely 1820 * corrupted. 1821 */ 1822 1823 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN)) 1824 return 0; 1825 1826 for_each_engine(engine, gt, id) { 1827 I915_RND_STATE(prng); 1828 int err = 0, i; 1829 1830 if (!intel_has_reset_engine(engine->gt)) 1831 continue; 1832 1833 intel_engine_pm_get(engine); 1834 for (i = 0; i < 3; i++) { 1835 err = __lrc_garbage(engine, &prng); 1836 if (err) 1837 break; 1838 } 1839 intel_engine_pm_put(engine); 1840 1841 if (igt_flush_test(gt->i915)) 1842 err = -EIO; 1843 if (err) 1844 return err; 1845 } 1846 1847 return 0; 1848 } 1849 1850 static int __live_pphwsp_runtime(struct intel_engine_cs *engine) 1851 { 1852 struct intel_context *ce; 1853 struct i915_request *rq; 1854 IGT_TIMEOUT(end_time); 1855 int err; 1856 1857 ce = intel_context_create(engine); 1858 if (IS_ERR(ce)) 1859 return PTR_ERR(ce); 1860 1861 ce->stats.runtime.num_underflow = 0; 1862 ce->stats.runtime.max_underflow = 0; 1863 1864 do { 1865 unsigned int loop = 1024; 1866 1867 while (loop) { 1868 rq = intel_context_create_request(ce); 1869 if (IS_ERR(rq)) { 1870 err = PTR_ERR(rq); 1871 goto err_rq; 1872 } 1873 1874 if (--loop == 0) 1875 i915_request_get(rq); 1876 1877 i915_request_add(rq); 1878 } 1879 1880 if (__igt_timeout(end_time, NULL)) 1881 break; 1882 1883 i915_request_put(rq); 1884 } while (1); 1885 1886 err = i915_request_wait(rq, 0, HZ / 5); 1887 if (err < 0) { 1888 pr_err("%s: request not completed!\n", engine->name); 1889 goto err_wait; 1890 } 1891 1892 igt_flush_test(engine->i915); 1893 1894 pr_info("%s: pphwsp runtime %lluns, average %lluns\n", 1895 engine->name, 1896 intel_context_get_total_runtime_ns(ce), 1897 intel_context_get_avg_runtime_ns(ce)); 1898 1899 err = 0; 1900 if (ce->stats.runtime.num_underflow) { 1901 pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n", 1902 engine->name, 1903 ce->stats.runtime.num_underflow, 1904 ce->stats.runtime.max_underflow); 1905 GEM_TRACE_DUMP(); 1906 err = -EOVERFLOW; 1907 } 1908 1909 err_wait: 1910 i915_request_put(rq); 1911 err_rq: 1912 intel_context_put(ce); 1913 return err; 1914 } 1915 1916 static int live_pphwsp_runtime(void *arg) 1917 { 1918 struct intel_gt *gt = arg; 1919 struct intel_engine_cs *engine; 1920 enum intel_engine_id id; 1921 int err = 0; 1922 1923 /* 1924 * Check that cumulative context runtime as stored in the pphwsp[16] 1925 * is monotonic. 1926 */ 1927 1928 for_each_engine(engine, gt, id) { 1929 err = __live_pphwsp_runtime(engine); 1930 if (err) 1931 break; 1932 } 1933 1934 if (igt_flush_test(gt->i915)) 1935 err = -EIO; 1936 1937 return err; 1938 } 1939 1940 int intel_lrc_live_selftests(struct drm_i915_private *i915) 1941 { 1942 static const struct i915_subtest tests[] = { 1943 SUBTEST(live_lrc_layout), 1944 SUBTEST(live_lrc_fixed), 1945 SUBTEST(live_lrc_state), 1946 SUBTEST(live_lrc_gpr), 1947 SUBTEST(live_lrc_isolation), 1948 SUBTEST(live_lrc_timestamp), 1949 SUBTEST(live_lrc_garbage), 1950 SUBTEST(live_pphwsp_runtime), 1951 SUBTEST(live_lrc_indirect_ctx_bb), 1952 }; 1953 1954 if (!HAS_LOGICAL_RING_CONTEXTS(i915)) 1955 return 0; 1956 1957 return intel_gt_live_subtests(tests, to_gt(i915)); 1958 } 1959