1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2018 Intel Corporation 4 */ 5 6 #include <linux/prime_numbers.h> 7 8 #include "gem/i915_gem_internal.h" 9 10 #include "i915_selftest.h" 11 #include "intel_engine_heartbeat.h" 12 #include "intel_engine_pm.h" 13 #include "intel_reset.h" 14 #include "intel_ring.h" 15 #include "selftest_engine_heartbeat.h" 16 #include "selftests/i915_random.h" 17 #include "selftests/igt_flush_test.h" 18 #include "selftests/igt_live_test.h" 19 #include "selftests/igt_spinner.h" 20 #include "selftests/lib_sw_fence.h" 21 #include "shmem_utils.h" 22 23 #include "gem/selftests/igt_gem_utils.h" 24 #include "gem/selftests/mock_context.h" 25 26 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4) 27 #define NUM_GPR 16 28 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */ 29 30 #define LRI_HEADER MI_INSTR(0x22, 0) 31 #define LRI_LENGTH_MASK GENMASK(7, 0) 32 33 static struct i915_vma *create_scratch(struct intel_gt *gt) 34 { 35 return __vm_create_scratch_for_read_pinned(>->ggtt->vm, PAGE_SIZE); 36 } 37 38 static bool is_active(struct i915_request *rq) 39 { 40 if (i915_request_is_active(rq)) 41 return true; 42 43 if (i915_request_on_hold(rq)) 44 return true; 45 46 if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq)) 47 return true; 48 49 return false; 50 } 51 52 static int wait_for_submit(struct intel_engine_cs *engine, 53 struct i915_request *rq, 54 unsigned long timeout) 55 { 56 /* Ignore our own attempts to suppress excess tasklets */ 57 tasklet_hi_schedule(&engine->sched_engine->tasklet); 58 59 timeout += jiffies; 60 do { 61 bool done = time_after(jiffies, timeout); 62 63 if (i915_request_completed(rq)) /* that was quick! */ 64 return 0; 65 66 /* Wait until the HW has acknowleged the submission (or err) */ 67 intel_engine_flush_submission(engine); 68 if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq)) 69 return 0; 70 71 if (done) 72 return -ETIME; 73 74 cond_resched(); 75 } while (1); 76 } 77 78 static int emit_semaphore_signal(struct intel_context *ce, void *slot) 79 { 80 const u32 offset = 81 i915_ggtt_offset(ce->engine->status_page.vma) + 82 offset_in_page(slot); 83 struct i915_request *rq; 84 u32 *cs; 85 86 rq = intel_context_create_request(ce); 87 if (IS_ERR(rq)) 88 return PTR_ERR(rq); 89 90 cs = intel_ring_begin(rq, 4); 91 if (IS_ERR(cs)) { 92 i915_request_add(rq); 93 return PTR_ERR(cs); 94 } 95 96 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 97 *cs++ = offset; 98 *cs++ = 0; 99 *cs++ = 1; 100 101 intel_ring_advance(rq, cs); 102 103 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 104 i915_request_add(rq); 105 return 0; 106 } 107 108 static int context_flush(struct intel_context *ce, long timeout) 109 { 110 struct i915_request *rq; 111 struct dma_fence *fence; 112 int err = 0; 113 114 rq = intel_engine_create_kernel_request(ce->engine); 115 if (IS_ERR(rq)) 116 return PTR_ERR(rq); 117 118 fence = i915_active_fence_get(&ce->timeline->last_request); 119 if (fence) { 120 i915_request_await_dma_fence(rq, fence); 121 dma_fence_put(fence); 122 } 123 124 rq = i915_request_get(rq); 125 i915_request_add(rq); 126 if (i915_request_wait(rq, 0, timeout) < 0) 127 err = -ETIME; 128 i915_request_put(rq); 129 130 rmb(); /* We know the request is written, make sure all state is too! */ 131 return err; 132 } 133 134 static int get_lri_mask(struct intel_engine_cs *engine, u32 lri) 135 { 136 if ((lri & MI_LRI_LRM_CS_MMIO) == 0) 137 return ~0u; 138 139 if (GRAPHICS_VER(engine->i915) < 12) 140 return 0xfff; 141 142 switch (engine->class) { 143 default: 144 case RENDER_CLASS: 145 case COMPUTE_CLASS: 146 return 0x07ff; 147 case COPY_ENGINE_CLASS: 148 return 0x0fff; 149 case VIDEO_DECODE_CLASS: 150 case VIDEO_ENHANCEMENT_CLASS: 151 return 0x3fff; 152 } 153 } 154 155 static int live_lrc_layout(void *arg) 156 { 157 struct intel_gt *gt = arg; 158 struct intel_engine_cs *engine; 159 enum intel_engine_id id; 160 u32 *lrc; 161 int err; 162 163 /* 164 * Check the registers offsets we use to create the initial reg state 165 * match the layout saved by HW. 166 */ 167 168 lrc = (u32 *)__get_free_page(GFP_KERNEL); /* requires page alignment */ 169 if (!lrc) 170 return -ENOMEM; 171 GEM_BUG_ON(offset_in_page(lrc)); 172 173 err = 0; 174 for_each_engine(engine, gt, id) { 175 u32 *hw; 176 int dw; 177 178 if (!engine->default_state) 179 continue; 180 181 hw = shmem_pin_map(engine->default_state); 182 if (!hw) { 183 err = -ENOMEM; 184 break; 185 } 186 hw += LRC_STATE_OFFSET / sizeof(*hw); 187 188 __lrc_init_regs(memset(lrc, POISON_INUSE, PAGE_SIZE), 189 engine->kernel_context, engine, true); 190 191 dw = 0; 192 do { 193 u32 lri = READ_ONCE(hw[dw]); 194 u32 lri_mask; 195 196 if (lri == 0) { 197 dw++; 198 continue; 199 } 200 201 if (lrc[dw] == 0) { 202 pr_debug("%s: skipped instruction %x at dword %d\n", 203 engine->name, lri, dw); 204 dw++; 205 continue; 206 } 207 208 if ((lri & GENMASK(31, 23)) != LRI_HEADER) { 209 pr_err("%s: Expected LRI command at dword %d, found %08x\n", 210 engine->name, dw, lri); 211 err = -EINVAL; 212 break; 213 } 214 215 if (lrc[dw] != lri) { 216 pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n", 217 engine->name, dw, lri, lrc[dw]); 218 err = -EINVAL; 219 break; 220 } 221 222 /* 223 * When bit 19 of MI_LOAD_REGISTER_IMM instruction 224 * opcode is set on Gen12+ devices, HW does not 225 * care about certain register address offsets, and 226 * instead check the following for valid address 227 * ranges on specific engines: 228 * RCS && CCS: BITS(0 - 10) 229 * BCS: BITS(0 - 11) 230 * VECS && VCS: BITS(0 - 13) 231 */ 232 lri_mask = get_lri_mask(engine, lri); 233 234 lri &= 0x7f; 235 lri++; 236 dw++; 237 238 while (lri) { 239 u32 offset = READ_ONCE(hw[dw]); 240 241 if ((offset ^ lrc[dw]) & lri_mask) { 242 pr_err("%s: Different registers found at dword %d, expected %x, found %x\n", 243 engine->name, dw, offset, lrc[dw]); 244 err = -EINVAL; 245 break; 246 } 247 248 /* 249 * Skip over the actual register value as we 250 * expect that to differ. 251 */ 252 dw += 2; 253 lri -= 2; 254 } 255 } while (!err && (lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); 256 257 if (err) { 258 pr_info("%s: HW register image:\n", engine->name); 259 igt_hexdump(hw, PAGE_SIZE); 260 261 pr_info("%s: SW register image:\n", engine->name); 262 igt_hexdump(lrc, PAGE_SIZE); 263 } 264 265 shmem_unpin_map(engine->default_state, hw); 266 if (err) 267 break; 268 } 269 270 free_page((unsigned long)lrc); 271 return err; 272 } 273 274 static int find_offset(const u32 *lri, u32 offset) 275 { 276 int i; 277 278 for (i = 0; i < PAGE_SIZE / sizeof(u32); i++) 279 if (lri[i] == offset) 280 return i; 281 282 return -1; 283 } 284 285 static int live_lrc_fixed(void *arg) 286 { 287 struct intel_gt *gt = arg; 288 struct intel_engine_cs *engine; 289 enum intel_engine_id id; 290 int err = 0; 291 292 /* 293 * Check the assumed register offsets match the actual locations in 294 * the context image. 295 */ 296 297 for_each_engine(engine, gt, id) { 298 const struct { 299 u32 reg; 300 u32 offset; 301 const char *name; 302 } tbl[] = { 303 { 304 i915_mmio_reg_offset(RING_START(engine->mmio_base)), 305 CTX_RING_START - 1, 306 "RING_START" 307 }, 308 { 309 i915_mmio_reg_offset(RING_CTL(engine->mmio_base)), 310 CTX_RING_CTL - 1, 311 "RING_CTL" 312 }, 313 { 314 i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)), 315 CTX_RING_HEAD - 1, 316 "RING_HEAD" 317 }, 318 { 319 i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)), 320 CTX_RING_TAIL - 1, 321 "RING_TAIL" 322 }, 323 { 324 i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)), 325 lrc_ring_mi_mode(engine), 326 "RING_MI_MODE" 327 }, 328 { 329 i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)), 330 CTX_BB_STATE - 1, 331 "BB_STATE" 332 }, 333 { 334 i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(engine->mmio_base)), 335 lrc_ring_wa_bb_per_ctx(engine), 336 "RING_BB_PER_CTX_PTR" 337 }, 338 { 339 i915_mmio_reg_offset(RING_INDIRECT_CTX(engine->mmio_base)), 340 lrc_ring_indirect_ptr(engine), 341 "RING_INDIRECT_CTX_PTR" 342 }, 343 { 344 i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(engine->mmio_base)), 345 lrc_ring_indirect_offset(engine), 346 "RING_INDIRECT_CTX_OFFSET" 347 }, 348 { 349 i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)), 350 CTX_TIMESTAMP - 1, 351 "RING_CTX_TIMESTAMP" 352 }, 353 { 354 i915_mmio_reg_offset(GEN8_RING_CS_GPR(engine->mmio_base, 0)), 355 lrc_ring_gpr0(engine), 356 "RING_CS_GPR0" 357 }, 358 { 359 i915_mmio_reg_offset(RING_CMD_BUF_CCTL(engine->mmio_base)), 360 lrc_ring_cmd_buf_cctl(engine), 361 "RING_CMD_BUF_CCTL" 362 }, 363 { 364 i915_mmio_reg_offset(RING_BB_OFFSET(engine->mmio_base)), 365 lrc_ring_bb_offset(engine), 366 "RING_BB_OFFSET" 367 }, 368 { }, 369 }, *t; 370 u32 *hw; 371 372 if (!engine->default_state) 373 continue; 374 375 hw = shmem_pin_map(engine->default_state); 376 if (!hw) { 377 err = -ENOMEM; 378 break; 379 } 380 hw += LRC_STATE_OFFSET / sizeof(*hw); 381 382 for (t = tbl; t->name; t++) { 383 int dw = find_offset(hw, t->reg); 384 385 if (dw != t->offset) { 386 pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n", 387 engine->name, 388 t->name, 389 t->reg, 390 dw, 391 t->offset); 392 err = -EINVAL; 393 } 394 } 395 396 shmem_unpin_map(engine->default_state, hw); 397 } 398 399 return err; 400 } 401 402 static int __live_lrc_state(struct intel_engine_cs *engine, 403 struct i915_vma *scratch) 404 { 405 struct intel_context *ce; 406 struct i915_request *rq; 407 struct i915_gem_ww_ctx ww; 408 enum { 409 RING_START_IDX = 0, 410 RING_TAIL_IDX, 411 MAX_IDX 412 }; 413 u32 expected[MAX_IDX]; 414 u32 *cs; 415 int err; 416 int n; 417 418 ce = intel_context_create(engine); 419 if (IS_ERR(ce)) 420 return PTR_ERR(ce); 421 422 i915_gem_ww_ctx_init(&ww, false); 423 retry: 424 err = i915_gem_object_lock(scratch->obj, &ww); 425 if (!err) 426 err = intel_context_pin_ww(ce, &ww); 427 if (err) 428 goto err_put; 429 430 rq = i915_request_create(ce); 431 if (IS_ERR(rq)) { 432 err = PTR_ERR(rq); 433 goto err_unpin; 434 } 435 436 cs = intel_ring_begin(rq, 4 * MAX_IDX); 437 if (IS_ERR(cs)) { 438 err = PTR_ERR(cs); 439 i915_request_add(rq); 440 goto err_unpin; 441 } 442 443 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 444 *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base)); 445 *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32); 446 *cs++ = 0; 447 448 expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma); 449 450 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 451 *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)); 452 *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32); 453 *cs++ = 0; 454 455 err = i915_request_await_object(rq, scratch->obj, true); 456 if (!err) 457 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE); 458 459 i915_request_get(rq); 460 i915_request_add(rq); 461 if (err) 462 goto err_rq; 463 464 intel_engine_flush_submission(engine); 465 expected[RING_TAIL_IDX] = ce->ring->tail; 466 467 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 468 err = -ETIME; 469 goto err_rq; 470 } 471 472 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); 473 if (IS_ERR(cs)) { 474 err = PTR_ERR(cs); 475 goto err_rq; 476 } 477 478 for (n = 0; n < MAX_IDX; n++) { 479 if (cs[n] != expected[n]) { 480 pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n", 481 engine->name, n, cs[n], expected[n]); 482 err = -EINVAL; 483 break; 484 } 485 } 486 487 i915_gem_object_unpin_map(scratch->obj); 488 489 err_rq: 490 i915_request_put(rq); 491 err_unpin: 492 intel_context_unpin(ce); 493 err_put: 494 if (err == -EDEADLK) { 495 err = i915_gem_ww_ctx_backoff(&ww); 496 if (!err) 497 goto retry; 498 } 499 i915_gem_ww_ctx_fini(&ww); 500 intel_context_put(ce); 501 return err; 502 } 503 504 static int live_lrc_state(void *arg) 505 { 506 struct intel_gt *gt = arg; 507 struct intel_engine_cs *engine; 508 struct i915_vma *scratch; 509 enum intel_engine_id id; 510 int err = 0; 511 512 /* 513 * Check the live register state matches what we expect for this 514 * intel_context. 515 */ 516 517 scratch = create_scratch(gt); 518 if (IS_ERR(scratch)) 519 return PTR_ERR(scratch); 520 521 for_each_engine(engine, gt, id) { 522 err = __live_lrc_state(engine, scratch); 523 if (err) 524 break; 525 } 526 527 if (igt_flush_test(gt->i915)) 528 err = -EIO; 529 530 i915_vma_unpin_and_release(&scratch, 0); 531 return err; 532 } 533 534 static int gpr_make_dirty(struct intel_context *ce) 535 { 536 struct i915_request *rq; 537 u32 *cs; 538 int n; 539 540 rq = intel_context_create_request(ce); 541 if (IS_ERR(rq)) 542 return PTR_ERR(rq); 543 544 cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2); 545 if (IS_ERR(cs)) { 546 i915_request_add(rq); 547 return PTR_ERR(cs); 548 } 549 550 *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW); 551 for (n = 0; n < NUM_GPR_DW; n++) { 552 *cs++ = CS_GPR(ce->engine, n); 553 *cs++ = STACK_MAGIC; 554 } 555 *cs++ = MI_NOOP; 556 557 intel_ring_advance(rq, cs); 558 559 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 560 i915_request_add(rq); 561 562 return 0; 563 } 564 565 static struct i915_request * 566 __gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot) 567 { 568 const u32 offset = 569 i915_ggtt_offset(ce->engine->status_page.vma) + 570 offset_in_page(slot); 571 struct i915_request *rq; 572 u32 *cs; 573 int err; 574 int n; 575 576 rq = intel_context_create_request(ce); 577 if (IS_ERR(rq)) 578 return rq; 579 580 cs = intel_ring_begin(rq, 6 + 4 * NUM_GPR_DW); 581 if (IS_ERR(cs)) { 582 i915_request_add(rq); 583 return ERR_CAST(cs); 584 } 585 586 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 587 *cs++ = MI_NOOP; 588 589 *cs++ = MI_SEMAPHORE_WAIT | 590 MI_SEMAPHORE_GLOBAL_GTT | 591 MI_SEMAPHORE_POLL | 592 MI_SEMAPHORE_SAD_NEQ_SDD; 593 *cs++ = 0; 594 *cs++ = offset; 595 *cs++ = 0; 596 597 for (n = 0; n < NUM_GPR_DW; n++) { 598 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 599 *cs++ = CS_GPR(ce->engine, n); 600 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32); 601 *cs++ = 0; 602 } 603 604 i915_vma_lock(scratch); 605 err = i915_request_await_object(rq, scratch->obj, true); 606 if (!err) 607 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE); 608 i915_vma_unlock(scratch); 609 610 i915_request_get(rq); 611 i915_request_add(rq); 612 if (err) { 613 i915_request_put(rq); 614 rq = ERR_PTR(err); 615 } 616 617 return rq; 618 } 619 620 static int __live_lrc_gpr(struct intel_engine_cs *engine, 621 struct i915_vma *scratch, 622 bool preempt) 623 { 624 u32 *slot = memset32(engine->status_page.addr + 1000, 0, 4); 625 struct intel_context *ce; 626 struct i915_request *rq; 627 u32 *cs; 628 int err; 629 int n; 630 631 if (GRAPHICS_VER(engine->i915) < 9 && engine->class != RENDER_CLASS) 632 return 0; /* GPR only on rcs0 for gen8 */ 633 634 err = gpr_make_dirty(engine->kernel_context); 635 if (err) 636 return err; 637 638 ce = intel_context_create(engine); 639 if (IS_ERR(ce)) 640 return PTR_ERR(ce); 641 642 rq = __gpr_read(ce, scratch, slot); 643 if (IS_ERR(rq)) { 644 err = PTR_ERR(rq); 645 goto err_put; 646 } 647 648 err = wait_for_submit(engine, rq, HZ / 2); 649 if (err) 650 goto err_rq; 651 652 if (preempt) { 653 err = gpr_make_dirty(engine->kernel_context); 654 if (err) 655 goto err_rq; 656 657 err = emit_semaphore_signal(engine->kernel_context, slot); 658 if (err) 659 goto err_rq; 660 661 err = wait_for_submit(engine, rq, HZ / 2); 662 if (err) 663 goto err_rq; 664 } else { 665 slot[0] = 1; 666 wmb(); 667 } 668 669 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 670 err = -ETIME; 671 goto err_rq; 672 } 673 674 cs = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB); 675 if (IS_ERR(cs)) { 676 err = PTR_ERR(cs); 677 goto err_rq; 678 } 679 680 for (n = 0; n < NUM_GPR_DW; n++) { 681 if (cs[n]) { 682 pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n", 683 engine->name, 684 n / 2, n & 1 ? "udw" : "ldw", 685 cs[n]); 686 err = -EINVAL; 687 break; 688 } 689 } 690 691 i915_gem_object_unpin_map(scratch->obj); 692 693 err_rq: 694 memset32(&slot[0], -1, 4); 695 wmb(); 696 i915_request_put(rq); 697 err_put: 698 intel_context_put(ce); 699 return err; 700 } 701 702 static int live_lrc_gpr(void *arg) 703 { 704 struct intel_gt *gt = arg; 705 struct intel_engine_cs *engine; 706 struct i915_vma *scratch; 707 enum intel_engine_id id; 708 int err = 0; 709 710 /* 711 * Check that GPR registers are cleared in new contexts as we need 712 * to avoid leaking any information from previous contexts. 713 */ 714 715 scratch = create_scratch(gt); 716 if (IS_ERR(scratch)) 717 return PTR_ERR(scratch); 718 719 for_each_engine(engine, gt, id) { 720 st_engine_heartbeat_disable(engine); 721 722 err = __live_lrc_gpr(engine, scratch, false); 723 if (err) 724 goto err; 725 726 err = __live_lrc_gpr(engine, scratch, true); 727 if (err) 728 goto err; 729 730 err: 731 st_engine_heartbeat_enable(engine); 732 if (igt_flush_test(gt->i915)) 733 err = -EIO; 734 if (err) 735 break; 736 } 737 738 i915_vma_unpin_and_release(&scratch, 0); 739 return err; 740 } 741 742 static struct i915_request * 743 create_timestamp(struct intel_context *ce, void *slot, int idx) 744 { 745 const u32 offset = 746 i915_ggtt_offset(ce->engine->status_page.vma) + 747 offset_in_page(slot); 748 struct i915_request *rq; 749 u32 *cs; 750 int err; 751 752 rq = intel_context_create_request(ce); 753 if (IS_ERR(rq)) 754 return rq; 755 756 cs = intel_ring_begin(rq, 10); 757 if (IS_ERR(cs)) { 758 err = PTR_ERR(cs); 759 goto err; 760 } 761 762 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 763 *cs++ = MI_NOOP; 764 765 *cs++ = MI_SEMAPHORE_WAIT | 766 MI_SEMAPHORE_GLOBAL_GTT | 767 MI_SEMAPHORE_POLL | 768 MI_SEMAPHORE_SAD_NEQ_SDD; 769 *cs++ = 0; 770 *cs++ = offset; 771 *cs++ = 0; 772 773 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 774 *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base)); 775 *cs++ = offset + idx * sizeof(u32); 776 *cs++ = 0; 777 778 intel_ring_advance(rq, cs); 779 780 err = 0; 781 err: 782 i915_request_get(rq); 783 i915_request_add(rq); 784 if (err) { 785 i915_request_put(rq); 786 return ERR_PTR(err); 787 } 788 789 return rq; 790 } 791 792 struct lrc_timestamp { 793 struct intel_engine_cs *engine; 794 struct intel_context *ce[2]; 795 u32 poison; 796 }; 797 798 static bool timestamp_advanced(u32 start, u32 end) 799 { 800 return (s32)(end - start) > 0; 801 } 802 803 static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt) 804 { 805 u32 *slot = memset32(arg->engine->status_page.addr + 1000, 0, 4); 806 struct i915_request *rq; 807 u32 timestamp; 808 int err = 0; 809 810 arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison; 811 rq = create_timestamp(arg->ce[0], slot, 1); 812 if (IS_ERR(rq)) 813 return PTR_ERR(rq); 814 815 err = wait_for_submit(rq->engine, rq, HZ / 2); 816 if (err) 817 goto err; 818 819 if (preempt) { 820 arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef; 821 err = emit_semaphore_signal(arg->ce[1], slot); 822 if (err) 823 goto err; 824 } else { 825 slot[0] = 1; 826 wmb(); 827 } 828 829 /* And wait for switch to kernel (to save our context to memory) */ 830 err = context_flush(arg->ce[0], HZ / 2); 831 if (err) 832 goto err; 833 834 if (!timestamp_advanced(arg->poison, slot[1])) { 835 pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n", 836 arg->engine->name, preempt ? "preempt" : "simple", 837 arg->poison, slot[1]); 838 err = -EINVAL; 839 } 840 841 timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]); 842 if (!timestamp_advanced(slot[1], timestamp)) { 843 pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n", 844 arg->engine->name, preempt ? "preempt" : "simple", 845 slot[1], timestamp); 846 err = -EINVAL; 847 } 848 849 err: 850 memset32(slot, -1, 4); 851 i915_request_put(rq); 852 return err; 853 } 854 855 static int live_lrc_timestamp(void *arg) 856 { 857 struct lrc_timestamp data = {}; 858 struct intel_gt *gt = arg; 859 enum intel_engine_id id; 860 const u32 poison[] = { 861 0, 862 S32_MAX, 863 (u32)S32_MAX + 1, 864 U32_MAX, 865 }; 866 867 /* 868 * We want to verify that the timestamp is saved and restore across 869 * context switches and is monotonic. 870 * 871 * So we do this with a little bit of LRC poisoning to check various 872 * boundary conditions, and see what happens if we preempt the context 873 * with a second request (carrying more poison into the timestamp). 874 */ 875 876 for_each_engine(data.engine, gt, id) { 877 int i, err = 0; 878 879 st_engine_heartbeat_disable(data.engine); 880 881 for (i = 0; i < ARRAY_SIZE(data.ce); i++) { 882 struct intel_context *tmp; 883 884 tmp = intel_context_create(data.engine); 885 if (IS_ERR(tmp)) { 886 err = PTR_ERR(tmp); 887 goto err; 888 } 889 890 err = intel_context_pin(tmp); 891 if (err) { 892 intel_context_put(tmp); 893 goto err; 894 } 895 896 data.ce[i] = tmp; 897 } 898 899 for (i = 0; i < ARRAY_SIZE(poison); i++) { 900 data.poison = poison[i]; 901 902 err = __lrc_timestamp(&data, false); 903 if (err) 904 break; 905 906 err = __lrc_timestamp(&data, true); 907 if (err) 908 break; 909 } 910 911 err: 912 st_engine_heartbeat_enable(data.engine); 913 for (i = 0; i < ARRAY_SIZE(data.ce); i++) { 914 if (!data.ce[i]) 915 break; 916 917 intel_context_unpin(data.ce[i]); 918 intel_context_put(data.ce[i]); 919 } 920 921 if (igt_flush_test(gt->i915)) 922 err = -EIO; 923 if (err) 924 return err; 925 } 926 927 return 0; 928 } 929 930 static struct i915_vma * 931 create_user_vma(struct i915_address_space *vm, unsigned long size) 932 { 933 struct drm_i915_gem_object *obj; 934 struct i915_vma *vma; 935 int err; 936 937 obj = i915_gem_object_create_internal(vm->i915, size); 938 if (IS_ERR(obj)) 939 return ERR_CAST(obj); 940 941 vma = i915_vma_instance(obj, vm, NULL); 942 if (IS_ERR(vma)) { 943 i915_gem_object_put(obj); 944 return vma; 945 } 946 947 err = i915_vma_pin(vma, 0, 0, PIN_USER); 948 if (err) { 949 i915_gem_object_put(obj); 950 return ERR_PTR(err); 951 } 952 953 return vma; 954 } 955 956 static u32 safe_poison(u32 offset, u32 poison) 957 { 958 /* 959 * Do not enable predication as it will nop all subsequent commands, 960 * not only disabling the tests (by preventing all the other SRM) but 961 * also preventing the arbitration events at the end of the request. 962 */ 963 if (offset == i915_mmio_reg_offset(RING_PREDICATE_RESULT(0))) 964 poison &= ~REG_BIT(0); 965 966 return poison; 967 } 968 969 static struct i915_vma * 970 store_context(struct intel_context *ce, struct i915_vma *scratch) 971 { 972 struct i915_vma *batch; 973 u32 dw, x, *cs, *hw; 974 u32 *defaults; 975 976 batch = create_user_vma(ce->vm, SZ_64K); 977 if (IS_ERR(batch)) 978 return batch; 979 980 cs = i915_gem_object_pin_map_unlocked(batch->obj, I915_MAP_WC); 981 if (IS_ERR(cs)) { 982 i915_vma_put(batch); 983 return ERR_CAST(cs); 984 } 985 986 defaults = shmem_pin_map(ce->engine->default_state); 987 if (!defaults) { 988 i915_gem_object_unpin_map(batch->obj); 989 i915_vma_put(batch); 990 return ERR_PTR(-ENOMEM); 991 } 992 993 x = 0; 994 dw = 0; 995 hw = defaults; 996 hw += LRC_STATE_OFFSET / sizeof(*hw); 997 do { 998 u32 len = hw[dw] & LRI_LENGTH_MASK; 999 1000 /* 1001 * Keep it simple, skip parsing complex commands 1002 * 1003 * At present, there are no more MI_LOAD_REGISTER_IMM 1004 * commands after the first 3D state command. Rather 1005 * than include a table (see i915_cmd_parser.c) of all 1006 * the possible commands and their instruction lengths 1007 * (or mask for variable length instructions), assume 1008 * we have gathered the complete list of registers and 1009 * bail out. 1010 */ 1011 if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT) 1012 break; 1013 1014 if (hw[dw] == 0) { 1015 dw++; 1016 continue; 1017 } 1018 1019 if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) { 1020 /* Assume all other MI commands match LRI length mask */ 1021 dw += len + 2; 1022 continue; 1023 } 1024 1025 if (!len) { 1026 pr_err("%s: invalid LRI found in context image\n", 1027 ce->engine->name); 1028 igt_hexdump(defaults, PAGE_SIZE); 1029 break; 1030 } 1031 1032 dw++; 1033 len = (len + 1) / 2; 1034 while (len--) { 1035 *cs++ = MI_STORE_REGISTER_MEM_GEN8; 1036 *cs++ = hw[dw]; 1037 *cs++ = lower_32_bits(scratch->node.start + x); 1038 *cs++ = upper_32_bits(scratch->node.start + x); 1039 1040 dw += 2; 1041 x += 4; 1042 } 1043 } while (dw < PAGE_SIZE / sizeof(u32) && 1044 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); 1045 1046 *cs++ = MI_BATCH_BUFFER_END; 1047 1048 shmem_unpin_map(ce->engine->default_state, defaults); 1049 1050 i915_gem_object_flush_map(batch->obj); 1051 i915_gem_object_unpin_map(batch->obj); 1052 1053 return batch; 1054 } 1055 1056 static int move_to_active(struct i915_request *rq, 1057 struct i915_vma *vma, 1058 unsigned int flags) 1059 { 1060 int err; 1061 1062 i915_vma_lock(vma); 1063 err = i915_request_await_object(rq, vma->obj, flags); 1064 if (!err) 1065 err = i915_vma_move_to_active(vma, rq, flags); 1066 i915_vma_unlock(vma); 1067 1068 return err; 1069 } 1070 1071 static struct i915_request * 1072 record_registers(struct intel_context *ce, 1073 struct i915_vma *before, 1074 struct i915_vma *after, 1075 u32 *sema) 1076 { 1077 struct i915_vma *b_before, *b_after; 1078 struct i915_request *rq; 1079 u32 *cs; 1080 int err; 1081 1082 b_before = store_context(ce, before); 1083 if (IS_ERR(b_before)) 1084 return ERR_CAST(b_before); 1085 1086 b_after = store_context(ce, after); 1087 if (IS_ERR(b_after)) { 1088 rq = ERR_CAST(b_after); 1089 goto err_before; 1090 } 1091 1092 rq = intel_context_create_request(ce); 1093 if (IS_ERR(rq)) 1094 goto err_after; 1095 1096 err = move_to_active(rq, before, EXEC_OBJECT_WRITE); 1097 if (err) 1098 goto err_rq; 1099 1100 err = move_to_active(rq, b_before, 0); 1101 if (err) 1102 goto err_rq; 1103 1104 err = move_to_active(rq, after, EXEC_OBJECT_WRITE); 1105 if (err) 1106 goto err_rq; 1107 1108 err = move_to_active(rq, b_after, 0); 1109 if (err) 1110 goto err_rq; 1111 1112 cs = intel_ring_begin(rq, 14); 1113 if (IS_ERR(cs)) { 1114 err = PTR_ERR(cs); 1115 goto err_rq; 1116 } 1117 1118 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 1119 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); 1120 *cs++ = lower_32_bits(b_before->node.start); 1121 *cs++ = upper_32_bits(b_before->node.start); 1122 1123 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 1124 *cs++ = MI_SEMAPHORE_WAIT | 1125 MI_SEMAPHORE_GLOBAL_GTT | 1126 MI_SEMAPHORE_POLL | 1127 MI_SEMAPHORE_SAD_NEQ_SDD; 1128 *cs++ = 0; 1129 *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) + 1130 offset_in_page(sema); 1131 *cs++ = 0; 1132 *cs++ = MI_NOOP; 1133 1134 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 1135 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); 1136 *cs++ = lower_32_bits(b_after->node.start); 1137 *cs++ = upper_32_bits(b_after->node.start); 1138 1139 intel_ring_advance(rq, cs); 1140 1141 WRITE_ONCE(*sema, 0); 1142 i915_request_get(rq); 1143 i915_request_add(rq); 1144 err_after: 1145 i915_vma_put(b_after); 1146 err_before: 1147 i915_vma_put(b_before); 1148 return rq; 1149 1150 err_rq: 1151 i915_request_add(rq); 1152 rq = ERR_PTR(err); 1153 goto err_after; 1154 } 1155 1156 static struct i915_vma *load_context(struct intel_context *ce, u32 poison) 1157 { 1158 struct i915_vma *batch; 1159 u32 dw, *cs, *hw; 1160 u32 *defaults; 1161 1162 batch = create_user_vma(ce->vm, SZ_64K); 1163 if (IS_ERR(batch)) 1164 return batch; 1165 1166 cs = i915_gem_object_pin_map_unlocked(batch->obj, I915_MAP_WC); 1167 if (IS_ERR(cs)) { 1168 i915_vma_put(batch); 1169 return ERR_CAST(cs); 1170 } 1171 1172 defaults = shmem_pin_map(ce->engine->default_state); 1173 if (!defaults) { 1174 i915_gem_object_unpin_map(batch->obj); 1175 i915_vma_put(batch); 1176 return ERR_PTR(-ENOMEM); 1177 } 1178 1179 dw = 0; 1180 hw = defaults; 1181 hw += LRC_STATE_OFFSET / sizeof(*hw); 1182 do { 1183 u32 len = hw[dw] & LRI_LENGTH_MASK; 1184 1185 /* For simplicity, break parsing at the first complex command */ 1186 if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT) 1187 break; 1188 1189 if (hw[dw] == 0) { 1190 dw++; 1191 continue; 1192 } 1193 1194 if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) { 1195 dw += len + 2; 1196 continue; 1197 } 1198 1199 if (!len) { 1200 pr_err("%s: invalid LRI found in context image\n", 1201 ce->engine->name); 1202 igt_hexdump(defaults, PAGE_SIZE); 1203 break; 1204 } 1205 1206 dw++; 1207 len = (len + 1) / 2; 1208 *cs++ = MI_LOAD_REGISTER_IMM(len); 1209 while (len--) { 1210 *cs++ = hw[dw]; 1211 *cs++ = safe_poison(hw[dw] & get_lri_mask(ce->engine, 1212 MI_LRI_LRM_CS_MMIO), 1213 poison); 1214 dw += 2; 1215 } 1216 } while (dw < PAGE_SIZE / sizeof(u32) && 1217 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); 1218 1219 *cs++ = MI_BATCH_BUFFER_END; 1220 1221 shmem_unpin_map(ce->engine->default_state, defaults); 1222 1223 i915_gem_object_flush_map(batch->obj); 1224 i915_gem_object_unpin_map(batch->obj); 1225 1226 return batch; 1227 } 1228 1229 static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema) 1230 { 1231 struct i915_request *rq; 1232 struct i915_vma *batch; 1233 u32 *cs; 1234 int err; 1235 1236 batch = load_context(ce, poison); 1237 if (IS_ERR(batch)) 1238 return PTR_ERR(batch); 1239 1240 rq = intel_context_create_request(ce); 1241 if (IS_ERR(rq)) { 1242 err = PTR_ERR(rq); 1243 goto err_batch; 1244 } 1245 1246 err = move_to_active(rq, batch, 0); 1247 if (err) 1248 goto err_rq; 1249 1250 cs = intel_ring_begin(rq, 8); 1251 if (IS_ERR(cs)) { 1252 err = PTR_ERR(cs); 1253 goto err_rq; 1254 } 1255 1256 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 1257 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); 1258 *cs++ = lower_32_bits(batch->node.start); 1259 *cs++ = upper_32_bits(batch->node.start); 1260 1261 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 1262 *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) + 1263 offset_in_page(sema); 1264 *cs++ = 0; 1265 *cs++ = 1; 1266 1267 intel_ring_advance(rq, cs); 1268 1269 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 1270 err_rq: 1271 i915_request_add(rq); 1272 err_batch: 1273 i915_vma_put(batch); 1274 return err; 1275 } 1276 1277 static bool is_moving(u32 a, u32 b) 1278 { 1279 return a != b; 1280 } 1281 1282 static int compare_isolation(struct intel_engine_cs *engine, 1283 struct i915_vma *ref[2], 1284 struct i915_vma *result[2], 1285 struct intel_context *ce, 1286 u32 poison) 1287 { 1288 u32 x, dw, *hw, *lrc; 1289 u32 *A[2], *B[2]; 1290 u32 *defaults; 1291 int err = 0; 1292 1293 A[0] = i915_gem_object_pin_map_unlocked(ref[0]->obj, I915_MAP_WC); 1294 if (IS_ERR(A[0])) 1295 return PTR_ERR(A[0]); 1296 1297 A[1] = i915_gem_object_pin_map_unlocked(ref[1]->obj, I915_MAP_WC); 1298 if (IS_ERR(A[1])) { 1299 err = PTR_ERR(A[1]); 1300 goto err_A0; 1301 } 1302 1303 B[0] = i915_gem_object_pin_map_unlocked(result[0]->obj, I915_MAP_WC); 1304 if (IS_ERR(B[0])) { 1305 err = PTR_ERR(B[0]); 1306 goto err_A1; 1307 } 1308 1309 B[1] = i915_gem_object_pin_map_unlocked(result[1]->obj, I915_MAP_WC); 1310 if (IS_ERR(B[1])) { 1311 err = PTR_ERR(B[1]); 1312 goto err_B0; 1313 } 1314 1315 lrc = i915_gem_object_pin_map_unlocked(ce->state->obj, 1316 i915_coherent_map_type(engine->i915, 1317 ce->state->obj, 1318 false)); 1319 if (IS_ERR(lrc)) { 1320 err = PTR_ERR(lrc); 1321 goto err_B1; 1322 } 1323 lrc += LRC_STATE_OFFSET / sizeof(*hw); 1324 1325 defaults = shmem_pin_map(ce->engine->default_state); 1326 if (!defaults) { 1327 err = -ENOMEM; 1328 goto err_lrc; 1329 } 1330 1331 x = 0; 1332 dw = 0; 1333 hw = defaults; 1334 hw += LRC_STATE_OFFSET / sizeof(*hw); 1335 do { 1336 u32 len = hw[dw] & LRI_LENGTH_MASK; 1337 1338 /* For simplicity, break parsing at the first complex command */ 1339 if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT) 1340 break; 1341 1342 if (hw[dw] == 0) { 1343 dw++; 1344 continue; 1345 } 1346 1347 if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) { 1348 dw += len + 2; 1349 continue; 1350 } 1351 1352 if (!len) { 1353 pr_err("%s: invalid LRI found in context image\n", 1354 engine->name); 1355 igt_hexdump(defaults, PAGE_SIZE); 1356 break; 1357 } 1358 1359 dw++; 1360 len = (len + 1) / 2; 1361 while (len--) { 1362 if (!is_moving(A[0][x], A[1][x]) && 1363 (A[0][x] != B[0][x] || A[1][x] != B[1][x])) { 1364 switch (hw[dw] & 4095) { 1365 case 0x30: /* RING_HEAD */ 1366 case 0x34: /* RING_TAIL */ 1367 break; 1368 1369 default: 1370 pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n", 1371 engine->name, dw, 1372 hw[dw], hw[dw + 1], 1373 A[0][x], B[0][x], B[1][x], 1374 poison, lrc[dw + 1]); 1375 err = -EINVAL; 1376 } 1377 } 1378 dw += 2; 1379 x++; 1380 } 1381 } while (dw < PAGE_SIZE / sizeof(u32) && 1382 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); 1383 1384 shmem_unpin_map(ce->engine->default_state, defaults); 1385 err_lrc: 1386 i915_gem_object_unpin_map(ce->state->obj); 1387 err_B1: 1388 i915_gem_object_unpin_map(result[1]->obj); 1389 err_B0: 1390 i915_gem_object_unpin_map(result[0]->obj); 1391 err_A1: 1392 i915_gem_object_unpin_map(ref[1]->obj); 1393 err_A0: 1394 i915_gem_object_unpin_map(ref[0]->obj); 1395 return err; 1396 } 1397 1398 static struct i915_vma * 1399 create_result_vma(struct i915_address_space *vm, unsigned long sz) 1400 { 1401 struct i915_vma *vma; 1402 void *ptr; 1403 1404 vma = create_user_vma(vm, sz); 1405 if (IS_ERR(vma)) 1406 return vma; 1407 1408 /* Set the results to a known value distinct from the poison */ 1409 ptr = i915_gem_object_pin_map_unlocked(vma->obj, I915_MAP_WC); 1410 if (IS_ERR(ptr)) { 1411 i915_vma_put(vma); 1412 return ERR_CAST(ptr); 1413 } 1414 1415 memset(ptr, POISON_INUSE, vma->size); 1416 i915_gem_object_flush_map(vma->obj); 1417 i915_gem_object_unpin_map(vma->obj); 1418 1419 return vma; 1420 } 1421 1422 static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison) 1423 { 1424 u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1); 1425 struct i915_vma *ref[2], *result[2]; 1426 struct intel_context *A, *B; 1427 struct i915_request *rq; 1428 int err; 1429 1430 A = intel_context_create(engine); 1431 if (IS_ERR(A)) 1432 return PTR_ERR(A); 1433 1434 B = intel_context_create(engine); 1435 if (IS_ERR(B)) { 1436 err = PTR_ERR(B); 1437 goto err_A; 1438 } 1439 1440 ref[0] = create_result_vma(A->vm, SZ_64K); 1441 if (IS_ERR(ref[0])) { 1442 err = PTR_ERR(ref[0]); 1443 goto err_B; 1444 } 1445 1446 ref[1] = create_result_vma(A->vm, SZ_64K); 1447 if (IS_ERR(ref[1])) { 1448 err = PTR_ERR(ref[1]); 1449 goto err_ref0; 1450 } 1451 1452 rq = record_registers(A, ref[0], ref[1], sema); 1453 if (IS_ERR(rq)) { 1454 err = PTR_ERR(rq); 1455 goto err_ref1; 1456 } 1457 1458 WRITE_ONCE(*sema, 1); 1459 wmb(); 1460 1461 if (i915_request_wait(rq, 0, HZ / 2) < 0) { 1462 i915_request_put(rq); 1463 err = -ETIME; 1464 goto err_ref1; 1465 } 1466 i915_request_put(rq); 1467 1468 result[0] = create_result_vma(A->vm, SZ_64K); 1469 if (IS_ERR(result[0])) { 1470 err = PTR_ERR(result[0]); 1471 goto err_ref1; 1472 } 1473 1474 result[1] = create_result_vma(A->vm, SZ_64K); 1475 if (IS_ERR(result[1])) { 1476 err = PTR_ERR(result[1]); 1477 goto err_result0; 1478 } 1479 1480 rq = record_registers(A, result[0], result[1], sema); 1481 if (IS_ERR(rq)) { 1482 err = PTR_ERR(rq); 1483 goto err_result1; 1484 } 1485 1486 err = poison_registers(B, poison, sema); 1487 if (err == 0 && i915_request_wait(rq, 0, HZ / 2) < 0) { 1488 pr_err("%s(%s): wait for results timed out\n", 1489 __func__, engine->name); 1490 err = -ETIME; 1491 } 1492 1493 /* Always cancel the semaphore wait, just in case the GPU gets stuck */ 1494 WRITE_ONCE(*sema, -1); 1495 i915_request_put(rq); 1496 if (err) 1497 goto err_result1; 1498 1499 err = compare_isolation(engine, ref, result, A, poison); 1500 1501 err_result1: 1502 i915_vma_put(result[1]); 1503 err_result0: 1504 i915_vma_put(result[0]); 1505 err_ref1: 1506 i915_vma_put(ref[1]); 1507 err_ref0: 1508 i915_vma_put(ref[0]); 1509 err_B: 1510 intel_context_put(B); 1511 err_A: 1512 intel_context_put(A); 1513 return err; 1514 } 1515 1516 static bool skip_isolation(const struct intel_engine_cs *engine) 1517 { 1518 if (engine->class == COPY_ENGINE_CLASS && GRAPHICS_VER(engine->i915) == 9) 1519 return true; 1520 1521 if (engine->class == RENDER_CLASS && GRAPHICS_VER(engine->i915) == 11) 1522 return true; 1523 1524 return false; 1525 } 1526 1527 static int live_lrc_isolation(void *arg) 1528 { 1529 struct intel_gt *gt = arg; 1530 struct intel_engine_cs *engine; 1531 enum intel_engine_id id; 1532 const u32 poison[] = { 1533 STACK_MAGIC, 1534 0x3a3a3a3a, 1535 0x5c5c5c5c, 1536 0xffffffff, 1537 0xffff0000, 1538 }; 1539 int err = 0; 1540 1541 /* 1542 * Our goal is try and verify that per-context state cannot be 1543 * tampered with by another non-privileged client. 1544 * 1545 * We take the list of context registers from the LRI in the default 1546 * context image and attempt to modify that list from a remote context. 1547 */ 1548 1549 for_each_engine(engine, gt, id) { 1550 int i; 1551 1552 /* Just don't even ask */ 1553 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) && 1554 skip_isolation(engine)) 1555 continue; 1556 1557 intel_engine_pm_get(engine); 1558 for (i = 0; i < ARRAY_SIZE(poison); i++) { 1559 int result; 1560 1561 result = __lrc_isolation(engine, poison[i]); 1562 if (result && !err) 1563 err = result; 1564 1565 result = __lrc_isolation(engine, ~poison[i]); 1566 if (result && !err) 1567 err = result; 1568 } 1569 intel_engine_pm_put(engine); 1570 if (igt_flush_test(gt->i915)) { 1571 err = -EIO; 1572 break; 1573 } 1574 } 1575 1576 return err; 1577 } 1578 1579 static int indirect_ctx_submit_req(struct intel_context *ce) 1580 { 1581 struct i915_request *rq; 1582 int err = 0; 1583 1584 rq = intel_context_create_request(ce); 1585 if (IS_ERR(rq)) 1586 return PTR_ERR(rq); 1587 1588 i915_request_get(rq); 1589 i915_request_add(rq); 1590 1591 if (i915_request_wait(rq, 0, HZ / 5) < 0) 1592 err = -ETIME; 1593 1594 i915_request_put(rq); 1595 1596 return err; 1597 } 1598 1599 #define CTX_BB_CANARY_OFFSET (3 * 1024) 1600 #define CTX_BB_CANARY_INDEX (CTX_BB_CANARY_OFFSET / sizeof(u32)) 1601 1602 static u32 * 1603 emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs) 1604 { 1605 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | 1606 MI_SRM_LRM_GLOBAL_GTT | 1607 MI_LRI_LRM_CS_MMIO; 1608 *cs++ = i915_mmio_reg_offset(RING_START(0)); 1609 *cs++ = i915_ggtt_offset(ce->state) + 1610 context_wa_bb_offset(ce) + 1611 CTX_BB_CANARY_OFFSET; 1612 *cs++ = 0; 1613 1614 return cs; 1615 } 1616 1617 static void 1618 indirect_ctx_bb_setup(struct intel_context *ce) 1619 { 1620 u32 *cs = context_indirect_bb(ce); 1621 1622 cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d; 1623 1624 setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary); 1625 } 1626 1627 static bool check_ring_start(struct intel_context *ce) 1628 { 1629 const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) - 1630 LRC_STATE_OFFSET + context_wa_bb_offset(ce); 1631 1632 if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START]) 1633 return true; 1634 1635 pr_err("ring start mismatch: canary 0x%08x vs state 0x%08x\n", 1636 ctx_bb[CTX_BB_CANARY_INDEX], 1637 ce->lrc_reg_state[CTX_RING_START]); 1638 1639 return false; 1640 } 1641 1642 static int indirect_ctx_bb_check(struct intel_context *ce) 1643 { 1644 int err; 1645 1646 err = indirect_ctx_submit_req(ce); 1647 if (err) 1648 return err; 1649 1650 if (!check_ring_start(ce)) 1651 return -EINVAL; 1652 1653 return 0; 1654 } 1655 1656 static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine) 1657 { 1658 struct intel_context *a, *b; 1659 int err; 1660 1661 a = intel_context_create(engine); 1662 if (IS_ERR(a)) 1663 return PTR_ERR(a); 1664 err = intel_context_pin(a); 1665 if (err) 1666 goto put_a; 1667 1668 b = intel_context_create(engine); 1669 if (IS_ERR(b)) { 1670 err = PTR_ERR(b); 1671 goto unpin_a; 1672 } 1673 err = intel_context_pin(b); 1674 if (err) 1675 goto put_b; 1676 1677 /* We use the already reserved extra page in context state */ 1678 if (!a->wa_bb_page) { 1679 GEM_BUG_ON(b->wa_bb_page); 1680 GEM_BUG_ON(GRAPHICS_VER(engine->i915) == 12); 1681 goto unpin_b; 1682 } 1683 1684 /* 1685 * In order to test that our per context bb is truly per context, 1686 * and executes at the intended spot on context restoring process, 1687 * make the batch store the ring start value to memory. 1688 * As ring start is restored apriori of starting the indirect ctx bb and 1689 * as it will be different for each context, it fits to this purpose. 1690 */ 1691 indirect_ctx_bb_setup(a); 1692 indirect_ctx_bb_setup(b); 1693 1694 err = indirect_ctx_bb_check(a); 1695 if (err) 1696 goto unpin_b; 1697 1698 err = indirect_ctx_bb_check(b); 1699 1700 unpin_b: 1701 intel_context_unpin(b); 1702 put_b: 1703 intel_context_put(b); 1704 unpin_a: 1705 intel_context_unpin(a); 1706 put_a: 1707 intel_context_put(a); 1708 1709 return err; 1710 } 1711 1712 static int live_lrc_indirect_ctx_bb(void *arg) 1713 { 1714 struct intel_gt *gt = arg; 1715 struct intel_engine_cs *engine; 1716 enum intel_engine_id id; 1717 int err = 0; 1718 1719 for_each_engine(engine, gt, id) { 1720 intel_engine_pm_get(engine); 1721 err = __live_lrc_indirect_ctx_bb(engine); 1722 intel_engine_pm_put(engine); 1723 1724 if (igt_flush_test(gt->i915)) 1725 err = -EIO; 1726 1727 if (err) 1728 break; 1729 } 1730 1731 return err; 1732 } 1733 1734 static void garbage_reset(struct intel_engine_cs *engine, 1735 struct i915_request *rq) 1736 { 1737 const unsigned int bit = I915_RESET_ENGINE + engine->id; 1738 unsigned long *lock = &engine->gt->reset.flags; 1739 1740 local_bh_disable(); 1741 if (!test_and_set_bit(bit, lock)) { 1742 tasklet_disable(&engine->sched_engine->tasklet); 1743 1744 if (!rq->fence.error) 1745 __intel_engine_reset_bh(engine, NULL); 1746 1747 tasklet_enable(&engine->sched_engine->tasklet); 1748 clear_and_wake_up_bit(bit, lock); 1749 } 1750 local_bh_enable(); 1751 } 1752 1753 static struct i915_request *garbage(struct intel_context *ce, 1754 struct rnd_state *prng) 1755 { 1756 struct i915_request *rq; 1757 int err; 1758 1759 err = intel_context_pin(ce); 1760 if (err) 1761 return ERR_PTR(err); 1762 1763 prandom_bytes_state(prng, 1764 ce->lrc_reg_state, 1765 ce->engine->context_size - 1766 LRC_STATE_OFFSET); 1767 1768 rq = intel_context_create_request(ce); 1769 if (IS_ERR(rq)) { 1770 err = PTR_ERR(rq); 1771 goto err_unpin; 1772 } 1773 1774 i915_request_get(rq); 1775 i915_request_add(rq); 1776 return rq; 1777 1778 err_unpin: 1779 intel_context_unpin(ce); 1780 return ERR_PTR(err); 1781 } 1782 1783 static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng) 1784 { 1785 struct intel_context *ce; 1786 struct i915_request *hang; 1787 int err = 0; 1788 1789 ce = intel_context_create(engine); 1790 if (IS_ERR(ce)) 1791 return PTR_ERR(ce); 1792 1793 hang = garbage(ce, prng); 1794 if (IS_ERR(hang)) { 1795 err = PTR_ERR(hang); 1796 goto err_ce; 1797 } 1798 1799 if (wait_for_submit(engine, hang, HZ / 2)) { 1800 i915_request_put(hang); 1801 err = -ETIME; 1802 goto err_ce; 1803 } 1804 1805 intel_context_set_banned(ce); 1806 garbage_reset(engine, hang); 1807 1808 intel_engine_flush_submission(engine); 1809 if (!hang->fence.error) { 1810 i915_request_put(hang); 1811 pr_err("%s: corrupted context was not reset\n", 1812 engine->name); 1813 err = -EINVAL; 1814 goto err_ce; 1815 } 1816 1817 if (i915_request_wait(hang, 0, HZ / 2) < 0) { 1818 pr_err("%s: corrupted context did not recover\n", 1819 engine->name); 1820 i915_request_put(hang); 1821 err = -EIO; 1822 goto err_ce; 1823 } 1824 i915_request_put(hang); 1825 1826 err_ce: 1827 intel_context_put(ce); 1828 return err; 1829 } 1830 1831 static int live_lrc_garbage(void *arg) 1832 { 1833 struct intel_gt *gt = arg; 1834 struct intel_engine_cs *engine; 1835 enum intel_engine_id id; 1836 1837 /* 1838 * Verify that we can recover if one context state is completely 1839 * corrupted. 1840 */ 1841 1842 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN)) 1843 return 0; 1844 1845 for_each_engine(engine, gt, id) { 1846 I915_RND_STATE(prng); 1847 int err = 0, i; 1848 1849 if (!intel_has_reset_engine(engine->gt)) 1850 continue; 1851 1852 intel_engine_pm_get(engine); 1853 for (i = 0; i < 3; i++) { 1854 err = __lrc_garbage(engine, &prng); 1855 if (err) 1856 break; 1857 } 1858 intel_engine_pm_put(engine); 1859 1860 if (igt_flush_test(gt->i915)) 1861 err = -EIO; 1862 if (err) 1863 return err; 1864 } 1865 1866 return 0; 1867 } 1868 1869 static int __live_pphwsp_runtime(struct intel_engine_cs *engine) 1870 { 1871 struct intel_context *ce; 1872 struct i915_request *rq; 1873 IGT_TIMEOUT(end_time); 1874 int err; 1875 1876 ce = intel_context_create(engine); 1877 if (IS_ERR(ce)) 1878 return PTR_ERR(ce); 1879 1880 ce->stats.runtime.num_underflow = 0; 1881 ce->stats.runtime.max_underflow = 0; 1882 1883 do { 1884 unsigned int loop = 1024; 1885 1886 while (loop) { 1887 rq = intel_context_create_request(ce); 1888 if (IS_ERR(rq)) { 1889 err = PTR_ERR(rq); 1890 goto err_rq; 1891 } 1892 1893 if (--loop == 0) 1894 i915_request_get(rq); 1895 1896 i915_request_add(rq); 1897 } 1898 1899 if (__igt_timeout(end_time, NULL)) 1900 break; 1901 1902 i915_request_put(rq); 1903 } while (1); 1904 1905 err = i915_request_wait(rq, 0, HZ / 5); 1906 if (err < 0) { 1907 pr_err("%s: request not completed!\n", engine->name); 1908 goto err_wait; 1909 } 1910 1911 igt_flush_test(engine->i915); 1912 1913 pr_info("%s: pphwsp runtime %lluns, average %lluns\n", 1914 engine->name, 1915 intel_context_get_total_runtime_ns(ce), 1916 intel_context_get_avg_runtime_ns(ce)); 1917 1918 err = 0; 1919 if (ce->stats.runtime.num_underflow) { 1920 pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n", 1921 engine->name, 1922 ce->stats.runtime.num_underflow, 1923 ce->stats.runtime.max_underflow); 1924 GEM_TRACE_DUMP(); 1925 err = -EOVERFLOW; 1926 } 1927 1928 err_wait: 1929 i915_request_put(rq); 1930 err_rq: 1931 intel_context_put(ce); 1932 return err; 1933 } 1934 1935 static int live_pphwsp_runtime(void *arg) 1936 { 1937 struct intel_gt *gt = arg; 1938 struct intel_engine_cs *engine; 1939 enum intel_engine_id id; 1940 int err = 0; 1941 1942 /* 1943 * Check that cumulative context runtime as stored in the pphwsp[16] 1944 * is monotonic. 1945 */ 1946 1947 for_each_engine(engine, gt, id) { 1948 err = __live_pphwsp_runtime(engine); 1949 if (err) 1950 break; 1951 } 1952 1953 if (igt_flush_test(gt->i915)) 1954 err = -EIO; 1955 1956 return err; 1957 } 1958 1959 int intel_lrc_live_selftests(struct drm_i915_private *i915) 1960 { 1961 static const struct i915_subtest tests[] = { 1962 SUBTEST(live_lrc_layout), 1963 SUBTEST(live_lrc_fixed), 1964 SUBTEST(live_lrc_state), 1965 SUBTEST(live_lrc_gpr), 1966 SUBTEST(live_lrc_isolation), 1967 SUBTEST(live_lrc_timestamp), 1968 SUBTEST(live_lrc_garbage), 1969 SUBTEST(live_pphwsp_runtime), 1970 SUBTEST(live_lrc_indirect_ctx_bb), 1971 }; 1972 1973 if (!HAS_LOGICAL_RING_CONTEXTS(i915)) 1974 return 0; 1975 1976 return intel_gt_live_subtests(tests, to_gt(i915)); 1977 } 1978