1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2018 Intel Corporation 5 */ 6 7 #include <linux/prime_numbers.h> 8 9 #include "gem/i915_gem_pm.h" 10 #include "gt/intel_engine_heartbeat.h" 11 #include "gt/intel_reset.h" 12 13 #include "i915_selftest.h" 14 #include "selftests/i915_random.h" 15 #include "selftests/igt_flush_test.h" 16 #include "selftests/igt_live_test.h" 17 #include "selftests/igt_spinner.h" 18 #include "selftests/lib_sw_fence.h" 19 20 #include "gem/selftests/igt_gem_utils.h" 21 #include "gem/selftests/mock_context.h" 22 23 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4) 24 #define NUM_GPR 16 25 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */ 26 27 static struct i915_vma *create_scratch(struct intel_gt *gt) 28 { 29 struct drm_i915_gem_object *obj; 30 struct i915_vma *vma; 31 int err; 32 33 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 34 if (IS_ERR(obj)) 35 return ERR_CAST(obj); 36 37 i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED); 38 39 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 40 if (IS_ERR(vma)) { 41 i915_gem_object_put(obj); 42 return vma; 43 } 44 45 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 46 if (err) { 47 i915_gem_object_put(obj); 48 return ERR_PTR(err); 49 } 50 51 return vma; 52 } 53 54 static void engine_heartbeat_disable(struct intel_engine_cs *engine, 55 unsigned long *saved) 56 { 57 *saved = engine->props.heartbeat_interval_ms; 58 engine->props.heartbeat_interval_ms = 0; 59 60 intel_engine_pm_get(engine); 61 intel_engine_park_heartbeat(engine); 62 } 63 64 static void engine_heartbeat_enable(struct intel_engine_cs *engine, 65 unsigned long saved) 66 { 67 intel_engine_pm_put(engine); 68 69 engine->props.heartbeat_interval_ms = saved; 70 } 71 72 static bool is_active(struct i915_request *rq) 73 { 74 if (i915_request_is_active(rq)) 75 return true; 76 77 if (i915_request_on_hold(rq)) 78 return true; 79 80 if (i915_request_started(rq)) 81 return true; 82 83 return false; 84 } 85 86 static int wait_for_submit(struct intel_engine_cs *engine, 87 struct i915_request *rq, 88 unsigned long timeout) 89 { 90 timeout += jiffies; 91 do { 92 bool done = time_after(jiffies, timeout); 93 94 if (i915_request_completed(rq)) /* that was quick! */ 95 return 0; 96 97 /* Wait until the HW has acknowleged the submission (or err) */ 98 intel_engine_flush_submission(engine); 99 if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq)) 100 return 0; 101 102 if (done) 103 return -ETIME; 104 105 cond_resched(); 106 } while (1); 107 } 108 109 static int wait_for_reset(struct intel_engine_cs *engine, 110 struct i915_request *rq, 111 unsigned long timeout) 112 { 113 timeout += jiffies; 114 115 do { 116 cond_resched(); 117 intel_engine_flush_submission(engine); 118 119 if (READ_ONCE(engine->execlists.pending[0])) 120 continue; 121 122 if (i915_request_completed(rq)) 123 break; 124 125 if (READ_ONCE(rq->fence.error)) 126 break; 127 } while (time_before(jiffies, timeout)); 128 129 flush_scheduled_work(); 130 131 if (rq->fence.error != -EIO) { 132 pr_err("%s: hanging request %llx:%lld not reset\n", 133 engine->name, 134 rq->fence.context, 135 rq->fence.seqno); 136 return -EINVAL; 137 } 138 139 /* Give the request a jiffie to complete after flushing the worker */ 140 if (i915_request_wait(rq, 0, 141 max(0l, (long)(timeout - jiffies)) + 1) < 0) { 142 pr_err("%s: hanging request %llx:%lld did not complete\n", 143 engine->name, 144 rq->fence.context, 145 rq->fence.seqno); 146 return -ETIME; 147 } 148 149 return 0; 150 } 151 152 static int live_sanitycheck(void *arg) 153 { 154 struct intel_gt *gt = arg; 155 struct intel_engine_cs *engine; 156 enum intel_engine_id id; 157 struct igt_spinner spin; 158 int err = 0; 159 160 if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915)) 161 return 0; 162 163 if (igt_spinner_init(&spin, gt)) 164 return -ENOMEM; 165 166 for_each_engine(engine, gt, id) { 167 struct intel_context *ce; 168 struct i915_request *rq; 169 170 ce = intel_context_create(engine); 171 if (IS_ERR(ce)) { 172 err = PTR_ERR(ce); 173 break; 174 } 175 176 rq = igt_spinner_create_request(&spin, ce, MI_NOOP); 177 if (IS_ERR(rq)) { 178 err = PTR_ERR(rq); 179 goto out_ctx; 180 } 181 182 i915_request_add(rq); 183 if (!igt_wait_for_spinner(&spin, rq)) { 184 GEM_TRACE("spinner failed to start\n"); 185 GEM_TRACE_DUMP(); 186 intel_gt_set_wedged(gt); 187 err = -EIO; 188 goto out_ctx; 189 } 190 191 igt_spinner_end(&spin); 192 if (igt_flush_test(gt->i915)) { 193 err = -EIO; 194 goto out_ctx; 195 } 196 197 out_ctx: 198 intel_context_put(ce); 199 if (err) 200 break; 201 } 202 203 igt_spinner_fini(&spin); 204 return err; 205 } 206 207 static int live_unlite_restore(struct intel_gt *gt, int prio) 208 { 209 struct intel_engine_cs *engine; 210 enum intel_engine_id id; 211 struct igt_spinner spin; 212 int err = -ENOMEM; 213 214 /* 215 * Check that we can correctly context switch between 2 instances 216 * on the same engine from the same parent context. 217 */ 218 219 if (igt_spinner_init(&spin, gt)) 220 return err; 221 222 err = 0; 223 for_each_engine(engine, gt, id) { 224 struct intel_context *ce[2] = {}; 225 struct i915_request *rq[2]; 226 struct igt_live_test t; 227 unsigned long saved; 228 int n; 229 230 if (prio && !intel_engine_has_preemption(engine)) 231 continue; 232 233 if (!intel_engine_can_store_dword(engine)) 234 continue; 235 236 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 237 err = -EIO; 238 break; 239 } 240 engine_heartbeat_disable(engine, &saved); 241 242 for (n = 0; n < ARRAY_SIZE(ce); n++) { 243 struct intel_context *tmp; 244 245 tmp = intel_context_create(engine); 246 if (IS_ERR(tmp)) { 247 err = PTR_ERR(tmp); 248 goto err_ce; 249 } 250 251 err = intel_context_pin(tmp); 252 if (err) { 253 intel_context_put(tmp); 254 goto err_ce; 255 } 256 257 /* 258 * Setup the pair of contexts such that if we 259 * lite-restore using the RING_TAIL from ce[1] it 260 * will execute garbage from ce[0]->ring. 261 */ 262 memset(tmp->ring->vaddr, 263 POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */ 264 tmp->ring->vma->size); 265 266 ce[n] = tmp; 267 } 268 GEM_BUG_ON(!ce[1]->ring->size); 269 intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2); 270 __execlists_update_reg_state(ce[1], engine, ce[1]->ring->head); 271 272 rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK); 273 if (IS_ERR(rq[0])) { 274 err = PTR_ERR(rq[0]); 275 goto err_ce; 276 } 277 278 i915_request_get(rq[0]); 279 i915_request_add(rq[0]); 280 GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit); 281 282 if (!igt_wait_for_spinner(&spin, rq[0])) { 283 i915_request_put(rq[0]); 284 goto err_ce; 285 } 286 287 rq[1] = i915_request_create(ce[1]); 288 if (IS_ERR(rq[1])) { 289 err = PTR_ERR(rq[1]); 290 i915_request_put(rq[0]); 291 goto err_ce; 292 } 293 294 if (!prio) { 295 /* 296 * Ensure we do the switch to ce[1] on completion. 297 * 298 * rq[0] is already submitted, so this should reduce 299 * to a no-op (a wait on a request on the same engine 300 * uses the submit fence, not the completion fence), 301 * but it will install a dependency on rq[1] for rq[0] 302 * that will prevent the pair being reordered by 303 * timeslicing. 304 */ 305 i915_request_await_dma_fence(rq[1], &rq[0]->fence); 306 } 307 308 i915_request_get(rq[1]); 309 i915_request_add(rq[1]); 310 GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix); 311 i915_request_put(rq[0]); 312 313 if (prio) { 314 struct i915_sched_attr attr = { 315 .priority = prio, 316 }; 317 318 /* Alternatively preempt the spinner with ce[1] */ 319 engine->schedule(rq[1], &attr); 320 } 321 322 /* And switch back to ce[0] for good measure */ 323 rq[0] = i915_request_create(ce[0]); 324 if (IS_ERR(rq[0])) { 325 err = PTR_ERR(rq[0]); 326 i915_request_put(rq[1]); 327 goto err_ce; 328 } 329 330 i915_request_await_dma_fence(rq[0], &rq[1]->fence); 331 i915_request_get(rq[0]); 332 i915_request_add(rq[0]); 333 GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix); 334 i915_request_put(rq[1]); 335 i915_request_put(rq[0]); 336 337 err_ce: 338 tasklet_kill(&engine->execlists.tasklet); /* flush submission */ 339 igt_spinner_end(&spin); 340 for (n = 0; n < ARRAY_SIZE(ce); n++) { 341 if (IS_ERR_OR_NULL(ce[n])) 342 break; 343 344 intel_context_unpin(ce[n]); 345 intel_context_put(ce[n]); 346 } 347 348 engine_heartbeat_enable(engine, saved); 349 if (igt_live_test_end(&t)) 350 err = -EIO; 351 if (err) 352 break; 353 } 354 355 igt_spinner_fini(&spin); 356 return err; 357 } 358 359 static int live_unlite_switch(void *arg) 360 { 361 return live_unlite_restore(arg, 0); 362 } 363 364 static int live_unlite_preempt(void *arg) 365 { 366 return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX)); 367 } 368 369 static int live_pin_rewind(void *arg) 370 { 371 struct intel_gt *gt = arg; 372 struct intel_engine_cs *engine; 373 enum intel_engine_id id; 374 int err = 0; 375 376 /* 377 * We have to be careful not to trust intel_ring too much, for example 378 * ring->head is updated upon retire which is out of sync with pinning 379 * the context. Thus we cannot use ring->head to set CTX_RING_HEAD, 380 * or else we risk writing an older, stale value. 381 * 382 * To simulate this, let's apply a bit of deliberate sabotague. 383 */ 384 385 for_each_engine(engine, gt, id) { 386 struct intel_context *ce; 387 struct i915_request *rq; 388 struct intel_ring *ring; 389 struct igt_live_test t; 390 391 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 392 err = -EIO; 393 break; 394 } 395 396 ce = intel_context_create(engine); 397 if (IS_ERR(ce)) { 398 err = PTR_ERR(ce); 399 break; 400 } 401 402 err = intel_context_pin(ce); 403 if (err) { 404 intel_context_put(ce); 405 break; 406 } 407 408 /* Keep the context awake while we play games */ 409 err = i915_active_acquire(&ce->active); 410 if (err) { 411 intel_context_unpin(ce); 412 intel_context_put(ce); 413 break; 414 } 415 ring = ce->ring; 416 417 /* Poison the ring, and offset the next request from HEAD */ 418 memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32)); 419 ring->emit = ring->size / 2; 420 ring->tail = ring->emit; 421 GEM_BUG_ON(ring->head); 422 423 intel_context_unpin(ce); 424 425 /* Submit a simple nop request */ 426 GEM_BUG_ON(intel_context_is_pinned(ce)); 427 rq = intel_context_create_request(ce); 428 i915_active_release(&ce->active); /* e.g. async retire */ 429 intel_context_put(ce); 430 if (IS_ERR(rq)) { 431 err = PTR_ERR(rq); 432 break; 433 } 434 GEM_BUG_ON(!rq->head); 435 i915_request_add(rq); 436 437 /* Expect not to hang! */ 438 if (igt_live_test_end(&t)) { 439 err = -EIO; 440 break; 441 } 442 } 443 444 return err; 445 } 446 447 static int live_hold_reset(void *arg) 448 { 449 struct intel_gt *gt = arg; 450 struct intel_engine_cs *engine; 451 enum intel_engine_id id; 452 struct igt_spinner spin; 453 int err = 0; 454 455 /* 456 * In order to support offline error capture for fast preempt reset, 457 * we need to decouple the guilty request and ensure that it and its 458 * descendents are not executed while the capture is in progress. 459 */ 460 461 if (!intel_has_reset_engine(gt)) 462 return 0; 463 464 if (igt_spinner_init(&spin, gt)) 465 return -ENOMEM; 466 467 for_each_engine(engine, gt, id) { 468 struct intel_context *ce; 469 unsigned long heartbeat; 470 struct i915_request *rq; 471 472 ce = intel_context_create(engine); 473 if (IS_ERR(ce)) { 474 err = PTR_ERR(ce); 475 break; 476 } 477 478 engine_heartbeat_disable(engine, &heartbeat); 479 480 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); 481 if (IS_ERR(rq)) { 482 err = PTR_ERR(rq); 483 goto out; 484 } 485 i915_request_add(rq); 486 487 if (!igt_wait_for_spinner(&spin, rq)) { 488 intel_gt_set_wedged(gt); 489 err = -ETIME; 490 goto out; 491 } 492 493 /* We have our request executing, now remove it and reset */ 494 495 if (test_and_set_bit(I915_RESET_ENGINE + id, 496 >->reset.flags)) { 497 intel_gt_set_wedged(gt); 498 err = -EBUSY; 499 goto out; 500 } 501 tasklet_disable(&engine->execlists.tasklet); 502 503 engine->execlists.tasklet.func(engine->execlists.tasklet.data); 504 GEM_BUG_ON(execlists_active(&engine->execlists) != rq); 505 506 i915_request_get(rq); 507 execlists_hold(engine, rq); 508 GEM_BUG_ON(!i915_request_on_hold(rq)); 509 510 intel_engine_reset(engine, NULL); 511 GEM_BUG_ON(rq->fence.error != -EIO); 512 513 tasklet_enable(&engine->execlists.tasklet); 514 clear_and_wake_up_bit(I915_RESET_ENGINE + id, 515 >->reset.flags); 516 517 /* Check that we do not resubmit the held request */ 518 if (!i915_request_wait(rq, 0, HZ / 5)) { 519 pr_err("%s: on hold request completed!\n", 520 engine->name); 521 i915_request_put(rq); 522 err = -EIO; 523 goto out; 524 } 525 GEM_BUG_ON(!i915_request_on_hold(rq)); 526 527 /* But is resubmitted on release */ 528 execlists_unhold(engine, rq); 529 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 530 pr_err("%s: held request did not complete!\n", 531 engine->name); 532 intel_gt_set_wedged(gt); 533 err = -ETIME; 534 } 535 i915_request_put(rq); 536 537 out: 538 engine_heartbeat_enable(engine, heartbeat); 539 intel_context_put(ce); 540 if (err) 541 break; 542 } 543 544 igt_spinner_fini(&spin); 545 return err; 546 } 547 548 static const char *error_repr(int err) 549 { 550 return err ? "bad" : "good"; 551 } 552 553 static int live_error_interrupt(void *arg) 554 { 555 static const struct error_phase { 556 enum { GOOD = 0, BAD = -EIO } error[2]; 557 } phases[] = { 558 { { BAD, GOOD } }, 559 { { BAD, BAD } }, 560 { { BAD, GOOD } }, 561 { { GOOD, GOOD } }, /* sentinel */ 562 }; 563 struct intel_gt *gt = arg; 564 struct intel_engine_cs *engine; 565 enum intel_engine_id id; 566 567 /* 568 * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning 569 * of invalid commands in user batches that will cause a GPU hang. 570 * This is a faster mechanism than using hangcheck/heartbeats, but 571 * only detects problems the HW knows about -- it will not warn when 572 * we kill the HW! 573 * 574 * To verify our detection and reset, we throw some invalid commands 575 * at the HW and wait for the interrupt. 576 */ 577 578 if (!intel_has_reset_engine(gt)) 579 return 0; 580 581 for_each_engine(engine, gt, id) { 582 const struct error_phase *p; 583 unsigned long heartbeat; 584 int err = 0; 585 586 engine_heartbeat_disable(engine, &heartbeat); 587 588 for (p = phases; p->error[0] != GOOD; p++) { 589 struct i915_request *client[ARRAY_SIZE(phases->error)]; 590 u32 *cs; 591 int i; 592 593 memset(client, 0, sizeof(*client)); 594 for (i = 0; i < ARRAY_SIZE(client); i++) { 595 struct intel_context *ce; 596 struct i915_request *rq; 597 598 ce = intel_context_create(engine); 599 if (IS_ERR(ce)) { 600 err = PTR_ERR(ce); 601 goto out; 602 } 603 604 rq = intel_context_create_request(ce); 605 intel_context_put(ce); 606 if (IS_ERR(rq)) { 607 err = PTR_ERR(rq); 608 goto out; 609 } 610 611 if (rq->engine->emit_init_breadcrumb) { 612 err = rq->engine->emit_init_breadcrumb(rq); 613 if (err) { 614 i915_request_add(rq); 615 goto out; 616 } 617 } 618 619 cs = intel_ring_begin(rq, 2); 620 if (IS_ERR(cs)) { 621 i915_request_add(rq); 622 err = PTR_ERR(cs); 623 goto out; 624 } 625 626 if (p->error[i]) { 627 *cs++ = 0xdeadbeef; 628 *cs++ = 0xdeadbeef; 629 } else { 630 *cs++ = MI_NOOP; 631 *cs++ = MI_NOOP; 632 } 633 634 client[i] = i915_request_get(rq); 635 i915_request_add(rq); 636 } 637 638 err = wait_for_submit(engine, client[0], HZ / 2); 639 if (err) { 640 pr_err("%s: first request did not start within time!\n", 641 engine->name); 642 err = -ETIME; 643 goto out; 644 } 645 646 for (i = 0; i < ARRAY_SIZE(client); i++) { 647 if (i915_request_wait(client[i], 0, HZ / 5) < 0) 648 pr_debug("%s: %s request incomplete!\n", 649 engine->name, 650 error_repr(p->error[i])); 651 652 if (!i915_request_started(client[i])) { 653 pr_err("%s: %s request not started!\n", 654 engine->name, 655 error_repr(p->error[i])); 656 err = -ETIME; 657 goto out; 658 } 659 660 /* Kick the tasklet to process the error */ 661 intel_engine_flush_submission(engine); 662 if (client[i]->fence.error != p->error[i]) { 663 pr_err("%s: %s request (%s) with wrong error code: %d\n", 664 engine->name, 665 error_repr(p->error[i]), 666 i915_request_completed(client[i]) ? "completed" : "running", 667 client[i]->fence.error); 668 err = -EINVAL; 669 goto out; 670 } 671 } 672 673 out: 674 for (i = 0; i < ARRAY_SIZE(client); i++) 675 if (client[i]) 676 i915_request_put(client[i]); 677 if (err) { 678 pr_err("%s: failed at phase[%zd] { %d, %d }\n", 679 engine->name, p - phases, 680 p->error[0], p->error[1]); 681 break; 682 } 683 } 684 685 engine_heartbeat_enable(engine, heartbeat); 686 if (err) { 687 intel_gt_set_wedged(gt); 688 return err; 689 } 690 } 691 692 return 0; 693 } 694 695 static int 696 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx) 697 { 698 u32 *cs; 699 700 cs = intel_ring_begin(rq, 10); 701 if (IS_ERR(cs)) 702 return PTR_ERR(cs); 703 704 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 705 706 *cs++ = MI_SEMAPHORE_WAIT | 707 MI_SEMAPHORE_GLOBAL_GTT | 708 MI_SEMAPHORE_POLL | 709 MI_SEMAPHORE_SAD_NEQ_SDD; 710 *cs++ = 0; 711 *cs++ = i915_ggtt_offset(vma) + 4 * idx; 712 *cs++ = 0; 713 714 if (idx > 0) { 715 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 716 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1); 717 *cs++ = 0; 718 *cs++ = 1; 719 } else { 720 *cs++ = MI_NOOP; 721 *cs++ = MI_NOOP; 722 *cs++ = MI_NOOP; 723 *cs++ = MI_NOOP; 724 } 725 726 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 727 728 intel_ring_advance(rq, cs); 729 return 0; 730 } 731 732 static struct i915_request * 733 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx) 734 { 735 struct intel_context *ce; 736 struct i915_request *rq; 737 int err; 738 739 ce = intel_context_create(engine); 740 if (IS_ERR(ce)) 741 return ERR_CAST(ce); 742 743 rq = intel_context_create_request(ce); 744 if (IS_ERR(rq)) 745 goto out_ce; 746 747 err = 0; 748 if (rq->engine->emit_init_breadcrumb) 749 err = rq->engine->emit_init_breadcrumb(rq); 750 if (err == 0) 751 err = emit_semaphore_chain(rq, vma, idx); 752 if (err == 0) 753 i915_request_get(rq); 754 i915_request_add(rq); 755 if (err) 756 rq = ERR_PTR(err); 757 758 out_ce: 759 intel_context_put(ce); 760 return rq; 761 } 762 763 static int 764 release_queue(struct intel_engine_cs *engine, 765 struct i915_vma *vma, 766 int idx, int prio) 767 { 768 struct i915_sched_attr attr = { 769 .priority = prio, 770 }; 771 struct i915_request *rq; 772 u32 *cs; 773 774 rq = intel_engine_create_kernel_request(engine); 775 if (IS_ERR(rq)) 776 return PTR_ERR(rq); 777 778 cs = intel_ring_begin(rq, 4); 779 if (IS_ERR(cs)) { 780 i915_request_add(rq); 781 return PTR_ERR(cs); 782 } 783 784 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 785 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1); 786 *cs++ = 0; 787 *cs++ = 1; 788 789 intel_ring_advance(rq, cs); 790 791 i915_request_get(rq); 792 i915_request_add(rq); 793 794 local_bh_disable(); 795 engine->schedule(rq, &attr); 796 local_bh_enable(); /* kick tasklet */ 797 798 i915_request_put(rq); 799 800 return 0; 801 } 802 803 static int 804 slice_semaphore_queue(struct intel_engine_cs *outer, 805 struct i915_vma *vma, 806 int count) 807 { 808 struct intel_engine_cs *engine; 809 struct i915_request *head; 810 enum intel_engine_id id; 811 int err, i, n = 0; 812 813 head = semaphore_queue(outer, vma, n++); 814 if (IS_ERR(head)) 815 return PTR_ERR(head); 816 817 for_each_engine(engine, outer->gt, id) { 818 for (i = 0; i < count; i++) { 819 struct i915_request *rq; 820 821 rq = semaphore_queue(engine, vma, n++); 822 if (IS_ERR(rq)) { 823 err = PTR_ERR(rq); 824 goto out; 825 } 826 827 i915_request_put(rq); 828 } 829 } 830 831 err = release_queue(outer, vma, n, INT_MAX); 832 if (err) 833 goto out; 834 835 if (i915_request_wait(head, 0, 836 2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) { 837 pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n", 838 count, n); 839 GEM_TRACE_DUMP(); 840 intel_gt_set_wedged(outer->gt); 841 err = -EIO; 842 } 843 844 out: 845 i915_request_put(head); 846 return err; 847 } 848 849 static int live_timeslice_preempt(void *arg) 850 { 851 struct intel_gt *gt = arg; 852 struct drm_i915_gem_object *obj; 853 struct i915_vma *vma; 854 void *vaddr; 855 int err = 0; 856 int count; 857 858 /* 859 * If a request takes too long, we would like to give other users 860 * a fair go on the GPU. In particular, users may create batches 861 * that wait upon external input, where that input may even be 862 * supplied by another GPU job. To avoid blocking forever, we 863 * need to preempt the current task and replace it with another 864 * ready task. 865 */ 866 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 867 return 0; 868 869 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 870 if (IS_ERR(obj)) 871 return PTR_ERR(obj); 872 873 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 874 if (IS_ERR(vma)) { 875 err = PTR_ERR(vma); 876 goto err_obj; 877 } 878 879 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); 880 if (IS_ERR(vaddr)) { 881 err = PTR_ERR(vaddr); 882 goto err_obj; 883 } 884 885 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 886 if (err) 887 goto err_map; 888 889 err = i915_vma_sync(vma); 890 if (err) 891 goto err_pin; 892 893 for_each_prime_number_from(count, 1, 16) { 894 struct intel_engine_cs *engine; 895 enum intel_engine_id id; 896 897 for_each_engine(engine, gt, id) { 898 unsigned long saved; 899 900 if (!intel_engine_has_preemption(engine)) 901 continue; 902 903 memset(vaddr, 0, PAGE_SIZE); 904 905 engine_heartbeat_disable(engine, &saved); 906 err = slice_semaphore_queue(engine, vma, count); 907 engine_heartbeat_enable(engine, saved); 908 if (err) 909 goto err_pin; 910 911 if (igt_flush_test(gt->i915)) { 912 err = -EIO; 913 goto err_pin; 914 } 915 } 916 } 917 918 err_pin: 919 i915_vma_unpin(vma); 920 err_map: 921 i915_gem_object_unpin_map(obj); 922 err_obj: 923 i915_gem_object_put(obj); 924 return err; 925 } 926 927 static struct i915_request * 928 create_rewinder(struct intel_context *ce, 929 struct i915_request *wait, 930 void *slot, int idx) 931 { 932 const u32 offset = 933 i915_ggtt_offset(ce->engine->status_page.vma) + 934 offset_in_page(slot); 935 struct i915_request *rq; 936 u32 *cs; 937 int err; 938 939 rq = intel_context_create_request(ce); 940 if (IS_ERR(rq)) 941 return rq; 942 943 if (wait) { 944 err = i915_request_await_dma_fence(rq, &wait->fence); 945 if (err) 946 goto err; 947 } 948 949 cs = intel_ring_begin(rq, 14); 950 if (IS_ERR(cs)) { 951 err = PTR_ERR(cs); 952 goto err; 953 } 954 955 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 956 *cs++ = MI_NOOP; 957 958 *cs++ = MI_SEMAPHORE_WAIT | 959 MI_SEMAPHORE_GLOBAL_GTT | 960 MI_SEMAPHORE_POLL | 961 MI_SEMAPHORE_SAD_GTE_SDD; 962 *cs++ = idx; 963 *cs++ = offset; 964 *cs++ = 0; 965 966 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 967 *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base)); 968 *cs++ = offset + idx * sizeof(u32); 969 *cs++ = 0; 970 971 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 972 *cs++ = offset; 973 *cs++ = 0; 974 *cs++ = idx + 1; 975 976 intel_ring_advance(rq, cs); 977 978 rq->sched.attr.priority = I915_PRIORITY_MASK; 979 err = 0; 980 err: 981 i915_request_get(rq); 982 i915_request_add(rq); 983 if (err) { 984 i915_request_put(rq); 985 return ERR_PTR(err); 986 } 987 988 return rq; 989 } 990 991 static int live_timeslice_rewind(void *arg) 992 { 993 struct intel_gt *gt = arg; 994 struct intel_engine_cs *engine; 995 enum intel_engine_id id; 996 997 /* 998 * The usual presumption on timeslice expiration is that we replace 999 * the active context with another. However, given a chain of 1000 * dependencies we may end up with replacing the context with itself, 1001 * but only a few of those requests, forcing us to rewind the 1002 * RING_TAIL of the original request. 1003 */ 1004 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 1005 return 0; 1006 1007 for_each_engine(engine, gt, id) { 1008 enum { A1, A2, B1 }; 1009 enum { X = 1, Z, Y }; 1010 struct i915_request *rq[3] = {}; 1011 struct intel_context *ce; 1012 unsigned long heartbeat; 1013 unsigned long timeslice; 1014 int i, err = 0; 1015 u32 *slot; 1016 1017 if (!intel_engine_has_timeslices(engine)) 1018 continue; 1019 1020 /* 1021 * A:rq1 -- semaphore wait, timestamp X 1022 * A:rq2 -- write timestamp Y 1023 * 1024 * B:rq1 [await A:rq1] -- write timestamp Z 1025 * 1026 * Force timeslice, release semaphore. 1027 * 1028 * Expect execution/evaluation order XZY 1029 */ 1030 1031 engine_heartbeat_disable(engine, &heartbeat); 1032 timeslice = xchg(&engine->props.timeslice_duration_ms, 1); 1033 1034 slot = memset32(engine->status_page.addr + 1000, 0, 4); 1035 1036 ce = intel_context_create(engine); 1037 if (IS_ERR(ce)) { 1038 err = PTR_ERR(ce); 1039 goto err; 1040 } 1041 1042 rq[0] = create_rewinder(ce, NULL, slot, X); 1043 if (IS_ERR(rq[0])) { 1044 intel_context_put(ce); 1045 goto err; 1046 } 1047 1048 rq[1] = create_rewinder(ce, NULL, slot, Y); 1049 intel_context_put(ce); 1050 if (IS_ERR(rq[1])) 1051 goto err; 1052 1053 err = wait_for_submit(engine, rq[1], HZ / 2); 1054 if (err) { 1055 pr_err("%s: failed to submit first context\n", 1056 engine->name); 1057 goto err; 1058 } 1059 1060 ce = intel_context_create(engine); 1061 if (IS_ERR(ce)) { 1062 err = PTR_ERR(ce); 1063 goto err; 1064 } 1065 1066 rq[2] = create_rewinder(ce, rq[0], slot, Z); 1067 intel_context_put(ce); 1068 if (IS_ERR(rq[2])) 1069 goto err; 1070 1071 err = wait_for_submit(engine, rq[2], HZ / 2); 1072 if (err) { 1073 pr_err("%s: failed to submit second context\n", 1074 engine->name); 1075 goto err; 1076 } 1077 1078 /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */ 1079 if (i915_request_is_active(rq[A2])) { /* semaphore yielded! */ 1080 /* Wait for the timeslice to kick in */ 1081 del_timer(&engine->execlists.timer); 1082 tasklet_hi_schedule(&engine->execlists.tasklet); 1083 intel_engine_flush_submission(engine); 1084 } 1085 /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */ 1086 GEM_BUG_ON(!i915_request_is_active(rq[A1])); 1087 GEM_BUG_ON(!i915_request_is_active(rq[B1])); 1088 GEM_BUG_ON(i915_request_is_active(rq[A2])); 1089 1090 /* Release the hounds! */ 1091 slot[0] = 1; 1092 wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */ 1093 1094 for (i = 1; i <= 3; i++) { 1095 unsigned long timeout = jiffies + HZ / 2; 1096 1097 while (!READ_ONCE(slot[i]) && 1098 time_before(jiffies, timeout)) 1099 ; 1100 1101 if (!time_before(jiffies, timeout)) { 1102 pr_err("%s: rq[%d] timed out\n", 1103 engine->name, i - 1); 1104 err = -ETIME; 1105 goto err; 1106 } 1107 1108 pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]); 1109 } 1110 1111 /* XZY: XZ < XY */ 1112 if (slot[Z] - slot[X] >= slot[Y] - slot[X]) { 1113 pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n", 1114 engine->name, 1115 slot[Z] - slot[X], 1116 slot[Y] - slot[X]); 1117 err = -EINVAL; 1118 } 1119 1120 err: 1121 memset32(&slot[0], -1, 4); 1122 wmb(); 1123 1124 engine->props.timeslice_duration_ms = timeslice; 1125 engine_heartbeat_enable(engine, heartbeat); 1126 for (i = 0; i < 3; i++) 1127 i915_request_put(rq[i]); 1128 if (igt_flush_test(gt->i915)) 1129 err = -EIO; 1130 if (err) 1131 return err; 1132 } 1133 1134 return 0; 1135 } 1136 1137 static struct i915_request *nop_request(struct intel_engine_cs *engine) 1138 { 1139 struct i915_request *rq; 1140 1141 rq = intel_engine_create_kernel_request(engine); 1142 if (IS_ERR(rq)) 1143 return rq; 1144 1145 i915_request_get(rq); 1146 i915_request_add(rq); 1147 1148 return rq; 1149 } 1150 1151 static long timeslice_threshold(const struct intel_engine_cs *engine) 1152 { 1153 return 2 * msecs_to_jiffies_timeout(timeslice(engine)) + 1; 1154 } 1155 1156 static int live_timeslice_queue(void *arg) 1157 { 1158 struct intel_gt *gt = arg; 1159 struct drm_i915_gem_object *obj; 1160 struct intel_engine_cs *engine; 1161 enum intel_engine_id id; 1162 struct i915_vma *vma; 1163 void *vaddr; 1164 int err = 0; 1165 1166 /* 1167 * Make sure that even if ELSP[0] and ELSP[1] are filled with 1168 * timeslicing between them disabled, we *do* enable timeslicing 1169 * if the queue demands it. (Normally, we do not submit if 1170 * ELSP[1] is already occupied, so must rely on timeslicing to 1171 * eject ELSP[0] in favour of the queue.) 1172 */ 1173 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 1174 return 0; 1175 1176 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 1177 if (IS_ERR(obj)) 1178 return PTR_ERR(obj); 1179 1180 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 1181 if (IS_ERR(vma)) { 1182 err = PTR_ERR(vma); 1183 goto err_obj; 1184 } 1185 1186 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); 1187 if (IS_ERR(vaddr)) { 1188 err = PTR_ERR(vaddr); 1189 goto err_obj; 1190 } 1191 1192 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 1193 if (err) 1194 goto err_map; 1195 1196 err = i915_vma_sync(vma); 1197 if (err) 1198 goto err_pin; 1199 1200 for_each_engine(engine, gt, id) { 1201 struct i915_sched_attr attr = { 1202 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), 1203 }; 1204 struct i915_request *rq, *nop; 1205 unsigned long saved; 1206 1207 if (!intel_engine_has_preemption(engine)) 1208 continue; 1209 1210 engine_heartbeat_disable(engine, &saved); 1211 memset(vaddr, 0, PAGE_SIZE); 1212 1213 /* ELSP[0]: semaphore wait */ 1214 rq = semaphore_queue(engine, vma, 0); 1215 if (IS_ERR(rq)) { 1216 err = PTR_ERR(rq); 1217 goto err_heartbeat; 1218 } 1219 engine->schedule(rq, &attr); 1220 err = wait_for_submit(engine, rq, HZ / 2); 1221 if (err) { 1222 pr_err("%s: Timed out trying to submit semaphores\n", 1223 engine->name); 1224 goto err_rq; 1225 } 1226 1227 /* ELSP[1]: nop request */ 1228 nop = nop_request(engine); 1229 if (IS_ERR(nop)) { 1230 err = PTR_ERR(nop); 1231 goto err_rq; 1232 } 1233 err = wait_for_submit(engine, nop, HZ / 2); 1234 i915_request_put(nop); 1235 if (err) { 1236 pr_err("%s: Timed out trying to submit nop\n", 1237 engine->name); 1238 goto err_rq; 1239 } 1240 1241 GEM_BUG_ON(i915_request_completed(rq)); 1242 GEM_BUG_ON(execlists_active(&engine->execlists) != rq); 1243 1244 /* Queue: semaphore signal, matching priority as semaphore */ 1245 err = release_queue(engine, vma, 1, effective_prio(rq)); 1246 if (err) 1247 goto err_rq; 1248 1249 /* Wait until we ack the release_queue and start timeslicing */ 1250 do { 1251 cond_resched(); 1252 intel_engine_flush_submission(engine); 1253 } while (READ_ONCE(engine->execlists.pending[0])); 1254 1255 if (!READ_ONCE(engine->execlists.timer.expires) && 1256 execlists_active(&engine->execlists) == rq && 1257 !i915_request_completed(rq)) { 1258 struct drm_printer p = 1259 drm_info_printer(gt->i915->drm.dev); 1260 1261 GEM_TRACE_ERR("%s: Failed to enable timeslicing!\n", 1262 engine->name); 1263 intel_engine_dump(engine, &p, 1264 "%s\n", engine->name); 1265 GEM_TRACE_DUMP(); 1266 1267 memset(vaddr, 0xff, PAGE_SIZE); 1268 err = -EINVAL; 1269 } 1270 1271 /* Timeslice every jiffy, so within 2 we should signal */ 1272 if (i915_request_wait(rq, 0, timeslice_threshold(engine)) < 0) { 1273 struct drm_printer p = 1274 drm_info_printer(gt->i915->drm.dev); 1275 1276 pr_err("%s: Failed to timeslice into queue\n", 1277 engine->name); 1278 intel_engine_dump(engine, &p, 1279 "%s\n", engine->name); 1280 1281 memset(vaddr, 0xff, PAGE_SIZE); 1282 err = -EIO; 1283 } 1284 err_rq: 1285 i915_request_put(rq); 1286 err_heartbeat: 1287 engine_heartbeat_enable(engine, saved); 1288 if (err) 1289 break; 1290 } 1291 1292 err_pin: 1293 i915_vma_unpin(vma); 1294 err_map: 1295 i915_gem_object_unpin_map(obj); 1296 err_obj: 1297 i915_gem_object_put(obj); 1298 return err; 1299 } 1300 1301 static int live_busywait_preempt(void *arg) 1302 { 1303 struct intel_gt *gt = arg; 1304 struct i915_gem_context *ctx_hi, *ctx_lo; 1305 struct intel_engine_cs *engine; 1306 struct drm_i915_gem_object *obj; 1307 struct i915_vma *vma; 1308 enum intel_engine_id id; 1309 int err = -ENOMEM; 1310 u32 *map; 1311 1312 /* 1313 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can 1314 * preempt the busywaits used to synchronise between rings. 1315 */ 1316 1317 ctx_hi = kernel_context(gt->i915); 1318 if (!ctx_hi) 1319 return -ENOMEM; 1320 ctx_hi->sched.priority = 1321 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); 1322 1323 ctx_lo = kernel_context(gt->i915); 1324 if (!ctx_lo) 1325 goto err_ctx_hi; 1326 ctx_lo->sched.priority = 1327 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); 1328 1329 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 1330 if (IS_ERR(obj)) { 1331 err = PTR_ERR(obj); 1332 goto err_ctx_lo; 1333 } 1334 1335 map = i915_gem_object_pin_map(obj, I915_MAP_WC); 1336 if (IS_ERR(map)) { 1337 err = PTR_ERR(map); 1338 goto err_obj; 1339 } 1340 1341 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 1342 if (IS_ERR(vma)) { 1343 err = PTR_ERR(vma); 1344 goto err_map; 1345 } 1346 1347 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 1348 if (err) 1349 goto err_map; 1350 1351 err = i915_vma_sync(vma); 1352 if (err) 1353 goto err_vma; 1354 1355 for_each_engine(engine, gt, id) { 1356 struct i915_request *lo, *hi; 1357 struct igt_live_test t; 1358 u32 *cs; 1359 1360 if (!intel_engine_has_preemption(engine)) 1361 continue; 1362 1363 if (!intel_engine_can_store_dword(engine)) 1364 continue; 1365 1366 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 1367 err = -EIO; 1368 goto err_vma; 1369 } 1370 1371 /* 1372 * We create two requests. The low priority request 1373 * busywaits on a semaphore (inside the ringbuffer where 1374 * is should be preemptible) and the high priority requests 1375 * uses a MI_STORE_DWORD_IMM to update the semaphore value 1376 * allowing the first request to complete. If preemption 1377 * fails, we hang instead. 1378 */ 1379 1380 lo = igt_request_alloc(ctx_lo, engine); 1381 if (IS_ERR(lo)) { 1382 err = PTR_ERR(lo); 1383 goto err_vma; 1384 } 1385 1386 cs = intel_ring_begin(lo, 8); 1387 if (IS_ERR(cs)) { 1388 err = PTR_ERR(cs); 1389 i915_request_add(lo); 1390 goto err_vma; 1391 } 1392 1393 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 1394 *cs++ = i915_ggtt_offset(vma); 1395 *cs++ = 0; 1396 *cs++ = 1; 1397 1398 /* XXX Do we need a flush + invalidate here? */ 1399 1400 *cs++ = MI_SEMAPHORE_WAIT | 1401 MI_SEMAPHORE_GLOBAL_GTT | 1402 MI_SEMAPHORE_POLL | 1403 MI_SEMAPHORE_SAD_EQ_SDD; 1404 *cs++ = 0; 1405 *cs++ = i915_ggtt_offset(vma); 1406 *cs++ = 0; 1407 1408 intel_ring_advance(lo, cs); 1409 1410 i915_request_get(lo); 1411 i915_request_add(lo); 1412 1413 if (wait_for(READ_ONCE(*map), 10)) { 1414 i915_request_put(lo); 1415 err = -ETIMEDOUT; 1416 goto err_vma; 1417 } 1418 1419 /* Low priority request should be busywaiting now */ 1420 if (i915_request_wait(lo, 0, 1) != -ETIME) { 1421 i915_request_put(lo); 1422 pr_err("%s: Busywaiting request did not!\n", 1423 engine->name); 1424 err = -EIO; 1425 goto err_vma; 1426 } 1427 1428 hi = igt_request_alloc(ctx_hi, engine); 1429 if (IS_ERR(hi)) { 1430 err = PTR_ERR(hi); 1431 i915_request_put(lo); 1432 goto err_vma; 1433 } 1434 1435 cs = intel_ring_begin(hi, 4); 1436 if (IS_ERR(cs)) { 1437 err = PTR_ERR(cs); 1438 i915_request_add(hi); 1439 i915_request_put(lo); 1440 goto err_vma; 1441 } 1442 1443 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 1444 *cs++ = i915_ggtt_offset(vma); 1445 *cs++ = 0; 1446 *cs++ = 0; 1447 1448 intel_ring_advance(hi, cs); 1449 i915_request_add(hi); 1450 1451 if (i915_request_wait(lo, 0, HZ / 5) < 0) { 1452 struct drm_printer p = drm_info_printer(gt->i915->drm.dev); 1453 1454 pr_err("%s: Failed to preempt semaphore busywait!\n", 1455 engine->name); 1456 1457 intel_engine_dump(engine, &p, "%s\n", engine->name); 1458 GEM_TRACE_DUMP(); 1459 1460 i915_request_put(lo); 1461 intel_gt_set_wedged(gt); 1462 err = -EIO; 1463 goto err_vma; 1464 } 1465 GEM_BUG_ON(READ_ONCE(*map)); 1466 i915_request_put(lo); 1467 1468 if (igt_live_test_end(&t)) { 1469 err = -EIO; 1470 goto err_vma; 1471 } 1472 } 1473 1474 err = 0; 1475 err_vma: 1476 i915_vma_unpin(vma); 1477 err_map: 1478 i915_gem_object_unpin_map(obj); 1479 err_obj: 1480 i915_gem_object_put(obj); 1481 err_ctx_lo: 1482 kernel_context_close(ctx_lo); 1483 err_ctx_hi: 1484 kernel_context_close(ctx_hi); 1485 return err; 1486 } 1487 1488 static struct i915_request * 1489 spinner_create_request(struct igt_spinner *spin, 1490 struct i915_gem_context *ctx, 1491 struct intel_engine_cs *engine, 1492 u32 arb) 1493 { 1494 struct intel_context *ce; 1495 struct i915_request *rq; 1496 1497 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); 1498 if (IS_ERR(ce)) 1499 return ERR_CAST(ce); 1500 1501 rq = igt_spinner_create_request(spin, ce, arb); 1502 intel_context_put(ce); 1503 return rq; 1504 } 1505 1506 static int live_preempt(void *arg) 1507 { 1508 struct intel_gt *gt = arg; 1509 struct i915_gem_context *ctx_hi, *ctx_lo; 1510 struct igt_spinner spin_hi, spin_lo; 1511 struct intel_engine_cs *engine; 1512 enum intel_engine_id id; 1513 int err = -ENOMEM; 1514 1515 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 1516 return 0; 1517 1518 if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION)) 1519 pr_err("Logical preemption supported, but not exposed\n"); 1520 1521 if (igt_spinner_init(&spin_hi, gt)) 1522 return -ENOMEM; 1523 1524 if (igt_spinner_init(&spin_lo, gt)) 1525 goto err_spin_hi; 1526 1527 ctx_hi = kernel_context(gt->i915); 1528 if (!ctx_hi) 1529 goto err_spin_lo; 1530 ctx_hi->sched.priority = 1531 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); 1532 1533 ctx_lo = kernel_context(gt->i915); 1534 if (!ctx_lo) 1535 goto err_ctx_hi; 1536 ctx_lo->sched.priority = 1537 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); 1538 1539 for_each_engine(engine, gt, id) { 1540 struct igt_live_test t; 1541 struct i915_request *rq; 1542 1543 if (!intel_engine_has_preemption(engine)) 1544 continue; 1545 1546 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 1547 err = -EIO; 1548 goto err_ctx_lo; 1549 } 1550 1551 rq = spinner_create_request(&spin_lo, ctx_lo, engine, 1552 MI_ARB_CHECK); 1553 if (IS_ERR(rq)) { 1554 err = PTR_ERR(rq); 1555 goto err_ctx_lo; 1556 } 1557 1558 i915_request_add(rq); 1559 if (!igt_wait_for_spinner(&spin_lo, rq)) { 1560 GEM_TRACE("lo spinner failed to start\n"); 1561 GEM_TRACE_DUMP(); 1562 intel_gt_set_wedged(gt); 1563 err = -EIO; 1564 goto err_ctx_lo; 1565 } 1566 1567 rq = spinner_create_request(&spin_hi, ctx_hi, engine, 1568 MI_ARB_CHECK); 1569 if (IS_ERR(rq)) { 1570 igt_spinner_end(&spin_lo); 1571 err = PTR_ERR(rq); 1572 goto err_ctx_lo; 1573 } 1574 1575 i915_request_add(rq); 1576 if (!igt_wait_for_spinner(&spin_hi, rq)) { 1577 GEM_TRACE("hi spinner failed to start\n"); 1578 GEM_TRACE_DUMP(); 1579 intel_gt_set_wedged(gt); 1580 err = -EIO; 1581 goto err_ctx_lo; 1582 } 1583 1584 igt_spinner_end(&spin_hi); 1585 igt_spinner_end(&spin_lo); 1586 1587 if (igt_live_test_end(&t)) { 1588 err = -EIO; 1589 goto err_ctx_lo; 1590 } 1591 } 1592 1593 err = 0; 1594 err_ctx_lo: 1595 kernel_context_close(ctx_lo); 1596 err_ctx_hi: 1597 kernel_context_close(ctx_hi); 1598 err_spin_lo: 1599 igt_spinner_fini(&spin_lo); 1600 err_spin_hi: 1601 igt_spinner_fini(&spin_hi); 1602 return err; 1603 } 1604 1605 static int live_late_preempt(void *arg) 1606 { 1607 struct intel_gt *gt = arg; 1608 struct i915_gem_context *ctx_hi, *ctx_lo; 1609 struct igt_spinner spin_hi, spin_lo; 1610 struct intel_engine_cs *engine; 1611 struct i915_sched_attr attr = {}; 1612 enum intel_engine_id id; 1613 int err = -ENOMEM; 1614 1615 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 1616 return 0; 1617 1618 if (igt_spinner_init(&spin_hi, gt)) 1619 return -ENOMEM; 1620 1621 if (igt_spinner_init(&spin_lo, gt)) 1622 goto err_spin_hi; 1623 1624 ctx_hi = kernel_context(gt->i915); 1625 if (!ctx_hi) 1626 goto err_spin_lo; 1627 1628 ctx_lo = kernel_context(gt->i915); 1629 if (!ctx_lo) 1630 goto err_ctx_hi; 1631 1632 /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */ 1633 ctx_lo->sched.priority = I915_USER_PRIORITY(1); 1634 1635 for_each_engine(engine, gt, id) { 1636 struct igt_live_test t; 1637 struct i915_request *rq; 1638 1639 if (!intel_engine_has_preemption(engine)) 1640 continue; 1641 1642 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 1643 err = -EIO; 1644 goto err_ctx_lo; 1645 } 1646 1647 rq = spinner_create_request(&spin_lo, ctx_lo, engine, 1648 MI_ARB_CHECK); 1649 if (IS_ERR(rq)) { 1650 err = PTR_ERR(rq); 1651 goto err_ctx_lo; 1652 } 1653 1654 i915_request_add(rq); 1655 if (!igt_wait_for_spinner(&spin_lo, rq)) { 1656 pr_err("First context failed to start\n"); 1657 goto err_wedged; 1658 } 1659 1660 rq = spinner_create_request(&spin_hi, ctx_hi, engine, 1661 MI_NOOP); 1662 if (IS_ERR(rq)) { 1663 igt_spinner_end(&spin_lo); 1664 err = PTR_ERR(rq); 1665 goto err_ctx_lo; 1666 } 1667 1668 i915_request_add(rq); 1669 if (igt_wait_for_spinner(&spin_hi, rq)) { 1670 pr_err("Second context overtook first?\n"); 1671 goto err_wedged; 1672 } 1673 1674 attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX); 1675 engine->schedule(rq, &attr); 1676 1677 if (!igt_wait_for_spinner(&spin_hi, rq)) { 1678 pr_err("High priority context failed to preempt the low priority context\n"); 1679 GEM_TRACE_DUMP(); 1680 goto err_wedged; 1681 } 1682 1683 igt_spinner_end(&spin_hi); 1684 igt_spinner_end(&spin_lo); 1685 1686 if (igt_live_test_end(&t)) { 1687 err = -EIO; 1688 goto err_ctx_lo; 1689 } 1690 } 1691 1692 err = 0; 1693 err_ctx_lo: 1694 kernel_context_close(ctx_lo); 1695 err_ctx_hi: 1696 kernel_context_close(ctx_hi); 1697 err_spin_lo: 1698 igt_spinner_fini(&spin_lo); 1699 err_spin_hi: 1700 igt_spinner_fini(&spin_hi); 1701 return err; 1702 1703 err_wedged: 1704 igt_spinner_end(&spin_hi); 1705 igt_spinner_end(&spin_lo); 1706 intel_gt_set_wedged(gt); 1707 err = -EIO; 1708 goto err_ctx_lo; 1709 } 1710 1711 struct preempt_client { 1712 struct igt_spinner spin; 1713 struct i915_gem_context *ctx; 1714 }; 1715 1716 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c) 1717 { 1718 c->ctx = kernel_context(gt->i915); 1719 if (!c->ctx) 1720 return -ENOMEM; 1721 1722 if (igt_spinner_init(&c->spin, gt)) 1723 goto err_ctx; 1724 1725 return 0; 1726 1727 err_ctx: 1728 kernel_context_close(c->ctx); 1729 return -ENOMEM; 1730 } 1731 1732 static void preempt_client_fini(struct preempt_client *c) 1733 { 1734 igt_spinner_fini(&c->spin); 1735 kernel_context_close(c->ctx); 1736 } 1737 1738 static int live_nopreempt(void *arg) 1739 { 1740 struct intel_gt *gt = arg; 1741 struct intel_engine_cs *engine; 1742 struct preempt_client a, b; 1743 enum intel_engine_id id; 1744 int err = -ENOMEM; 1745 1746 /* 1747 * Verify that we can disable preemption for an individual request 1748 * that may be being observed and not want to be interrupted. 1749 */ 1750 1751 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 1752 return 0; 1753 1754 if (preempt_client_init(gt, &a)) 1755 return -ENOMEM; 1756 if (preempt_client_init(gt, &b)) 1757 goto err_client_a; 1758 b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX); 1759 1760 for_each_engine(engine, gt, id) { 1761 struct i915_request *rq_a, *rq_b; 1762 1763 if (!intel_engine_has_preemption(engine)) 1764 continue; 1765 1766 engine->execlists.preempt_hang.count = 0; 1767 1768 rq_a = spinner_create_request(&a.spin, 1769 a.ctx, engine, 1770 MI_ARB_CHECK); 1771 if (IS_ERR(rq_a)) { 1772 err = PTR_ERR(rq_a); 1773 goto err_client_b; 1774 } 1775 1776 /* Low priority client, but unpreemptable! */ 1777 __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags); 1778 1779 i915_request_add(rq_a); 1780 if (!igt_wait_for_spinner(&a.spin, rq_a)) { 1781 pr_err("First client failed to start\n"); 1782 goto err_wedged; 1783 } 1784 1785 rq_b = spinner_create_request(&b.spin, 1786 b.ctx, engine, 1787 MI_ARB_CHECK); 1788 if (IS_ERR(rq_b)) { 1789 err = PTR_ERR(rq_b); 1790 goto err_client_b; 1791 } 1792 1793 i915_request_add(rq_b); 1794 1795 /* B is much more important than A! (But A is unpreemptable.) */ 1796 GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a)); 1797 1798 /* Wait long enough for preemption and timeslicing */ 1799 if (igt_wait_for_spinner(&b.spin, rq_b)) { 1800 pr_err("Second client started too early!\n"); 1801 goto err_wedged; 1802 } 1803 1804 igt_spinner_end(&a.spin); 1805 1806 if (!igt_wait_for_spinner(&b.spin, rq_b)) { 1807 pr_err("Second client failed to start\n"); 1808 goto err_wedged; 1809 } 1810 1811 igt_spinner_end(&b.spin); 1812 1813 if (engine->execlists.preempt_hang.count) { 1814 pr_err("Preemption recorded x%d; should have been suppressed!\n", 1815 engine->execlists.preempt_hang.count); 1816 err = -EINVAL; 1817 goto err_wedged; 1818 } 1819 1820 if (igt_flush_test(gt->i915)) 1821 goto err_wedged; 1822 } 1823 1824 err = 0; 1825 err_client_b: 1826 preempt_client_fini(&b); 1827 err_client_a: 1828 preempt_client_fini(&a); 1829 return err; 1830 1831 err_wedged: 1832 igt_spinner_end(&b.spin); 1833 igt_spinner_end(&a.spin); 1834 intel_gt_set_wedged(gt); 1835 err = -EIO; 1836 goto err_client_b; 1837 } 1838 1839 struct live_preempt_cancel { 1840 struct intel_engine_cs *engine; 1841 struct preempt_client a, b; 1842 }; 1843 1844 static int __cancel_active0(struct live_preempt_cancel *arg) 1845 { 1846 struct i915_request *rq; 1847 struct igt_live_test t; 1848 int err; 1849 1850 /* Preempt cancel of ELSP0 */ 1851 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 1852 if (igt_live_test_begin(&t, arg->engine->i915, 1853 __func__, arg->engine->name)) 1854 return -EIO; 1855 1856 rq = spinner_create_request(&arg->a.spin, 1857 arg->a.ctx, arg->engine, 1858 MI_ARB_CHECK); 1859 if (IS_ERR(rq)) 1860 return PTR_ERR(rq); 1861 1862 clear_bit(CONTEXT_BANNED, &rq->context->flags); 1863 i915_request_get(rq); 1864 i915_request_add(rq); 1865 if (!igt_wait_for_spinner(&arg->a.spin, rq)) { 1866 err = -EIO; 1867 goto out; 1868 } 1869 1870 intel_context_set_banned(rq->context); 1871 err = intel_engine_pulse(arg->engine); 1872 if (err) 1873 goto out; 1874 1875 err = wait_for_reset(arg->engine, rq, HZ / 2); 1876 if (err) { 1877 pr_err("Cancelled inflight0 request did not reset\n"); 1878 goto out; 1879 } 1880 1881 out: 1882 i915_request_put(rq); 1883 if (igt_live_test_end(&t)) 1884 err = -EIO; 1885 return err; 1886 } 1887 1888 static int __cancel_active1(struct live_preempt_cancel *arg) 1889 { 1890 struct i915_request *rq[2] = {}; 1891 struct igt_live_test t; 1892 int err; 1893 1894 /* Preempt cancel of ELSP1 */ 1895 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 1896 if (igt_live_test_begin(&t, arg->engine->i915, 1897 __func__, arg->engine->name)) 1898 return -EIO; 1899 1900 rq[0] = spinner_create_request(&arg->a.spin, 1901 arg->a.ctx, arg->engine, 1902 MI_NOOP); /* no preemption */ 1903 if (IS_ERR(rq[0])) 1904 return PTR_ERR(rq[0]); 1905 1906 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); 1907 i915_request_get(rq[0]); 1908 i915_request_add(rq[0]); 1909 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { 1910 err = -EIO; 1911 goto out; 1912 } 1913 1914 rq[1] = spinner_create_request(&arg->b.spin, 1915 arg->b.ctx, arg->engine, 1916 MI_ARB_CHECK); 1917 if (IS_ERR(rq[1])) { 1918 err = PTR_ERR(rq[1]); 1919 goto out; 1920 } 1921 1922 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); 1923 i915_request_get(rq[1]); 1924 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); 1925 i915_request_add(rq[1]); 1926 if (err) 1927 goto out; 1928 1929 intel_context_set_banned(rq[1]->context); 1930 err = intel_engine_pulse(arg->engine); 1931 if (err) 1932 goto out; 1933 1934 igt_spinner_end(&arg->a.spin); 1935 err = wait_for_reset(arg->engine, rq[1], HZ / 2); 1936 if (err) 1937 goto out; 1938 1939 if (rq[0]->fence.error != 0) { 1940 pr_err("Normal inflight0 request did not complete\n"); 1941 err = -EINVAL; 1942 goto out; 1943 } 1944 1945 if (rq[1]->fence.error != -EIO) { 1946 pr_err("Cancelled inflight1 request did not report -EIO\n"); 1947 err = -EINVAL; 1948 goto out; 1949 } 1950 1951 out: 1952 i915_request_put(rq[1]); 1953 i915_request_put(rq[0]); 1954 if (igt_live_test_end(&t)) 1955 err = -EIO; 1956 return err; 1957 } 1958 1959 static int __cancel_queued(struct live_preempt_cancel *arg) 1960 { 1961 struct i915_request *rq[3] = {}; 1962 struct igt_live_test t; 1963 int err; 1964 1965 /* Full ELSP and one in the wings */ 1966 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 1967 if (igt_live_test_begin(&t, arg->engine->i915, 1968 __func__, arg->engine->name)) 1969 return -EIO; 1970 1971 rq[0] = spinner_create_request(&arg->a.spin, 1972 arg->a.ctx, arg->engine, 1973 MI_ARB_CHECK); 1974 if (IS_ERR(rq[0])) 1975 return PTR_ERR(rq[0]); 1976 1977 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); 1978 i915_request_get(rq[0]); 1979 i915_request_add(rq[0]); 1980 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { 1981 err = -EIO; 1982 goto out; 1983 } 1984 1985 rq[1] = igt_request_alloc(arg->b.ctx, arg->engine); 1986 if (IS_ERR(rq[1])) { 1987 err = PTR_ERR(rq[1]); 1988 goto out; 1989 } 1990 1991 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); 1992 i915_request_get(rq[1]); 1993 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); 1994 i915_request_add(rq[1]); 1995 if (err) 1996 goto out; 1997 1998 rq[2] = spinner_create_request(&arg->b.spin, 1999 arg->a.ctx, arg->engine, 2000 MI_ARB_CHECK); 2001 if (IS_ERR(rq[2])) { 2002 err = PTR_ERR(rq[2]); 2003 goto out; 2004 } 2005 2006 i915_request_get(rq[2]); 2007 err = i915_request_await_dma_fence(rq[2], &rq[1]->fence); 2008 i915_request_add(rq[2]); 2009 if (err) 2010 goto out; 2011 2012 intel_context_set_banned(rq[2]->context); 2013 err = intel_engine_pulse(arg->engine); 2014 if (err) 2015 goto out; 2016 2017 err = wait_for_reset(arg->engine, rq[2], HZ / 2); 2018 if (err) 2019 goto out; 2020 2021 if (rq[0]->fence.error != -EIO) { 2022 pr_err("Cancelled inflight0 request did not report -EIO\n"); 2023 err = -EINVAL; 2024 goto out; 2025 } 2026 2027 if (rq[1]->fence.error != 0) { 2028 pr_err("Normal inflight1 request did not complete\n"); 2029 err = -EINVAL; 2030 goto out; 2031 } 2032 2033 if (rq[2]->fence.error != -EIO) { 2034 pr_err("Cancelled queued request did not report -EIO\n"); 2035 err = -EINVAL; 2036 goto out; 2037 } 2038 2039 out: 2040 i915_request_put(rq[2]); 2041 i915_request_put(rq[1]); 2042 i915_request_put(rq[0]); 2043 if (igt_live_test_end(&t)) 2044 err = -EIO; 2045 return err; 2046 } 2047 2048 static int __cancel_hostile(struct live_preempt_cancel *arg) 2049 { 2050 struct i915_request *rq; 2051 int err; 2052 2053 /* Preempt cancel non-preemptible spinner in ELSP0 */ 2054 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) 2055 return 0; 2056 2057 if (!intel_has_reset_engine(arg->engine->gt)) 2058 return 0; 2059 2060 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 2061 rq = spinner_create_request(&arg->a.spin, 2062 arg->a.ctx, arg->engine, 2063 MI_NOOP); /* preemption disabled */ 2064 if (IS_ERR(rq)) 2065 return PTR_ERR(rq); 2066 2067 clear_bit(CONTEXT_BANNED, &rq->context->flags); 2068 i915_request_get(rq); 2069 i915_request_add(rq); 2070 if (!igt_wait_for_spinner(&arg->a.spin, rq)) { 2071 err = -EIO; 2072 goto out; 2073 } 2074 2075 intel_context_set_banned(rq->context); 2076 err = intel_engine_pulse(arg->engine); /* force reset */ 2077 if (err) 2078 goto out; 2079 2080 err = wait_for_reset(arg->engine, rq, HZ / 2); 2081 if (err) { 2082 pr_err("Cancelled inflight0 request did not reset\n"); 2083 goto out; 2084 } 2085 2086 out: 2087 i915_request_put(rq); 2088 if (igt_flush_test(arg->engine->i915)) 2089 err = -EIO; 2090 return err; 2091 } 2092 2093 static int live_preempt_cancel(void *arg) 2094 { 2095 struct intel_gt *gt = arg; 2096 struct live_preempt_cancel data; 2097 enum intel_engine_id id; 2098 int err = -ENOMEM; 2099 2100 /* 2101 * To cancel an inflight context, we need to first remove it from the 2102 * GPU. That sounds like preemption! Plus a little bit of bookkeeping. 2103 */ 2104 2105 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2106 return 0; 2107 2108 if (preempt_client_init(gt, &data.a)) 2109 return -ENOMEM; 2110 if (preempt_client_init(gt, &data.b)) 2111 goto err_client_a; 2112 2113 for_each_engine(data.engine, gt, id) { 2114 if (!intel_engine_has_preemption(data.engine)) 2115 continue; 2116 2117 err = __cancel_active0(&data); 2118 if (err) 2119 goto err_wedged; 2120 2121 err = __cancel_active1(&data); 2122 if (err) 2123 goto err_wedged; 2124 2125 err = __cancel_queued(&data); 2126 if (err) 2127 goto err_wedged; 2128 2129 err = __cancel_hostile(&data); 2130 if (err) 2131 goto err_wedged; 2132 } 2133 2134 err = 0; 2135 err_client_b: 2136 preempt_client_fini(&data.b); 2137 err_client_a: 2138 preempt_client_fini(&data.a); 2139 return err; 2140 2141 err_wedged: 2142 GEM_TRACE_DUMP(); 2143 igt_spinner_end(&data.b.spin); 2144 igt_spinner_end(&data.a.spin); 2145 intel_gt_set_wedged(gt); 2146 goto err_client_b; 2147 } 2148 2149 static int live_suppress_self_preempt(void *arg) 2150 { 2151 struct intel_gt *gt = arg; 2152 struct intel_engine_cs *engine; 2153 struct i915_sched_attr attr = { 2154 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX) 2155 }; 2156 struct preempt_client a, b; 2157 enum intel_engine_id id; 2158 int err = -ENOMEM; 2159 2160 /* 2161 * Verify that if a preemption request does not cause a change in 2162 * the current execution order, the preempt-to-idle injection is 2163 * skipped and that we do not accidentally apply it after the CS 2164 * completion event. 2165 */ 2166 2167 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2168 return 0; 2169 2170 if (intel_uc_uses_guc_submission(>->uc)) 2171 return 0; /* presume black blox */ 2172 2173 if (intel_vgpu_active(gt->i915)) 2174 return 0; /* GVT forces single port & request submission */ 2175 2176 if (preempt_client_init(gt, &a)) 2177 return -ENOMEM; 2178 if (preempt_client_init(gt, &b)) 2179 goto err_client_a; 2180 2181 for_each_engine(engine, gt, id) { 2182 struct i915_request *rq_a, *rq_b; 2183 int depth; 2184 2185 if (!intel_engine_has_preemption(engine)) 2186 continue; 2187 2188 if (igt_flush_test(gt->i915)) 2189 goto err_wedged; 2190 2191 intel_engine_pm_get(engine); 2192 engine->execlists.preempt_hang.count = 0; 2193 2194 rq_a = spinner_create_request(&a.spin, 2195 a.ctx, engine, 2196 MI_NOOP); 2197 if (IS_ERR(rq_a)) { 2198 err = PTR_ERR(rq_a); 2199 intel_engine_pm_put(engine); 2200 goto err_client_b; 2201 } 2202 2203 i915_request_add(rq_a); 2204 if (!igt_wait_for_spinner(&a.spin, rq_a)) { 2205 pr_err("First client failed to start\n"); 2206 intel_engine_pm_put(engine); 2207 goto err_wedged; 2208 } 2209 2210 /* Keep postponing the timer to avoid premature slicing */ 2211 mod_timer(&engine->execlists.timer, jiffies + HZ); 2212 for (depth = 0; depth < 8; depth++) { 2213 rq_b = spinner_create_request(&b.spin, 2214 b.ctx, engine, 2215 MI_NOOP); 2216 if (IS_ERR(rq_b)) { 2217 err = PTR_ERR(rq_b); 2218 intel_engine_pm_put(engine); 2219 goto err_client_b; 2220 } 2221 i915_request_add(rq_b); 2222 2223 GEM_BUG_ON(i915_request_completed(rq_a)); 2224 engine->schedule(rq_a, &attr); 2225 igt_spinner_end(&a.spin); 2226 2227 if (!igt_wait_for_spinner(&b.spin, rq_b)) { 2228 pr_err("Second client failed to start\n"); 2229 intel_engine_pm_put(engine); 2230 goto err_wedged; 2231 } 2232 2233 swap(a, b); 2234 rq_a = rq_b; 2235 } 2236 igt_spinner_end(&a.spin); 2237 2238 if (engine->execlists.preempt_hang.count) { 2239 pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n", 2240 engine->name, 2241 engine->execlists.preempt_hang.count, 2242 depth); 2243 intel_engine_pm_put(engine); 2244 err = -EINVAL; 2245 goto err_client_b; 2246 } 2247 2248 intel_engine_pm_put(engine); 2249 if (igt_flush_test(gt->i915)) 2250 goto err_wedged; 2251 } 2252 2253 err = 0; 2254 err_client_b: 2255 preempt_client_fini(&b); 2256 err_client_a: 2257 preempt_client_fini(&a); 2258 return err; 2259 2260 err_wedged: 2261 igt_spinner_end(&b.spin); 2262 igt_spinner_end(&a.spin); 2263 intel_gt_set_wedged(gt); 2264 err = -EIO; 2265 goto err_client_b; 2266 } 2267 2268 static int __i915_sw_fence_call 2269 dummy_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) 2270 { 2271 return NOTIFY_DONE; 2272 } 2273 2274 static struct i915_request *dummy_request(struct intel_engine_cs *engine) 2275 { 2276 struct i915_request *rq; 2277 2278 rq = kzalloc(sizeof(*rq), GFP_KERNEL); 2279 if (!rq) 2280 return NULL; 2281 2282 rq->engine = engine; 2283 2284 spin_lock_init(&rq->lock); 2285 INIT_LIST_HEAD(&rq->fence.cb_list); 2286 rq->fence.lock = &rq->lock; 2287 rq->fence.ops = &i915_fence_ops; 2288 2289 i915_sched_node_init(&rq->sched); 2290 2291 /* mark this request as permanently incomplete */ 2292 rq->fence.seqno = 1; 2293 BUILD_BUG_ON(sizeof(rq->fence.seqno) != 8); /* upper 32b == 0 */ 2294 rq->hwsp_seqno = (u32 *)&rq->fence.seqno + 1; 2295 GEM_BUG_ON(i915_request_completed(rq)); 2296 2297 i915_sw_fence_init(&rq->submit, dummy_notify); 2298 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); 2299 2300 spin_lock_init(&rq->lock); 2301 rq->fence.lock = &rq->lock; 2302 INIT_LIST_HEAD(&rq->fence.cb_list); 2303 2304 return rq; 2305 } 2306 2307 static void dummy_request_free(struct i915_request *dummy) 2308 { 2309 /* We have to fake the CS interrupt to kick the next request */ 2310 i915_sw_fence_commit(&dummy->submit); 2311 2312 i915_request_mark_complete(dummy); 2313 dma_fence_signal(&dummy->fence); 2314 2315 i915_sched_node_fini(&dummy->sched); 2316 i915_sw_fence_fini(&dummy->submit); 2317 2318 dma_fence_free(&dummy->fence); 2319 } 2320 2321 static int live_suppress_wait_preempt(void *arg) 2322 { 2323 struct intel_gt *gt = arg; 2324 struct preempt_client client[4]; 2325 struct i915_request *rq[ARRAY_SIZE(client)] = {}; 2326 struct intel_engine_cs *engine; 2327 enum intel_engine_id id; 2328 int err = -ENOMEM; 2329 int i; 2330 2331 /* 2332 * Waiters are given a little priority nudge, but not enough 2333 * to actually cause any preemption. Double check that we do 2334 * not needlessly generate preempt-to-idle cycles. 2335 */ 2336 2337 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2338 return 0; 2339 2340 if (preempt_client_init(gt, &client[0])) /* ELSP[0] */ 2341 return -ENOMEM; 2342 if (preempt_client_init(gt, &client[1])) /* ELSP[1] */ 2343 goto err_client_0; 2344 if (preempt_client_init(gt, &client[2])) /* head of queue */ 2345 goto err_client_1; 2346 if (preempt_client_init(gt, &client[3])) /* bystander */ 2347 goto err_client_2; 2348 2349 for_each_engine(engine, gt, id) { 2350 int depth; 2351 2352 if (!intel_engine_has_preemption(engine)) 2353 continue; 2354 2355 if (!engine->emit_init_breadcrumb) 2356 continue; 2357 2358 for (depth = 0; depth < ARRAY_SIZE(client); depth++) { 2359 struct i915_request *dummy; 2360 2361 engine->execlists.preempt_hang.count = 0; 2362 2363 dummy = dummy_request(engine); 2364 if (!dummy) 2365 goto err_client_3; 2366 2367 for (i = 0; i < ARRAY_SIZE(client); i++) { 2368 struct i915_request *this; 2369 2370 this = spinner_create_request(&client[i].spin, 2371 client[i].ctx, engine, 2372 MI_NOOP); 2373 if (IS_ERR(this)) { 2374 err = PTR_ERR(this); 2375 goto err_wedged; 2376 } 2377 2378 /* Disable NEWCLIENT promotion */ 2379 __i915_active_fence_set(&i915_request_timeline(this)->last_request, 2380 &dummy->fence); 2381 2382 rq[i] = i915_request_get(this); 2383 i915_request_add(this); 2384 } 2385 2386 dummy_request_free(dummy); 2387 2388 GEM_BUG_ON(i915_request_completed(rq[0])); 2389 if (!igt_wait_for_spinner(&client[0].spin, rq[0])) { 2390 pr_err("%s: First client failed to start\n", 2391 engine->name); 2392 goto err_wedged; 2393 } 2394 GEM_BUG_ON(!i915_request_started(rq[0])); 2395 2396 if (i915_request_wait(rq[depth], 2397 I915_WAIT_PRIORITY, 2398 1) != -ETIME) { 2399 pr_err("%s: Waiter depth:%d completed!\n", 2400 engine->name, depth); 2401 goto err_wedged; 2402 } 2403 2404 for (i = 0; i < ARRAY_SIZE(client); i++) { 2405 igt_spinner_end(&client[i].spin); 2406 i915_request_put(rq[i]); 2407 rq[i] = NULL; 2408 } 2409 2410 if (igt_flush_test(gt->i915)) 2411 goto err_wedged; 2412 2413 if (engine->execlists.preempt_hang.count) { 2414 pr_err("%s: Preemption recorded x%d, depth %d; should have been suppressed!\n", 2415 engine->name, 2416 engine->execlists.preempt_hang.count, 2417 depth); 2418 err = -EINVAL; 2419 goto err_client_3; 2420 } 2421 } 2422 } 2423 2424 err = 0; 2425 err_client_3: 2426 preempt_client_fini(&client[3]); 2427 err_client_2: 2428 preempt_client_fini(&client[2]); 2429 err_client_1: 2430 preempt_client_fini(&client[1]); 2431 err_client_0: 2432 preempt_client_fini(&client[0]); 2433 return err; 2434 2435 err_wedged: 2436 for (i = 0; i < ARRAY_SIZE(client); i++) { 2437 igt_spinner_end(&client[i].spin); 2438 i915_request_put(rq[i]); 2439 } 2440 intel_gt_set_wedged(gt); 2441 err = -EIO; 2442 goto err_client_3; 2443 } 2444 2445 static int live_chain_preempt(void *arg) 2446 { 2447 struct intel_gt *gt = arg; 2448 struct intel_engine_cs *engine; 2449 struct preempt_client hi, lo; 2450 enum intel_engine_id id; 2451 int err = -ENOMEM; 2452 2453 /* 2454 * Build a chain AB...BA between two contexts (A, B) and request 2455 * preemption of the last request. It should then complete before 2456 * the previously submitted spinner in B. 2457 */ 2458 2459 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2460 return 0; 2461 2462 if (preempt_client_init(gt, &hi)) 2463 return -ENOMEM; 2464 2465 if (preempt_client_init(gt, &lo)) 2466 goto err_client_hi; 2467 2468 for_each_engine(engine, gt, id) { 2469 struct i915_sched_attr attr = { 2470 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), 2471 }; 2472 struct igt_live_test t; 2473 struct i915_request *rq; 2474 int ring_size, count, i; 2475 2476 if (!intel_engine_has_preemption(engine)) 2477 continue; 2478 2479 rq = spinner_create_request(&lo.spin, 2480 lo.ctx, engine, 2481 MI_ARB_CHECK); 2482 if (IS_ERR(rq)) 2483 goto err_wedged; 2484 2485 i915_request_get(rq); 2486 i915_request_add(rq); 2487 2488 ring_size = rq->wa_tail - rq->head; 2489 if (ring_size < 0) 2490 ring_size += rq->ring->size; 2491 ring_size = rq->ring->size / ring_size; 2492 pr_debug("%s(%s): Using maximum of %d requests\n", 2493 __func__, engine->name, ring_size); 2494 2495 igt_spinner_end(&lo.spin); 2496 if (i915_request_wait(rq, 0, HZ / 2) < 0) { 2497 pr_err("Timed out waiting to flush %s\n", engine->name); 2498 i915_request_put(rq); 2499 goto err_wedged; 2500 } 2501 i915_request_put(rq); 2502 2503 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 2504 err = -EIO; 2505 goto err_wedged; 2506 } 2507 2508 for_each_prime_number_from(count, 1, ring_size) { 2509 rq = spinner_create_request(&hi.spin, 2510 hi.ctx, engine, 2511 MI_ARB_CHECK); 2512 if (IS_ERR(rq)) 2513 goto err_wedged; 2514 i915_request_add(rq); 2515 if (!igt_wait_for_spinner(&hi.spin, rq)) 2516 goto err_wedged; 2517 2518 rq = spinner_create_request(&lo.spin, 2519 lo.ctx, engine, 2520 MI_ARB_CHECK); 2521 if (IS_ERR(rq)) 2522 goto err_wedged; 2523 i915_request_add(rq); 2524 2525 for (i = 0; i < count; i++) { 2526 rq = igt_request_alloc(lo.ctx, engine); 2527 if (IS_ERR(rq)) 2528 goto err_wedged; 2529 i915_request_add(rq); 2530 } 2531 2532 rq = igt_request_alloc(hi.ctx, engine); 2533 if (IS_ERR(rq)) 2534 goto err_wedged; 2535 2536 i915_request_get(rq); 2537 i915_request_add(rq); 2538 engine->schedule(rq, &attr); 2539 2540 igt_spinner_end(&hi.spin); 2541 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 2542 struct drm_printer p = 2543 drm_info_printer(gt->i915->drm.dev); 2544 2545 pr_err("Failed to preempt over chain of %d\n", 2546 count); 2547 intel_engine_dump(engine, &p, 2548 "%s\n", engine->name); 2549 i915_request_put(rq); 2550 goto err_wedged; 2551 } 2552 igt_spinner_end(&lo.spin); 2553 i915_request_put(rq); 2554 2555 rq = igt_request_alloc(lo.ctx, engine); 2556 if (IS_ERR(rq)) 2557 goto err_wedged; 2558 2559 i915_request_get(rq); 2560 i915_request_add(rq); 2561 2562 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 2563 struct drm_printer p = 2564 drm_info_printer(gt->i915->drm.dev); 2565 2566 pr_err("Failed to flush low priority chain of %d requests\n", 2567 count); 2568 intel_engine_dump(engine, &p, 2569 "%s\n", engine->name); 2570 2571 i915_request_put(rq); 2572 goto err_wedged; 2573 } 2574 i915_request_put(rq); 2575 } 2576 2577 if (igt_live_test_end(&t)) { 2578 err = -EIO; 2579 goto err_wedged; 2580 } 2581 } 2582 2583 err = 0; 2584 err_client_lo: 2585 preempt_client_fini(&lo); 2586 err_client_hi: 2587 preempt_client_fini(&hi); 2588 return err; 2589 2590 err_wedged: 2591 igt_spinner_end(&hi.spin); 2592 igt_spinner_end(&lo.spin); 2593 intel_gt_set_wedged(gt); 2594 err = -EIO; 2595 goto err_client_lo; 2596 } 2597 2598 static int create_gang(struct intel_engine_cs *engine, 2599 struct i915_request **prev) 2600 { 2601 struct drm_i915_gem_object *obj; 2602 struct intel_context *ce; 2603 struct i915_request *rq; 2604 struct i915_vma *vma; 2605 u32 *cs; 2606 int err; 2607 2608 ce = intel_context_create(engine); 2609 if (IS_ERR(ce)) 2610 return PTR_ERR(ce); 2611 2612 obj = i915_gem_object_create_internal(engine->i915, 4096); 2613 if (IS_ERR(obj)) { 2614 err = PTR_ERR(obj); 2615 goto err_ce; 2616 } 2617 2618 vma = i915_vma_instance(obj, ce->vm, NULL); 2619 if (IS_ERR(vma)) { 2620 err = PTR_ERR(vma); 2621 goto err_obj; 2622 } 2623 2624 err = i915_vma_pin(vma, 0, 0, PIN_USER); 2625 if (err) 2626 goto err_obj; 2627 2628 cs = i915_gem_object_pin_map(obj, I915_MAP_WC); 2629 if (IS_ERR(cs)) 2630 goto err_obj; 2631 2632 /* Semaphore target: spin until zero */ 2633 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 2634 2635 *cs++ = MI_SEMAPHORE_WAIT | 2636 MI_SEMAPHORE_POLL | 2637 MI_SEMAPHORE_SAD_EQ_SDD; 2638 *cs++ = 0; 2639 *cs++ = lower_32_bits(vma->node.start); 2640 *cs++ = upper_32_bits(vma->node.start); 2641 2642 if (*prev) { 2643 u64 offset = (*prev)->batch->node.start; 2644 2645 /* Terminate the spinner in the next lower priority batch. */ 2646 *cs++ = MI_STORE_DWORD_IMM_GEN4; 2647 *cs++ = lower_32_bits(offset); 2648 *cs++ = upper_32_bits(offset); 2649 *cs++ = 0; 2650 } 2651 2652 *cs++ = MI_BATCH_BUFFER_END; 2653 i915_gem_object_flush_map(obj); 2654 i915_gem_object_unpin_map(obj); 2655 2656 rq = intel_context_create_request(ce); 2657 if (IS_ERR(rq)) 2658 goto err_obj; 2659 2660 rq->batch = i915_vma_get(vma); 2661 i915_request_get(rq); 2662 2663 i915_vma_lock(vma); 2664 err = i915_request_await_object(rq, vma->obj, false); 2665 if (!err) 2666 err = i915_vma_move_to_active(vma, rq, 0); 2667 if (!err) 2668 err = rq->engine->emit_bb_start(rq, 2669 vma->node.start, 2670 PAGE_SIZE, 0); 2671 i915_vma_unlock(vma); 2672 i915_request_add(rq); 2673 if (err) 2674 goto err_rq; 2675 2676 i915_gem_object_put(obj); 2677 intel_context_put(ce); 2678 2679 rq->client_link.next = &(*prev)->client_link; 2680 *prev = rq; 2681 return 0; 2682 2683 err_rq: 2684 i915_vma_put(rq->batch); 2685 i915_request_put(rq); 2686 err_obj: 2687 i915_gem_object_put(obj); 2688 err_ce: 2689 intel_context_put(ce); 2690 return err; 2691 } 2692 2693 static int live_preempt_gang(void *arg) 2694 { 2695 struct intel_gt *gt = arg; 2696 struct intel_engine_cs *engine; 2697 enum intel_engine_id id; 2698 2699 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2700 return 0; 2701 2702 /* 2703 * Build as long a chain of preempters as we can, with each 2704 * request higher priority than the last. Once we are ready, we release 2705 * the last batch which then precolates down the chain, each releasing 2706 * the next oldest in turn. The intent is to simply push as hard as we 2707 * can with the number of preemptions, trying to exceed narrow HW 2708 * limits. At a minimum, we insist that we can sort all the user 2709 * high priority levels into execution order. 2710 */ 2711 2712 for_each_engine(engine, gt, id) { 2713 struct i915_request *rq = NULL; 2714 struct igt_live_test t; 2715 IGT_TIMEOUT(end_time); 2716 int prio = 0; 2717 int err = 0; 2718 u32 *cs; 2719 2720 if (!intel_engine_has_preemption(engine)) 2721 continue; 2722 2723 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) 2724 return -EIO; 2725 2726 do { 2727 struct i915_sched_attr attr = { 2728 .priority = I915_USER_PRIORITY(prio++), 2729 }; 2730 2731 err = create_gang(engine, &rq); 2732 if (err) 2733 break; 2734 2735 /* Submit each spinner at increasing priority */ 2736 engine->schedule(rq, &attr); 2737 2738 if (prio <= I915_PRIORITY_MAX) 2739 continue; 2740 2741 if (prio > (INT_MAX >> I915_USER_PRIORITY_SHIFT)) 2742 break; 2743 2744 if (__igt_timeout(end_time, NULL)) 2745 break; 2746 } while (1); 2747 pr_debug("%s: Preempt chain of %d requests\n", 2748 engine->name, prio); 2749 2750 /* 2751 * Such that the last spinner is the highest priority and 2752 * should execute first. When that spinner completes, 2753 * it will terminate the next lowest spinner until there 2754 * are no more spinners and the gang is complete. 2755 */ 2756 cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC); 2757 if (!IS_ERR(cs)) { 2758 *cs = 0; 2759 i915_gem_object_unpin_map(rq->batch->obj); 2760 } else { 2761 err = PTR_ERR(cs); 2762 intel_gt_set_wedged(gt); 2763 } 2764 2765 while (rq) { /* wait for each rq from highest to lowest prio */ 2766 struct i915_request *n = 2767 list_next_entry(rq, client_link); 2768 2769 if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) { 2770 struct drm_printer p = 2771 drm_info_printer(engine->i915->drm.dev); 2772 2773 pr_err("Failed to flush chain of %d requests, at %d\n", 2774 prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT); 2775 intel_engine_dump(engine, &p, 2776 "%s\n", engine->name); 2777 2778 err = -ETIME; 2779 } 2780 2781 i915_vma_put(rq->batch); 2782 i915_request_put(rq); 2783 rq = n; 2784 } 2785 2786 if (igt_live_test_end(&t)) 2787 err = -EIO; 2788 if (err) 2789 return err; 2790 } 2791 2792 return 0; 2793 } 2794 2795 static struct i915_vma * 2796 create_gpr_user(struct intel_engine_cs *engine, 2797 struct i915_vma *result, 2798 unsigned int offset) 2799 { 2800 struct drm_i915_gem_object *obj; 2801 struct i915_vma *vma; 2802 u32 *cs; 2803 int err; 2804 int i; 2805 2806 obj = i915_gem_object_create_internal(engine->i915, 4096); 2807 if (IS_ERR(obj)) 2808 return ERR_CAST(obj); 2809 2810 vma = i915_vma_instance(obj, result->vm, NULL); 2811 if (IS_ERR(vma)) { 2812 i915_gem_object_put(obj); 2813 return vma; 2814 } 2815 2816 err = i915_vma_pin(vma, 0, 0, PIN_USER); 2817 if (err) { 2818 i915_vma_put(vma); 2819 return ERR_PTR(err); 2820 } 2821 2822 cs = i915_gem_object_pin_map(obj, I915_MAP_WC); 2823 if (IS_ERR(cs)) { 2824 i915_vma_put(vma); 2825 return ERR_CAST(cs); 2826 } 2827 2828 /* All GPR are clear for new contexts. We use GPR(0) as a constant */ 2829 *cs++ = MI_LOAD_REGISTER_IMM(1); 2830 *cs++ = CS_GPR(engine, 0); 2831 *cs++ = 1; 2832 2833 for (i = 1; i < NUM_GPR; i++) { 2834 u64 addr; 2835 2836 /* 2837 * Perform: GPR[i]++ 2838 * 2839 * As we read and write into the context saved GPR[i], if 2840 * we restart this batch buffer from an earlier point, we 2841 * will repeat the increment and store a value > 1. 2842 */ 2843 *cs++ = MI_MATH(4); 2844 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(i)); 2845 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(0)); 2846 *cs++ = MI_MATH_ADD; 2847 *cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU); 2848 2849 addr = result->node.start + offset + i * sizeof(*cs); 2850 *cs++ = MI_STORE_REGISTER_MEM_GEN8; 2851 *cs++ = CS_GPR(engine, 2 * i); 2852 *cs++ = lower_32_bits(addr); 2853 *cs++ = upper_32_bits(addr); 2854 2855 *cs++ = MI_SEMAPHORE_WAIT | 2856 MI_SEMAPHORE_POLL | 2857 MI_SEMAPHORE_SAD_GTE_SDD; 2858 *cs++ = i; 2859 *cs++ = lower_32_bits(result->node.start); 2860 *cs++ = upper_32_bits(result->node.start); 2861 } 2862 2863 *cs++ = MI_BATCH_BUFFER_END; 2864 i915_gem_object_flush_map(obj); 2865 i915_gem_object_unpin_map(obj); 2866 2867 return vma; 2868 } 2869 2870 static struct i915_vma *create_global(struct intel_gt *gt, size_t sz) 2871 { 2872 struct drm_i915_gem_object *obj; 2873 struct i915_vma *vma; 2874 int err; 2875 2876 obj = i915_gem_object_create_internal(gt->i915, sz); 2877 if (IS_ERR(obj)) 2878 return ERR_CAST(obj); 2879 2880 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 2881 if (IS_ERR(vma)) { 2882 i915_gem_object_put(obj); 2883 return vma; 2884 } 2885 2886 err = i915_ggtt_pin(vma, 0, 0); 2887 if (err) { 2888 i915_vma_put(vma); 2889 return ERR_PTR(err); 2890 } 2891 2892 return vma; 2893 } 2894 2895 static struct i915_request * 2896 create_gpr_client(struct intel_engine_cs *engine, 2897 struct i915_vma *global, 2898 unsigned int offset) 2899 { 2900 struct i915_vma *batch, *vma; 2901 struct intel_context *ce; 2902 struct i915_request *rq; 2903 int err; 2904 2905 ce = intel_context_create(engine); 2906 if (IS_ERR(ce)) 2907 return ERR_CAST(ce); 2908 2909 vma = i915_vma_instance(global->obj, ce->vm, NULL); 2910 if (IS_ERR(vma)) { 2911 err = PTR_ERR(vma); 2912 goto out_ce; 2913 } 2914 2915 err = i915_vma_pin(vma, 0, 0, PIN_USER); 2916 if (err) 2917 goto out_ce; 2918 2919 batch = create_gpr_user(engine, vma, offset); 2920 if (IS_ERR(batch)) { 2921 err = PTR_ERR(batch); 2922 goto out_vma; 2923 } 2924 2925 rq = intel_context_create_request(ce); 2926 if (IS_ERR(rq)) { 2927 err = PTR_ERR(rq); 2928 goto out_batch; 2929 } 2930 2931 i915_vma_lock(vma); 2932 err = i915_request_await_object(rq, vma->obj, false); 2933 if (!err) 2934 err = i915_vma_move_to_active(vma, rq, 0); 2935 i915_vma_unlock(vma); 2936 2937 i915_vma_lock(batch); 2938 if (!err) 2939 err = i915_request_await_object(rq, batch->obj, false); 2940 if (!err) 2941 err = i915_vma_move_to_active(batch, rq, 0); 2942 if (!err) 2943 err = rq->engine->emit_bb_start(rq, 2944 batch->node.start, 2945 PAGE_SIZE, 0); 2946 i915_vma_unlock(batch); 2947 i915_vma_unpin(batch); 2948 2949 if (!err) 2950 i915_request_get(rq); 2951 i915_request_add(rq); 2952 2953 out_batch: 2954 i915_vma_put(batch); 2955 out_vma: 2956 i915_vma_unpin(vma); 2957 out_ce: 2958 intel_context_put(ce); 2959 return err ? ERR_PTR(err) : rq; 2960 } 2961 2962 static int preempt_user(struct intel_engine_cs *engine, 2963 struct i915_vma *global, 2964 int id) 2965 { 2966 struct i915_sched_attr attr = { 2967 .priority = I915_PRIORITY_MAX 2968 }; 2969 struct i915_request *rq; 2970 int err = 0; 2971 u32 *cs; 2972 2973 rq = intel_engine_create_kernel_request(engine); 2974 if (IS_ERR(rq)) 2975 return PTR_ERR(rq); 2976 2977 cs = intel_ring_begin(rq, 4); 2978 if (IS_ERR(cs)) { 2979 i915_request_add(rq); 2980 return PTR_ERR(cs); 2981 } 2982 2983 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 2984 *cs++ = i915_ggtt_offset(global); 2985 *cs++ = 0; 2986 *cs++ = id; 2987 2988 intel_ring_advance(rq, cs); 2989 2990 i915_request_get(rq); 2991 i915_request_add(rq); 2992 2993 engine->schedule(rq, &attr); 2994 2995 if (i915_request_wait(rq, 0, HZ / 2) < 0) 2996 err = -ETIME; 2997 i915_request_put(rq); 2998 2999 return err; 3000 } 3001 3002 static int live_preempt_user(void *arg) 3003 { 3004 struct intel_gt *gt = arg; 3005 struct intel_engine_cs *engine; 3006 struct i915_vma *global; 3007 enum intel_engine_id id; 3008 u32 *result; 3009 int err = 0; 3010 3011 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 3012 return 0; 3013 3014 /* 3015 * In our other tests, we look at preemption in carefully 3016 * controlled conditions in the ringbuffer. Since most of the 3017 * time is spent in user batches, most of our preemptions naturally 3018 * occur there. We want to verify that when we preempt inside a batch 3019 * we continue on from the current instruction and do not roll back 3020 * to the start, or another earlier arbitration point. 3021 * 3022 * To verify this, we create a batch which is a mixture of 3023 * MI_MATH (gpr++) MI_SRM (gpr) and preemption points. Then with 3024 * a few preempting contexts thrown into the mix, we look for any 3025 * repeated instructions (which show up as incorrect values). 3026 */ 3027 3028 global = create_global(gt, 4096); 3029 if (IS_ERR(global)) 3030 return PTR_ERR(global); 3031 3032 result = i915_gem_object_pin_map(global->obj, I915_MAP_WC); 3033 if (IS_ERR(result)) { 3034 i915_vma_unpin_and_release(&global, 0); 3035 return PTR_ERR(result); 3036 } 3037 3038 for_each_engine(engine, gt, id) { 3039 struct i915_request *client[3] = {}; 3040 struct igt_live_test t; 3041 int i; 3042 3043 if (!intel_engine_has_preemption(engine)) 3044 continue; 3045 3046 if (IS_GEN(gt->i915, 8) && engine->class != RENDER_CLASS) 3047 continue; /* we need per-context GPR */ 3048 3049 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 3050 err = -EIO; 3051 break; 3052 } 3053 3054 memset(result, 0, 4096); 3055 3056 for (i = 0; i < ARRAY_SIZE(client); i++) { 3057 struct i915_request *rq; 3058 3059 rq = create_gpr_client(engine, global, 3060 NUM_GPR * i * sizeof(u32)); 3061 if (IS_ERR(rq)) 3062 goto end_test; 3063 3064 client[i] = rq; 3065 } 3066 3067 /* Continuously preempt the set of 3 running contexts */ 3068 for (i = 1; i <= NUM_GPR; i++) { 3069 err = preempt_user(engine, global, i); 3070 if (err) 3071 goto end_test; 3072 } 3073 3074 if (READ_ONCE(result[0]) != NUM_GPR) { 3075 pr_err("%s: Failed to release semaphore\n", 3076 engine->name); 3077 err = -EIO; 3078 goto end_test; 3079 } 3080 3081 for (i = 0; i < ARRAY_SIZE(client); i++) { 3082 int gpr; 3083 3084 if (i915_request_wait(client[i], 0, HZ / 2) < 0) { 3085 err = -ETIME; 3086 goto end_test; 3087 } 3088 3089 for (gpr = 1; gpr < NUM_GPR; gpr++) { 3090 if (result[NUM_GPR * i + gpr] != 1) { 3091 pr_err("%s: Invalid result, client %d, gpr %d, result: %d\n", 3092 engine->name, 3093 i, gpr, result[NUM_GPR * i + gpr]); 3094 err = -EINVAL; 3095 goto end_test; 3096 } 3097 } 3098 } 3099 3100 end_test: 3101 for (i = 0; i < ARRAY_SIZE(client); i++) { 3102 if (!client[i]) 3103 break; 3104 3105 i915_request_put(client[i]); 3106 } 3107 3108 /* Flush the semaphores on error */ 3109 smp_store_mb(result[0], -1); 3110 if (igt_live_test_end(&t)) 3111 err = -EIO; 3112 if (err) 3113 break; 3114 } 3115 3116 i915_vma_unpin_and_release(&global, I915_VMA_RELEASE_MAP); 3117 return err; 3118 } 3119 3120 static int live_preempt_timeout(void *arg) 3121 { 3122 struct intel_gt *gt = arg; 3123 struct i915_gem_context *ctx_hi, *ctx_lo; 3124 struct igt_spinner spin_lo; 3125 struct intel_engine_cs *engine; 3126 enum intel_engine_id id; 3127 int err = -ENOMEM; 3128 3129 /* 3130 * Check that we force preemption to occur by cancelling the previous 3131 * context if it refuses to yield the GPU. 3132 */ 3133 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) 3134 return 0; 3135 3136 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 3137 return 0; 3138 3139 if (!intel_has_reset_engine(gt)) 3140 return 0; 3141 3142 if (igt_spinner_init(&spin_lo, gt)) 3143 return -ENOMEM; 3144 3145 ctx_hi = kernel_context(gt->i915); 3146 if (!ctx_hi) 3147 goto err_spin_lo; 3148 ctx_hi->sched.priority = 3149 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); 3150 3151 ctx_lo = kernel_context(gt->i915); 3152 if (!ctx_lo) 3153 goto err_ctx_hi; 3154 ctx_lo->sched.priority = 3155 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); 3156 3157 for_each_engine(engine, gt, id) { 3158 unsigned long saved_timeout; 3159 struct i915_request *rq; 3160 3161 if (!intel_engine_has_preemption(engine)) 3162 continue; 3163 3164 rq = spinner_create_request(&spin_lo, ctx_lo, engine, 3165 MI_NOOP); /* preemption disabled */ 3166 if (IS_ERR(rq)) { 3167 err = PTR_ERR(rq); 3168 goto err_ctx_lo; 3169 } 3170 3171 i915_request_add(rq); 3172 if (!igt_wait_for_spinner(&spin_lo, rq)) { 3173 intel_gt_set_wedged(gt); 3174 err = -EIO; 3175 goto err_ctx_lo; 3176 } 3177 3178 rq = igt_request_alloc(ctx_hi, engine); 3179 if (IS_ERR(rq)) { 3180 igt_spinner_end(&spin_lo); 3181 err = PTR_ERR(rq); 3182 goto err_ctx_lo; 3183 } 3184 3185 /* Flush the previous CS ack before changing timeouts */ 3186 while (READ_ONCE(engine->execlists.pending[0])) 3187 cpu_relax(); 3188 3189 saved_timeout = engine->props.preempt_timeout_ms; 3190 engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */ 3191 3192 i915_request_get(rq); 3193 i915_request_add(rq); 3194 3195 intel_engine_flush_submission(engine); 3196 engine->props.preempt_timeout_ms = saved_timeout; 3197 3198 if (i915_request_wait(rq, 0, HZ / 10) < 0) { 3199 intel_gt_set_wedged(gt); 3200 i915_request_put(rq); 3201 err = -ETIME; 3202 goto err_ctx_lo; 3203 } 3204 3205 igt_spinner_end(&spin_lo); 3206 i915_request_put(rq); 3207 } 3208 3209 err = 0; 3210 err_ctx_lo: 3211 kernel_context_close(ctx_lo); 3212 err_ctx_hi: 3213 kernel_context_close(ctx_hi); 3214 err_spin_lo: 3215 igt_spinner_fini(&spin_lo); 3216 return err; 3217 } 3218 3219 static int random_range(struct rnd_state *rnd, int min, int max) 3220 { 3221 return i915_prandom_u32_max_state(max - min, rnd) + min; 3222 } 3223 3224 static int random_priority(struct rnd_state *rnd) 3225 { 3226 return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX); 3227 } 3228 3229 struct preempt_smoke { 3230 struct intel_gt *gt; 3231 struct i915_gem_context **contexts; 3232 struct intel_engine_cs *engine; 3233 struct drm_i915_gem_object *batch; 3234 unsigned int ncontext; 3235 struct rnd_state prng; 3236 unsigned long count; 3237 }; 3238 3239 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke) 3240 { 3241 return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext, 3242 &smoke->prng)]; 3243 } 3244 3245 static int smoke_submit(struct preempt_smoke *smoke, 3246 struct i915_gem_context *ctx, int prio, 3247 struct drm_i915_gem_object *batch) 3248 { 3249 struct i915_request *rq; 3250 struct i915_vma *vma = NULL; 3251 int err = 0; 3252 3253 if (batch) { 3254 struct i915_address_space *vm; 3255 3256 vm = i915_gem_context_get_vm_rcu(ctx); 3257 vma = i915_vma_instance(batch, vm, NULL); 3258 i915_vm_put(vm); 3259 if (IS_ERR(vma)) 3260 return PTR_ERR(vma); 3261 3262 err = i915_vma_pin(vma, 0, 0, PIN_USER); 3263 if (err) 3264 return err; 3265 } 3266 3267 ctx->sched.priority = prio; 3268 3269 rq = igt_request_alloc(ctx, smoke->engine); 3270 if (IS_ERR(rq)) { 3271 err = PTR_ERR(rq); 3272 goto unpin; 3273 } 3274 3275 if (vma) { 3276 i915_vma_lock(vma); 3277 err = i915_request_await_object(rq, vma->obj, false); 3278 if (!err) 3279 err = i915_vma_move_to_active(vma, rq, 0); 3280 if (!err) 3281 err = rq->engine->emit_bb_start(rq, 3282 vma->node.start, 3283 PAGE_SIZE, 0); 3284 i915_vma_unlock(vma); 3285 } 3286 3287 i915_request_add(rq); 3288 3289 unpin: 3290 if (vma) 3291 i915_vma_unpin(vma); 3292 3293 return err; 3294 } 3295 3296 static int smoke_crescendo_thread(void *arg) 3297 { 3298 struct preempt_smoke *smoke = arg; 3299 IGT_TIMEOUT(end_time); 3300 unsigned long count; 3301 3302 count = 0; 3303 do { 3304 struct i915_gem_context *ctx = smoke_context(smoke); 3305 int err; 3306 3307 err = smoke_submit(smoke, 3308 ctx, count % I915_PRIORITY_MAX, 3309 smoke->batch); 3310 if (err) 3311 return err; 3312 3313 count++; 3314 } while (!__igt_timeout(end_time, NULL)); 3315 3316 smoke->count = count; 3317 return 0; 3318 } 3319 3320 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) 3321 #define BATCH BIT(0) 3322 { 3323 struct task_struct *tsk[I915_NUM_ENGINES] = {}; 3324 struct preempt_smoke arg[I915_NUM_ENGINES]; 3325 struct intel_engine_cs *engine; 3326 enum intel_engine_id id; 3327 unsigned long count; 3328 int err = 0; 3329 3330 for_each_engine(engine, smoke->gt, id) { 3331 arg[id] = *smoke; 3332 arg[id].engine = engine; 3333 if (!(flags & BATCH)) 3334 arg[id].batch = NULL; 3335 arg[id].count = 0; 3336 3337 tsk[id] = kthread_run(smoke_crescendo_thread, &arg, 3338 "igt/smoke:%d", id); 3339 if (IS_ERR(tsk[id])) { 3340 err = PTR_ERR(tsk[id]); 3341 break; 3342 } 3343 get_task_struct(tsk[id]); 3344 } 3345 3346 yield(); /* start all threads before we kthread_stop() */ 3347 3348 count = 0; 3349 for_each_engine(engine, smoke->gt, id) { 3350 int status; 3351 3352 if (IS_ERR_OR_NULL(tsk[id])) 3353 continue; 3354 3355 status = kthread_stop(tsk[id]); 3356 if (status && !err) 3357 err = status; 3358 3359 count += arg[id].count; 3360 3361 put_task_struct(tsk[id]); 3362 } 3363 3364 pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n", 3365 count, flags, 3366 RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext); 3367 return 0; 3368 } 3369 3370 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags) 3371 { 3372 enum intel_engine_id id; 3373 IGT_TIMEOUT(end_time); 3374 unsigned long count; 3375 3376 count = 0; 3377 do { 3378 for_each_engine(smoke->engine, smoke->gt, id) { 3379 struct i915_gem_context *ctx = smoke_context(smoke); 3380 int err; 3381 3382 err = smoke_submit(smoke, 3383 ctx, random_priority(&smoke->prng), 3384 flags & BATCH ? smoke->batch : NULL); 3385 if (err) 3386 return err; 3387 3388 count++; 3389 } 3390 } while (!__igt_timeout(end_time, NULL)); 3391 3392 pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n", 3393 count, flags, 3394 RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext); 3395 return 0; 3396 } 3397 3398 static int live_preempt_smoke(void *arg) 3399 { 3400 struct preempt_smoke smoke = { 3401 .gt = arg, 3402 .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed), 3403 .ncontext = 1024, 3404 }; 3405 const unsigned int phase[] = { 0, BATCH }; 3406 struct igt_live_test t; 3407 int err = -ENOMEM; 3408 u32 *cs; 3409 int n; 3410 3411 if (!HAS_LOGICAL_RING_PREEMPTION(smoke.gt->i915)) 3412 return 0; 3413 3414 smoke.contexts = kmalloc_array(smoke.ncontext, 3415 sizeof(*smoke.contexts), 3416 GFP_KERNEL); 3417 if (!smoke.contexts) 3418 return -ENOMEM; 3419 3420 smoke.batch = 3421 i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE); 3422 if (IS_ERR(smoke.batch)) { 3423 err = PTR_ERR(smoke.batch); 3424 goto err_free; 3425 } 3426 3427 cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB); 3428 if (IS_ERR(cs)) { 3429 err = PTR_ERR(cs); 3430 goto err_batch; 3431 } 3432 for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++) 3433 cs[n] = MI_ARB_CHECK; 3434 cs[n] = MI_BATCH_BUFFER_END; 3435 i915_gem_object_flush_map(smoke.batch); 3436 i915_gem_object_unpin_map(smoke.batch); 3437 3438 if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) { 3439 err = -EIO; 3440 goto err_batch; 3441 } 3442 3443 for (n = 0; n < smoke.ncontext; n++) { 3444 smoke.contexts[n] = kernel_context(smoke.gt->i915); 3445 if (!smoke.contexts[n]) 3446 goto err_ctx; 3447 } 3448 3449 for (n = 0; n < ARRAY_SIZE(phase); n++) { 3450 err = smoke_crescendo(&smoke, phase[n]); 3451 if (err) 3452 goto err_ctx; 3453 3454 err = smoke_random(&smoke, phase[n]); 3455 if (err) 3456 goto err_ctx; 3457 } 3458 3459 err_ctx: 3460 if (igt_live_test_end(&t)) 3461 err = -EIO; 3462 3463 for (n = 0; n < smoke.ncontext; n++) { 3464 if (!smoke.contexts[n]) 3465 break; 3466 kernel_context_close(smoke.contexts[n]); 3467 } 3468 3469 err_batch: 3470 i915_gem_object_put(smoke.batch); 3471 err_free: 3472 kfree(smoke.contexts); 3473 3474 return err; 3475 } 3476 3477 static int nop_virtual_engine(struct intel_gt *gt, 3478 struct intel_engine_cs **siblings, 3479 unsigned int nsibling, 3480 unsigned int nctx, 3481 unsigned int flags) 3482 #define CHAIN BIT(0) 3483 { 3484 IGT_TIMEOUT(end_time); 3485 struct i915_request *request[16] = {}; 3486 struct intel_context *ve[16]; 3487 unsigned long n, prime, nc; 3488 struct igt_live_test t; 3489 ktime_t times[2] = {}; 3490 int err; 3491 3492 GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve)); 3493 3494 for (n = 0; n < nctx; n++) { 3495 ve[n] = intel_execlists_create_virtual(siblings, nsibling); 3496 if (IS_ERR(ve[n])) { 3497 err = PTR_ERR(ve[n]); 3498 nctx = n; 3499 goto out; 3500 } 3501 3502 err = intel_context_pin(ve[n]); 3503 if (err) { 3504 intel_context_put(ve[n]); 3505 nctx = n; 3506 goto out; 3507 } 3508 } 3509 3510 err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name); 3511 if (err) 3512 goto out; 3513 3514 for_each_prime_number_from(prime, 1, 8192) { 3515 times[1] = ktime_get_raw(); 3516 3517 if (flags & CHAIN) { 3518 for (nc = 0; nc < nctx; nc++) { 3519 for (n = 0; n < prime; n++) { 3520 struct i915_request *rq; 3521 3522 rq = i915_request_create(ve[nc]); 3523 if (IS_ERR(rq)) { 3524 err = PTR_ERR(rq); 3525 goto out; 3526 } 3527 3528 if (request[nc]) 3529 i915_request_put(request[nc]); 3530 request[nc] = i915_request_get(rq); 3531 i915_request_add(rq); 3532 } 3533 } 3534 } else { 3535 for (n = 0; n < prime; n++) { 3536 for (nc = 0; nc < nctx; nc++) { 3537 struct i915_request *rq; 3538 3539 rq = i915_request_create(ve[nc]); 3540 if (IS_ERR(rq)) { 3541 err = PTR_ERR(rq); 3542 goto out; 3543 } 3544 3545 if (request[nc]) 3546 i915_request_put(request[nc]); 3547 request[nc] = i915_request_get(rq); 3548 i915_request_add(rq); 3549 } 3550 } 3551 } 3552 3553 for (nc = 0; nc < nctx; nc++) { 3554 if (i915_request_wait(request[nc], 0, HZ / 10) < 0) { 3555 pr_err("%s(%s): wait for %llx:%lld timed out\n", 3556 __func__, ve[0]->engine->name, 3557 request[nc]->fence.context, 3558 request[nc]->fence.seqno); 3559 3560 GEM_TRACE("%s(%s) failed at request %llx:%lld\n", 3561 __func__, ve[0]->engine->name, 3562 request[nc]->fence.context, 3563 request[nc]->fence.seqno); 3564 GEM_TRACE_DUMP(); 3565 intel_gt_set_wedged(gt); 3566 break; 3567 } 3568 } 3569 3570 times[1] = ktime_sub(ktime_get_raw(), times[1]); 3571 if (prime == 1) 3572 times[0] = times[1]; 3573 3574 for (nc = 0; nc < nctx; nc++) { 3575 i915_request_put(request[nc]); 3576 request[nc] = NULL; 3577 } 3578 3579 if (__igt_timeout(end_time, NULL)) 3580 break; 3581 } 3582 3583 err = igt_live_test_end(&t); 3584 if (err) 3585 goto out; 3586 3587 pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n", 3588 nctx, ve[0]->engine->name, ktime_to_ns(times[0]), 3589 prime, div64_u64(ktime_to_ns(times[1]), prime)); 3590 3591 out: 3592 if (igt_flush_test(gt->i915)) 3593 err = -EIO; 3594 3595 for (nc = 0; nc < nctx; nc++) { 3596 i915_request_put(request[nc]); 3597 intel_context_unpin(ve[nc]); 3598 intel_context_put(ve[nc]); 3599 } 3600 return err; 3601 } 3602 3603 static int live_virtual_engine(void *arg) 3604 { 3605 struct intel_gt *gt = arg; 3606 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 3607 struct intel_engine_cs *engine; 3608 enum intel_engine_id id; 3609 unsigned int class, inst; 3610 int err; 3611 3612 if (intel_uc_uses_guc_submission(>->uc)) 3613 return 0; 3614 3615 for_each_engine(engine, gt, id) { 3616 err = nop_virtual_engine(gt, &engine, 1, 1, 0); 3617 if (err) { 3618 pr_err("Failed to wrap engine %s: err=%d\n", 3619 engine->name, err); 3620 return err; 3621 } 3622 } 3623 3624 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 3625 int nsibling, n; 3626 3627 nsibling = 0; 3628 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { 3629 if (!gt->engine_class[class][inst]) 3630 continue; 3631 3632 siblings[nsibling++] = gt->engine_class[class][inst]; 3633 } 3634 if (nsibling < 2) 3635 continue; 3636 3637 for (n = 1; n <= nsibling + 1; n++) { 3638 err = nop_virtual_engine(gt, siblings, nsibling, 3639 n, 0); 3640 if (err) 3641 return err; 3642 } 3643 3644 err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN); 3645 if (err) 3646 return err; 3647 } 3648 3649 return 0; 3650 } 3651 3652 static int mask_virtual_engine(struct intel_gt *gt, 3653 struct intel_engine_cs **siblings, 3654 unsigned int nsibling) 3655 { 3656 struct i915_request *request[MAX_ENGINE_INSTANCE + 1]; 3657 struct intel_context *ve; 3658 struct igt_live_test t; 3659 unsigned int n; 3660 int err; 3661 3662 /* 3663 * Check that by setting the execution mask on a request, we can 3664 * restrict it to our desired engine within the virtual engine. 3665 */ 3666 3667 ve = intel_execlists_create_virtual(siblings, nsibling); 3668 if (IS_ERR(ve)) { 3669 err = PTR_ERR(ve); 3670 goto out_close; 3671 } 3672 3673 err = intel_context_pin(ve); 3674 if (err) 3675 goto out_put; 3676 3677 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name); 3678 if (err) 3679 goto out_unpin; 3680 3681 for (n = 0; n < nsibling; n++) { 3682 request[n] = i915_request_create(ve); 3683 if (IS_ERR(request[n])) { 3684 err = PTR_ERR(request[n]); 3685 nsibling = n; 3686 goto out; 3687 } 3688 3689 /* Reverse order as it's more likely to be unnatural */ 3690 request[n]->execution_mask = siblings[nsibling - n - 1]->mask; 3691 3692 i915_request_get(request[n]); 3693 i915_request_add(request[n]); 3694 } 3695 3696 for (n = 0; n < nsibling; n++) { 3697 if (i915_request_wait(request[n], 0, HZ / 10) < 0) { 3698 pr_err("%s(%s): wait for %llx:%lld timed out\n", 3699 __func__, ve->engine->name, 3700 request[n]->fence.context, 3701 request[n]->fence.seqno); 3702 3703 GEM_TRACE("%s(%s) failed at request %llx:%lld\n", 3704 __func__, ve->engine->name, 3705 request[n]->fence.context, 3706 request[n]->fence.seqno); 3707 GEM_TRACE_DUMP(); 3708 intel_gt_set_wedged(gt); 3709 err = -EIO; 3710 goto out; 3711 } 3712 3713 if (request[n]->engine != siblings[nsibling - n - 1]) { 3714 pr_err("Executed on wrong sibling '%s', expected '%s'\n", 3715 request[n]->engine->name, 3716 siblings[nsibling - n - 1]->name); 3717 err = -EINVAL; 3718 goto out; 3719 } 3720 } 3721 3722 err = igt_live_test_end(&t); 3723 out: 3724 if (igt_flush_test(gt->i915)) 3725 err = -EIO; 3726 3727 for (n = 0; n < nsibling; n++) 3728 i915_request_put(request[n]); 3729 3730 out_unpin: 3731 intel_context_unpin(ve); 3732 out_put: 3733 intel_context_put(ve); 3734 out_close: 3735 return err; 3736 } 3737 3738 static int live_virtual_mask(void *arg) 3739 { 3740 struct intel_gt *gt = arg; 3741 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 3742 unsigned int class, inst; 3743 int err; 3744 3745 if (intel_uc_uses_guc_submission(>->uc)) 3746 return 0; 3747 3748 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 3749 unsigned int nsibling; 3750 3751 nsibling = 0; 3752 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { 3753 if (!gt->engine_class[class][inst]) 3754 break; 3755 3756 siblings[nsibling++] = gt->engine_class[class][inst]; 3757 } 3758 if (nsibling < 2) 3759 continue; 3760 3761 err = mask_virtual_engine(gt, siblings, nsibling); 3762 if (err) 3763 return err; 3764 } 3765 3766 return 0; 3767 } 3768 3769 static int preserved_virtual_engine(struct intel_gt *gt, 3770 struct intel_engine_cs **siblings, 3771 unsigned int nsibling) 3772 { 3773 struct i915_request *last = NULL; 3774 struct intel_context *ve; 3775 struct i915_vma *scratch; 3776 struct igt_live_test t; 3777 unsigned int n; 3778 int err = 0; 3779 u32 *cs; 3780 3781 scratch = create_scratch(siblings[0]->gt); 3782 if (IS_ERR(scratch)) 3783 return PTR_ERR(scratch); 3784 3785 err = i915_vma_sync(scratch); 3786 if (err) 3787 goto out_scratch; 3788 3789 ve = intel_execlists_create_virtual(siblings, nsibling); 3790 if (IS_ERR(ve)) { 3791 err = PTR_ERR(ve); 3792 goto out_scratch; 3793 } 3794 3795 err = intel_context_pin(ve); 3796 if (err) 3797 goto out_put; 3798 3799 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name); 3800 if (err) 3801 goto out_unpin; 3802 3803 for (n = 0; n < NUM_GPR_DW; n++) { 3804 struct intel_engine_cs *engine = siblings[n % nsibling]; 3805 struct i915_request *rq; 3806 3807 rq = i915_request_create(ve); 3808 if (IS_ERR(rq)) { 3809 err = PTR_ERR(rq); 3810 goto out_end; 3811 } 3812 3813 i915_request_put(last); 3814 last = i915_request_get(rq); 3815 3816 cs = intel_ring_begin(rq, 8); 3817 if (IS_ERR(cs)) { 3818 i915_request_add(rq); 3819 err = PTR_ERR(cs); 3820 goto out_end; 3821 } 3822 3823 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 3824 *cs++ = CS_GPR(engine, n); 3825 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32); 3826 *cs++ = 0; 3827 3828 *cs++ = MI_LOAD_REGISTER_IMM(1); 3829 *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW); 3830 *cs++ = n + 1; 3831 3832 *cs++ = MI_NOOP; 3833 intel_ring_advance(rq, cs); 3834 3835 /* Restrict this request to run on a particular engine */ 3836 rq->execution_mask = engine->mask; 3837 i915_request_add(rq); 3838 } 3839 3840 if (i915_request_wait(last, 0, HZ / 5) < 0) { 3841 err = -ETIME; 3842 goto out_end; 3843 } 3844 3845 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); 3846 if (IS_ERR(cs)) { 3847 err = PTR_ERR(cs); 3848 goto out_end; 3849 } 3850 3851 for (n = 0; n < NUM_GPR_DW; n++) { 3852 if (cs[n] != n) { 3853 pr_err("Incorrect value[%d] found for GPR[%d]\n", 3854 cs[n], n); 3855 err = -EINVAL; 3856 break; 3857 } 3858 } 3859 3860 i915_gem_object_unpin_map(scratch->obj); 3861 3862 out_end: 3863 if (igt_live_test_end(&t)) 3864 err = -EIO; 3865 i915_request_put(last); 3866 out_unpin: 3867 intel_context_unpin(ve); 3868 out_put: 3869 intel_context_put(ve); 3870 out_scratch: 3871 i915_vma_unpin_and_release(&scratch, 0); 3872 return err; 3873 } 3874 3875 static int live_virtual_preserved(void *arg) 3876 { 3877 struct intel_gt *gt = arg; 3878 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 3879 unsigned int class, inst; 3880 3881 /* 3882 * Check that the context image retains non-privileged (user) registers 3883 * from one engine to the next. For this we check that the CS_GPR 3884 * are preserved. 3885 */ 3886 3887 if (intel_uc_uses_guc_submission(>->uc)) 3888 return 0; 3889 3890 /* As we use CS_GPR we cannot run before they existed on all engines. */ 3891 if (INTEL_GEN(gt->i915) < 9) 3892 return 0; 3893 3894 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 3895 int nsibling, err; 3896 3897 nsibling = 0; 3898 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { 3899 if (!gt->engine_class[class][inst]) 3900 continue; 3901 3902 siblings[nsibling++] = gt->engine_class[class][inst]; 3903 } 3904 if (nsibling < 2) 3905 continue; 3906 3907 err = preserved_virtual_engine(gt, siblings, nsibling); 3908 if (err) 3909 return err; 3910 } 3911 3912 return 0; 3913 } 3914 3915 static int bond_virtual_engine(struct intel_gt *gt, 3916 unsigned int class, 3917 struct intel_engine_cs **siblings, 3918 unsigned int nsibling, 3919 unsigned int flags) 3920 #define BOND_SCHEDULE BIT(0) 3921 { 3922 struct intel_engine_cs *master; 3923 struct i915_request *rq[16]; 3924 enum intel_engine_id id; 3925 struct igt_spinner spin; 3926 unsigned long n; 3927 int err; 3928 3929 /* 3930 * A set of bonded requests is intended to be run concurrently 3931 * across a number of engines. We use one request per-engine 3932 * and a magic fence to schedule each of the bonded requests 3933 * at the same time. A consequence of our current scheduler is that 3934 * we only move requests to the HW ready queue when the request 3935 * becomes ready, that is when all of its prerequisite fences have 3936 * been signaled. As one of those fences is the master submit fence, 3937 * there is a delay on all secondary fences as the HW may be 3938 * currently busy. Equally, as all the requests are independent, 3939 * they may have other fences that delay individual request 3940 * submission to HW. Ergo, we do not guarantee that all requests are 3941 * immediately submitted to HW at the same time, just that if the 3942 * rules are abided by, they are ready at the same time as the 3943 * first is submitted. Userspace can embed semaphores in its batch 3944 * to ensure parallel execution of its phases as it requires. 3945 * Though naturally it gets requested that perhaps the scheduler should 3946 * take care of parallel execution, even across preemption events on 3947 * different HW. (The proper answer is of course "lalalala".) 3948 * 3949 * With the submit-fence, we have identified three possible phases 3950 * of synchronisation depending on the master fence: queued (not 3951 * ready), executing, and signaled. The first two are quite simple 3952 * and checked below. However, the signaled master fence handling is 3953 * contentious. Currently we do not distinguish between a signaled 3954 * fence and an expired fence, as once signaled it does not convey 3955 * any information about the previous execution. It may even be freed 3956 * and hence checking later it may not exist at all. Ergo we currently 3957 * do not apply the bonding constraint for an already signaled fence, 3958 * as our expectation is that it should not constrain the secondaries 3959 * and is outside of the scope of the bonded request API (i.e. all 3960 * userspace requests are meant to be running in parallel). As 3961 * it imposes no constraint, and is effectively a no-op, we do not 3962 * check below as normal execution flows are checked extensively above. 3963 * 3964 * XXX Is the degenerate handling of signaled submit fences the 3965 * expected behaviour for userpace? 3966 */ 3967 3968 GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1); 3969 3970 if (igt_spinner_init(&spin, gt)) 3971 return -ENOMEM; 3972 3973 err = 0; 3974 rq[0] = ERR_PTR(-ENOMEM); 3975 for_each_engine(master, gt, id) { 3976 struct i915_sw_fence fence = {}; 3977 struct intel_context *ce; 3978 3979 if (master->class == class) 3980 continue; 3981 3982 ce = intel_context_create(master); 3983 if (IS_ERR(ce)) { 3984 err = PTR_ERR(ce); 3985 goto out; 3986 } 3987 3988 memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq)); 3989 3990 rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP); 3991 intel_context_put(ce); 3992 if (IS_ERR(rq[0])) { 3993 err = PTR_ERR(rq[0]); 3994 goto out; 3995 } 3996 i915_request_get(rq[0]); 3997 3998 if (flags & BOND_SCHEDULE) { 3999 onstack_fence_init(&fence); 4000 err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit, 4001 &fence, 4002 GFP_KERNEL); 4003 } 4004 4005 i915_request_add(rq[0]); 4006 if (err < 0) 4007 goto out; 4008 4009 if (!(flags & BOND_SCHEDULE) && 4010 !igt_wait_for_spinner(&spin, rq[0])) { 4011 err = -EIO; 4012 goto out; 4013 } 4014 4015 for (n = 0; n < nsibling; n++) { 4016 struct intel_context *ve; 4017 4018 ve = intel_execlists_create_virtual(siblings, nsibling); 4019 if (IS_ERR(ve)) { 4020 err = PTR_ERR(ve); 4021 onstack_fence_fini(&fence); 4022 goto out; 4023 } 4024 4025 err = intel_virtual_engine_attach_bond(ve->engine, 4026 master, 4027 siblings[n]); 4028 if (err) { 4029 intel_context_put(ve); 4030 onstack_fence_fini(&fence); 4031 goto out; 4032 } 4033 4034 err = intel_context_pin(ve); 4035 intel_context_put(ve); 4036 if (err) { 4037 onstack_fence_fini(&fence); 4038 goto out; 4039 } 4040 4041 rq[n + 1] = i915_request_create(ve); 4042 intel_context_unpin(ve); 4043 if (IS_ERR(rq[n + 1])) { 4044 err = PTR_ERR(rq[n + 1]); 4045 onstack_fence_fini(&fence); 4046 goto out; 4047 } 4048 i915_request_get(rq[n + 1]); 4049 4050 err = i915_request_await_execution(rq[n + 1], 4051 &rq[0]->fence, 4052 ve->engine->bond_execute); 4053 i915_request_add(rq[n + 1]); 4054 if (err < 0) { 4055 onstack_fence_fini(&fence); 4056 goto out; 4057 } 4058 } 4059 onstack_fence_fini(&fence); 4060 intel_engine_flush_submission(master); 4061 igt_spinner_end(&spin); 4062 4063 if (i915_request_wait(rq[0], 0, HZ / 10) < 0) { 4064 pr_err("Master request did not execute (on %s)!\n", 4065 rq[0]->engine->name); 4066 err = -EIO; 4067 goto out; 4068 } 4069 4070 for (n = 0; n < nsibling; n++) { 4071 if (i915_request_wait(rq[n + 1], 0, 4072 MAX_SCHEDULE_TIMEOUT) < 0) { 4073 err = -EIO; 4074 goto out; 4075 } 4076 4077 if (rq[n + 1]->engine != siblings[n]) { 4078 pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n", 4079 siblings[n]->name, 4080 rq[n + 1]->engine->name, 4081 rq[0]->engine->name); 4082 err = -EINVAL; 4083 goto out; 4084 } 4085 } 4086 4087 for (n = 0; !IS_ERR(rq[n]); n++) 4088 i915_request_put(rq[n]); 4089 rq[0] = ERR_PTR(-ENOMEM); 4090 } 4091 4092 out: 4093 for (n = 0; !IS_ERR(rq[n]); n++) 4094 i915_request_put(rq[n]); 4095 if (igt_flush_test(gt->i915)) 4096 err = -EIO; 4097 4098 igt_spinner_fini(&spin); 4099 return err; 4100 } 4101 4102 static int live_virtual_bond(void *arg) 4103 { 4104 static const struct phase { 4105 const char *name; 4106 unsigned int flags; 4107 } phases[] = { 4108 { "", 0 }, 4109 { "schedule", BOND_SCHEDULE }, 4110 { }, 4111 }; 4112 struct intel_gt *gt = arg; 4113 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 4114 unsigned int class, inst; 4115 int err; 4116 4117 if (intel_uc_uses_guc_submission(>->uc)) 4118 return 0; 4119 4120 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 4121 const struct phase *p; 4122 int nsibling; 4123 4124 nsibling = 0; 4125 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { 4126 if (!gt->engine_class[class][inst]) 4127 break; 4128 4129 GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings)); 4130 siblings[nsibling++] = gt->engine_class[class][inst]; 4131 } 4132 if (nsibling < 2) 4133 continue; 4134 4135 for (p = phases; p->name; p++) { 4136 err = bond_virtual_engine(gt, 4137 class, siblings, nsibling, 4138 p->flags); 4139 if (err) { 4140 pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n", 4141 __func__, p->name, class, nsibling, err); 4142 return err; 4143 } 4144 } 4145 } 4146 4147 return 0; 4148 } 4149 4150 static int reset_virtual_engine(struct intel_gt *gt, 4151 struct intel_engine_cs **siblings, 4152 unsigned int nsibling) 4153 { 4154 struct intel_engine_cs *engine; 4155 struct intel_context *ve; 4156 unsigned long *heartbeat; 4157 struct igt_spinner spin; 4158 struct i915_request *rq; 4159 unsigned int n; 4160 int err = 0; 4161 4162 /* 4163 * In order to support offline error capture for fast preempt reset, 4164 * we need to decouple the guilty request and ensure that it and its 4165 * descendents are not executed while the capture is in progress. 4166 */ 4167 4168 heartbeat = kmalloc_array(nsibling, sizeof(*heartbeat), GFP_KERNEL); 4169 if (!heartbeat) 4170 return -ENOMEM; 4171 4172 if (igt_spinner_init(&spin, gt)) { 4173 err = -ENOMEM; 4174 goto out_free; 4175 } 4176 4177 ve = intel_execlists_create_virtual(siblings, nsibling); 4178 if (IS_ERR(ve)) { 4179 err = PTR_ERR(ve); 4180 goto out_spin; 4181 } 4182 4183 for (n = 0; n < nsibling; n++) 4184 engine_heartbeat_disable(siblings[n], &heartbeat[n]); 4185 4186 rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK); 4187 if (IS_ERR(rq)) { 4188 err = PTR_ERR(rq); 4189 goto out_heartbeat; 4190 } 4191 i915_request_add(rq); 4192 4193 if (!igt_wait_for_spinner(&spin, rq)) { 4194 intel_gt_set_wedged(gt); 4195 err = -ETIME; 4196 goto out_heartbeat; 4197 } 4198 4199 engine = rq->engine; 4200 GEM_BUG_ON(engine == ve->engine); 4201 4202 /* Take ownership of the reset and tasklet */ 4203 if (test_and_set_bit(I915_RESET_ENGINE + engine->id, 4204 >->reset.flags)) { 4205 intel_gt_set_wedged(gt); 4206 err = -EBUSY; 4207 goto out_heartbeat; 4208 } 4209 tasklet_disable(&engine->execlists.tasklet); 4210 4211 engine->execlists.tasklet.func(engine->execlists.tasklet.data); 4212 GEM_BUG_ON(execlists_active(&engine->execlists) != rq); 4213 4214 /* Fake a preemption event; failed of course */ 4215 spin_lock_irq(&engine->active.lock); 4216 __unwind_incomplete_requests(engine); 4217 spin_unlock_irq(&engine->active.lock); 4218 GEM_BUG_ON(rq->engine != ve->engine); 4219 4220 /* Reset the engine while keeping our active request on hold */ 4221 execlists_hold(engine, rq); 4222 GEM_BUG_ON(!i915_request_on_hold(rq)); 4223 4224 intel_engine_reset(engine, NULL); 4225 GEM_BUG_ON(rq->fence.error != -EIO); 4226 4227 /* Release our grasp on the engine, letting CS flow again */ 4228 tasklet_enable(&engine->execlists.tasklet); 4229 clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, >->reset.flags); 4230 4231 /* Check that we do not resubmit the held request */ 4232 i915_request_get(rq); 4233 if (!i915_request_wait(rq, 0, HZ / 5)) { 4234 pr_err("%s: on hold request completed!\n", 4235 engine->name); 4236 intel_gt_set_wedged(gt); 4237 err = -EIO; 4238 goto out_rq; 4239 } 4240 GEM_BUG_ON(!i915_request_on_hold(rq)); 4241 4242 /* But is resubmitted on release */ 4243 execlists_unhold(engine, rq); 4244 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 4245 pr_err("%s: held request did not complete!\n", 4246 engine->name); 4247 intel_gt_set_wedged(gt); 4248 err = -ETIME; 4249 } 4250 4251 out_rq: 4252 i915_request_put(rq); 4253 out_heartbeat: 4254 for (n = 0; n < nsibling; n++) 4255 engine_heartbeat_enable(siblings[n], heartbeat[n]); 4256 4257 intel_context_put(ve); 4258 out_spin: 4259 igt_spinner_fini(&spin); 4260 out_free: 4261 kfree(heartbeat); 4262 return err; 4263 } 4264 4265 static int live_virtual_reset(void *arg) 4266 { 4267 struct intel_gt *gt = arg; 4268 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 4269 unsigned int class, inst; 4270 4271 /* 4272 * Check that we handle a reset event within a virtual engine. 4273 * Only the physical engine is reset, but we have to check the flow 4274 * of the virtual requests around the reset, and make sure it is not 4275 * forgotten. 4276 */ 4277 4278 if (intel_uc_uses_guc_submission(>->uc)) 4279 return 0; 4280 4281 if (!intel_has_reset_engine(gt)) 4282 return 0; 4283 4284 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 4285 int nsibling, err; 4286 4287 nsibling = 0; 4288 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { 4289 if (!gt->engine_class[class][inst]) 4290 continue; 4291 4292 siblings[nsibling++] = gt->engine_class[class][inst]; 4293 } 4294 if (nsibling < 2) 4295 continue; 4296 4297 err = reset_virtual_engine(gt, siblings, nsibling); 4298 if (err) 4299 return err; 4300 } 4301 4302 return 0; 4303 } 4304 4305 int intel_execlists_live_selftests(struct drm_i915_private *i915) 4306 { 4307 static const struct i915_subtest tests[] = { 4308 SUBTEST(live_sanitycheck), 4309 SUBTEST(live_unlite_switch), 4310 SUBTEST(live_unlite_preempt), 4311 SUBTEST(live_pin_rewind), 4312 SUBTEST(live_hold_reset), 4313 SUBTEST(live_error_interrupt), 4314 SUBTEST(live_timeslice_preempt), 4315 SUBTEST(live_timeslice_rewind), 4316 SUBTEST(live_timeslice_queue), 4317 SUBTEST(live_busywait_preempt), 4318 SUBTEST(live_preempt), 4319 SUBTEST(live_late_preempt), 4320 SUBTEST(live_nopreempt), 4321 SUBTEST(live_preempt_cancel), 4322 SUBTEST(live_suppress_self_preempt), 4323 SUBTEST(live_suppress_wait_preempt), 4324 SUBTEST(live_chain_preempt), 4325 SUBTEST(live_preempt_gang), 4326 SUBTEST(live_preempt_timeout), 4327 SUBTEST(live_preempt_user), 4328 SUBTEST(live_preempt_smoke), 4329 SUBTEST(live_virtual_engine), 4330 SUBTEST(live_virtual_mask), 4331 SUBTEST(live_virtual_preserved), 4332 SUBTEST(live_virtual_bond), 4333 SUBTEST(live_virtual_reset), 4334 }; 4335 4336 if (!HAS_EXECLISTS(i915)) 4337 return 0; 4338 4339 if (intel_gt_is_wedged(&i915->gt)) 4340 return 0; 4341 4342 return intel_gt_live_subtests(tests, &i915->gt); 4343 } 4344 4345 static int emit_semaphore_signal(struct intel_context *ce, void *slot) 4346 { 4347 const u32 offset = 4348 i915_ggtt_offset(ce->engine->status_page.vma) + 4349 offset_in_page(slot); 4350 struct i915_request *rq; 4351 u32 *cs; 4352 4353 rq = intel_context_create_request(ce); 4354 if (IS_ERR(rq)) 4355 return PTR_ERR(rq); 4356 4357 cs = intel_ring_begin(rq, 4); 4358 if (IS_ERR(cs)) { 4359 i915_request_add(rq); 4360 return PTR_ERR(cs); 4361 } 4362 4363 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 4364 *cs++ = offset; 4365 *cs++ = 0; 4366 *cs++ = 1; 4367 4368 intel_ring_advance(rq, cs); 4369 4370 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 4371 i915_request_add(rq); 4372 return 0; 4373 } 4374 4375 static int context_flush(struct intel_context *ce, long timeout) 4376 { 4377 struct i915_request *rq; 4378 struct dma_fence *fence; 4379 int err = 0; 4380 4381 rq = intel_engine_create_kernel_request(ce->engine); 4382 if (IS_ERR(rq)) 4383 return PTR_ERR(rq); 4384 4385 fence = i915_active_fence_get(&ce->timeline->last_request); 4386 if (fence) { 4387 i915_request_await_dma_fence(rq, fence); 4388 dma_fence_put(fence); 4389 } 4390 4391 rq = i915_request_get(rq); 4392 i915_request_add(rq); 4393 if (i915_request_wait(rq, 0, timeout) < 0) 4394 err = -ETIME; 4395 i915_request_put(rq); 4396 4397 rmb(); /* We know the request is written, make sure all state is too! */ 4398 return err; 4399 } 4400 4401 static int live_lrc_layout(void *arg) 4402 { 4403 struct intel_gt *gt = arg; 4404 struct intel_engine_cs *engine; 4405 enum intel_engine_id id; 4406 u32 *lrc; 4407 int err; 4408 4409 /* 4410 * Check the registers offsets we use to create the initial reg state 4411 * match the layout saved by HW. 4412 */ 4413 4414 lrc = kmalloc(PAGE_SIZE, GFP_KERNEL); 4415 if (!lrc) 4416 return -ENOMEM; 4417 4418 err = 0; 4419 for_each_engine(engine, gt, id) { 4420 u32 *hw; 4421 int dw; 4422 4423 if (!engine->default_state) 4424 continue; 4425 4426 hw = shmem_pin_map(engine->default_state); 4427 if (IS_ERR(hw)) { 4428 err = PTR_ERR(hw); 4429 break; 4430 } 4431 hw += LRC_STATE_OFFSET / sizeof(*hw); 4432 4433 execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE), 4434 engine->kernel_context, 4435 engine, 4436 engine->kernel_context->ring, 4437 true); 4438 4439 dw = 0; 4440 do { 4441 u32 lri = hw[dw]; 4442 4443 if (lri == 0) { 4444 dw++; 4445 continue; 4446 } 4447 4448 if (lrc[dw] == 0) { 4449 pr_debug("%s: skipped instruction %x at dword %d\n", 4450 engine->name, lri, dw); 4451 dw++; 4452 continue; 4453 } 4454 4455 if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { 4456 pr_err("%s: Expected LRI command at dword %d, found %08x\n", 4457 engine->name, dw, lri); 4458 err = -EINVAL; 4459 break; 4460 } 4461 4462 if (lrc[dw] != lri) { 4463 pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n", 4464 engine->name, dw, lri, lrc[dw]); 4465 err = -EINVAL; 4466 break; 4467 } 4468 4469 lri &= 0x7f; 4470 lri++; 4471 dw++; 4472 4473 while (lri) { 4474 if (hw[dw] != lrc[dw]) { 4475 pr_err("%s: Different registers found at dword %d, expected %x, found %x\n", 4476 engine->name, dw, hw[dw], lrc[dw]); 4477 err = -EINVAL; 4478 break; 4479 } 4480 4481 /* 4482 * Skip over the actual register value as we 4483 * expect that to differ. 4484 */ 4485 dw += 2; 4486 lri -= 2; 4487 } 4488 } while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); 4489 4490 if (err) { 4491 pr_info("%s: HW register image:\n", engine->name); 4492 igt_hexdump(hw, PAGE_SIZE); 4493 4494 pr_info("%s: SW register image:\n", engine->name); 4495 igt_hexdump(lrc, PAGE_SIZE); 4496 } 4497 4498 shmem_unpin_map(engine->default_state, hw); 4499 if (err) 4500 break; 4501 } 4502 4503 kfree(lrc); 4504 return err; 4505 } 4506 4507 static int find_offset(const u32 *lri, u32 offset) 4508 { 4509 int i; 4510 4511 for (i = 0; i < PAGE_SIZE / sizeof(u32); i++) 4512 if (lri[i] == offset) 4513 return i; 4514 4515 return -1; 4516 } 4517 4518 static int live_lrc_fixed(void *arg) 4519 { 4520 struct intel_gt *gt = arg; 4521 struct intel_engine_cs *engine; 4522 enum intel_engine_id id; 4523 int err = 0; 4524 4525 /* 4526 * Check the assumed register offsets match the actual locations in 4527 * the context image. 4528 */ 4529 4530 for_each_engine(engine, gt, id) { 4531 const struct { 4532 u32 reg; 4533 u32 offset; 4534 const char *name; 4535 } tbl[] = { 4536 { 4537 i915_mmio_reg_offset(RING_START(engine->mmio_base)), 4538 CTX_RING_START - 1, 4539 "RING_START" 4540 }, 4541 { 4542 i915_mmio_reg_offset(RING_CTL(engine->mmio_base)), 4543 CTX_RING_CTL - 1, 4544 "RING_CTL" 4545 }, 4546 { 4547 i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)), 4548 CTX_RING_HEAD - 1, 4549 "RING_HEAD" 4550 }, 4551 { 4552 i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)), 4553 CTX_RING_TAIL - 1, 4554 "RING_TAIL" 4555 }, 4556 { 4557 i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)), 4558 lrc_ring_mi_mode(engine), 4559 "RING_MI_MODE" 4560 }, 4561 { 4562 i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)), 4563 CTX_BB_STATE - 1, 4564 "BB_STATE" 4565 }, 4566 { 4567 i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(engine->mmio_base)), 4568 lrc_ring_wa_bb_per_ctx(engine), 4569 "RING_BB_PER_CTX_PTR" 4570 }, 4571 { 4572 i915_mmio_reg_offset(RING_INDIRECT_CTX(engine->mmio_base)), 4573 lrc_ring_indirect_ptr(engine), 4574 "RING_INDIRECT_CTX_PTR" 4575 }, 4576 { 4577 i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(engine->mmio_base)), 4578 lrc_ring_indirect_offset(engine), 4579 "RING_INDIRECT_CTX_OFFSET" 4580 }, 4581 { 4582 i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)), 4583 CTX_TIMESTAMP - 1, 4584 "RING_CTX_TIMESTAMP" 4585 }, 4586 { 4587 i915_mmio_reg_offset(GEN8_RING_CS_GPR(engine->mmio_base, 0)), 4588 lrc_ring_gpr0(engine), 4589 "RING_CS_GPR0" 4590 }, 4591 { 4592 i915_mmio_reg_offset(RING_CMD_BUF_CCTL(engine->mmio_base)), 4593 lrc_ring_cmd_buf_cctl(engine), 4594 "RING_CMD_BUF_CCTL" 4595 }, 4596 { }, 4597 }, *t; 4598 u32 *hw; 4599 4600 if (!engine->default_state) 4601 continue; 4602 4603 hw = shmem_pin_map(engine->default_state); 4604 if (IS_ERR(hw)) { 4605 err = PTR_ERR(hw); 4606 break; 4607 } 4608 hw += LRC_STATE_OFFSET / sizeof(*hw); 4609 4610 for (t = tbl; t->name; t++) { 4611 int dw = find_offset(hw, t->reg); 4612 4613 if (dw != t->offset) { 4614 pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n", 4615 engine->name, 4616 t->name, 4617 t->reg, 4618 dw, 4619 t->offset); 4620 err = -EINVAL; 4621 } 4622 } 4623 4624 shmem_unpin_map(engine->default_state, hw); 4625 } 4626 4627 return err; 4628 } 4629 4630 static int __live_lrc_state(struct intel_engine_cs *engine, 4631 struct i915_vma *scratch) 4632 { 4633 struct intel_context *ce; 4634 struct i915_request *rq; 4635 enum { 4636 RING_START_IDX = 0, 4637 RING_TAIL_IDX, 4638 MAX_IDX 4639 }; 4640 u32 expected[MAX_IDX]; 4641 u32 *cs; 4642 int err; 4643 int n; 4644 4645 ce = intel_context_create(engine); 4646 if (IS_ERR(ce)) 4647 return PTR_ERR(ce); 4648 4649 err = intel_context_pin(ce); 4650 if (err) 4651 goto err_put; 4652 4653 rq = i915_request_create(ce); 4654 if (IS_ERR(rq)) { 4655 err = PTR_ERR(rq); 4656 goto err_unpin; 4657 } 4658 4659 cs = intel_ring_begin(rq, 4 * MAX_IDX); 4660 if (IS_ERR(cs)) { 4661 err = PTR_ERR(cs); 4662 i915_request_add(rq); 4663 goto err_unpin; 4664 } 4665 4666 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 4667 *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base)); 4668 *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32); 4669 *cs++ = 0; 4670 4671 expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma); 4672 4673 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 4674 *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)); 4675 *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32); 4676 *cs++ = 0; 4677 4678 i915_vma_lock(scratch); 4679 err = i915_request_await_object(rq, scratch->obj, true); 4680 if (!err) 4681 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE); 4682 i915_vma_unlock(scratch); 4683 4684 i915_request_get(rq); 4685 i915_request_add(rq); 4686 if (err) 4687 goto err_rq; 4688 4689 intel_engine_flush_submission(engine); 4690 expected[RING_TAIL_IDX] = ce->ring->tail; 4691 4692 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 4693 err = -ETIME; 4694 goto err_rq; 4695 } 4696 4697 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); 4698 if (IS_ERR(cs)) { 4699 err = PTR_ERR(cs); 4700 goto err_rq; 4701 } 4702 4703 for (n = 0; n < MAX_IDX; n++) { 4704 if (cs[n] != expected[n]) { 4705 pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n", 4706 engine->name, n, cs[n], expected[n]); 4707 err = -EINVAL; 4708 break; 4709 } 4710 } 4711 4712 i915_gem_object_unpin_map(scratch->obj); 4713 4714 err_rq: 4715 i915_request_put(rq); 4716 err_unpin: 4717 intel_context_unpin(ce); 4718 err_put: 4719 intel_context_put(ce); 4720 return err; 4721 } 4722 4723 static int live_lrc_state(void *arg) 4724 { 4725 struct intel_gt *gt = arg; 4726 struct intel_engine_cs *engine; 4727 struct i915_vma *scratch; 4728 enum intel_engine_id id; 4729 int err = 0; 4730 4731 /* 4732 * Check the live register state matches what we expect for this 4733 * intel_context. 4734 */ 4735 4736 scratch = create_scratch(gt); 4737 if (IS_ERR(scratch)) 4738 return PTR_ERR(scratch); 4739 4740 for_each_engine(engine, gt, id) { 4741 err = __live_lrc_state(engine, scratch); 4742 if (err) 4743 break; 4744 } 4745 4746 if (igt_flush_test(gt->i915)) 4747 err = -EIO; 4748 4749 i915_vma_unpin_and_release(&scratch, 0); 4750 return err; 4751 } 4752 4753 static int gpr_make_dirty(struct intel_context *ce) 4754 { 4755 struct i915_request *rq; 4756 u32 *cs; 4757 int n; 4758 4759 rq = intel_context_create_request(ce); 4760 if (IS_ERR(rq)) 4761 return PTR_ERR(rq); 4762 4763 cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2); 4764 if (IS_ERR(cs)) { 4765 i915_request_add(rq); 4766 return PTR_ERR(cs); 4767 } 4768 4769 *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW); 4770 for (n = 0; n < NUM_GPR_DW; n++) { 4771 *cs++ = CS_GPR(ce->engine, n); 4772 *cs++ = STACK_MAGIC; 4773 } 4774 *cs++ = MI_NOOP; 4775 4776 intel_ring_advance(rq, cs); 4777 4778 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 4779 i915_request_add(rq); 4780 4781 return 0; 4782 } 4783 4784 static struct i915_request * 4785 __gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot) 4786 { 4787 const u32 offset = 4788 i915_ggtt_offset(ce->engine->status_page.vma) + 4789 offset_in_page(slot); 4790 struct i915_request *rq; 4791 u32 *cs; 4792 int err; 4793 int n; 4794 4795 rq = intel_context_create_request(ce); 4796 if (IS_ERR(rq)) 4797 return rq; 4798 4799 cs = intel_ring_begin(rq, 6 + 4 * NUM_GPR_DW); 4800 if (IS_ERR(cs)) { 4801 i915_request_add(rq); 4802 return ERR_CAST(cs); 4803 } 4804 4805 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 4806 *cs++ = MI_NOOP; 4807 4808 *cs++ = MI_SEMAPHORE_WAIT | 4809 MI_SEMAPHORE_GLOBAL_GTT | 4810 MI_SEMAPHORE_POLL | 4811 MI_SEMAPHORE_SAD_NEQ_SDD; 4812 *cs++ = 0; 4813 *cs++ = offset; 4814 *cs++ = 0; 4815 4816 for (n = 0; n < NUM_GPR_DW; n++) { 4817 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 4818 *cs++ = CS_GPR(ce->engine, n); 4819 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32); 4820 *cs++ = 0; 4821 } 4822 4823 i915_vma_lock(scratch); 4824 err = i915_request_await_object(rq, scratch->obj, true); 4825 if (!err) 4826 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE); 4827 i915_vma_unlock(scratch); 4828 4829 i915_request_get(rq); 4830 i915_request_add(rq); 4831 if (err) { 4832 i915_request_put(rq); 4833 rq = ERR_PTR(err); 4834 } 4835 4836 return rq; 4837 } 4838 4839 static int __live_lrc_gpr(struct intel_engine_cs *engine, 4840 struct i915_vma *scratch, 4841 bool preempt) 4842 { 4843 u32 *slot = memset32(engine->status_page.addr + 1000, 0, 4); 4844 struct intel_context *ce; 4845 struct i915_request *rq; 4846 u32 *cs; 4847 int err; 4848 int n; 4849 4850 if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS) 4851 return 0; /* GPR only on rcs0 for gen8 */ 4852 4853 err = gpr_make_dirty(engine->kernel_context); 4854 if (err) 4855 return err; 4856 4857 ce = intel_context_create(engine); 4858 if (IS_ERR(ce)) 4859 return PTR_ERR(ce); 4860 4861 rq = __gpr_read(ce, scratch, slot); 4862 if (IS_ERR(rq)) { 4863 err = PTR_ERR(rq); 4864 goto err_put; 4865 } 4866 4867 err = wait_for_submit(engine, rq, HZ / 2); 4868 if (err) 4869 goto err_rq; 4870 4871 if (preempt) { 4872 err = gpr_make_dirty(engine->kernel_context); 4873 if (err) 4874 goto err_rq; 4875 4876 err = emit_semaphore_signal(engine->kernel_context, slot); 4877 if (err) 4878 goto err_rq; 4879 } else { 4880 slot[0] = 1; 4881 wmb(); 4882 } 4883 4884 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 4885 err = -ETIME; 4886 goto err_rq; 4887 } 4888 4889 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); 4890 if (IS_ERR(cs)) { 4891 err = PTR_ERR(cs); 4892 goto err_rq; 4893 } 4894 4895 for (n = 0; n < NUM_GPR_DW; n++) { 4896 if (cs[n]) { 4897 pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n", 4898 engine->name, 4899 n / 2, n & 1 ? "udw" : "ldw", 4900 cs[n]); 4901 err = -EINVAL; 4902 break; 4903 } 4904 } 4905 4906 i915_gem_object_unpin_map(scratch->obj); 4907 4908 err_rq: 4909 memset32(&slot[0], -1, 4); 4910 wmb(); 4911 i915_request_put(rq); 4912 err_put: 4913 intel_context_put(ce); 4914 return err; 4915 } 4916 4917 static int live_lrc_gpr(void *arg) 4918 { 4919 struct intel_gt *gt = arg; 4920 struct intel_engine_cs *engine; 4921 struct i915_vma *scratch; 4922 enum intel_engine_id id; 4923 int err = 0; 4924 4925 /* 4926 * Check that GPR registers are cleared in new contexts as we need 4927 * to avoid leaking any information from previous contexts. 4928 */ 4929 4930 scratch = create_scratch(gt); 4931 if (IS_ERR(scratch)) 4932 return PTR_ERR(scratch); 4933 4934 for_each_engine(engine, gt, id) { 4935 unsigned long heartbeat; 4936 4937 engine_heartbeat_disable(engine, &heartbeat); 4938 4939 err = __live_lrc_gpr(engine, scratch, false); 4940 if (err) 4941 goto err; 4942 4943 err = __live_lrc_gpr(engine, scratch, true); 4944 if (err) 4945 goto err; 4946 4947 err: 4948 engine_heartbeat_enable(engine, heartbeat); 4949 if (igt_flush_test(gt->i915)) 4950 err = -EIO; 4951 if (err) 4952 break; 4953 } 4954 4955 i915_vma_unpin_and_release(&scratch, 0); 4956 return err; 4957 } 4958 4959 static struct i915_request * 4960 create_timestamp(struct intel_context *ce, void *slot, int idx) 4961 { 4962 const u32 offset = 4963 i915_ggtt_offset(ce->engine->status_page.vma) + 4964 offset_in_page(slot); 4965 struct i915_request *rq; 4966 u32 *cs; 4967 int err; 4968 4969 rq = intel_context_create_request(ce); 4970 if (IS_ERR(rq)) 4971 return rq; 4972 4973 cs = intel_ring_begin(rq, 10); 4974 if (IS_ERR(cs)) { 4975 err = PTR_ERR(cs); 4976 goto err; 4977 } 4978 4979 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 4980 *cs++ = MI_NOOP; 4981 4982 *cs++ = MI_SEMAPHORE_WAIT | 4983 MI_SEMAPHORE_GLOBAL_GTT | 4984 MI_SEMAPHORE_POLL | 4985 MI_SEMAPHORE_SAD_NEQ_SDD; 4986 *cs++ = 0; 4987 *cs++ = offset; 4988 *cs++ = 0; 4989 4990 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 4991 *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base)); 4992 *cs++ = offset + idx * sizeof(u32); 4993 *cs++ = 0; 4994 4995 intel_ring_advance(rq, cs); 4996 4997 rq->sched.attr.priority = I915_PRIORITY_MASK; 4998 err = 0; 4999 err: 5000 i915_request_get(rq); 5001 i915_request_add(rq); 5002 if (err) { 5003 i915_request_put(rq); 5004 return ERR_PTR(err); 5005 } 5006 5007 return rq; 5008 } 5009 5010 struct lrc_timestamp { 5011 struct intel_engine_cs *engine; 5012 struct intel_context *ce[2]; 5013 u32 poison; 5014 }; 5015 5016 static bool timestamp_advanced(u32 start, u32 end) 5017 { 5018 return (s32)(end - start) > 0; 5019 } 5020 5021 static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt) 5022 { 5023 u32 *slot = memset32(arg->engine->status_page.addr + 1000, 0, 4); 5024 struct i915_request *rq; 5025 u32 timestamp; 5026 int err = 0; 5027 5028 arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison; 5029 rq = create_timestamp(arg->ce[0], slot, 1); 5030 if (IS_ERR(rq)) 5031 return PTR_ERR(rq); 5032 5033 err = wait_for_submit(rq->engine, rq, HZ / 2); 5034 if (err) 5035 goto err; 5036 5037 if (preempt) { 5038 arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef; 5039 err = emit_semaphore_signal(arg->ce[1], slot); 5040 if (err) 5041 goto err; 5042 } else { 5043 slot[0] = 1; 5044 wmb(); 5045 } 5046 5047 /* And wait for switch to kernel (to save our context to memory) */ 5048 err = context_flush(arg->ce[0], HZ / 2); 5049 if (err) 5050 goto err; 5051 5052 if (!timestamp_advanced(arg->poison, slot[1])) { 5053 pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n", 5054 arg->engine->name, preempt ? "preempt" : "simple", 5055 arg->poison, slot[1]); 5056 err = -EINVAL; 5057 } 5058 5059 timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]); 5060 if (!timestamp_advanced(slot[1], timestamp)) { 5061 pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n", 5062 arg->engine->name, preempt ? "preempt" : "simple", 5063 slot[1], timestamp); 5064 err = -EINVAL; 5065 } 5066 5067 err: 5068 memset32(slot, -1, 4); 5069 i915_request_put(rq); 5070 return err; 5071 } 5072 5073 static int live_lrc_timestamp(void *arg) 5074 { 5075 struct lrc_timestamp data = {}; 5076 struct intel_gt *gt = arg; 5077 enum intel_engine_id id; 5078 const u32 poison[] = { 5079 0, 5080 S32_MAX, 5081 (u32)S32_MAX + 1, 5082 U32_MAX, 5083 }; 5084 5085 /* 5086 * We want to verify that the timestamp is saved and restore across 5087 * context switches and is monotonic. 5088 * 5089 * So we do this with a little bit of LRC poisoning to check various 5090 * boundary conditions, and see what happens if we preempt the context 5091 * with a second request (carrying more poison into the timestamp). 5092 */ 5093 5094 for_each_engine(data.engine, gt, id) { 5095 unsigned long heartbeat; 5096 int i, err = 0; 5097 5098 engine_heartbeat_disable(data.engine, &heartbeat); 5099 5100 for (i = 0; i < ARRAY_SIZE(data.ce); i++) { 5101 struct intel_context *tmp; 5102 5103 tmp = intel_context_create(data.engine); 5104 if (IS_ERR(tmp)) { 5105 err = PTR_ERR(tmp); 5106 goto err; 5107 } 5108 5109 err = intel_context_pin(tmp); 5110 if (err) { 5111 intel_context_put(tmp); 5112 goto err; 5113 } 5114 5115 data.ce[i] = tmp; 5116 } 5117 5118 for (i = 0; i < ARRAY_SIZE(poison); i++) { 5119 data.poison = poison[i]; 5120 5121 err = __lrc_timestamp(&data, false); 5122 if (err) 5123 break; 5124 5125 err = __lrc_timestamp(&data, true); 5126 if (err) 5127 break; 5128 } 5129 5130 err: 5131 engine_heartbeat_enable(data.engine, heartbeat); 5132 for (i = 0; i < ARRAY_SIZE(data.ce); i++) { 5133 if (!data.ce[i]) 5134 break; 5135 5136 intel_context_unpin(data.ce[i]); 5137 intel_context_put(data.ce[i]); 5138 } 5139 5140 if (igt_flush_test(gt->i915)) 5141 err = -EIO; 5142 if (err) 5143 return err; 5144 } 5145 5146 return 0; 5147 } 5148 5149 static struct i915_vma * 5150 create_user_vma(struct i915_address_space *vm, unsigned long size) 5151 { 5152 struct drm_i915_gem_object *obj; 5153 struct i915_vma *vma; 5154 int err; 5155 5156 obj = i915_gem_object_create_internal(vm->i915, size); 5157 if (IS_ERR(obj)) 5158 return ERR_CAST(obj); 5159 5160 vma = i915_vma_instance(obj, vm, NULL); 5161 if (IS_ERR(vma)) { 5162 i915_gem_object_put(obj); 5163 return vma; 5164 } 5165 5166 err = i915_vma_pin(vma, 0, 0, PIN_USER); 5167 if (err) { 5168 i915_gem_object_put(obj); 5169 return ERR_PTR(err); 5170 } 5171 5172 return vma; 5173 } 5174 5175 static struct i915_vma * 5176 store_context(struct intel_context *ce, struct i915_vma *scratch) 5177 { 5178 struct i915_vma *batch; 5179 u32 dw, x, *cs, *hw; 5180 u32 *defaults; 5181 5182 batch = create_user_vma(ce->vm, SZ_64K); 5183 if (IS_ERR(batch)) 5184 return batch; 5185 5186 cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); 5187 if (IS_ERR(cs)) { 5188 i915_vma_put(batch); 5189 return ERR_CAST(cs); 5190 } 5191 5192 defaults = shmem_pin_map(ce->engine->default_state); 5193 if (!defaults) { 5194 i915_gem_object_unpin_map(batch->obj); 5195 i915_vma_put(batch); 5196 return ERR_PTR(-ENOMEM); 5197 } 5198 5199 x = 0; 5200 dw = 0; 5201 hw = defaults; 5202 hw += LRC_STATE_OFFSET / sizeof(*hw); 5203 do { 5204 u32 len = hw[dw] & 0x7f; 5205 5206 if (hw[dw] == 0) { 5207 dw++; 5208 continue; 5209 } 5210 5211 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { 5212 dw += len + 2; 5213 continue; 5214 } 5215 5216 dw++; 5217 len = (len + 1) / 2; 5218 while (len--) { 5219 *cs++ = MI_STORE_REGISTER_MEM_GEN8; 5220 *cs++ = hw[dw]; 5221 *cs++ = lower_32_bits(scratch->node.start + x); 5222 *cs++ = upper_32_bits(scratch->node.start + x); 5223 5224 dw += 2; 5225 x += 4; 5226 } 5227 } while (dw < PAGE_SIZE / sizeof(u32) && 5228 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); 5229 5230 *cs++ = MI_BATCH_BUFFER_END; 5231 5232 shmem_unpin_map(ce->engine->default_state, defaults); 5233 5234 i915_gem_object_flush_map(batch->obj); 5235 i915_gem_object_unpin_map(batch->obj); 5236 5237 return batch; 5238 } 5239 5240 static int move_to_active(struct i915_request *rq, 5241 struct i915_vma *vma, 5242 unsigned int flags) 5243 { 5244 int err; 5245 5246 i915_vma_lock(vma); 5247 err = i915_request_await_object(rq, vma->obj, flags); 5248 if (!err) 5249 err = i915_vma_move_to_active(vma, rq, flags); 5250 i915_vma_unlock(vma); 5251 5252 return err; 5253 } 5254 5255 static struct i915_request * 5256 record_registers(struct intel_context *ce, 5257 struct i915_vma *before, 5258 struct i915_vma *after, 5259 u32 *sema) 5260 { 5261 struct i915_vma *b_before, *b_after; 5262 struct i915_request *rq; 5263 u32 *cs; 5264 int err; 5265 5266 b_before = store_context(ce, before); 5267 if (IS_ERR(b_before)) 5268 return ERR_CAST(b_before); 5269 5270 b_after = store_context(ce, after); 5271 if (IS_ERR(b_after)) { 5272 rq = ERR_CAST(b_after); 5273 goto err_before; 5274 } 5275 5276 rq = intel_context_create_request(ce); 5277 if (IS_ERR(rq)) 5278 goto err_after; 5279 5280 err = move_to_active(rq, before, EXEC_OBJECT_WRITE); 5281 if (err) 5282 goto err_rq; 5283 5284 err = move_to_active(rq, b_before, 0); 5285 if (err) 5286 goto err_rq; 5287 5288 err = move_to_active(rq, after, EXEC_OBJECT_WRITE); 5289 if (err) 5290 goto err_rq; 5291 5292 err = move_to_active(rq, b_after, 0); 5293 if (err) 5294 goto err_rq; 5295 5296 cs = intel_ring_begin(rq, 14); 5297 if (IS_ERR(cs)) { 5298 err = PTR_ERR(cs); 5299 goto err_rq; 5300 } 5301 5302 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 5303 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); 5304 *cs++ = lower_32_bits(b_before->node.start); 5305 *cs++ = upper_32_bits(b_before->node.start); 5306 5307 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 5308 *cs++ = MI_SEMAPHORE_WAIT | 5309 MI_SEMAPHORE_GLOBAL_GTT | 5310 MI_SEMAPHORE_POLL | 5311 MI_SEMAPHORE_SAD_NEQ_SDD; 5312 *cs++ = 0; 5313 *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) + 5314 offset_in_page(sema); 5315 *cs++ = 0; 5316 *cs++ = MI_NOOP; 5317 5318 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 5319 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); 5320 *cs++ = lower_32_bits(b_after->node.start); 5321 *cs++ = upper_32_bits(b_after->node.start); 5322 5323 intel_ring_advance(rq, cs); 5324 5325 WRITE_ONCE(*sema, 0); 5326 i915_request_get(rq); 5327 i915_request_add(rq); 5328 err_after: 5329 i915_vma_put(b_after); 5330 err_before: 5331 i915_vma_put(b_before); 5332 return rq; 5333 5334 err_rq: 5335 i915_request_add(rq); 5336 rq = ERR_PTR(err); 5337 goto err_after; 5338 } 5339 5340 static struct i915_vma *load_context(struct intel_context *ce, u32 poison) 5341 { 5342 struct i915_vma *batch; 5343 u32 dw, *cs, *hw; 5344 u32 *defaults; 5345 5346 batch = create_user_vma(ce->vm, SZ_64K); 5347 if (IS_ERR(batch)) 5348 return batch; 5349 5350 cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); 5351 if (IS_ERR(cs)) { 5352 i915_vma_put(batch); 5353 return ERR_CAST(cs); 5354 } 5355 5356 defaults = shmem_pin_map(ce->engine->default_state); 5357 if (!defaults) { 5358 i915_gem_object_unpin_map(batch->obj); 5359 i915_vma_put(batch); 5360 return ERR_PTR(-ENOMEM); 5361 } 5362 5363 dw = 0; 5364 hw = defaults; 5365 hw += LRC_STATE_OFFSET / sizeof(*hw); 5366 do { 5367 u32 len = hw[dw] & 0x7f; 5368 5369 if (hw[dw] == 0) { 5370 dw++; 5371 continue; 5372 } 5373 5374 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { 5375 dw += len + 2; 5376 continue; 5377 } 5378 5379 dw++; 5380 len = (len + 1) / 2; 5381 *cs++ = MI_LOAD_REGISTER_IMM(len); 5382 while (len--) { 5383 *cs++ = hw[dw]; 5384 *cs++ = poison; 5385 dw += 2; 5386 } 5387 } while (dw < PAGE_SIZE / sizeof(u32) && 5388 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); 5389 5390 *cs++ = MI_BATCH_BUFFER_END; 5391 5392 shmem_unpin_map(ce->engine->default_state, defaults); 5393 5394 i915_gem_object_flush_map(batch->obj); 5395 i915_gem_object_unpin_map(batch->obj); 5396 5397 return batch; 5398 } 5399 5400 static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema) 5401 { 5402 struct i915_request *rq; 5403 struct i915_vma *batch; 5404 u32 *cs; 5405 int err; 5406 5407 batch = load_context(ce, poison); 5408 if (IS_ERR(batch)) 5409 return PTR_ERR(batch); 5410 5411 rq = intel_context_create_request(ce); 5412 if (IS_ERR(rq)) { 5413 err = PTR_ERR(rq); 5414 goto err_batch; 5415 } 5416 5417 err = move_to_active(rq, batch, 0); 5418 if (err) 5419 goto err_rq; 5420 5421 cs = intel_ring_begin(rq, 8); 5422 if (IS_ERR(cs)) { 5423 err = PTR_ERR(cs); 5424 goto err_rq; 5425 } 5426 5427 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 5428 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); 5429 *cs++ = lower_32_bits(batch->node.start); 5430 *cs++ = upper_32_bits(batch->node.start); 5431 5432 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 5433 *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) + 5434 offset_in_page(sema); 5435 *cs++ = 0; 5436 *cs++ = 1; 5437 5438 intel_ring_advance(rq, cs); 5439 5440 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 5441 err_rq: 5442 i915_request_add(rq); 5443 err_batch: 5444 i915_vma_put(batch); 5445 return err; 5446 } 5447 5448 static bool is_moving(u32 a, u32 b) 5449 { 5450 return a != b; 5451 } 5452 5453 static int compare_isolation(struct intel_engine_cs *engine, 5454 struct i915_vma *ref[2], 5455 struct i915_vma *result[2], 5456 struct intel_context *ce, 5457 u32 poison) 5458 { 5459 u32 x, dw, *hw, *lrc; 5460 u32 *A[2], *B[2]; 5461 u32 *defaults; 5462 int err = 0; 5463 5464 A[0] = i915_gem_object_pin_map(ref[0]->obj, I915_MAP_WC); 5465 if (IS_ERR(A[0])) 5466 return PTR_ERR(A[0]); 5467 5468 A[1] = i915_gem_object_pin_map(ref[1]->obj, I915_MAP_WC); 5469 if (IS_ERR(A[1])) { 5470 err = PTR_ERR(A[1]); 5471 goto err_A0; 5472 } 5473 5474 B[0] = i915_gem_object_pin_map(result[0]->obj, I915_MAP_WC); 5475 if (IS_ERR(B[0])) { 5476 err = PTR_ERR(B[0]); 5477 goto err_A1; 5478 } 5479 5480 B[1] = i915_gem_object_pin_map(result[1]->obj, I915_MAP_WC); 5481 if (IS_ERR(B[1])) { 5482 err = PTR_ERR(B[1]); 5483 goto err_B0; 5484 } 5485 5486 lrc = i915_gem_object_pin_map(ce->state->obj, 5487 i915_coherent_map_type(engine->i915)); 5488 if (IS_ERR(lrc)) { 5489 err = PTR_ERR(lrc); 5490 goto err_B1; 5491 } 5492 lrc += LRC_STATE_OFFSET / sizeof(*hw); 5493 5494 defaults = shmem_pin_map(ce->engine->default_state); 5495 if (!defaults) { 5496 err = -ENOMEM; 5497 goto err_lrc; 5498 } 5499 5500 x = 0; 5501 dw = 0; 5502 hw = defaults; 5503 hw += LRC_STATE_OFFSET / sizeof(*hw); 5504 do { 5505 u32 len = hw[dw] & 0x7f; 5506 5507 if (hw[dw] == 0) { 5508 dw++; 5509 continue; 5510 } 5511 5512 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { 5513 dw += len + 2; 5514 continue; 5515 } 5516 5517 dw++; 5518 len = (len + 1) / 2; 5519 while (len--) { 5520 if (!is_moving(A[0][x], A[1][x]) && 5521 (A[0][x] != B[0][x] || A[1][x] != B[1][x])) { 5522 switch (hw[dw] & 4095) { 5523 case 0x30: /* RING_HEAD */ 5524 case 0x34: /* RING_TAIL */ 5525 break; 5526 5527 default: 5528 pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n", 5529 engine->name, dw, 5530 hw[dw], hw[dw + 1], 5531 A[0][x], B[0][x], B[1][x], 5532 poison, lrc[dw + 1]); 5533 err = -EINVAL; 5534 } 5535 } 5536 dw += 2; 5537 x++; 5538 } 5539 } while (dw < PAGE_SIZE / sizeof(u32) && 5540 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); 5541 5542 shmem_unpin_map(ce->engine->default_state, defaults); 5543 err_lrc: 5544 i915_gem_object_unpin_map(ce->state->obj); 5545 err_B1: 5546 i915_gem_object_unpin_map(result[1]->obj); 5547 err_B0: 5548 i915_gem_object_unpin_map(result[0]->obj); 5549 err_A1: 5550 i915_gem_object_unpin_map(ref[1]->obj); 5551 err_A0: 5552 i915_gem_object_unpin_map(ref[0]->obj); 5553 return err; 5554 } 5555 5556 static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison) 5557 { 5558 u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1); 5559 struct i915_vma *ref[2], *result[2]; 5560 struct intel_context *A, *B; 5561 struct i915_request *rq; 5562 int err; 5563 5564 A = intel_context_create(engine); 5565 if (IS_ERR(A)) 5566 return PTR_ERR(A); 5567 5568 B = intel_context_create(engine); 5569 if (IS_ERR(B)) { 5570 err = PTR_ERR(B); 5571 goto err_A; 5572 } 5573 5574 ref[0] = create_user_vma(A->vm, SZ_64K); 5575 if (IS_ERR(ref[0])) { 5576 err = PTR_ERR(ref[0]); 5577 goto err_B; 5578 } 5579 5580 ref[1] = create_user_vma(A->vm, SZ_64K); 5581 if (IS_ERR(ref[1])) { 5582 err = PTR_ERR(ref[1]); 5583 goto err_ref0; 5584 } 5585 5586 rq = record_registers(A, ref[0], ref[1], sema); 5587 if (IS_ERR(rq)) { 5588 err = PTR_ERR(rq); 5589 goto err_ref1; 5590 } 5591 5592 WRITE_ONCE(*sema, 1); 5593 wmb(); 5594 5595 if (i915_request_wait(rq, 0, HZ / 2) < 0) { 5596 i915_request_put(rq); 5597 err = -ETIME; 5598 goto err_ref1; 5599 } 5600 i915_request_put(rq); 5601 5602 result[0] = create_user_vma(A->vm, SZ_64K); 5603 if (IS_ERR(result[0])) { 5604 err = PTR_ERR(result[0]); 5605 goto err_ref1; 5606 } 5607 5608 result[1] = create_user_vma(A->vm, SZ_64K); 5609 if (IS_ERR(result[1])) { 5610 err = PTR_ERR(result[1]); 5611 goto err_result0; 5612 } 5613 5614 rq = record_registers(A, result[0], result[1], sema); 5615 if (IS_ERR(rq)) { 5616 err = PTR_ERR(rq); 5617 goto err_result1; 5618 } 5619 5620 err = poison_registers(B, poison, sema); 5621 if (err) { 5622 WRITE_ONCE(*sema, -1); 5623 i915_request_put(rq); 5624 goto err_result1; 5625 } 5626 5627 if (i915_request_wait(rq, 0, HZ / 2) < 0) { 5628 i915_request_put(rq); 5629 err = -ETIME; 5630 goto err_result1; 5631 } 5632 i915_request_put(rq); 5633 5634 err = compare_isolation(engine, ref, result, A, poison); 5635 5636 err_result1: 5637 i915_vma_put(result[1]); 5638 err_result0: 5639 i915_vma_put(result[0]); 5640 err_ref1: 5641 i915_vma_put(ref[1]); 5642 err_ref0: 5643 i915_vma_put(ref[0]); 5644 err_B: 5645 intel_context_put(B); 5646 err_A: 5647 intel_context_put(A); 5648 return err; 5649 } 5650 5651 static bool skip_isolation(const struct intel_engine_cs *engine) 5652 { 5653 if (engine->class == COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) == 9) 5654 return true; 5655 5656 if (engine->class == RENDER_CLASS && INTEL_GEN(engine->i915) == 11) 5657 return true; 5658 5659 return false; 5660 } 5661 5662 static int live_lrc_isolation(void *arg) 5663 { 5664 struct intel_gt *gt = arg; 5665 struct intel_engine_cs *engine; 5666 enum intel_engine_id id; 5667 const u32 poison[] = { 5668 STACK_MAGIC, 5669 0x3a3a3a3a, 5670 0x5c5c5c5c, 5671 0xffffffff, 5672 0xffff0000, 5673 }; 5674 int err = 0; 5675 5676 /* 5677 * Our goal is try and verify that per-context state cannot be 5678 * tampered with by another non-privileged client. 5679 * 5680 * We take the list of context registers from the LRI in the default 5681 * context image and attempt to modify that list from a remote context. 5682 */ 5683 5684 for_each_engine(engine, gt, id) { 5685 int i; 5686 5687 /* Just don't even ask */ 5688 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) && 5689 skip_isolation(engine)) 5690 continue; 5691 5692 intel_engine_pm_get(engine); 5693 for (i = 0; i < ARRAY_SIZE(poison); i++) { 5694 int result; 5695 5696 result = __lrc_isolation(engine, poison[i]); 5697 if (result && !err) 5698 err = result; 5699 5700 result = __lrc_isolation(engine, ~poison[i]); 5701 if (result && !err) 5702 err = result; 5703 } 5704 intel_engine_pm_put(engine); 5705 if (igt_flush_test(gt->i915)) { 5706 err = -EIO; 5707 break; 5708 } 5709 } 5710 5711 return err; 5712 } 5713 5714 static int indirect_ctx_submit_req(struct intel_context *ce) 5715 { 5716 struct i915_request *rq; 5717 int err = 0; 5718 5719 rq = intel_context_create_request(ce); 5720 if (IS_ERR(rq)) 5721 return PTR_ERR(rq); 5722 5723 i915_request_get(rq); 5724 i915_request_add(rq); 5725 5726 if (i915_request_wait(rq, 0, HZ / 5) < 0) 5727 err = -ETIME; 5728 5729 i915_request_put(rq); 5730 5731 return err; 5732 } 5733 5734 #define CTX_BB_CANARY_OFFSET (3 * 1024) 5735 #define CTX_BB_CANARY_INDEX (CTX_BB_CANARY_OFFSET / sizeof(u32)) 5736 5737 static u32 * 5738 emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs) 5739 { 5740 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | 5741 MI_SRM_LRM_GLOBAL_GTT | 5742 MI_LRI_LRM_CS_MMIO; 5743 *cs++ = i915_mmio_reg_offset(RING_START(0)); 5744 *cs++ = i915_ggtt_offset(ce->state) + 5745 context_wa_bb_offset(ce) + 5746 CTX_BB_CANARY_OFFSET; 5747 *cs++ = 0; 5748 5749 return cs; 5750 } 5751 5752 static void 5753 indirect_ctx_bb_setup(struct intel_context *ce) 5754 { 5755 u32 *cs = context_indirect_bb(ce); 5756 5757 cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d; 5758 5759 setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary); 5760 } 5761 5762 static bool check_ring_start(struct intel_context *ce) 5763 { 5764 const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) - 5765 LRC_STATE_OFFSET + context_wa_bb_offset(ce); 5766 5767 if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START]) 5768 return true; 5769 5770 pr_err("ring start mismatch: canary 0x%08x vs state 0x%08x\n", 5771 ctx_bb[CTX_BB_CANARY_INDEX], 5772 ce->lrc_reg_state[CTX_RING_START]); 5773 5774 return false; 5775 } 5776 5777 static int indirect_ctx_bb_check(struct intel_context *ce) 5778 { 5779 int err; 5780 5781 err = indirect_ctx_submit_req(ce); 5782 if (err) 5783 return err; 5784 5785 if (!check_ring_start(ce)) 5786 return -EINVAL; 5787 5788 return 0; 5789 } 5790 5791 static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine) 5792 { 5793 struct intel_context *a, *b; 5794 int err; 5795 5796 a = intel_context_create(engine); 5797 if (IS_ERR(a)) 5798 return PTR_ERR(a); 5799 err = intel_context_pin(a); 5800 if (err) 5801 goto put_a; 5802 5803 b = intel_context_create(engine); 5804 if (IS_ERR(b)) { 5805 err = PTR_ERR(b); 5806 goto unpin_a; 5807 } 5808 err = intel_context_pin(b); 5809 if (err) 5810 goto put_b; 5811 5812 /* We use the already reserved extra page in context state */ 5813 if (!a->wa_bb_page) { 5814 GEM_BUG_ON(b->wa_bb_page); 5815 GEM_BUG_ON(INTEL_GEN(engine->i915) == 12); 5816 goto unpin_b; 5817 } 5818 5819 /* 5820 * In order to test that our per context bb is truly per context, 5821 * and executes at the intended spot on context restoring process, 5822 * make the batch store the ring start value to memory. 5823 * As ring start is restored apriori of starting the indirect ctx bb and 5824 * as it will be different for each context, it fits to this purpose. 5825 */ 5826 indirect_ctx_bb_setup(a); 5827 indirect_ctx_bb_setup(b); 5828 5829 err = indirect_ctx_bb_check(a); 5830 if (err) 5831 goto unpin_b; 5832 5833 err = indirect_ctx_bb_check(b); 5834 5835 unpin_b: 5836 intel_context_unpin(b); 5837 put_b: 5838 intel_context_put(b); 5839 unpin_a: 5840 intel_context_unpin(a); 5841 put_a: 5842 intel_context_put(a); 5843 5844 return err; 5845 } 5846 5847 static int live_lrc_indirect_ctx_bb(void *arg) 5848 { 5849 struct intel_gt *gt = arg; 5850 struct intel_engine_cs *engine; 5851 enum intel_engine_id id; 5852 int err = 0; 5853 5854 for_each_engine(engine, gt, id) { 5855 intel_engine_pm_get(engine); 5856 err = __live_lrc_indirect_ctx_bb(engine); 5857 intel_engine_pm_put(engine); 5858 5859 if (igt_flush_test(gt->i915)) 5860 err = -EIO; 5861 5862 if (err) 5863 break; 5864 } 5865 5866 return err; 5867 } 5868 5869 static void garbage_reset(struct intel_engine_cs *engine, 5870 struct i915_request *rq) 5871 { 5872 const unsigned int bit = I915_RESET_ENGINE + engine->id; 5873 unsigned long *lock = &engine->gt->reset.flags; 5874 5875 if (test_and_set_bit(bit, lock)) 5876 return; 5877 5878 tasklet_disable(&engine->execlists.tasklet); 5879 5880 if (!rq->fence.error) 5881 intel_engine_reset(engine, NULL); 5882 5883 tasklet_enable(&engine->execlists.tasklet); 5884 clear_and_wake_up_bit(bit, lock); 5885 } 5886 5887 static struct i915_request *garbage(struct intel_context *ce, 5888 struct rnd_state *prng) 5889 { 5890 struct i915_request *rq; 5891 int err; 5892 5893 err = intel_context_pin(ce); 5894 if (err) 5895 return ERR_PTR(err); 5896 5897 prandom_bytes_state(prng, 5898 ce->lrc_reg_state, 5899 ce->engine->context_size - 5900 LRC_STATE_OFFSET); 5901 5902 rq = intel_context_create_request(ce); 5903 if (IS_ERR(rq)) { 5904 err = PTR_ERR(rq); 5905 goto err_unpin; 5906 } 5907 5908 i915_request_get(rq); 5909 i915_request_add(rq); 5910 return rq; 5911 5912 err_unpin: 5913 intel_context_unpin(ce); 5914 return ERR_PTR(err); 5915 } 5916 5917 static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng) 5918 { 5919 struct intel_context *ce; 5920 struct i915_request *hang; 5921 int err = 0; 5922 5923 ce = intel_context_create(engine); 5924 if (IS_ERR(ce)) 5925 return PTR_ERR(ce); 5926 5927 hang = garbage(ce, prng); 5928 if (IS_ERR(hang)) { 5929 err = PTR_ERR(hang); 5930 goto err_ce; 5931 } 5932 5933 if (wait_for_submit(engine, hang, HZ / 2)) { 5934 i915_request_put(hang); 5935 err = -ETIME; 5936 goto err_ce; 5937 } 5938 5939 intel_context_set_banned(ce); 5940 garbage_reset(engine, hang); 5941 5942 intel_engine_flush_submission(engine); 5943 if (!hang->fence.error) { 5944 i915_request_put(hang); 5945 pr_err("%s: corrupted context was not reset\n", 5946 engine->name); 5947 err = -EINVAL; 5948 goto err_ce; 5949 } 5950 5951 if (i915_request_wait(hang, 0, HZ / 2) < 0) { 5952 pr_err("%s: corrupted context did not recover\n", 5953 engine->name); 5954 i915_request_put(hang); 5955 err = -EIO; 5956 goto err_ce; 5957 } 5958 i915_request_put(hang); 5959 5960 err_ce: 5961 intel_context_put(ce); 5962 return err; 5963 } 5964 5965 static int live_lrc_garbage(void *arg) 5966 { 5967 struct intel_gt *gt = arg; 5968 struct intel_engine_cs *engine; 5969 enum intel_engine_id id; 5970 5971 /* 5972 * Verify that we can recover if one context state is completely 5973 * corrupted. 5974 */ 5975 5976 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN)) 5977 return 0; 5978 5979 for_each_engine(engine, gt, id) { 5980 I915_RND_STATE(prng); 5981 int err = 0, i; 5982 5983 if (!intel_has_reset_engine(engine->gt)) 5984 continue; 5985 5986 intel_engine_pm_get(engine); 5987 for (i = 0; i < 3; i++) { 5988 err = __lrc_garbage(engine, &prng); 5989 if (err) 5990 break; 5991 } 5992 intel_engine_pm_put(engine); 5993 5994 if (igt_flush_test(gt->i915)) 5995 err = -EIO; 5996 if (err) 5997 return err; 5998 } 5999 6000 return 0; 6001 } 6002 6003 static int __live_pphwsp_runtime(struct intel_engine_cs *engine) 6004 { 6005 struct intel_context *ce; 6006 struct i915_request *rq; 6007 IGT_TIMEOUT(end_time); 6008 int err; 6009 6010 ce = intel_context_create(engine); 6011 if (IS_ERR(ce)) 6012 return PTR_ERR(ce); 6013 6014 ce->runtime.num_underflow = 0; 6015 ce->runtime.max_underflow = 0; 6016 6017 do { 6018 unsigned int loop = 1024; 6019 6020 while (loop) { 6021 rq = intel_context_create_request(ce); 6022 if (IS_ERR(rq)) { 6023 err = PTR_ERR(rq); 6024 goto err_rq; 6025 } 6026 6027 if (--loop == 0) 6028 i915_request_get(rq); 6029 6030 i915_request_add(rq); 6031 } 6032 6033 if (__igt_timeout(end_time, NULL)) 6034 break; 6035 6036 i915_request_put(rq); 6037 } while (1); 6038 6039 err = i915_request_wait(rq, 0, HZ / 5); 6040 if (err < 0) { 6041 pr_err("%s: request not completed!\n", engine->name); 6042 goto err_wait; 6043 } 6044 6045 igt_flush_test(engine->i915); 6046 6047 pr_info("%s: pphwsp runtime %lluns, average %lluns\n", 6048 engine->name, 6049 intel_context_get_total_runtime_ns(ce), 6050 intel_context_get_avg_runtime_ns(ce)); 6051 6052 err = 0; 6053 if (ce->runtime.num_underflow) { 6054 pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n", 6055 engine->name, 6056 ce->runtime.num_underflow, 6057 ce->runtime.max_underflow); 6058 GEM_TRACE_DUMP(); 6059 err = -EOVERFLOW; 6060 } 6061 6062 err_wait: 6063 i915_request_put(rq); 6064 err_rq: 6065 intel_context_put(ce); 6066 return err; 6067 } 6068 6069 static int live_pphwsp_runtime(void *arg) 6070 { 6071 struct intel_gt *gt = arg; 6072 struct intel_engine_cs *engine; 6073 enum intel_engine_id id; 6074 int err = 0; 6075 6076 /* 6077 * Check that cumulative context runtime as stored in the pphwsp[16] 6078 * is monotonic. 6079 */ 6080 6081 for_each_engine(engine, gt, id) { 6082 err = __live_pphwsp_runtime(engine); 6083 if (err) 6084 break; 6085 } 6086 6087 if (igt_flush_test(gt->i915)) 6088 err = -EIO; 6089 6090 return err; 6091 } 6092 6093 int intel_lrc_live_selftests(struct drm_i915_private *i915) 6094 { 6095 static const struct i915_subtest tests[] = { 6096 SUBTEST(live_lrc_layout), 6097 SUBTEST(live_lrc_fixed), 6098 SUBTEST(live_lrc_state), 6099 SUBTEST(live_lrc_gpr), 6100 SUBTEST(live_lrc_isolation), 6101 SUBTEST(live_lrc_timestamp), 6102 SUBTEST(live_lrc_garbage), 6103 SUBTEST(live_pphwsp_runtime), 6104 SUBTEST(live_lrc_indirect_ctx_bb), 6105 }; 6106 6107 if (!HAS_LOGICAL_RING_CONTEXTS(i915)) 6108 return 0; 6109 6110 return intel_gt_live_subtests(tests, &i915->gt); 6111 } 6112