1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2018 Intel Corporation 5 */ 6 7 #include <linux/prime_numbers.h> 8 9 #include "gem/i915_gem_pm.h" 10 #include "gt/intel_engine_heartbeat.h" 11 #include "gt/intel_reset.h" 12 13 #include "i915_selftest.h" 14 #include "selftests/i915_random.h" 15 #include "selftests/igt_flush_test.h" 16 #include "selftests/igt_live_test.h" 17 #include "selftests/igt_spinner.h" 18 #include "selftests/lib_sw_fence.h" 19 20 #include "gem/selftests/igt_gem_utils.h" 21 #include "gem/selftests/mock_context.h" 22 23 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4) 24 #define NUM_GPR 16 25 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */ 26 27 static struct i915_vma *create_scratch(struct intel_gt *gt) 28 { 29 struct drm_i915_gem_object *obj; 30 struct i915_vma *vma; 31 int err; 32 33 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 34 if (IS_ERR(obj)) 35 return ERR_CAST(obj); 36 37 i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED); 38 39 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 40 if (IS_ERR(vma)) { 41 i915_gem_object_put(obj); 42 return vma; 43 } 44 45 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 46 if (err) { 47 i915_gem_object_put(obj); 48 return ERR_PTR(err); 49 } 50 51 return vma; 52 } 53 54 static void engine_heartbeat_disable(struct intel_engine_cs *engine) 55 { 56 engine->props.heartbeat_interval_ms = 0; 57 58 intel_engine_pm_get(engine); 59 intel_engine_park_heartbeat(engine); 60 } 61 62 static void engine_heartbeat_enable(struct intel_engine_cs *engine) 63 { 64 intel_engine_pm_put(engine); 65 66 engine->props.heartbeat_interval_ms = 67 engine->defaults.heartbeat_interval_ms; 68 } 69 70 static bool is_active(struct i915_request *rq) 71 { 72 if (i915_request_is_active(rq)) 73 return true; 74 75 if (i915_request_on_hold(rq)) 76 return true; 77 78 if (i915_request_started(rq)) 79 return true; 80 81 return false; 82 } 83 84 static int wait_for_submit(struct intel_engine_cs *engine, 85 struct i915_request *rq, 86 unsigned long timeout) 87 { 88 timeout += jiffies; 89 do { 90 bool done = time_after(jiffies, timeout); 91 92 if (i915_request_completed(rq)) /* that was quick! */ 93 return 0; 94 95 /* Wait until the HW has acknowleged the submission (or err) */ 96 intel_engine_flush_submission(engine); 97 if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq)) 98 return 0; 99 100 if (done) 101 return -ETIME; 102 103 cond_resched(); 104 } while (1); 105 } 106 107 static int wait_for_reset(struct intel_engine_cs *engine, 108 struct i915_request *rq, 109 unsigned long timeout) 110 { 111 timeout += jiffies; 112 113 do { 114 cond_resched(); 115 intel_engine_flush_submission(engine); 116 117 if (READ_ONCE(engine->execlists.pending[0])) 118 continue; 119 120 if (i915_request_completed(rq)) 121 break; 122 123 if (READ_ONCE(rq->fence.error)) 124 break; 125 } while (time_before(jiffies, timeout)); 126 127 flush_scheduled_work(); 128 129 if (rq->fence.error != -EIO) { 130 pr_err("%s: hanging request %llx:%lld not reset\n", 131 engine->name, 132 rq->fence.context, 133 rq->fence.seqno); 134 return -EINVAL; 135 } 136 137 /* Give the request a jiffie to complete after flushing the worker */ 138 if (i915_request_wait(rq, 0, 139 max(0l, (long)(timeout - jiffies)) + 1) < 0) { 140 pr_err("%s: hanging request %llx:%lld did not complete\n", 141 engine->name, 142 rq->fence.context, 143 rq->fence.seqno); 144 return -ETIME; 145 } 146 147 return 0; 148 } 149 150 static int live_sanitycheck(void *arg) 151 { 152 struct intel_gt *gt = arg; 153 struct intel_engine_cs *engine; 154 enum intel_engine_id id; 155 struct igt_spinner spin; 156 int err = 0; 157 158 if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915)) 159 return 0; 160 161 if (igt_spinner_init(&spin, gt)) 162 return -ENOMEM; 163 164 for_each_engine(engine, gt, id) { 165 struct intel_context *ce; 166 struct i915_request *rq; 167 168 ce = intel_context_create(engine); 169 if (IS_ERR(ce)) { 170 err = PTR_ERR(ce); 171 break; 172 } 173 174 rq = igt_spinner_create_request(&spin, ce, MI_NOOP); 175 if (IS_ERR(rq)) { 176 err = PTR_ERR(rq); 177 goto out_ctx; 178 } 179 180 i915_request_add(rq); 181 if (!igt_wait_for_spinner(&spin, rq)) { 182 GEM_TRACE("spinner failed to start\n"); 183 GEM_TRACE_DUMP(); 184 intel_gt_set_wedged(gt); 185 err = -EIO; 186 goto out_ctx; 187 } 188 189 igt_spinner_end(&spin); 190 if (igt_flush_test(gt->i915)) { 191 err = -EIO; 192 goto out_ctx; 193 } 194 195 out_ctx: 196 intel_context_put(ce); 197 if (err) 198 break; 199 } 200 201 igt_spinner_fini(&spin); 202 return err; 203 } 204 205 static int live_unlite_restore(struct intel_gt *gt, int prio) 206 { 207 struct intel_engine_cs *engine; 208 enum intel_engine_id id; 209 struct igt_spinner spin; 210 int err = -ENOMEM; 211 212 /* 213 * Check that we can correctly context switch between 2 instances 214 * on the same engine from the same parent context. 215 */ 216 217 if (igt_spinner_init(&spin, gt)) 218 return err; 219 220 err = 0; 221 for_each_engine(engine, gt, id) { 222 struct intel_context *ce[2] = {}; 223 struct i915_request *rq[2]; 224 struct igt_live_test t; 225 int n; 226 227 if (prio && !intel_engine_has_preemption(engine)) 228 continue; 229 230 if (!intel_engine_can_store_dword(engine)) 231 continue; 232 233 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 234 err = -EIO; 235 break; 236 } 237 engine_heartbeat_disable(engine); 238 239 for (n = 0; n < ARRAY_SIZE(ce); n++) { 240 struct intel_context *tmp; 241 242 tmp = intel_context_create(engine); 243 if (IS_ERR(tmp)) { 244 err = PTR_ERR(tmp); 245 goto err_ce; 246 } 247 248 err = intel_context_pin(tmp); 249 if (err) { 250 intel_context_put(tmp); 251 goto err_ce; 252 } 253 254 /* 255 * Setup the pair of contexts such that if we 256 * lite-restore using the RING_TAIL from ce[1] it 257 * will execute garbage from ce[0]->ring. 258 */ 259 memset(tmp->ring->vaddr, 260 POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */ 261 tmp->ring->vma->size); 262 263 ce[n] = tmp; 264 } 265 GEM_BUG_ON(!ce[1]->ring->size); 266 intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2); 267 __execlists_update_reg_state(ce[1], engine, ce[1]->ring->head); 268 269 rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK); 270 if (IS_ERR(rq[0])) { 271 err = PTR_ERR(rq[0]); 272 goto err_ce; 273 } 274 275 i915_request_get(rq[0]); 276 i915_request_add(rq[0]); 277 GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit); 278 279 if (!igt_wait_for_spinner(&spin, rq[0])) { 280 i915_request_put(rq[0]); 281 goto err_ce; 282 } 283 284 rq[1] = i915_request_create(ce[1]); 285 if (IS_ERR(rq[1])) { 286 err = PTR_ERR(rq[1]); 287 i915_request_put(rq[0]); 288 goto err_ce; 289 } 290 291 if (!prio) { 292 /* 293 * Ensure we do the switch to ce[1] on completion. 294 * 295 * rq[0] is already submitted, so this should reduce 296 * to a no-op (a wait on a request on the same engine 297 * uses the submit fence, not the completion fence), 298 * but it will install a dependency on rq[1] for rq[0] 299 * that will prevent the pair being reordered by 300 * timeslicing. 301 */ 302 i915_request_await_dma_fence(rq[1], &rq[0]->fence); 303 } 304 305 i915_request_get(rq[1]); 306 i915_request_add(rq[1]); 307 GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix); 308 i915_request_put(rq[0]); 309 310 if (prio) { 311 struct i915_sched_attr attr = { 312 .priority = prio, 313 }; 314 315 /* Alternatively preempt the spinner with ce[1] */ 316 engine->schedule(rq[1], &attr); 317 } 318 319 /* And switch back to ce[0] for good measure */ 320 rq[0] = i915_request_create(ce[0]); 321 if (IS_ERR(rq[0])) { 322 err = PTR_ERR(rq[0]); 323 i915_request_put(rq[1]); 324 goto err_ce; 325 } 326 327 i915_request_await_dma_fence(rq[0], &rq[1]->fence); 328 i915_request_get(rq[0]); 329 i915_request_add(rq[0]); 330 GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix); 331 i915_request_put(rq[1]); 332 i915_request_put(rq[0]); 333 334 err_ce: 335 tasklet_kill(&engine->execlists.tasklet); /* flush submission */ 336 igt_spinner_end(&spin); 337 for (n = 0; n < ARRAY_SIZE(ce); n++) { 338 if (IS_ERR_OR_NULL(ce[n])) 339 break; 340 341 intel_context_unpin(ce[n]); 342 intel_context_put(ce[n]); 343 } 344 345 engine_heartbeat_enable(engine); 346 if (igt_live_test_end(&t)) 347 err = -EIO; 348 if (err) 349 break; 350 } 351 352 igt_spinner_fini(&spin); 353 return err; 354 } 355 356 static int live_unlite_switch(void *arg) 357 { 358 return live_unlite_restore(arg, 0); 359 } 360 361 static int live_unlite_preempt(void *arg) 362 { 363 return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX)); 364 } 365 366 static int live_pin_rewind(void *arg) 367 { 368 struct intel_gt *gt = arg; 369 struct intel_engine_cs *engine; 370 enum intel_engine_id id; 371 int err = 0; 372 373 /* 374 * We have to be careful not to trust intel_ring too much, for example 375 * ring->head is updated upon retire which is out of sync with pinning 376 * the context. Thus we cannot use ring->head to set CTX_RING_HEAD, 377 * or else we risk writing an older, stale value. 378 * 379 * To simulate this, let's apply a bit of deliberate sabotague. 380 */ 381 382 for_each_engine(engine, gt, id) { 383 struct intel_context *ce; 384 struct i915_request *rq; 385 struct intel_ring *ring; 386 struct igt_live_test t; 387 388 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 389 err = -EIO; 390 break; 391 } 392 393 ce = intel_context_create(engine); 394 if (IS_ERR(ce)) { 395 err = PTR_ERR(ce); 396 break; 397 } 398 399 err = intel_context_pin(ce); 400 if (err) { 401 intel_context_put(ce); 402 break; 403 } 404 405 /* Keep the context awake while we play games */ 406 err = i915_active_acquire(&ce->active); 407 if (err) { 408 intel_context_unpin(ce); 409 intel_context_put(ce); 410 break; 411 } 412 ring = ce->ring; 413 414 /* Poison the ring, and offset the next request from HEAD */ 415 memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32)); 416 ring->emit = ring->size / 2; 417 ring->tail = ring->emit; 418 GEM_BUG_ON(ring->head); 419 420 intel_context_unpin(ce); 421 422 /* Submit a simple nop request */ 423 GEM_BUG_ON(intel_context_is_pinned(ce)); 424 rq = intel_context_create_request(ce); 425 i915_active_release(&ce->active); /* e.g. async retire */ 426 intel_context_put(ce); 427 if (IS_ERR(rq)) { 428 err = PTR_ERR(rq); 429 break; 430 } 431 GEM_BUG_ON(!rq->head); 432 i915_request_add(rq); 433 434 /* Expect not to hang! */ 435 if (igt_live_test_end(&t)) { 436 err = -EIO; 437 break; 438 } 439 } 440 441 return err; 442 } 443 444 static int live_hold_reset(void *arg) 445 { 446 struct intel_gt *gt = arg; 447 struct intel_engine_cs *engine; 448 enum intel_engine_id id; 449 struct igt_spinner spin; 450 int err = 0; 451 452 /* 453 * In order to support offline error capture for fast preempt reset, 454 * we need to decouple the guilty request and ensure that it and its 455 * descendents are not executed while the capture is in progress. 456 */ 457 458 if (!intel_has_reset_engine(gt)) 459 return 0; 460 461 if (igt_spinner_init(&spin, gt)) 462 return -ENOMEM; 463 464 for_each_engine(engine, gt, id) { 465 struct intel_context *ce; 466 struct i915_request *rq; 467 468 ce = intel_context_create(engine); 469 if (IS_ERR(ce)) { 470 err = PTR_ERR(ce); 471 break; 472 } 473 474 engine_heartbeat_disable(engine); 475 476 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); 477 if (IS_ERR(rq)) { 478 err = PTR_ERR(rq); 479 goto out; 480 } 481 i915_request_add(rq); 482 483 if (!igt_wait_for_spinner(&spin, rq)) { 484 intel_gt_set_wedged(gt); 485 err = -ETIME; 486 goto out; 487 } 488 489 /* We have our request executing, now remove it and reset */ 490 491 if (test_and_set_bit(I915_RESET_ENGINE + id, 492 >->reset.flags)) { 493 intel_gt_set_wedged(gt); 494 err = -EBUSY; 495 goto out; 496 } 497 tasklet_disable(&engine->execlists.tasklet); 498 499 engine->execlists.tasklet.func(engine->execlists.tasklet.data); 500 GEM_BUG_ON(execlists_active(&engine->execlists) != rq); 501 502 i915_request_get(rq); 503 execlists_hold(engine, rq); 504 GEM_BUG_ON(!i915_request_on_hold(rq)); 505 506 intel_engine_reset(engine, NULL); 507 GEM_BUG_ON(rq->fence.error != -EIO); 508 509 tasklet_enable(&engine->execlists.tasklet); 510 clear_and_wake_up_bit(I915_RESET_ENGINE + id, 511 >->reset.flags); 512 513 /* Check that we do not resubmit the held request */ 514 if (!i915_request_wait(rq, 0, HZ / 5)) { 515 pr_err("%s: on hold request completed!\n", 516 engine->name); 517 i915_request_put(rq); 518 err = -EIO; 519 goto out; 520 } 521 GEM_BUG_ON(!i915_request_on_hold(rq)); 522 523 /* But is resubmitted on release */ 524 execlists_unhold(engine, rq); 525 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 526 pr_err("%s: held request did not complete!\n", 527 engine->name); 528 intel_gt_set_wedged(gt); 529 err = -ETIME; 530 } 531 i915_request_put(rq); 532 533 out: 534 engine_heartbeat_enable(engine); 535 intel_context_put(ce); 536 if (err) 537 break; 538 } 539 540 igt_spinner_fini(&spin); 541 return err; 542 } 543 544 static const char *error_repr(int err) 545 { 546 return err ? "bad" : "good"; 547 } 548 549 static int live_error_interrupt(void *arg) 550 { 551 static const struct error_phase { 552 enum { GOOD = 0, BAD = -EIO } error[2]; 553 } phases[] = { 554 { { BAD, GOOD } }, 555 { { BAD, BAD } }, 556 { { BAD, GOOD } }, 557 { { GOOD, GOOD } }, /* sentinel */ 558 }; 559 struct intel_gt *gt = arg; 560 struct intel_engine_cs *engine; 561 enum intel_engine_id id; 562 563 /* 564 * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning 565 * of invalid commands in user batches that will cause a GPU hang. 566 * This is a faster mechanism than using hangcheck/heartbeats, but 567 * only detects problems the HW knows about -- it will not warn when 568 * we kill the HW! 569 * 570 * To verify our detection and reset, we throw some invalid commands 571 * at the HW and wait for the interrupt. 572 */ 573 574 if (!intel_has_reset_engine(gt)) 575 return 0; 576 577 for_each_engine(engine, gt, id) { 578 const struct error_phase *p; 579 int err = 0; 580 581 engine_heartbeat_disable(engine); 582 583 for (p = phases; p->error[0] != GOOD; p++) { 584 struct i915_request *client[ARRAY_SIZE(phases->error)]; 585 u32 *cs; 586 int i; 587 588 memset(client, 0, sizeof(*client)); 589 for (i = 0; i < ARRAY_SIZE(client); i++) { 590 struct intel_context *ce; 591 struct i915_request *rq; 592 593 ce = intel_context_create(engine); 594 if (IS_ERR(ce)) { 595 err = PTR_ERR(ce); 596 goto out; 597 } 598 599 rq = intel_context_create_request(ce); 600 intel_context_put(ce); 601 if (IS_ERR(rq)) { 602 err = PTR_ERR(rq); 603 goto out; 604 } 605 606 if (rq->engine->emit_init_breadcrumb) { 607 err = rq->engine->emit_init_breadcrumb(rq); 608 if (err) { 609 i915_request_add(rq); 610 goto out; 611 } 612 } 613 614 cs = intel_ring_begin(rq, 2); 615 if (IS_ERR(cs)) { 616 i915_request_add(rq); 617 err = PTR_ERR(cs); 618 goto out; 619 } 620 621 if (p->error[i]) { 622 *cs++ = 0xdeadbeef; 623 *cs++ = 0xdeadbeef; 624 } else { 625 *cs++ = MI_NOOP; 626 *cs++ = MI_NOOP; 627 } 628 629 client[i] = i915_request_get(rq); 630 i915_request_add(rq); 631 } 632 633 err = wait_for_submit(engine, client[0], HZ / 2); 634 if (err) { 635 pr_err("%s: first request did not start within time!\n", 636 engine->name); 637 err = -ETIME; 638 goto out; 639 } 640 641 for (i = 0; i < ARRAY_SIZE(client); i++) { 642 if (i915_request_wait(client[i], 0, HZ / 5) < 0) 643 pr_debug("%s: %s request incomplete!\n", 644 engine->name, 645 error_repr(p->error[i])); 646 647 if (!i915_request_started(client[i])) { 648 pr_err("%s: %s request not started!\n", 649 engine->name, 650 error_repr(p->error[i])); 651 err = -ETIME; 652 goto out; 653 } 654 655 /* Kick the tasklet to process the error */ 656 intel_engine_flush_submission(engine); 657 if (client[i]->fence.error != p->error[i]) { 658 pr_err("%s: %s request (%s) with wrong error code: %d\n", 659 engine->name, 660 error_repr(p->error[i]), 661 i915_request_completed(client[i]) ? "completed" : "running", 662 client[i]->fence.error); 663 err = -EINVAL; 664 goto out; 665 } 666 } 667 668 out: 669 for (i = 0; i < ARRAY_SIZE(client); i++) 670 if (client[i]) 671 i915_request_put(client[i]); 672 if (err) { 673 pr_err("%s: failed at phase[%zd] { %d, %d }\n", 674 engine->name, p - phases, 675 p->error[0], p->error[1]); 676 break; 677 } 678 } 679 680 engine_heartbeat_enable(engine); 681 if (err) { 682 intel_gt_set_wedged(gt); 683 return err; 684 } 685 } 686 687 return 0; 688 } 689 690 static int 691 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx) 692 { 693 u32 *cs; 694 695 cs = intel_ring_begin(rq, 10); 696 if (IS_ERR(cs)) 697 return PTR_ERR(cs); 698 699 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 700 701 *cs++ = MI_SEMAPHORE_WAIT | 702 MI_SEMAPHORE_GLOBAL_GTT | 703 MI_SEMAPHORE_POLL | 704 MI_SEMAPHORE_SAD_NEQ_SDD; 705 *cs++ = 0; 706 *cs++ = i915_ggtt_offset(vma) + 4 * idx; 707 *cs++ = 0; 708 709 if (idx > 0) { 710 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 711 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1); 712 *cs++ = 0; 713 *cs++ = 1; 714 } else { 715 *cs++ = MI_NOOP; 716 *cs++ = MI_NOOP; 717 *cs++ = MI_NOOP; 718 *cs++ = MI_NOOP; 719 } 720 721 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 722 723 intel_ring_advance(rq, cs); 724 return 0; 725 } 726 727 static struct i915_request * 728 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx) 729 { 730 struct intel_context *ce; 731 struct i915_request *rq; 732 int err; 733 734 ce = intel_context_create(engine); 735 if (IS_ERR(ce)) 736 return ERR_CAST(ce); 737 738 rq = intel_context_create_request(ce); 739 if (IS_ERR(rq)) 740 goto out_ce; 741 742 err = 0; 743 if (rq->engine->emit_init_breadcrumb) 744 err = rq->engine->emit_init_breadcrumb(rq); 745 if (err == 0) 746 err = emit_semaphore_chain(rq, vma, idx); 747 if (err == 0) 748 i915_request_get(rq); 749 i915_request_add(rq); 750 if (err) 751 rq = ERR_PTR(err); 752 753 out_ce: 754 intel_context_put(ce); 755 return rq; 756 } 757 758 static int 759 release_queue(struct intel_engine_cs *engine, 760 struct i915_vma *vma, 761 int idx, int prio) 762 { 763 struct i915_sched_attr attr = { 764 .priority = prio, 765 }; 766 struct i915_request *rq; 767 u32 *cs; 768 769 rq = intel_engine_create_kernel_request(engine); 770 if (IS_ERR(rq)) 771 return PTR_ERR(rq); 772 773 cs = intel_ring_begin(rq, 4); 774 if (IS_ERR(cs)) { 775 i915_request_add(rq); 776 return PTR_ERR(cs); 777 } 778 779 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 780 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1); 781 *cs++ = 0; 782 *cs++ = 1; 783 784 intel_ring_advance(rq, cs); 785 786 i915_request_get(rq); 787 i915_request_add(rq); 788 789 local_bh_disable(); 790 engine->schedule(rq, &attr); 791 local_bh_enable(); /* kick tasklet */ 792 793 i915_request_put(rq); 794 795 return 0; 796 } 797 798 static int 799 slice_semaphore_queue(struct intel_engine_cs *outer, 800 struct i915_vma *vma, 801 int count) 802 { 803 struct intel_engine_cs *engine; 804 struct i915_request *head; 805 enum intel_engine_id id; 806 int err, i, n = 0; 807 808 head = semaphore_queue(outer, vma, n++); 809 if (IS_ERR(head)) 810 return PTR_ERR(head); 811 812 for_each_engine(engine, outer->gt, id) { 813 for (i = 0; i < count; i++) { 814 struct i915_request *rq; 815 816 rq = semaphore_queue(engine, vma, n++); 817 if (IS_ERR(rq)) { 818 err = PTR_ERR(rq); 819 goto out; 820 } 821 822 i915_request_put(rq); 823 } 824 } 825 826 err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER); 827 if (err) 828 goto out; 829 830 if (i915_request_wait(head, 0, 831 2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) { 832 pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n", 833 count, n); 834 GEM_TRACE_DUMP(); 835 intel_gt_set_wedged(outer->gt); 836 err = -EIO; 837 } 838 839 out: 840 i915_request_put(head); 841 return err; 842 } 843 844 static int live_timeslice_preempt(void *arg) 845 { 846 struct intel_gt *gt = arg; 847 struct drm_i915_gem_object *obj; 848 struct i915_vma *vma; 849 void *vaddr; 850 int err = 0; 851 int count; 852 853 /* 854 * If a request takes too long, we would like to give other users 855 * a fair go on the GPU. In particular, users may create batches 856 * that wait upon external input, where that input may even be 857 * supplied by another GPU job. To avoid blocking forever, we 858 * need to preempt the current task and replace it with another 859 * ready task. 860 */ 861 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 862 return 0; 863 864 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 865 if (IS_ERR(obj)) 866 return PTR_ERR(obj); 867 868 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 869 if (IS_ERR(vma)) { 870 err = PTR_ERR(vma); 871 goto err_obj; 872 } 873 874 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); 875 if (IS_ERR(vaddr)) { 876 err = PTR_ERR(vaddr); 877 goto err_obj; 878 } 879 880 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 881 if (err) 882 goto err_map; 883 884 err = i915_vma_sync(vma); 885 if (err) 886 goto err_pin; 887 888 for_each_prime_number_from(count, 1, 16) { 889 struct intel_engine_cs *engine; 890 enum intel_engine_id id; 891 892 for_each_engine(engine, gt, id) { 893 if (!intel_engine_has_preemption(engine)) 894 continue; 895 896 memset(vaddr, 0, PAGE_SIZE); 897 898 engine_heartbeat_disable(engine); 899 err = slice_semaphore_queue(engine, vma, count); 900 engine_heartbeat_enable(engine); 901 if (err) 902 goto err_pin; 903 904 if (igt_flush_test(gt->i915)) { 905 err = -EIO; 906 goto err_pin; 907 } 908 } 909 } 910 911 err_pin: 912 i915_vma_unpin(vma); 913 err_map: 914 i915_gem_object_unpin_map(obj); 915 err_obj: 916 i915_gem_object_put(obj); 917 return err; 918 } 919 920 static struct i915_request * 921 create_rewinder(struct intel_context *ce, 922 struct i915_request *wait, 923 void *slot, int idx) 924 { 925 const u32 offset = 926 i915_ggtt_offset(ce->engine->status_page.vma) + 927 offset_in_page(slot); 928 struct i915_request *rq; 929 u32 *cs; 930 int err; 931 932 rq = intel_context_create_request(ce); 933 if (IS_ERR(rq)) 934 return rq; 935 936 if (wait) { 937 err = i915_request_await_dma_fence(rq, &wait->fence); 938 if (err) 939 goto err; 940 } 941 942 cs = intel_ring_begin(rq, 14); 943 if (IS_ERR(cs)) { 944 err = PTR_ERR(cs); 945 goto err; 946 } 947 948 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 949 *cs++ = MI_NOOP; 950 951 *cs++ = MI_SEMAPHORE_WAIT | 952 MI_SEMAPHORE_GLOBAL_GTT | 953 MI_SEMAPHORE_POLL | 954 MI_SEMAPHORE_SAD_GTE_SDD; 955 *cs++ = idx; 956 *cs++ = offset; 957 *cs++ = 0; 958 959 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 960 *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base)); 961 *cs++ = offset + idx * sizeof(u32); 962 *cs++ = 0; 963 964 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 965 *cs++ = offset; 966 *cs++ = 0; 967 *cs++ = idx + 1; 968 969 intel_ring_advance(rq, cs); 970 971 rq->sched.attr.priority = I915_PRIORITY_MASK; 972 err = 0; 973 err: 974 i915_request_get(rq); 975 i915_request_add(rq); 976 if (err) { 977 i915_request_put(rq); 978 return ERR_PTR(err); 979 } 980 981 return rq; 982 } 983 984 static int live_timeslice_rewind(void *arg) 985 { 986 struct intel_gt *gt = arg; 987 struct intel_engine_cs *engine; 988 enum intel_engine_id id; 989 990 /* 991 * The usual presumption on timeslice expiration is that we replace 992 * the active context with another. However, given a chain of 993 * dependencies we may end up with replacing the context with itself, 994 * but only a few of those requests, forcing us to rewind the 995 * RING_TAIL of the original request. 996 */ 997 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 998 return 0; 999 1000 for_each_engine(engine, gt, id) { 1001 enum { A1, A2, B1 }; 1002 enum { X = 1, Z, Y }; 1003 struct i915_request *rq[3] = {}; 1004 struct intel_context *ce; 1005 unsigned long timeslice; 1006 int i, err = 0; 1007 u32 *slot; 1008 1009 if (!intel_engine_has_timeslices(engine)) 1010 continue; 1011 1012 /* 1013 * A:rq1 -- semaphore wait, timestamp X 1014 * A:rq2 -- write timestamp Y 1015 * 1016 * B:rq1 [await A:rq1] -- write timestamp Z 1017 * 1018 * Force timeslice, release semaphore. 1019 * 1020 * Expect execution/evaluation order XZY 1021 */ 1022 1023 engine_heartbeat_disable(engine); 1024 timeslice = xchg(&engine->props.timeslice_duration_ms, 1); 1025 1026 slot = memset32(engine->status_page.addr + 1000, 0, 4); 1027 1028 ce = intel_context_create(engine); 1029 if (IS_ERR(ce)) { 1030 err = PTR_ERR(ce); 1031 goto err; 1032 } 1033 1034 rq[0] = create_rewinder(ce, NULL, slot, X); 1035 if (IS_ERR(rq[0])) { 1036 intel_context_put(ce); 1037 goto err; 1038 } 1039 1040 rq[1] = create_rewinder(ce, NULL, slot, Y); 1041 intel_context_put(ce); 1042 if (IS_ERR(rq[1])) 1043 goto err; 1044 1045 err = wait_for_submit(engine, rq[1], HZ / 2); 1046 if (err) { 1047 pr_err("%s: failed to submit first context\n", 1048 engine->name); 1049 goto err; 1050 } 1051 1052 ce = intel_context_create(engine); 1053 if (IS_ERR(ce)) { 1054 err = PTR_ERR(ce); 1055 goto err; 1056 } 1057 1058 rq[2] = create_rewinder(ce, rq[0], slot, Z); 1059 intel_context_put(ce); 1060 if (IS_ERR(rq[2])) 1061 goto err; 1062 1063 err = wait_for_submit(engine, rq[2], HZ / 2); 1064 if (err) { 1065 pr_err("%s: failed to submit second context\n", 1066 engine->name); 1067 goto err; 1068 } 1069 1070 /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */ 1071 if (i915_request_is_active(rq[A2])) { /* semaphore yielded! */ 1072 /* Wait for the timeslice to kick in */ 1073 del_timer(&engine->execlists.timer); 1074 tasklet_hi_schedule(&engine->execlists.tasklet); 1075 intel_engine_flush_submission(engine); 1076 } 1077 /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */ 1078 GEM_BUG_ON(!i915_request_is_active(rq[A1])); 1079 GEM_BUG_ON(!i915_request_is_active(rq[B1])); 1080 GEM_BUG_ON(i915_request_is_active(rq[A2])); 1081 1082 /* Release the hounds! */ 1083 slot[0] = 1; 1084 wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */ 1085 1086 for (i = 1; i <= 3; i++) { 1087 unsigned long timeout = jiffies + HZ / 2; 1088 1089 while (!READ_ONCE(slot[i]) && 1090 time_before(jiffies, timeout)) 1091 ; 1092 1093 if (!time_before(jiffies, timeout)) { 1094 pr_err("%s: rq[%d] timed out\n", 1095 engine->name, i - 1); 1096 err = -ETIME; 1097 goto err; 1098 } 1099 1100 pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]); 1101 } 1102 1103 /* XZY: XZ < XY */ 1104 if (slot[Z] - slot[X] >= slot[Y] - slot[X]) { 1105 pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n", 1106 engine->name, 1107 slot[Z] - slot[X], 1108 slot[Y] - slot[X]); 1109 err = -EINVAL; 1110 } 1111 1112 err: 1113 memset32(&slot[0], -1, 4); 1114 wmb(); 1115 1116 engine->props.timeslice_duration_ms = timeslice; 1117 engine_heartbeat_enable(engine); 1118 for (i = 0; i < 3; i++) 1119 i915_request_put(rq[i]); 1120 if (igt_flush_test(gt->i915)) 1121 err = -EIO; 1122 if (err) 1123 return err; 1124 } 1125 1126 return 0; 1127 } 1128 1129 static struct i915_request *nop_request(struct intel_engine_cs *engine) 1130 { 1131 struct i915_request *rq; 1132 1133 rq = intel_engine_create_kernel_request(engine); 1134 if (IS_ERR(rq)) 1135 return rq; 1136 1137 i915_request_get(rq); 1138 i915_request_add(rq); 1139 1140 return rq; 1141 } 1142 1143 static long timeslice_threshold(const struct intel_engine_cs *engine) 1144 { 1145 return 2 * msecs_to_jiffies_timeout(timeslice(engine)) + 1; 1146 } 1147 1148 static int live_timeslice_queue(void *arg) 1149 { 1150 struct intel_gt *gt = arg; 1151 struct drm_i915_gem_object *obj; 1152 struct intel_engine_cs *engine; 1153 enum intel_engine_id id; 1154 struct i915_vma *vma; 1155 void *vaddr; 1156 int err = 0; 1157 1158 /* 1159 * Make sure that even if ELSP[0] and ELSP[1] are filled with 1160 * timeslicing between them disabled, we *do* enable timeslicing 1161 * if the queue demands it. (Normally, we do not submit if 1162 * ELSP[1] is already occupied, so must rely on timeslicing to 1163 * eject ELSP[0] in favour of the queue.) 1164 */ 1165 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 1166 return 0; 1167 1168 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 1169 if (IS_ERR(obj)) 1170 return PTR_ERR(obj); 1171 1172 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 1173 if (IS_ERR(vma)) { 1174 err = PTR_ERR(vma); 1175 goto err_obj; 1176 } 1177 1178 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); 1179 if (IS_ERR(vaddr)) { 1180 err = PTR_ERR(vaddr); 1181 goto err_obj; 1182 } 1183 1184 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 1185 if (err) 1186 goto err_map; 1187 1188 err = i915_vma_sync(vma); 1189 if (err) 1190 goto err_pin; 1191 1192 for_each_engine(engine, gt, id) { 1193 struct i915_sched_attr attr = { 1194 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), 1195 }; 1196 struct i915_request *rq, *nop; 1197 1198 if (!intel_engine_has_preemption(engine)) 1199 continue; 1200 1201 engine_heartbeat_disable(engine); 1202 memset(vaddr, 0, PAGE_SIZE); 1203 1204 /* ELSP[0]: semaphore wait */ 1205 rq = semaphore_queue(engine, vma, 0); 1206 if (IS_ERR(rq)) { 1207 err = PTR_ERR(rq); 1208 goto err_heartbeat; 1209 } 1210 engine->schedule(rq, &attr); 1211 err = wait_for_submit(engine, rq, HZ / 2); 1212 if (err) { 1213 pr_err("%s: Timed out trying to submit semaphores\n", 1214 engine->name); 1215 goto err_rq; 1216 } 1217 1218 /* ELSP[1]: nop request */ 1219 nop = nop_request(engine); 1220 if (IS_ERR(nop)) { 1221 err = PTR_ERR(nop); 1222 goto err_rq; 1223 } 1224 err = wait_for_submit(engine, nop, HZ / 2); 1225 i915_request_put(nop); 1226 if (err) { 1227 pr_err("%s: Timed out trying to submit nop\n", 1228 engine->name); 1229 goto err_rq; 1230 } 1231 1232 GEM_BUG_ON(i915_request_completed(rq)); 1233 GEM_BUG_ON(execlists_active(&engine->execlists) != rq); 1234 1235 /* Queue: semaphore signal, matching priority as semaphore */ 1236 err = release_queue(engine, vma, 1, effective_prio(rq)); 1237 if (err) 1238 goto err_rq; 1239 1240 /* Wait until we ack the release_queue and start timeslicing */ 1241 do { 1242 cond_resched(); 1243 intel_engine_flush_submission(engine); 1244 } while (READ_ONCE(engine->execlists.pending[0])); 1245 1246 if (!READ_ONCE(engine->execlists.timer.expires) && 1247 execlists_active(&engine->execlists) == rq && 1248 !i915_request_completed(rq)) { 1249 struct drm_printer p = 1250 drm_info_printer(gt->i915->drm.dev); 1251 1252 GEM_TRACE_ERR("%s: Failed to enable timeslicing!\n", 1253 engine->name); 1254 intel_engine_dump(engine, &p, 1255 "%s\n", engine->name); 1256 GEM_TRACE_DUMP(); 1257 1258 memset(vaddr, 0xff, PAGE_SIZE); 1259 err = -EINVAL; 1260 } 1261 1262 /* Timeslice every jiffy, so within 2 we should signal */ 1263 if (i915_request_wait(rq, 0, timeslice_threshold(engine)) < 0) { 1264 struct drm_printer p = 1265 drm_info_printer(gt->i915->drm.dev); 1266 1267 pr_err("%s: Failed to timeslice into queue\n", 1268 engine->name); 1269 intel_engine_dump(engine, &p, 1270 "%s\n", engine->name); 1271 1272 memset(vaddr, 0xff, PAGE_SIZE); 1273 err = -EIO; 1274 } 1275 err_rq: 1276 i915_request_put(rq); 1277 err_heartbeat: 1278 engine_heartbeat_enable(engine); 1279 if (err) 1280 break; 1281 } 1282 1283 err_pin: 1284 i915_vma_unpin(vma); 1285 err_map: 1286 i915_gem_object_unpin_map(obj); 1287 err_obj: 1288 i915_gem_object_put(obj); 1289 return err; 1290 } 1291 1292 static int live_timeslice_nopreempt(void *arg) 1293 { 1294 struct intel_gt *gt = arg; 1295 struct intel_engine_cs *engine; 1296 enum intel_engine_id id; 1297 struct igt_spinner spin; 1298 int err = 0; 1299 1300 /* 1301 * We should not timeslice into a request that is marked with 1302 * I915_REQUEST_NOPREEMPT. 1303 */ 1304 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 1305 return 0; 1306 1307 if (igt_spinner_init(&spin, gt)) 1308 return -ENOMEM; 1309 1310 for_each_engine(engine, gt, id) { 1311 struct intel_context *ce; 1312 struct i915_request *rq; 1313 unsigned long timeslice; 1314 1315 if (!intel_engine_has_preemption(engine)) 1316 continue; 1317 1318 ce = intel_context_create(engine); 1319 if (IS_ERR(ce)) { 1320 err = PTR_ERR(ce); 1321 break; 1322 } 1323 1324 engine_heartbeat_disable(engine); 1325 timeslice = xchg(&engine->props.timeslice_duration_ms, 1); 1326 1327 /* Create an unpreemptible spinner */ 1328 1329 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); 1330 intel_context_put(ce); 1331 if (IS_ERR(rq)) { 1332 err = PTR_ERR(rq); 1333 goto out_heartbeat; 1334 } 1335 1336 i915_request_get(rq); 1337 i915_request_add(rq); 1338 1339 if (!igt_wait_for_spinner(&spin, rq)) { 1340 i915_request_put(rq); 1341 err = -ETIME; 1342 goto out_spin; 1343 } 1344 1345 set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags); 1346 i915_request_put(rq); 1347 1348 /* Followed by a maximum priority barrier (heartbeat) */ 1349 1350 ce = intel_context_create(engine); 1351 if (IS_ERR(ce)) { 1352 err = PTR_ERR(rq); 1353 goto out_spin; 1354 } 1355 1356 rq = intel_context_create_request(ce); 1357 intel_context_put(ce); 1358 if (IS_ERR(rq)) { 1359 err = PTR_ERR(rq); 1360 goto out_spin; 1361 } 1362 1363 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 1364 i915_request_get(rq); 1365 i915_request_add(rq); 1366 1367 /* 1368 * Wait until the barrier is in ELSP, and we know timeslicing 1369 * will have been activated. 1370 */ 1371 if (wait_for_submit(engine, rq, HZ / 2)) { 1372 i915_request_put(rq); 1373 err = -ETIME; 1374 goto out_spin; 1375 } 1376 1377 /* 1378 * Since the ELSP[0] request is unpreemptible, it should not 1379 * allow the maximum priority barrier through. Wait long 1380 * enough to see if it is timesliced in by mistake. 1381 */ 1382 if (i915_request_wait(rq, 0, timeslice_threshold(engine)) >= 0) { 1383 pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n", 1384 engine->name); 1385 err = -EINVAL; 1386 } 1387 i915_request_put(rq); 1388 1389 out_spin: 1390 igt_spinner_end(&spin); 1391 out_heartbeat: 1392 xchg(&engine->props.timeslice_duration_ms, timeslice); 1393 engine_heartbeat_enable(engine); 1394 if (err) 1395 break; 1396 1397 if (igt_flush_test(gt->i915)) { 1398 err = -EIO; 1399 break; 1400 } 1401 } 1402 1403 igt_spinner_fini(&spin); 1404 return err; 1405 } 1406 1407 static int live_busywait_preempt(void *arg) 1408 { 1409 struct intel_gt *gt = arg; 1410 struct i915_gem_context *ctx_hi, *ctx_lo; 1411 struct intel_engine_cs *engine; 1412 struct drm_i915_gem_object *obj; 1413 struct i915_vma *vma; 1414 enum intel_engine_id id; 1415 int err = -ENOMEM; 1416 u32 *map; 1417 1418 /* 1419 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can 1420 * preempt the busywaits used to synchronise between rings. 1421 */ 1422 1423 ctx_hi = kernel_context(gt->i915); 1424 if (!ctx_hi) 1425 return -ENOMEM; 1426 ctx_hi->sched.priority = 1427 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); 1428 1429 ctx_lo = kernel_context(gt->i915); 1430 if (!ctx_lo) 1431 goto err_ctx_hi; 1432 ctx_lo->sched.priority = 1433 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); 1434 1435 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 1436 if (IS_ERR(obj)) { 1437 err = PTR_ERR(obj); 1438 goto err_ctx_lo; 1439 } 1440 1441 map = i915_gem_object_pin_map(obj, I915_MAP_WC); 1442 if (IS_ERR(map)) { 1443 err = PTR_ERR(map); 1444 goto err_obj; 1445 } 1446 1447 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 1448 if (IS_ERR(vma)) { 1449 err = PTR_ERR(vma); 1450 goto err_map; 1451 } 1452 1453 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 1454 if (err) 1455 goto err_map; 1456 1457 err = i915_vma_sync(vma); 1458 if (err) 1459 goto err_vma; 1460 1461 for_each_engine(engine, gt, id) { 1462 struct i915_request *lo, *hi; 1463 struct igt_live_test t; 1464 u32 *cs; 1465 1466 if (!intel_engine_has_preemption(engine)) 1467 continue; 1468 1469 if (!intel_engine_can_store_dword(engine)) 1470 continue; 1471 1472 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 1473 err = -EIO; 1474 goto err_vma; 1475 } 1476 1477 /* 1478 * We create two requests. The low priority request 1479 * busywaits on a semaphore (inside the ringbuffer where 1480 * is should be preemptible) and the high priority requests 1481 * uses a MI_STORE_DWORD_IMM to update the semaphore value 1482 * allowing the first request to complete. If preemption 1483 * fails, we hang instead. 1484 */ 1485 1486 lo = igt_request_alloc(ctx_lo, engine); 1487 if (IS_ERR(lo)) { 1488 err = PTR_ERR(lo); 1489 goto err_vma; 1490 } 1491 1492 cs = intel_ring_begin(lo, 8); 1493 if (IS_ERR(cs)) { 1494 err = PTR_ERR(cs); 1495 i915_request_add(lo); 1496 goto err_vma; 1497 } 1498 1499 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 1500 *cs++ = i915_ggtt_offset(vma); 1501 *cs++ = 0; 1502 *cs++ = 1; 1503 1504 /* XXX Do we need a flush + invalidate here? */ 1505 1506 *cs++ = MI_SEMAPHORE_WAIT | 1507 MI_SEMAPHORE_GLOBAL_GTT | 1508 MI_SEMAPHORE_POLL | 1509 MI_SEMAPHORE_SAD_EQ_SDD; 1510 *cs++ = 0; 1511 *cs++ = i915_ggtt_offset(vma); 1512 *cs++ = 0; 1513 1514 intel_ring_advance(lo, cs); 1515 1516 i915_request_get(lo); 1517 i915_request_add(lo); 1518 1519 if (wait_for(READ_ONCE(*map), 10)) { 1520 i915_request_put(lo); 1521 err = -ETIMEDOUT; 1522 goto err_vma; 1523 } 1524 1525 /* Low priority request should be busywaiting now */ 1526 if (i915_request_wait(lo, 0, 1) != -ETIME) { 1527 i915_request_put(lo); 1528 pr_err("%s: Busywaiting request did not!\n", 1529 engine->name); 1530 err = -EIO; 1531 goto err_vma; 1532 } 1533 1534 hi = igt_request_alloc(ctx_hi, engine); 1535 if (IS_ERR(hi)) { 1536 err = PTR_ERR(hi); 1537 i915_request_put(lo); 1538 goto err_vma; 1539 } 1540 1541 cs = intel_ring_begin(hi, 4); 1542 if (IS_ERR(cs)) { 1543 err = PTR_ERR(cs); 1544 i915_request_add(hi); 1545 i915_request_put(lo); 1546 goto err_vma; 1547 } 1548 1549 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 1550 *cs++ = i915_ggtt_offset(vma); 1551 *cs++ = 0; 1552 *cs++ = 0; 1553 1554 intel_ring_advance(hi, cs); 1555 i915_request_add(hi); 1556 1557 if (i915_request_wait(lo, 0, HZ / 5) < 0) { 1558 struct drm_printer p = drm_info_printer(gt->i915->drm.dev); 1559 1560 pr_err("%s: Failed to preempt semaphore busywait!\n", 1561 engine->name); 1562 1563 intel_engine_dump(engine, &p, "%s\n", engine->name); 1564 GEM_TRACE_DUMP(); 1565 1566 i915_request_put(lo); 1567 intel_gt_set_wedged(gt); 1568 err = -EIO; 1569 goto err_vma; 1570 } 1571 GEM_BUG_ON(READ_ONCE(*map)); 1572 i915_request_put(lo); 1573 1574 if (igt_live_test_end(&t)) { 1575 err = -EIO; 1576 goto err_vma; 1577 } 1578 } 1579 1580 err = 0; 1581 err_vma: 1582 i915_vma_unpin(vma); 1583 err_map: 1584 i915_gem_object_unpin_map(obj); 1585 err_obj: 1586 i915_gem_object_put(obj); 1587 err_ctx_lo: 1588 kernel_context_close(ctx_lo); 1589 err_ctx_hi: 1590 kernel_context_close(ctx_hi); 1591 return err; 1592 } 1593 1594 static struct i915_request * 1595 spinner_create_request(struct igt_spinner *spin, 1596 struct i915_gem_context *ctx, 1597 struct intel_engine_cs *engine, 1598 u32 arb) 1599 { 1600 struct intel_context *ce; 1601 struct i915_request *rq; 1602 1603 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); 1604 if (IS_ERR(ce)) 1605 return ERR_CAST(ce); 1606 1607 rq = igt_spinner_create_request(spin, ce, arb); 1608 intel_context_put(ce); 1609 return rq; 1610 } 1611 1612 static int live_preempt(void *arg) 1613 { 1614 struct intel_gt *gt = arg; 1615 struct i915_gem_context *ctx_hi, *ctx_lo; 1616 struct igt_spinner spin_hi, spin_lo; 1617 struct intel_engine_cs *engine; 1618 enum intel_engine_id id; 1619 int err = -ENOMEM; 1620 1621 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 1622 return 0; 1623 1624 if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION)) 1625 pr_err("Logical preemption supported, but not exposed\n"); 1626 1627 if (igt_spinner_init(&spin_hi, gt)) 1628 return -ENOMEM; 1629 1630 if (igt_spinner_init(&spin_lo, gt)) 1631 goto err_spin_hi; 1632 1633 ctx_hi = kernel_context(gt->i915); 1634 if (!ctx_hi) 1635 goto err_spin_lo; 1636 ctx_hi->sched.priority = 1637 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); 1638 1639 ctx_lo = kernel_context(gt->i915); 1640 if (!ctx_lo) 1641 goto err_ctx_hi; 1642 ctx_lo->sched.priority = 1643 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); 1644 1645 for_each_engine(engine, gt, id) { 1646 struct igt_live_test t; 1647 struct i915_request *rq; 1648 1649 if (!intel_engine_has_preemption(engine)) 1650 continue; 1651 1652 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 1653 err = -EIO; 1654 goto err_ctx_lo; 1655 } 1656 1657 rq = spinner_create_request(&spin_lo, ctx_lo, engine, 1658 MI_ARB_CHECK); 1659 if (IS_ERR(rq)) { 1660 err = PTR_ERR(rq); 1661 goto err_ctx_lo; 1662 } 1663 1664 i915_request_add(rq); 1665 if (!igt_wait_for_spinner(&spin_lo, rq)) { 1666 GEM_TRACE("lo spinner failed to start\n"); 1667 GEM_TRACE_DUMP(); 1668 intel_gt_set_wedged(gt); 1669 err = -EIO; 1670 goto err_ctx_lo; 1671 } 1672 1673 rq = spinner_create_request(&spin_hi, ctx_hi, engine, 1674 MI_ARB_CHECK); 1675 if (IS_ERR(rq)) { 1676 igt_spinner_end(&spin_lo); 1677 err = PTR_ERR(rq); 1678 goto err_ctx_lo; 1679 } 1680 1681 i915_request_add(rq); 1682 if (!igt_wait_for_spinner(&spin_hi, rq)) { 1683 GEM_TRACE("hi spinner failed to start\n"); 1684 GEM_TRACE_DUMP(); 1685 intel_gt_set_wedged(gt); 1686 err = -EIO; 1687 goto err_ctx_lo; 1688 } 1689 1690 igt_spinner_end(&spin_hi); 1691 igt_spinner_end(&spin_lo); 1692 1693 if (igt_live_test_end(&t)) { 1694 err = -EIO; 1695 goto err_ctx_lo; 1696 } 1697 } 1698 1699 err = 0; 1700 err_ctx_lo: 1701 kernel_context_close(ctx_lo); 1702 err_ctx_hi: 1703 kernel_context_close(ctx_hi); 1704 err_spin_lo: 1705 igt_spinner_fini(&spin_lo); 1706 err_spin_hi: 1707 igt_spinner_fini(&spin_hi); 1708 return err; 1709 } 1710 1711 static int live_late_preempt(void *arg) 1712 { 1713 struct intel_gt *gt = arg; 1714 struct i915_gem_context *ctx_hi, *ctx_lo; 1715 struct igt_spinner spin_hi, spin_lo; 1716 struct intel_engine_cs *engine; 1717 struct i915_sched_attr attr = {}; 1718 enum intel_engine_id id; 1719 int err = -ENOMEM; 1720 1721 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 1722 return 0; 1723 1724 if (igt_spinner_init(&spin_hi, gt)) 1725 return -ENOMEM; 1726 1727 if (igt_spinner_init(&spin_lo, gt)) 1728 goto err_spin_hi; 1729 1730 ctx_hi = kernel_context(gt->i915); 1731 if (!ctx_hi) 1732 goto err_spin_lo; 1733 1734 ctx_lo = kernel_context(gt->i915); 1735 if (!ctx_lo) 1736 goto err_ctx_hi; 1737 1738 /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */ 1739 ctx_lo->sched.priority = I915_USER_PRIORITY(1); 1740 1741 for_each_engine(engine, gt, id) { 1742 struct igt_live_test t; 1743 struct i915_request *rq; 1744 1745 if (!intel_engine_has_preemption(engine)) 1746 continue; 1747 1748 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 1749 err = -EIO; 1750 goto err_ctx_lo; 1751 } 1752 1753 rq = spinner_create_request(&spin_lo, ctx_lo, engine, 1754 MI_ARB_CHECK); 1755 if (IS_ERR(rq)) { 1756 err = PTR_ERR(rq); 1757 goto err_ctx_lo; 1758 } 1759 1760 i915_request_add(rq); 1761 if (!igt_wait_for_spinner(&spin_lo, rq)) { 1762 pr_err("First context failed to start\n"); 1763 goto err_wedged; 1764 } 1765 1766 rq = spinner_create_request(&spin_hi, ctx_hi, engine, 1767 MI_NOOP); 1768 if (IS_ERR(rq)) { 1769 igt_spinner_end(&spin_lo); 1770 err = PTR_ERR(rq); 1771 goto err_ctx_lo; 1772 } 1773 1774 i915_request_add(rq); 1775 if (igt_wait_for_spinner(&spin_hi, rq)) { 1776 pr_err("Second context overtook first?\n"); 1777 goto err_wedged; 1778 } 1779 1780 attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX); 1781 engine->schedule(rq, &attr); 1782 1783 if (!igt_wait_for_spinner(&spin_hi, rq)) { 1784 pr_err("High priority context failed to preempt the low priority context\n"); 1785 GEM_TRACE_DUMP(); 1786 goto err_wedged; 1787 } 1788 1789 igt_spinner_end(&spin_hi); 1790 igt_spinner_end(&spin_lo); 1791 1792 if (igt_live_test_end(&t)) { 1793 err = -EIO; 1794 goto err_ctx_lo; 1795 } 1796 } 1797 1798 err = 0; 1799 err_ctx_lo: 1800 kernel_context_close(ctx_lo); 1801 err_ctx_hi: 1802 kernel_context_close(ctx_hi); 1803 err_spin_lo: 1804 igt_spinner_fini(&spin_lo); 1805 err_spin_hi: 1806 igt_spinner_fini(&spin_hi); 1807 return err; 1808 1809 err_wedged: 1810 igt_spinner_end(&spin_hi); 1811 igt_spinner_end(&spin_lo); 1812 intel_gt_set_wedged(gt); 1813 err = -EIO; 1814 goto err_ctx_lo; 1815 } 1816 1817 struct preempt_client { 1818 struct igt_spinner spin; 1819 struct i915_gem_context *ctx; 1820 }; 1821 1822 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c) 1823 { 1824 c->ctx = kernel_context(gt->i915); 1825 if (!c->ctx) 1826 return -ENOMEM; 1827 1828 if (igt_spinner_init(&c->spin, gt)) 1829 goto err_ctx; 1830 1831 return 0; 1832 1833 err_ctx: 1834 kernel_context_close(c->ctx); 1835 return -ENOMEM; 1836 } 1837 1838 static void preempt_client_fini(struct preempt_client *c) 1839 { 1840 igt_spinner_fini(&c->spin); 1841 kernel_context_close(c->ctx); 1842 } 1843 1844 static int live_nopreempt(void *arg) 1845 { 1846 struct intel_gt *gt = arg; 1847 struct intel_engine_cs *engine; 1848 struct preempt_client a, b; 1849 enum intel_engine_id id; 1850 int err = -ENOMEM; 1851 1852 /* 1853 * Verify that we can disable preemption for an individual request 1854 * that may be being observed and not want to be interrupted. 1855 */ 1856 1857 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 1858 return 0; 1859 1860 if (preempt_client_init(gt, &a)) 1861 return -ENOMEM; 1862 if (preempt_client_init(gt, &b)) 1863 goto err_client_a; 1864 b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX); 1865 1866 for_each_engine(engine, gt, id) { 1867 struct i915_request *rq_a, *rq_b; 1868 1869 if (!intel_engine_has_preemption(engine)) 1870 continue; 1871 1872 engine->execlists.preempt_hang.count = 0; 1873 1874 rq_a = spinner_create_request(&a.spin, 1875 a.ctx, engine, 1876 MI_ARB_CHECK); 1877 if (IS_ERR(rq_a)) { 1878 err = PTR_ERR(rq_a); 1879 goto err_client_b; 1880 } 1881 1882 /* Low priority client, but unpreemptable! */ 1883 __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags); 1884 1885 i915_request_add(rq_a); 1886 if (!igt_wait_for_spinner(&a.spin, rq_a)) { 1887 pr_err("First client failed to start\n"); 1888 goto err_wedged; 1889 } 1890 1891 rq_b = spinner_create_request(&b.spin, 1892 b.ctx, engine, 1893 MI_ARB_CHECK); 1894 if (IS_ERR(rq_b)) { 1895 err = PTR_ERR(rq_b); 1896 goto err_client_b; 1897 } 1898 1899 i915_request_add(rq_b); 1900 1901 /* B is much more important than A! (But A is unpreemptable.) */ 1902 GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a)); 1903 1904 /* Wait long enough for preemption and timeslicing */ 1905 if (igt_wait_for_spinner(&b.spin, rq_b)) { 1906 pr_err("Second client started too early!\n"); 1907 goto err_wedged; 1908 } 1909 1910 igt_spinner_end(&a.spin); 1911 1912 if (!igt_wait_for_spinner(&b.spin, rq_b)) { 1913 pr_err("Second client failed to start\n"); 1914 goto err_wedged; 1915 } 1916 1917 igt_spinner_end(&b.spin); 1918 1919 if (engine->execlists.preempt_hang.count) { 1920 pr_err("Preemption recorded x%d; should have been suppressed!\n", 1921 engine->execlists.preempt_hang.count); 1922 err = -EINVAL; 1923 goto err_wedged; 1924 } 1925 1926 if (igt_flush_test(gt->i915)) 1927 goto err_wedged; 1928 } 1929 1930 err = 0; 1931 err_client_b: 1932 preempt_client_fini(&b); 1933 err_client_a: 1934 preempt_client_fini(&a); 1935 return err; 1936 1937 err_wedged: 1938 igt_spinner_end(&b.spin); 1939 igt_spinner_end(&a.spin); 1940 intel_gt_set_wedged(gt); 1941 err = -EIO; 1942 goto err_client_b; 1943 } 1944 1945 struct live_preempt_cancel { 1946 struct intel_engine_cs *engine; 1947 struct preempt_client a, b; 1948 }; 1949 1950 static int __cancel_active0(struct live_preempt_cancel *arg) 1951 { 1952 struct i915_request *rq; 1953 struct igt_live_test t; 1954 int err; 1955 1956 /* Preempt cancel of ELSP0 */ 1957 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 1958 if (igt_live_test_begin(&t, arg->engine->i915, 1959 __func__, arg->engine->name)) 1960 return -EIO; 1961 1962 rq = spinner_create_request(&arg->a.spin, 1963 arg->a.ctx, arg->engine, 1964 MI_ARB_CHECK); 1965 if (IS_ERR(rq)) 1966 return PTR_ERR(rq); 1967 1968 clear_bit(CONTEXT_BANNED, &rq->context->flags); 1969 i915_request_get(rq); 1970 i915_request_add(rq); 1971 if (!igt_wait_for_spinner(&arg->a.spin, rq)) { 1972 err = -EIO; 1973 goto out; 1974 } 1975 1976 intel_context_set_banned(rq->context); 1977 err = intel_engine_pulse(arg->engine); 1978 if (err) 1979 goto out; 1980 1981 err = wait_for_reset(arg->engine, rq, HZ / 2); 1982 if (err) { 1983 pr_err("Cancelled inflight0 request did not reset\n"); 1984 goto out; 1985 } 1986 1987 out: 1988 i915_request_put(rq); 1989 if (igt_live_test_end(&t)) 1990 err = -EIO; 1991 return err; 1992 } 1993 1994 static int __cancel_active1(struct live_preempt_cancel *arg) 1995 { 1996 struct i915_request *rq[2] = {}; 1997 struct igt_live_test t; 1998 int err; 1999 2000 /* Preempt cancel of ELSP1 */ 2001 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 2002 if (igt_live_test_begin(&t, arg->engine->i915, 2003 __func__, arg->engine->name)) 2004 return -EIO; 2005 2006 rq[0] = spinner_create_request(&arg->a.spin, 2007 arg->a.ctx, arg->engine, 2008 MI_NOOP); /* no preemption */ 2009 if (IS_ERR(rq[0])) 2010 return PTR_ERR(rq[0]); 2011 2012 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); 2013 i915_request_get(rq[0]); 2014 i915_request_add(rq[0]); 2015 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { 2016 err = -EIO; 2017 goto out; 2018 } 2019 2020 rq[1] = spinner_create_request(&arg->b.spin, 2021 arg->b.ctx, arg->engine, 2022 MI_ARB_CHECK); 2023 if (IS_ERR(rq[1])) { 2024 err = PTR_ERR(rq[1]); 2025 goto out; 2026 } 2027 2028 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); 2029 i915_request_get(rq[1]); 2030 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); 2031 i915_request_add(rq[1]); 2032 if (err) 2033 goto out; 2034 2035 intel_context_set_banned(rq[1]->context); 2036 err = intel_engine_pulse(arg->engine); 2037 if (err) 2038 goto out; 2039 2040 igt_spinner_end(&arg->a.spin); 2041 err = wait_for_reset(arg->engine, rq[1], HZ / 2); 2042 if (err) 2043 goto out; 2044 2045 if (rq[0]->fence.error != 0) { 2046 pr_err("Normal inflight0 request did not complete\n"); 2047 err = -EINVAL; 2048 goto out; 2049 } 2050 2051 if (rq[1]->fence.error != -EIO) { 2052 pr_err("Cancelled inflight1 request did not report -EIO\n"); 2053 err = -EINVAL; 2054 goto out; 2055 } 2056 2057 out: 2058 i915_request_put(rq[1]); 2059 i915_request_put(rq[0]); 2060 if (igt_live_test_end(&t)) 2061 err = -EIO; 2062 return err; 2063 } 2064 2065 static int __cancel_queued(struct live_preempt_cancel *arg) 2066 { 2067 struct i915_request *rq[3] = {}; 2068 struct igt_live_test t; 2069 int err; 2070 2071 /* Full ELSP and one in the wings */ 2072 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 2073 if (igt_live_test_begin(&t, arg->engine->i915, 2074 __func__, arg->engine->name)) 2075 return -EIO; 2076 2077 rq[0] = spinner_create_request(&arg->a.spin, 2078 arg->a.ctx, arg->engine, 2079 MI_ARB_CHECK); 2080 if (IS_ERR(rq[0])) 2081 return PTR_ERR(rq[0]); 2082 2083 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); 2084 i915_request_get(rq[0]); 2085 i915_request_add(rq[0]); 2086 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { 2087 err = -EIO; 2088 goto out; 2089 } 2090 2091 rq[1] = igt_request_alloc(arg->b.ctx, arg->engine); 2092 if (IS_ERR(rq[1])) { 2093 err = PTR_ERR(rq[1]); 2094 goto out; 2095 } 2096 2097 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); 2098 i915_request_get(rq[1]); 2099 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); 2100 i915_request_add(rq[1]); 2101 if (err) 2102 goto out; 2103 2104 rq[2] = spinner_create_request(&arg->b.spin, 2105 arg->a.ctx, arg->engine, 2106 MI_ARB_CHECK); 2107 if (IS_ERR(rq[2])) { 2108 err = PTR_ERR(rq[2]); 2109 goto out; 2110 } 2111 2112 i915_request_get(rq[2]); 2113 err = i915_request_await_dma_fence(rq[2], &rq[1]->fence); 2114 i915_request_add(rq[2]); 2115 if (err) 2116 goto out; 2117 2118 intel_context_set_banned(rq[2]->context); 2119 err = intel_engine_pulse(arg->engine); 2120 if (err) 2121 goto out; 2122 2123 err = wait_for_reset(arg->engine, rq[2], HZ / 2); 2124 if (err) 2125 goto out; 2126 2127 if (rq[0]->fence.error != -EIO) { 2128 pr_err("Cancelled inflight0 request did not report -EIO\n"); 2129 err = -EINVAL; 2130 goto out; 2131 } 2132 2133 if (rq[1]->fence.error != 0) { 2134 pr_err("Normal inflight1 request did not complete\n"); 2135 err = -EINVAL; 2136 goto out; 2137 } 2138 2139 if (rq[2]->fence.error != -EIO) { 2140 pr_err("Cancelled queued request did not report -EIO\n"); 2141 err = -EINVAL; 2142 goto out; 2143 } 2144 2145 out: 2146 i915_request_put(rq[2]); 2147 i915_request_put(rq[1]); 2148 i915_request_put(rq[0]); 2149 if (igt_live_test_end(&t)) 2150 err = -EIO; 2151 return err; 2152 } 2153 2154 static int __cancel_hostile(struct live_preempt_cancel *arg) 2155 { 2156 struct i915_request *rq; 2157 int err; 2158 2159 /* Preempt cancel non-preemptible spinner in ELSP0 */ 2160 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) 2161 return 0; 2162 2163 if (!intel_has_reset_engine(arg->engine->gt)) 2164 return 0; 2165 2166 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 2167 rq = spinner_create_request(&arg->a.spin, 2168 arg->a.ctx, arg->engine, 2169 MI_NOOP); /* preemption disabled */ 2170 if (IS_ERR(rq)) 2171 return PTR_ERR(rq); 2172 2173 clear_bit(CONTEXT_BANNED, &rq->context->flags); 2174 i915_request_get(rq); 2175 i915_request_add(rq); 2176 if (!igt_wait_for_spinner(&arg->a.spin, rq)) { 2177 err = -EIO; 2178 goto out; 2179 } 2180 2181 intel_context_set_banned(rq->context); 2182 err = intel_engine_pulse(arg->engine); /* force reset */ 2183 if (err) 2184 goto out; 2185 2186 err = wait_for_reset(arg->engine, rq, HZ / 2); 2187 if (err) { 2188 pr_err("Cancelled inflight0 request did not reset\n"); 2189 goto out; 2190 } 2191 2192 out: 2193 i915_request_put(rq); 2194 if (igt_flush_test(arg->engine->i915)) 2195 err = -EIO; 2196 return err; 2197 } 2198 2199 static int live_preempt_cancel(void *arg) 2200 { 2201 struct intel_gt *gt = arg; 2202 struct live_preempt_cancel data; 2203 enum intel_engine_id id; 2204 int err = -ENOMEM; 2205 2206 /* 2207 * To cancel an inflight context, we need to first remove it from the 2208 * GPU. That sounds like preemption! Plus a little bit of bookkeeping. 2209 */ 2210 2211 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2212 return 0; 2213 2214 if (preempt_client_init(gt, &data.a)) 2215 return -ENOMEM; 2216 if (preempt_client_init(gt, &data.b)) 2217 goto err_client_a; 2218 2219 for_each_engine(data.engine, gt, id) { 2220 if (!intel_engine_has_preemption(data.engine)) 2221 continue; 2222 2223 err = __cancel_active0(&data); 2224 if (err) 2225 goto err_wedged; 2226 2227 err = __cancel_active1(&data); 2228 if (err) 2229 goto err_wedged; 2230 2231 err = __cancel_queued(&data); 2232 if (err) 2233 goto err_wedged; 2234 2235 err = __cancel_hostile(&data); 2236 if (err) 2237 goto err_wedged; 2238 } 2239 2240 err = 0; 2241 err_client_b: 2242 preempt_client_fini(&data.b); 2243 err_client_a: 2244 preempt_client_fini(&data.a); 2245 return err; 2246 2247 err_wedged: 2248 GEM_TRACE_DUMP(); 2249 igt_spinner_end(&data.b.spin); 2250 igt_spinner_end(&data.a.spin); 2251 intel_gt_set_wedged(gt); 2252 goto err_client_b; 2253 } 2254 2255 static int live_suppress_self_preempt(void *arg) 2256 { 2257 struct intel_gt *gt = arg; 2258 struct intel_engine_cs *engine; 2259 struct i915_sched_attr attr = { 2260 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX) 2261 }; 2262 struct preempt_client a, b; 2263 enum intel_engine_id id; 2264 int err = -ENOMEM; 2265 2266 /* 2267 * Verify that if a preemption request does not cause a change in 2268 * the current execution order, the preempt-to-idle injection is 2269 * skipped and that we do not accidentally apply it after the CS 2270 * completion event. 2271 */ 2272 2273 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2274 return 0; 2275 2276 if (intel_uc_uses_guc_submission(>->uc)) 2277 return 0; /* presume black blox */ 2278 2279 if (intel_vgpu_active(gt->i915)) 2280 return 0; /* GVT forces single port & request submission */ 2281 2282 if (preempt_client_init(gt, &a)) 2283 return -ENOMEM; 2284 if (preempt_client_init(gt, &b)) 2285 goto err_client_a; 2286 2287 for_each_engine(engine, gt, id) { 2288 struct i915_request *rq_a, *rq_b; 2289 int depth; 2290 2291 if (!intel_engine_has_preemption(engine)) 2292 continue; 2293 2294 if (igt_flush_test(gt->i915)) 2295 goto err_wedged; 2296 2297 intel_engine_pm_get(engine); 2298 engine->execlists.preempt_hang.count = 0; 2299 2300 rq_a = spinner_create_request(&a.spin, 2301 a.ctx, engine, 2302 MI_NOOP); 2303 if (IS_ERR(rq_a)) { 2304 err = PTR_ERR(rq_a); 2305 intel_engine_pm_put(engine); 2306 goto err_client_b; 2307 } 2308 2309 i915_request_add(rq_a); 2310 if (!igt_wait_for_spinner(&a.spin, rq_a)) { 2311 pr_err("First client failed to start\n"); 2312 intel_engine_pm_put(engine); 2313 goto err_wedged; 2314 } 2315 2316 /* Keep postponing the timer to avoid premature slicing */ 2317 mod_timer(&engine->execlists.timer, jiffies + HZ); 2318 for (depth = 0; depth < 8; depth++) { 2319 rq_b = spinner_create_request(&b.spin, 2320 b.ctx, engine, 2321 MI_NOOP); 2322 if (IS_ERR(rq_b)) { 2323 err = PTR_ERR(rq_b); 2324 intel_engine_pm_put(engine); 2325 goto err_client_b; 2326 } 2327 i915_request_add(rq_b); 2328 2329 GEM_BUG_ON(i915_request_completed(rq_a)); 2330 engine->schedule(rq_a, &attr); 2331 igt_spinner_end(&a.spin); 2332 2333 if (!igt_wait_for_spinner(&b.spin, rq_b)) { 2334 pr_err("Second client failed to start\n"); 2335 intel_engine_pm_put(engine); 2336 goto err_wedged; 2337 } 2338 2339 swap(a, b); 2340 rq_a = rq_b; 2341 } 2342 igt_spinner_end(&a.spin); 2343 2344 if (engine->execlists.preempt_hang.count) { 2345 pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n", 2346 engine->name, 2347 engine->execlists.preempt_hang.count, 2348 depth); 2349 intel_engine_pm_put(engine); 2350 err = -EINVAL; 2351 goto err_client_b; 2352 } 2353 2354 intel_engine_pm_put(engine); 2355 if (igt_flush_test(gt->i915)) 2356 goto err_wedged; 2357 } 2358 2359 err = 0; 2360 err_client_b: 2361 preempt_client_fini(&b); 2362 err_client_a: 2363 preempt_client_fini(&a); 2364 return err; 2365 2366 err_wedged: 2367 igt_spinner_end(&b.spin); 2368 igt_spinner_end(&a.spin); 2369 intel_gt_set_wedged(gt); 2370 err = -EIO; 2371 goto err_client_b; 2372 } 2373 2374 static int __i915_sw_fence_call 2375 dummy_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) 2376 { 2377 return NOTIFY_DONE; 2378 } 2379 2380 static struct i915_request *dummy_request(struct intel_engine_cs *engine) 2381 { 2382 struct i915_request *rq; 2383 2384 rq = kzalloc(sizeof(*rq), GFP_KERNEL); 2385 if (!rq) 2386 return NULL; 2387 2388 rq->engine = engine; 2389 2390 spin_lock_init(&rq->lock); 2391 INIT_LIST_HEAD(&rq->fence.cb_list); 2392 rq->fence.lock = &rq->lock; 2393 rq->fence.ops = &i915_fence_ops; 2394 2395 i915_sched_node_init(&rq->sched); 2396 2397 /* mark this request as permanently incomplete */ 2398 rq->fence.seqno = 1; 2399 BUILD_BUG_ON(sizeof(rq->fence.seqno) != 8); /* upper 32b == 0 */ 2400 rq->hwsp_seqno = (u32 *)&rq->fence.seqno + 1; 2401 GEM_BUG_ON(i915_request_completed(rq)); 2402 2403 i915_sw_fence_init(&rq->submit, dummy_notify); 2404 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); 2405 2406 spin_lock_init(&rq->lock); 2407 rq->fence.lock = &rq->lock; 2408 INIT_LIST_HEAD(&rq->fence.cb_list); 2409 2410 return rq; 2411 } 2412 2413 static void dummy_request_free(struct i915_request *dummy) 2414 { 2415 /* We have to fake the CS interrupt to kick the next request */ 2416 i915_sw_fence_commit(&dummy->submit); 2417 2418 i915_request_mark_complete(dummy); 2419 dma_fence_signal(&dummy->fence); 2420 2421 i915_sched_node_fini(&dummy->sched); 2422 i915_sw_fence_fini(&dummy->submit); 2423 2424 dma_fence_free(&dummy->fence); 2425 } 2426 2427 static int live_suppress_wait_preempt(void *arg) 2428 { 2429 struct intel_gt *gt = arg; 2430 struct preempt_client client[4]; 2431 struct i915_request *rq[ARRAY_SIZE(client)] = {}; 2432 struct intel_engine_cs *engine; 2433 enum intel_engine_id id; 2434 int err = -ENOMEM; 2435 int i; 2436 2437 /* 2438 * Waiters are given a little priority nudge, but not enough 2439 * to actually cause any preemption. Double check that we do 2440 * not needlessly generate preempt-to-idle cycles. 2441 */ 2442 2443 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2444 return 0; 2445 2446 if (preempt_client_init(gt, &client[0])) /* ELSP[0] */ 2447 return -ENOMEM; 2448 if (preempt_client_init(gt, &client[1])) /* ELSP[1] */ 2449 goto err_client_0; 2450 if (preempt_client_init(gt, &client[2])) /* head of queue */ 2451 goto err_client_1; 2452 if (preempt_client_init(gt, &client[3])) /* bystander */ 2453 goto err_client_2; 2454 2455 for_each_engine(engine, gt, id) { 2456 int depth; 2457 2458 if (!intel_engine_has_preemption(engine)) 2459 continue; 2460 2461 if (!engine->emit_init_breadcrumb) 2462 continue; 2463 2464 for (depth = 0; depth < ARRAY_SIZE(client); depth++) { 2465 struct i915_request *dummy; 2466 2467 engine->execlists.preempt_hang.count = 0; 2468 2469 dummy = dummy_request(engine); 2470 if (!dummy) 2471 goto err_client_3; 2472 2473 for (i = 0; i < ARRAY_SIZE(client); i++) { 2474 struct i915_request *this; 2475 2476 this = spinner_create_request(&client[i].spin, 2477 client[i].ctx, engine, 2478 MI_NOOP); 2479 if (IS_ERR(this)) { 2480 err = PTR_ERR(this); 2481 goto err_wedged; 2482 } 2483 2484 /* Disable NEWCLIENT promotion */ 2485 __i915_active_fence_set(&i915_request_timeline(this)->last_request, 2486 &dummy->fence); 2487 2488 rq[i] = i915_request_get(this); 2489 i915_request_add(this); 2490 } 2491 2492 dummy_request_free(dummy); 2493 2494 GEM_BUG_ON(i915_request_completed(rq[0])); 2495 if (!igt_wait_for_spinner(&client[0].spin, rq[0])) { 2496 pr_err("%s: First client failed to start\n", 2497 engine->name); 2498 goto err_wedged; 2499 } 2500 GEM_BUG_ON(!i915_request_started(rq[0])); 2501 2502 if (i915_request_wait(rq[depth], 2503 I915_WAIT_PRIORITY, 2504 1) != -ETIME) { 2505 pr_err("%s: Waiter depth:%d completed!\n", 2506 engine->name, depth); 2507 goto err_wedged; 2508 } 2509 2510 for (i = 0; i < ARRAY_SIZE(client); i++) { 2511 igt_spinner_end(&client[i].spin); 2512 i915_request_put(rq[i]); 2513 rq[i] = NULL; 2514 } 2515 2516 if (igt_flush_test(gt->i915)) 2517 goto err_wedged; 2518 2519 if (engine->execlists.preempt_hang.count) { 2520 pr_err("%s: Preemption recorded x%d, depth %d; should have been suppressed!\n", 2521 engine->name, 2522 engine->execlists.preempt_hang.count, 2523 depth); 2524 err = -EINVAL; 2525 goto err_client_3; 2526 } 2527 } 2528 } 2529 2530 err = 0; 2531 err_client_3: 2532 preempt_client_fini(&client[3]); 2533 err_client_2: 2534 preempt_client_fini(&client[2]); 2535 err_client_1: 2536 preempt_client_fini(&client[1]); 2537 err_client_0: 2538 preempt_client_fini(&client[0]); 2539 return err; 2540 2541 err_wedged: 2542 for (i = 0; i < ARRAY_SIZE(client); i++) { 2543 igt_spinner_end(&client[i].spin); 2544 i915_request_put(rq[i]); 2545 } 2546 intel_gt_set_wedged(gt); 2547 err = -EIO; 2548 goto err_client_3; 2549 } 2550 2551 static int live_chain_preempt(void *arg) 2552 { 2553 struct intel_gt *gt = arg; 2554 struct intel_engine_cs *engine; 2555 struct preempt_client hi, lo; 2556 enum intel_engine_id id; 2557 int err = -ENOMEM; 2558 2559 /* 2560 * Build a chain AB...BA between two contexts (A, B) and request 2561 * preemption of the last request. It should then complete before 2562 * the previously submitted spinner in B. 2563 */ 2564 2565 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2566 return 0; 2567 2568 if (preempt_client_init(gt, &hi)) 2569 return -ENOMEM; 2570 2571 if (preempt_client_init(gt, &lo)) 2572 goto err_client_hi; 2573 2574 for_each_engine(engine, gt, id) { 2575 struct i915_sched_attr attr = { 2576 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), 2577 }; 2578 struct igt_live_test t; 2579 struct i915_request *rq; 2580 int ring_size, count, i; 2581 2582 if (!intel_engine_has_preemption(engine)) 2583 continue; 2584 2585 rq = spinner_create_request(&lo.spin, 2586 lo.ctx, engine, 2587 MI_ARB_CHECK); 2588 if (IS_ERR(rq)) 2589 goto err_wedged; 2590 2591 i915_request_get(rq); 2592 i915_request_add(rq); 2593 2594 ring_size = rq->wa_tail - rq->head; 2595 if (ring_size < 0) 2596 ring_size += rq->ring->size; 2597 ring_size = rq->ring->size / ring_size; 2598 pr_debug("%s(%s): Using maximum of %d requests\n", 2599 __func__, engine->name, ring_size); 2600 2601 igt_spinner_end(&lo.spin); 2602 if (i915_request_wait(rq, 0, HZ / 2) < 0) { 2603 pr_err("Timed out waiting to flush %s\n", engine->name); 2604 i915_request_put(rq); 2605 goto err_wedged; 2606 } 2607 i915_request_put(rq); 2608 2609 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 2610 err = -EIO; 2611 goto err_wedged; 2612 } 2613 2614 for_each_prime_number_from(count, 1, ring_size) { 2615 rq = spinner_create_request(&hi.spin, 2616 hi.ctx, engine, 2617 MI_ARB_CHECK); 2618 if (IS_ERR(rq)) 2619 goto err_wedged; 2620 i915_request_add(rq); 2621 if (!igt_wait_for_spinner(&hi.spin, rq)) 2622 goto err_wedged; 2623 2624 rq = spinner_create_request(&lo.spin, 2625 lo.ctx, engine, 2626 MI_ARB_CHECK); 2627 if (IS_ERR(rq)) 2628 goto err_wedged; 2629 i915_request_add(rq); 2630 2631 for (i = 0; i < count; i++) { 2632 rq = igt_request_alloc(lo.ctx, engine); 2633 if (IS_ERR(rq)) 2634 goto err_wedged; 2635 i915_request_add(rq); 2636 } 2637 2638 rq = igt_request_alloc(hi.ctx, engine); 2639 if (IS_ERR(rq)) 2640 goto err_wedged; 2641 2642 i915_request_get(rq); 2643 i915_request_add(rq); 2644 engine->schedule(rq, &attr); 2645 2646 igt_spinner_end(&hi.spin); 2647 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 2648 struct drm_printer p = 2649 drm_info_printer(gt->i915->drm.dev); 2650 2651 pr_err("Failed to preempt over chain of %d\n", 2652 count); 2653 intel_engine_dump(engine, &p, 2654 "%s\n", engine->name); 2655 i915_request_put(rq); 2656 goto err_wedged; 2657 } 2658 igt_spinner_end(&lo.spin); 2659 i915_request_put(rq); 2660 2661 rq = igt_request_alloc(lo.ctx, engine); 2662 if (IS_ERR(rq)) 2663 goto err_wedged; 2664 2665 i915_request_get(rq); 2666 i915_request_add(rq); 2667 2668 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 2669 struct drm_printer p = 2670 drm_info_printer(gt->i915->drm.dev); 2671 2672 pr_err("Failed to flush low priority chain of %d requests\n", 2673 count); 2674 intel_engine_dump(engine, &p, 2675 "%s\n", engine->name); 2676 2677 i915_request_put(rq); 2678 goto err_wedged; 2679 } 2680 i915_request_put(rq); 2681 } 2682 2683 if (igt_live_test_end(&t)) { 2684 err = -EIO; 2685 goto err_wedged; 2686 } 2687 } 2688 2689 err = 0; 2690 err_client_lo: 2691 preempt_client_fini(&lo); 2692 err_client_hi: 2693 preempt_client_fini(&hi); 2694 return err; 2695 2696 err_wedged: 2697 igt_spinner_end(&hi.spin); 2698 igt_spinner_end(&lo.spin); 2699 intel_gt_set_wedged(gt); 2700 err = -EIO; 2701 goto err_client_lo; 2702 } 2703 2704 static int create_gang(struct intel_engine_cs *engine, 2705 struct i915_request **prev) 2706 { 2707 struct drm_i915_gem_object *obj; 2708 struct intel_context *ce; 2709 struct i915_request *rq; 2710 struct i915_vma *vma; 2711 u32 *cs; 2712 int err; 2713 2714 ce = intel_context_create(engine); 2715 if (IS_ERR(ce)) 2716 return PTR_ERR(ce); 2717 2718 obj = i915_gem_object_create_internal(engine->i915, 4096); 2719 if (IS_ERR(obj)) { 2720 err = PTR_ERR(obj); 2721 goto err_ce; 2722 } 2723 2724 vma = i915_vma_instance(obj, ce->vm, NULL); 2725 if (IS_ERR(vma)) { 2726 err = PTR_ERR(vma); 2727 goto err_obj; 2728 } 2729 2730 err = i915_vma_pin(vma, 0, 0, PIN_USER); 2731 if (err) 2732 goto err_obj; 2733 2734 cs = i915_gem_object_pin_map(obj, I915_MAP_WC); 2735 if (IS_ERR(cs)) 2736 goto err_obj; 2737 2738 /* Semaphore target: spin until zero */ 2739 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 2740 2741 *cs++ = MI_SEMAPHORE_WAIT | 2742 MI_SEMAPHORE_POLL | 2743 MI_SEMAPHORE_SAD_EQ_SDD; 2744 *cs++ = 0; 2745 *cs++ = lower_32_bits(vma->node.start); 2746 *cs++ = upper_32_bits(vma->node.start); 2747 2748 if (*prev) { 2749 u64 offset = (*prev)->batch->node.start; 2750 2751 /* Terminate the spinner in the next lower priority batch. */ 2752 *cs++ = MI_STORE_DWORD_IMM_GEN4; 2753 *cs++ = lower_32_bits(offset); 2754 *cs++ = upper_32_bits(offset); 2755 *cs++ = 0; 2756 } 2757 2758 *cs++ = MI_BATCH_BUFFER_END; 2759 i915_gem_object_flush_map(obj); 2760 i915_gem_object_unpin_map(obj); 2761 2762 rq = intel_context_create_request(ce); 2763 if (IS_ERR(rq)) 2764 goto err_obj; 2765 2766 rq->batch = i915_vma_get(vma); 2767 i915_request_get(rq); 2768 2769 i915_vma_lock(vma); 2770 err = i915_request_await_object(rq, vma->obj, false); 2771 if (!err) 2772 err = i915_vma_move_to_active(vma, rq, 0); 2773 if (!err) 2774 err = rq->engine->emit_bb_start(rq, 2775 vma->node.start, 2776 PAGE_SIZE, 0); 2777 i915_vma_unlock(vma); 2778 i915_request_add(rq); 2779 if (err) 2780 goto err_rq; 2781 2782 i915_gem_object_put(obj); 2783 intel_context_put(ce); 2784 2785 rq->client_link.next = &(*prev)->client_link; 2786 *prev = rq; 2787 return 0; 2788 2789 err_rq: 2790 i915_vma_put(rq->batch); 2791 i915_request_put(rq); 2792 err_obj: 2793 i915_gem_object_put(obj); 2794 err_ce: 2795 intel_context_put(ce); 2796 return err; 2797 } 2798 2799 static int live_preempt_gang(void *arg) 2800 { 2801 struct intel_gt *gt = arg; 2802 struct intel_engine_cs *engine; 2803 enum intel_engine_id id; 2804 2805 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2806 return 0; 2807 2808 /* 2809 * Build as long a chain of preempters as we can, with each 2810 * request higher priority than the last. Once we are ready, we release 2811 * the last batch which then precolates down the chain, each releasing 2812 * the next oldest in turn. The intent is to simply push as hard as we 2813 * can with the number of preemptions, trying to exceed narrow HW 2814 * limits. At a minimum, we insist that we can sort all the user 2815 * high priority levels into execution order. 2816 */ 2817 2818 for_each_engine(engine, gt, id) { 2819 struct i915_request *rq = NULL; 2820 struct igt_live_test t; 2821 IGT_TIMEOUT(end_time); 2822 int prio = 0; 2823 int err = 0; 2824 u32 *cs; 2825 2826 if (!intel_engine_has_preemption(engine)) 2827 continue; 2828 2829 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) 2830 return -EIO; 2831 2832 do { 2833 struct i915_sched_attr attr = { 2834 .priority = I915_USER_PRIORITY(prio++), 2835 }; 2836 2837 err = create_gang(engine, &rq); 2838 if (err) 2839 break; 2840 2841 /* Submit each spinner at increasing priority */ 2842 engine->schedule(rq, &attr); 2843 2844 if (prio <= I915_PRIORITY_MAX) 2845 continue; 2846 2847 if (prio > (INT_MAX >> I915_USER_PRIORITY_SHIFT)) 2848 break; 2849 2850 if (__igt_timeout(end_time, NULL)) 2851 break; 2852 } while (1); 2853 pr_debug("%s: Preempt chain of %d requests\n", 2854 engine->name, prio); 2855 2856 /* 2857 * Such that the last spinner is the highest priority and 2858 * should execute first. When that spinner completes, 2859 * it will terminate the next lowest spinner until there 2860 * are no more spinners and the gang is complete. 2861 */ 2862 cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC); 2863 if (!IS_ERR(cs)) { 2864 *cs = 0; 2865 i915_gem_object_unpin_map(rq->batch->obj); 2866 } else { 2867 err = PTR_ERR(cs); 2868 intel_gt_set_wedged(gt); 2869 } 2870 2871 while (rq) { /* wait for each rq from highest to lowest prio */ 2872 struct i915_request *n = 2873 list_next_entry(rq, client_link); 2874 2875 if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) { 2876 struct drm_printer p = 2877 drm_info_printer(engine->i915->drm.dev); 2878 2879 pr_err("Failed to flush chain of %d requests, at %d\n", 2880 prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT); 2881 intel_engine_dump(engine, &p, 2882 "%s\n", engine->name); 2883 2884 err = -ETIME; 2885 } 2886 2887 i915_vma_put(rq->batch); 2888 i915_request_put(rq); 2889 rq = n; 2890 } 2891 2892 if (igt_live_test_end(&t)) 2893 err = -EIO; 2894 if (err) 2895 return err; 2896 } 2897 2898 return 0; 2899 } 2900 2901 static struct i915_vma * 2902 create_gpr_user(struct intel_engine_cs *engine, 2903 struct i915_vma *result, 2904 unsigned int offset) 2905 { 2906 struct drm_i915_gem_object *obj; 2907 struct i915_vma *vma; 2908 u32 *cs; 2909 int err; 2910 int i; 2911 2912 obj = i915_gem_object_create_internal(engine->i915, 4096); 2913 if (IS_ERR(obj)) 2914 return ERR_CAST(obj); 2915 2916 vma = i915_vma_instance(obj, result->vm, NULL); 2917 if (IS_ERR(vma)) { 2918 i915_gem_object_put(obj); 2919 return vma; 2920 } 2921 2922 err = i915_vma_pin(vma, 0, 0, PIN_USER); 2923 if (err) { 2924 i915_vma_put(vma); 2925 return ERR_PTR(err); 2926 } 2927 2928 cs = i915_gem_object_pin_map(obj, I915_MAP_WC); 2929 if (IS_ERR(cs)) { 2930 i915_vma_put(vma); 2931 return ERR_CAST(cs); 2932 } 2933 2934 /* All GPR are clear for new contexts. We use GPR(0) as a constant */ 2935 *cs++ = MI_LOAD_REGISTER_IMM(1); 2936 *cs++ = CS_GPR(engine, 0); 2937 *cs++ = 1; 2938 2939 for (i = 1; i < NUM_GPR; i++) { 2940 u64 addr; 2941 2942 /* 2943 * Perform: GPR[i]++ 2944 * 2945 * As we read and write into the context saved GPR[i], if 2946 * we restart this batch buffer from an earlier point, we 2947 * will repeat the increment and store a value > 1. 2948 */ 2949 *cs++ = MI_MATH(4); 2950 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(i)); 2951 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(0)); 2952 *cs++ = MI_MATH_ADD; 2953 *cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU); 2954 2955 addr = result->node.start + offset + i * sizeof(*cs); 2956 *cs++ = MI_STORE_REGISTER_MEM_GEN8; 2957 *cs++ = CS_GPR(engine, 2 * i); 2958 *cs++ = lower_32_bits(addr); 2959 *cs++ = upper_32_bits(addr); 2960 2961 *cs++ = MI_SEMAPHORE_WAIT | 2962 MI_SEMAPHORE_POLL | 2963 MI_SEMAPHORE_SAD_GTE_SDD; 2964 *cs++ = i; 2965 *cs++ = lower_32_bits(result->node.start); 2966 *cs++ = upper_32_bits(result->node.start); 2967 } 2968 2969 *cs++ = MI_BATCH_BUFFER_END; 2970 i915_gem_object_flush_map(obj); 2971 i915_gem_object_unpin_map(obj); 2972 2973 return vma; 2974 } 2975 2976 static struct i915_vma *create_global(struct intel_gt *gt, size_t sz) 2977 { 2978 struct drm_i915_gem_object *obj; 2979 struct i915_vma *vma; 2980 int err; 2981 2982 obj = i915_gem_object_create_internal(gt->i915, sz); 2983 if (IS_ERR(obj)) 2984 return ERR_CAST(obj); 2985 2986 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 2987 if (IS_ERR(vma)) { 2988 i915_gem_object_put(obj); 2989 return vma; 2990 } 2991 2992 err = i915_ggtt_pin(vma, 0, 0); 2993 if (err) { 2994 i915_vma_put(vma); 2995 return ERR_PTR(err); 2996 } 2997 2998 return vma; 2999 } 3000 3001 static struct i915_request * 3002 create_gpr_client(struct intel_engine_cs *engine, 3003 struct i915_vma *global, 3004 unsigned int offset) 3005 { 3006 struct i915_vma *batch, *vma; 3007 struct intel_context *ce; 3008 struct i915_request *rq; 3009 int err; 3010 3011 ce = intel_context_create(engine); 3012 if (IS_ERR(ce)) 3013 return ERR_CAST(ce); 3014 3015 vma = i915_vma_instance(global->obj, ce->vm, NULL); 3016 if (IS_ERR(vma)) { 3017 err = PTR_ERR(vma); 3018 goto out_ce; 3019 } 3020 3021 err = i915_vma_pin(vma, 0, 0, PIN_USER); 3022 if (err) 3023 goto out_ce; 3024 3025 batch = create_gpr_user(engine, vma, offset); 3026 if (IS_ERR(batch)) { 3027 err = PTR_ERR(batch); 3028 goto out_vma; 3029 } 3030 3031 rq = intel_context_create_request(ce); 3032 if (IS_ERR(rq)) { 3033 err = PTR_ERR(rq); 3034 goto out_batch; 3035 } 3036 3037 i915_vma_lock(vma); 3038 err = i915_request_await_object(rq, vma->obj, false); 3039 if (!err) 3040 err = i915_vma_move_to_active(vma, rq, 0); 3041 i915_vma_unlock(vma); 3042 3043 i915_vma_lock(batch); 3044 if (!err) 3045 err = i915_request_await_object(rq, batch->obj, false); 3046 if (!err) 3047 err = i915_vma_move_to_active(batch, rq, 0); 3048 if (!err) 3049 err = rq->engine->emit_bb_start(rq, 3050 batch->node.start, 3051 PAGE_SIZE, 0); 3052 i915_vma_unlock(batch); 3053 i915_vma_unpin(batch); 3054 3055 if (!err) 3056 i915_request_get(rq); 3057 i915_request_add(rq); 3058 3059 out_batch: 3060 i915_vma_put(batch); 3061 out_vma: 3062 i915_vma_unpin(vma); 3063 out_ce: 3064 intel_context_put(ce); 3065 return err ? ERR_PTR(err) : rq; 3066 } 3067 3068 static int preempt_user(struct intel_engine_cs *engine, 3069 struct i915_vma *global, 3070 int id) 3071 { 3072 struct i915_sched_attr attr = { 3073 .priority = I915_PRIORITY_MAX 3074 }; 3075 struct i915_request *rq; 3076 int err = 0; 3077 u32 *cs; 3078 3079 rq = intel_engine_create_kernel_request(engine); 3080 if (IS_ERR(rq)) 3081 return PTR_ERR(rq); 3082 3083 cs = intel_ring_begin(rq, 4); 3084 if (IS_ERR(cs)) { 3085 i915_request_add(rq); 3086 return PTR_ERR(cs); 3087 } 3088 3089 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 3090 *cs++ = i915_ggtt_offset(global); 3091 *cs++ = 0; 3092 *cs++ = id; 3093 3094 intel_ring_advance(rq, cs); 3095 3096 i915_request_get(rq); 3097 i915_request_add(rq); 3098 3099 engine->schedule(rq, &attr); 3100 3101 if (i915_request_wait(rq, 0, HZ / 2) < 0) 3102 err = -ETIME; 3103 i915_request_put(rq); 3104 3105 return err; 3106 } 3107 3108 static int live_preempt_user(void *arg) 3109 { 3110 struct intel_gt *gt = arg; 3111 struct intel_engine_cs *engine; 3112 struct i915_vma *global; 3113 enum intel_engine_id id; 3114 u32 *result; 3115 int err = 0; 3116 3117 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 3118 return 0; 3119 3120 /* 3121 * In our other tests, we look at preemption in carefully 3122 * controlled conditions in the ringbuffer. Since most of the 3123 * time is spent in user batches, most of our preemptions naturally 3124 * occur there. We want to verify that when we preempt inside a batch 3125 * we continue on from the current instruction and do not roll back 3126 * to the start, or another earlier arbitration point. 3127 * 3128 * To verify this, we create a batch which is a mixture of 3129 * MI_MATH (gpr++) MI_SRM (gpr) and preemption points. Then with 3130 * a few preempting contexts thrown into the mix, we look for any 3131 * repeated instructions (which show up as incorrect values). 3132 */ 3133 3134 global = create_global(gt, 4096); 3135 if (IS_ERR(global)) 3136 return PTR_ERR(global); 3137 3138 result = i915_gem_object_pin_map(global->obj, I915_MAP_WC); 3139 if (IS_ERR(result)) { 3140 i915_vma_unpin_and_release(&global, 0); 3141 return PTR_ERR(result); 3142 } 3143 3144 for_each_engine(engine, gt, id) { 3145 struct i915_request *client[3] = {}; 3146 struct igt_live_test t; 3147 int i; 3148 3149 if (!intel_engine_has_preemption(engine)) 3150 continue; 3151 3152 if (IS_GEN(gt->i915, 8) && engine->class != RENDER_CLASS) 3153 continue; /* we need per-context GPR */ 3154 3155 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 3156 err = -EIO; 3157 break; 3158 } 3159 3160 memset(result, 0, 4096); 3161 3162 for (i = 0; i < ARRAY_SIZE(client); i++) { 3163 struct i915_request *rq; 3164 3165 rq = create_gpr_client(engine, global, 3166 NUM_GPR * i * sizeof(u32)); 3167 if (IS_ERR(rq)) 3168 goto end_test; 3169 3170 client[i] = rq; 3171 } 3172 3173 /* Continuously preempt the set of 3 running contexts */ 3174 for (i = 1; i <= NUM_GPR; i++) { 3175 err = preempt_user(engine, global, i); 3176 if (err) 3177 goto end_test; 3178 } 3179 3180 if (READ_ONCE(result[0]) != NUM_GPR) { 3181 pr_err("%s: Failed to release semaphore\n", 3182 engine->name); 3183 err = -EIO; 3184 goto end_test; 3185 } 3186 3187 for (i = 0; i < ARRAY_SIZE(client); i++) { 3188 int gpr; 3189 3190 if (i915_request_wait(client[i], 0, HZ / 2) < 0) { 3191 err = -ETIME; 3192 goto end_test; 3193 } 3194 3195 for (gpr = 1; gpr < NUM_GPR; gpr++) { 3196 if (result[NUM_GPR * i + gpr] != 1) { 3197 pr_err("%s: Invalid result, client %d, gpr %d, result: %d\n", 3198 engine->name, 3199 i, gpr, result[NUM_GPR * i + gpr]); 3200 err = -EINVAL; 3201 goto end_test; 3202 } 3203 } 3204 } 3205 3206 end_test: 3207 for (i = 0; i < ARRAY_SIZE(client); i++) { 3208 if (!client[i]) 3209 break; 3210 3211 i915_request_put(client[i]); 3212 } 3213 3214 /* Flush the semaphores on error */ 3215 smp_store_mb(result[0], -1); 3216 if (igt_live_test_end(&t)) 3217 err = -EIO; 3218 if (err) 3219 break; 3220 } 3221 3222 i915_vma_unpin_and_release(&global, I915_VMA_RELEASE_MAP); 3223 return err; 3224 } 3225 3226 static int live_preempt_timeout(void *arg) 3227 { 3228 struct intel_gt *gt = arg; 3229 struct i915_gem_context *ctx_hi, *ctx_lo; 3230 struct igt_spinner spin_lo; 3231 struct intel_engine_cs *engine; 3232 enum intel_engine_id id; 3233 int err = -ENOMEM; 3234 3235 /* 3236 * Check that we force preemption to occur by cancelling the previous 3237 * context if it refuses to yield the GPU. 3238 */ 3239 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) 3240 return 0; 3241 3242 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 3243 return 0; 3244 3245 if (!intel_has_reset_engine(gt)) 3246 return 0; 3247 3248 if (igt_spinner_init(&spin_lo, gt)) 3249 return -ENOMEM; 3250 3251 ctx_hi = kernel_context(gt->i915); 3252 if (!ctx_hi) 3253 goto err_spin_lo; 3254 ctx_hi->sched.priority = 3255 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); 3256 3257 ctx_lo = kernel_context(gt->i915); 3258 if (!ctx_lo) 3259 goto err_ctx_hi; 3260 ctx_lo->sched.priority = 3261 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); 3262 3263 for_each_engine(engine, gt, id) { 3264 unsigned long saved_timeout; 3265 struct i915_request *rq; 3266 3267 if (!intel_engine_has_preemption(engine)) 3268 continue; 3269 3270 rq = spinner_create_request(&spin_lo, ctx_lo, engine, 3271 MI_NOOP); /* preemption disabled */ 3272 if (IS_ERR(rq)) { 3273 err = PTR_ERR(rq); 3274 goto err_ctx_lo; 3275 } 3276 3277 i915_request_add(rq); 3278 if (!igt_wait_for_spinner(&spin_lo, rq)) { 3279 intel_gt_set_wedged(gt); 3280 err = -EIO; 3281 goto err_ctx_lo; 3282 } 3283 3284 rq = igt_request_alloc(ctx_hi, engine); 3285 if (IS_ERR(rq)) { 3286 igt_spinner_end(&spin_lo); 3287 err = PTR_ERR(rq); 3288 goto err_ctx_lo; 3289 } 3290 3291 /* Flush the previous CS ack before changing timeouts */ 3292 while (READ_ONCE(engine->execlists.pending[0])) 3293 cpu_relax(); 3294 3295 saved_timeout = engine->props.preempt_timeout_ms; 3296 engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */ 3297 3298 i915_request_get(rq); 3299 i915_request_add(rq); 3300 3301 intel_engine_flush_submission(engine); 3302 engine->props.preempt_timeout_ms = saved_timeout; 3303 3304 if (i915_request_wait(rq, 0, HZ / 10) < 0) { 3305 intel_gt_set_wedged(gt); 3306 i915_request_put(rq); 3307 err = -ETIME; 3308 goto err_ctx_lo; 3309 } 3310 3311 igt_spinner_end(&spin_lo); 3312 i915_request_put(rq); 3313 } 3314 3315 err = 0; 3316 err_ctx_lo: 3317 kernel_context_close(ctx_lo); 3318 err_ctx_hi: 3319 kernel_context_close(ctx_hi); 3320 err_spin_lo: 3321 igt_spinner_fini(&spin_lo); 3322 return err; 3323 } 3324 3325 static int random_range(struct rnd_state *rnd, int min, int max) 3326 { 3327 return i915_prandom_u32_max_state(max - min, rnd) + min; 3328 } 3329 3330 static int random_priority(struct rnd_state *rnd) 3331 { 3332 return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX); 3333 } 3334 3335 struct preempt_smoke { 3336 struct intel_gt *gt; 3337 struct i915_gem_context **contexts; 3338 struct intel_engine_cs *engine; 3339 struct drm_i915_gem_object *batch; 3340 unsigned int ncontext; 3341 struct rnd_state prng; 3342 unsigned long count; 3343 }; 3344 3345 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke) 3346 { 3347 return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext, 3348 &smoke->prng)]; 3349 } 3350 3351 static int smoke_submit(struct preempt_smoke *smoke, 3352 struct i915_gem_context *ctx, int prio, 3353 struct drm_i915_gem_object *batch) 3354 { 3355 struct i915_request *rq; 3356 struct i915_vma *vma = NULL; 3357 int err = 0; 3358 3359 if (batch) { 3360 struct i915_address_space *vm; 3361 3362 vm = i915_gem_context_get_vm_rcu(ctx); 3363 vma = i915_vma_instance(batch, vm, NULL); 3364 i915_vm_put(vm); 3365 if (IS_ERR(vma)) 3366 return PTR_ERR(vma); 3367 3368 err = i915_vma_pin(vma, 0, 0, PIN_USER); 3369 if (err) 3370 return err; 3371 } 3372 3373 ctx->sched.priority = prio; 3374 3375 rq = igt_request_alloc(ctx, smoke->engine); 3376 if (IS_ERR(rq)) { 3377 err = PTR_ERR(rq); 3378 goto unpin; 3379 } 3380 3381 if (vma) { 3382 i915_vma_lock(vma); 3383 err = i915_request_await_object(rq, vma->obj, false); 3384 if (!err) 3385 err = i915_vma_move_to_active(vma, rq, 0); 3386 if (!err) 3387 err = rq->engine->emit_bb_start(rq, 3388 vma->node.start, 3389 PAGE_SIZE, 0); 3390 i915_vma_unlock(vma); 3391 } 3392 3393 i915_request_add(rq); 3394 3395 unpin: 3396 if (vma) 3397 i915_vma_unpin(vma); 3398 3399 return err; 3400 } 3401 3402 static int smoke_crescendo_thread(void *arg) 3403 { 3404 struct preempt_smoke *smoke = arg; 3405 IGT_TIMEOUT(end_time); 3406 unsigned long count; 3407 3408 count = 0; 3409 do { 3410 struct i915_gem_context *ctx = smoke_context(smoke); 3411 int err; 3412 3413 err = smoke_submit(smoke, 3414 ctx, count % I915_PRIORITY_MAX, 3415 smoke->batch); 3416 if (err) 3417 return err; 3418 3419 count++; 3420 } while (!__igt_timeout(end_time, NULL)); 3421 3422 smoke->count = count; 3423 return 0; 3424 } 3425 3426 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) 3427 #define BATCH BIT(0) 3428 { 3429 struct task_struct *tsk[I915_NUM_ENGINES] = {}; 3430 struct preempt_smoke arg[I915_NUM_ENGINES]; 3431 struct intel_engine_cs *engine; 3432 enum intel_engine_id id; 3433 unsigned long count; 3434 int err = 0; 3435 3436 for_each_engine(engine, smoke->gt, id) { 3437 arg[id] = *smoke; 3438 arg[id].engine = engine; 3439 if (!(flags & BATCH)) 3440 arg[id].batch = NULL; 3441 arg[id].count = 0; 3442 3443 tsk[id] = kthread_run(smoke_crescendo_thread, &arg, 3444 "igt/smoke:%d", id); 3445 if (IS_ERR(tsk[id])) { 3446 err = PTR_ERR(tsk[id]); 3447 break; 3448 } 3449 get_task_struct(tsk[id]); 3450 } 3451 3452 yield(); /* start all threads before we kthread_stop() */ 3453 3454 count = 0; 3455 for_each_engine(engine, smoke->gt, id) { 3456 int status; 3457 3458 if (IS_ERR_OR_NULL(tsk[id])) 3459 continue; 3460 3461 status = kthread_stop(tsk[id]); 3462 if (status && !err) 3463 err = status; 3464 3465 count += arg[id].count; 3466 3467 put_task_struct(tsk[id]); 3468 } 3469 3470 pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n", 3471 count, flags, 3472 RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext); 3473 return 0; 3474 } 3475 3476 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags) 3477 { 3478 enum intel_engine_id id; 3479 IGT_TIMEOUT(end_time); 3480 unsigned long count; 3481 3482 count = 0; 3483 do { 3484 for_each_engine(smoke->engine, smoke->gt, id) { 3485 struct i915_gem_context *ctx = smoke_context(smoke); 3486 int err; 3487 3488 err = smoke_submit(smoke, 3489 ctx, random_priority(&smoke->prng), 3490 flags & BATCH ? smoke->batch : NULL); 3491 if (err) 3492 return err; 3493 3494 count++; 3495 } 3496 } while (!__igt_timeout(end_time, NULL)); 3497 3498 pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n", 3499 count, flags, 3500 RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext); 3501 return 0; 3502 } 3503 3504 static int live_preempt_smoke(void *arg) 3505 { 3506 struct preempt_smoke smoke = { 3507 .gt = arg, 3508 .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed), 3509 .ncontext = 1024, 3510 }; 3511 const unsigned int phase[] = { 0, BATCH }; 3512 struct igt_live_test t; 3513 int err = -ENOMEM; 3514 u32 *cs; 3515 int n; 3516 3517 if (!HAS_LOGICAL_RING_PREEMPTION(smoke.gt->i915)) 3518 return 0; 3519 3520 smoke.contexts = kmalloc_array(smoke.ncontext, 3521 sizeof(*smoke.contexts), 3522 GFP_KERNEL); 3523 if (!smoke.contexts) 3524 return -ENOMEM; 3525 3526 smoke.batch = 3527 i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE); 3528 if (IS_ERR(smoke.batch)) { 3529 err = PTR_ERR(smoke.batch); 3530 goto err_free; 3531 } 3532 3533 cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB); 3534 if (IS_ERR(cs)) { 3535 err = PTR_ERR(cs); 3536 goto err_batch; 3537 } 3538 for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++) 3539 cs[n] = MI_ARB_CHECK; 3540 cs[n] = MI_BATCH_BUFFER_END; 3541 i915_gem_object_flush_map(smoke.batch); 3542 i915_gem_object_unpin_map(smoke.batch); 3543 3544 if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) { 3545 err = -EIO; 3546 goto err_batch; 3547 } 3548 3549 for (n = 0; n < smoke.ncontext; n++) { 3550 smoke.contexts[n] = kernel_context(smoke.gt->i915); 3551 if (!smoke.contexts[n]) 3552 goto err_ctx; 3553 } 3554 3555 for (n = 0; n < ARRAY_SIZE(phase); n++) { 3556 err = smoke_crescendo(&smoke, phase[n]); 3557 if (err) 3558 goto err_ctx; 3559 3560 err = smoke_random(&smoke, phase[n]); 3561 if (err) 3562 goto err_ctx; 3563 } 3564 3565 err_ctx: 3566 if (igt_live_test_end(&t)) 3567 err = -EIO; 3568 3569 for (n = 0; n < smoke.ncontext; n++) { 3570 if (!smoke.contexts[n]) 3571 break; 3572 kernel_context_close(smoke.contexts[n]); 3573 } 3574 3575 err_batch: 3576 i915_gem_object_put(smoke.batch); 3577 err_free: 3578 kfree(smoke.contexts); 3579 3580 return err; 3581 } 3582 3583 static int nop_virtual_engine(struct intel_gt *gt, 3584 struct intel_engine_cs **siblings, 3585 unsigned int nsibling, 3586 unsigned int nctx, 3587 unsigned int flags) 3588 #define CHAIN BIT(0) 3589 { 3590 IGT_TIMEOUT(end_time); 3591 struct i915_request *request[16] = {}; 3592 struct intel_context *ve[16]; 3593 unsigned long n, prime, nc; 3594 struct igt_live_test t; 3595 ktime_t times[2] = {}; 3596 int err; 3597 3598 GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve)); 3599 3600 for (n = 0; n < nctx; n++) { 3601 ve[n] = intel_execlists_create_virtual(siblings, nsibling); 3602 if (IS_ERR(ve[n])) { 3603 err = PTR_ERR(ve[n]); 3604 nctx = n; 3605 goto out; 3606 } 3607 3608 err = intel_context_pin(ve[n]); 3609 if (err) { 3610 intel_context_put(ve[n]); 3611 nctx = n; 3612 goto out; 3613 } 3614 } 3615 3616 err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name); 3617 if (err) 3618 goto out; 3619 3620 for_each_prime_number_from(prime, 1, 8192) { 3621 times[1] = ktime_get_raw(); 3622 3623 if (flags & CHAIN) { 3624 for (nc = 0; nc < nctx; nc++) { 3625 for (n = 0; n < prime; n++) { 3626 struct i915_request *rq; 3627 3628 rq = i915_request_create(ve[nc]); 3629 if (IS_ERR(rq)) { 3630 err = PTR_ERR(rq); 3631 goto out; 3632 } 3633 3634 if (request[nc]) 3635 i915_request_put(request[nc]); 3636 request[nc] = i915_request_get(rq); 3637 i915_request_add(rq); 3638 } 3639 } 3640 } else { 3641 for (n = 0; n < prime; n++) { 3642 for (nc = 0; nc < nctx; nc++) { 3643 struct i915_request *rq; 3644 3645 rq = i915_request_create(ve[nc]); 3646 if (IS_ERR(rq)) { 3647 err = PTR_ERR(rq); 3648 goto out; 3649 } 3650 3651 if (request[nc]) 3652 i915_request_put(request[nc]); 3653 request[nc] = i915_request_get(rq); 3654 i915_request_add(rq); 3655 } 3656 } 3657 } 3658 3659 for (nc = 0; nc < nctx; nc++) { 3660 if (i915_request_wait(request[nc], 0, HZ / 10) < 0) { 3661 pr_err("%s(%s): wait for %llx:%lld timed out\n", 3662 __func__, ve[0]->engine->name, 3663 request[nc]->fence.context, 3664 request[nc]->fence.seqno); 3665 3666 GEM_TRACE("%s(%s) failed at request %llx:%lld\n", 3667 __func__, ve[0]->engine->name, 3668 request[nc]->fence.context, 3669 request[nc]->fence.seqno); 3670 GEM_TRACE_DUMP(); 3671 intel_gt_set_wedged(gt); 3672 break; 3673 } 3674 } 3675 3676 times[1] = ktime_sub(ktime_get_raw(), times[1]); 3677 if (prime == 1) 3678 times[0] = times[1]; 3679 3680 for (nc = 0; nc < nctx; nc++) { 3681 i915_request_put(request[nc]); 3682 request[nc] = NULL; 3683 } 3684 3685 if (__igt_timeout(end_time, NULL)) 3686 break; 3687 } 3688 3689 err = igt_live_test_end(&t); 3690 if (err) 3691 goto out; 3692 3693 pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n", 3694 nctx, ve[0]->engine->name, ktime_to_ns(times[0]), 3695 prime, div64_u64(ktime_to_ns(times[1]), prime)); 3696 3697 out: 3698 if (igt_flush_test(gt->i915)) 3699 err = -EIO; 3700 3701 for (nc = 0; nc < nctx; nc++) { 3702 i915_request_put(request[nc]); 3703 intel_context_unpin(ve[nc]); 3704 intel_context_put(ve[nc]); 3705 } 3706 return err; 3707 } 3708 3709 static int live_virtual_engine(void *arg) 3710 { 3711 struct intel_gt *gt = arg; 3712 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 3713 struct intel_engine_cs *engine; 3714 enum intel_engine_id id; 3715 unsigned int class, inst; 3716 int err; 3717 3718 if (intel_uc_uses_guc_submission(>->uc)) 3719 return 0; 3720 3721 for_each_engine(engine, gt, id) { 3722 err = nop_virtual_engine(gt, &engine, 1, 1, 0); 3723 if (err) { 3724 pr_err("Failed to wrap engine %s: err=%d\n", 3725 engine->name, err); 3726 return err; 3727 } 3728 } 3729 3730 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 3731 int nsibling, n; 3732 3733 nsibling = 0; 3734 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { 3735 if (!gt->engine_class[class][inst]) 3736 continue; 3737 3738 siblings[nsibling++] = gt->engine_class[class][inst]; 3739 } 3740 if (nsibling < 2) 3741 continue; 3742 3743 for (n = 1; n <= nsibling + 1; n++) { 3744 err = nop_virtual_engine(gt, siblings, nsibling, 3745 n, 0); 3746 if (err) 3747 return err; 3748 } 3749 3750 err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN); 3751 if (err) 3752 return err; 3753 } 3754 3755 return 0; 3756 } 3757 3758 static int mask_virtual_engine(struct intel_gt *gt, 3759 struct intel_engine_cs **siblings, 3760 unsigned int nsibling) 3761 { 3762 struct i915_request *request[MAX_ENGINE_INSTANCE + 1]; 3763 struct intel_context *ve; 3764 struct igt_live_test t; 3765 unsigned int n; 3766 int err; 3767 3768 /* 3769 * Check that by setting the execution mask on a request, we can 3770 * restrict it to our desired engine within the virtual engine. 3771 */ 3772 3773 ve = intel_execlists_create_virtual(siblings, nsibling); 3774 if (IS_ERR(ve)) { 3775 err = PTR_ERR(ve); 3776 goto out_close; 3777 } 3778 3779 err = intel_context_pin(ve); 3780 if (err) 3781 goto out_put; 3782 3783 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name); 3784 if (err) 3785 goto out_unpin; 3786 3787 for (n = 0; n < nsibling; n++) { 3788 request[n] = i915_request_create(ve); 3789 if (IS_ERR(request[n])) { 3790 err = PTR_ERR(request[n]); 3791 nsibling = n; 3792 goto out; 3793 } 3794 3795 /* Reverse order as it's more likely to be unnatural */ 3796 request[n]->execution_mask = siblings[nsibling - n - 1]->mask; 3797 3798 i915_request_get(request[n]); 3799 i915_request_add(request[n]); 3800 } 3801 3802 for (n = 0; n < nsibling; n++) { 3803 if (i915_request_wait(request[n], 0, HZ / 10) < 0) { 3804 pr_err("%s(%s): wait for %llx:%lld timed out\n", 3805 __func__, ve->engine->name, 3806 request[n]->fence.context, 3807 request[n]->fence.seqno); 3808 3809 GEM_TRACE("%s(%s) failed at request %llx:%lld\n", 3810 __func__, ve->engine->name, 3811 request[n]->fence.context, 3812 request[n]->fence.seqno); 3813 GEM_TRACE_DUMP(); 3814 intel_gt_set_wedged(gt); 3815 err = -EIO; 3816 goto out; 3817 } 3818 3819 if (request[n]->engine != siblings[nsibling - n - 1]) { 3820 pr_err("Executed on wrong sibling '%s', expected '%s'\n", 3821 request[n]->engine->name, 3822 siblings[nsibling - n - 1]->name); 3823 err = -EINVAL; 3824 goto out; 3825 } 3826 } 3827 3828 err = igt_live_test_end(&t); 3829 out: 3830 if (igt_flush_test(gt->i915)) 3831 err = -EIO; 3832 3833 for (n = 0; n < nsibling; n++) 3834 i915_request_put(request[n]); 3835 3836 out_unpin: 3837 intel_context_unpin(ve); 3838 out_put: 3839 intel_context_put(ve); 3840 out_close: 3841 return err; 3842 } 3843 3844 static int live_virtual_mask(void *arg) 3845 { 3846 struct intel_gt *gt = arg; 3847 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 3848 unsigned int class, inst; 3849 int err; 3850 3851 if (intel_uc_uses_guc_submission(>->uc)) 3852 return 0; 3853 3854 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 3855 unsigned int nsibling; 3856 3857 nsibling = 0; 3858 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { 3859 if (!gt->engine_class[class][inst]) 3860 break; 3861 3862 siblings[nsibling++] = gt->engine_class[class][inst]; 3863 } 3864 if (nsibling < 2) 3865 continue; 3866 3867 err = mask_virtual_engine(gt, siblings, nsibling); 3868 if (err) 3869 return err; 3870 } 3871 3872 return 0; 3873 } 3874 3875 static int preserved_virtual_engine(struct intel_gt *gt, 3876 struct intel_engine_cs **siblings, 3877 unsigned int nsibling) 3878 { 3879 struct i915_request *last = NULL; 3880 struct intel_context *ve; 3881 struct i915_vma *scratch; 3882 struct igt_live_test t; 3883 unsigned int n; 3884 int err = 0; 3885 u32 *cs; 3886 3887 scratch = create_scratch(siblings[0]->gt); 3888 if (IS_ERR(scratch)) 3889 return PTR_ERR(scratch); 3890 3891 err = i915_vma_sync(scratch); 3892 if (err) 3893 goto out_scratch; 3894 3895 ve = intel_execlists_create_virtual(siblings, nsibling); 3896 if (IS_ERR(ve)) { 3897 err = PTR_ERR(ve); 3898 goto out_scratch; 3899 } 3900 3901 err = intel_context_pin(ve); 3902 if (err) 3903 goto out_put; 3904 3905 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name); 3906 if (err) 3907 goto out_unpin; 3908 3909 for (n = 0; n < NUM_GPR_DW; n++) { 3910 struct intel_engine_cs *engine = siblings[n % nsibling]; 3911 struct i915_request *rq; 3912 3913 rq = i915_request_create(ve); 3914 if (IS_ERR(rq)) { 3915 err = PTR_ERR(rq); 3916 goto out_end; 3917 } 3918 3919 i915_request_put(last); 3920 last = i915_request_get(rq); 3921 3922 cs = intel_ring_begin(rq, 8); 3923 if (IS_ERR(cs)) { 3924 i915_request_add(rq); 3925 err = PTR_ERR(cs); 3926 goto out_end; 3927 } 3928 3929 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 3930 *cs++ = CS_GPR(engine, n); 3931 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32); 3932 *cs++ = 0; 3933 3934 *cs++ = MI_LOAD_REGISTER_IMM(1); 3935 *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW); 3936 *cs++ = n + 1; 3937 3938 *cs++ = MI_NOOP; 3939 intel_ring_advance(rq, cs); 3940 3941 /* Restrict this request to run on a particular engine */ 3942 rq->execution_mask = engine->mask; 3943 i915_request_add(rq); 3944 } 3945 3946 if (i915_request_wait(last, 0, HZ / 5) < 0) { 3947 err = -ETIME; 3948 goto out_end; 3949 } 3950 3951 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); 3952 if (IS_ERR(cs)) { 3953 err = PTR_ERR(cs); 3954 goto out_end; 3955 } 3956 3957 for (n = 0; n < NUM_GPR_DW; n++) { 3958 if (cs[n] != n) { 3959 pr_err("Incorrect value[%d] found for GPR[%d]\n", 3960 cs[n], n); 3961 err = -EINVAL; 3962 break; 3963 } 3964 } 3965 3966 i915_gem_object_unpin_map(scratch->obj); 3967 3968 out_end: 3969 if (igt_live_test_end(&t)) 3970 err = -EIO; 3971 i915_request_put(last); 3972 out_unpin: 3973 intel_context_unpin(ve); 3974 out_put: 3975 intel_context_put(ve); 3976 out_scratch: 3977 i915_vma_unpin_and_release(&scratch, 0); 3978 return err; 3979 } 3980 3981 static int live_virtual_preserved(void *arg) 3982 { 3983 struct intel_gt *gt = arg; 3984 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 3985 unsigned int class, inst; 3986 3987 /* 3988 * Check that the context image retains non-privileged (user) registers 3989 * from one engine to the next. For this we check that the CS_GPR 3990 * are preserved. 3991 */ 3992 3993 if (intel_uc_uses_guc_submission(>->uc)) 3994 return 0; 3995 3996 /* As we use CS_GPR we cannot run before they existed on all engines. */ 3997 if (INTEL_GEN(gt->i915) < 9) 3998 return 0; 3999 4000 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 4001 int nsibling, err; 4002 4003 nsibling = 0; 4004 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { 4005 if (!gt->engine_class[class][inst]) 4006 continue; 4007 4008 siblings[nsibling++] = gt->engine_class[class][inst]; 4009 } 4010 if (nsibling < 2) 4011 continue; 4012 4013 err = preserved_virtual_engine(gt, siblings, nsibling); 4014 if (err) 4015 return err; 4016 } 4017 4018 return 0; 4019 } 4020 4021 static int bond_virtual_engine(struct intel_gt *gt, 4022 unsigned int class, 4023 struct intel_engine_cs **siblings, 4024 unsigned int nsibling, 4025 unsigned int flags) 4026 #define BOND_SCHEDULE BIT(0) 4027 { 4028 struct intel_engine_cs *master; 4029 struct i915_request *rq[16]; 4030 enum intel_engine_id id; 4031 struct igt_spinner spin; 4032 unsigned long n; 4033 int err; 4034 4035 /* 4036 * A set of bonded requests is intended to be run concurrently 4037 * across a number of engines. We use one request per-engine 4038 * and a magic fence to schedule each of the bonded requests 4039 * at the same time. A consequence of our current scheduler is that 4040 * we only move requests to the HW ready queue when the request 4041 * becomes ready, that is when all of its prerequisite fences have 4042 * been signaled. As one of those fences is the master submit fence, 4043 * there is a delay on all secondary fences as the HW may be 4044 * currently busy. Equally, as all the requests are independent, 4045 * they may have other fences that delay individual request 4046 * submission to HW. Ergo, we do not guarantee that all requests are 4047 * immediately submitted to HW at the same time, just that if the 4048 * rules are abided by, they are ready at the same time as the 4049 * first is submitted. Userspace can embed semaphores in its batch 4050 * to ensure parallel execution of its phases as it requires. 4051 * Though naturally it gets requested that perhaps the scheduler should 4052 * take care of parallel execution, even across preemption events on 4053 * different HW. (The proper answer is of course "lalalala".) 4054 * 4055 * With the submit-fence, we have identified three possible phases 4056 * of synchronisation depending on the master fence: queued (not 4057 * ready), executing, and signaled. The first two are quite simple 4058 * and checked below. However, the signaled master fence handling is 4059 * contentious. Currently we do not distinguish between a signaled 4060 * fence and an expired fence, as once signaled it does not convey 4061 * any information about the previous execution. It may even be freed 4062 * and hence checking later it may not exist at all. Ergo we currently 4063 * do not apply the bonding constraint for an already signaled fence, 4064 * as our expectation is that it should not constrain the secondaries 4065 * and is outside of the scope of the bonded request API (i.e. all 4066 * userspace requests are meant to be running in parallel). As 4067 * it imposes no constraint, and is effectively a no-op, we do not 4068 * check below as normal execution flows are checked extensively above. 4069 * 4070 * XXX Is the degenerate handling of signaled submit fences the 4071 * expected behaviour for userpace? 4072 */ 4073 4074 GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1); 4075 4076 if (igt_spinner_init(&spin, gt)) 4077 return -ENOMEM; 4078 4079 err = 0; 4080 rq[0] = ERR_PTR(-ENOMEM); 4081 for_each_engine(master, gt, id) { 4082 struct i915_sw_fence fence = {}; 4083 struct intel_context *ce; 4084 4085 if (master->class == class) 4086 continue; 4087 4088 ce = intel_context_create(master); 4089 if (IS_ERR(ce)) { 4090 err = PTR_ERR(ce); 4091 goto out; 4092 } 4093 4094 memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq)); 4095 4096 rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP); 4097 intel_context_put(ce); 4098 if (IS_ERR(rq[0])) { 4099 err = PTR_ERR(rq[0]); 4100 goto out; 4101 } 4102 i915_request_get(rq[0]); 4103 4104 if (flags & BOND_SCHEDULE) { 4105 onstack_fence_init(&fence); 4106 err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit, 4107 &fence, 4108 GFP_KERNEL); 4109 } 4110 4111 i915_request_add(rq[0]); 4112 if (err < 0) 4113 goto out; 4114 4115 if (!(flags & BOND_SCHEDULE) && 4116 !igt_wait_for_spinner(&spin, rq[0])) { 4117 err = -EIO; 4118 goto out; 4119 } 4120 4121 for (n = 0; n < nsibling; n++) { 4122 struct intel_context *ve; 4123 4124 ve = intel_execlists_create_virtual(siblings, nsibling); 4125 if (IS_ERR(ve)) { 4126 err = PTR_ERR(ve); 4127 onstack_fence_fini(&fence); 4128 goto out; 4129 } 4130 4131 err = intel_virtual_engine_attach_bond(ve->engine, 4132 master, 4133 siblings[n]); 4134 if (err) { 4135 intel_context_put(ve); 4136 onstack_fence_fini(&fence); 4137 goto out; 4138 } 4139 4140 err = intel_context_pin(ve); 4141 intel_context_put(ve); 4142 if (err) { 4143 onstack_fence_fini(&fence); 4144 goto out; 4145 } 4146 4147 rq[n + 1] = i915_request_create(ve); 4148 intel_context_unpin(ve); 4149 if (IS_ERR(rq[n + 1])) { 4150 err = PTR_ERR(rq[n + 1]); 4151 onstack_fence_fini(&fence); 4152 goto out; 4153 } 4154 i915_request_get(rq[n + 1]); 4155 4156 err = i915_request_await_execution(rq[n + 1], 4157 &rq[0]->fence, 4158 ve->engine->bond_execute); 4159 i915_request_add(rq[n + 1]); 4160 if (err < 0) { 4161 onstack_fence_fini(&fence); 4162 goto out; 4163 } 4164 } 4165 onstack_fence_fini(&fence); 4166 intel_engine_flush_submission(master); 4167 igt_spinner_end(&spin); 4168 4169 if (i915_request_wait(rq[0], 0, HZ / 10) < 0) { 4170 pr_err("Master request did not execute (on %s)!\n", 4171 rq[0]->engine->name); 4172 err = -EIO; 4173 goto out; 4174 } 4175 4176 for (n = 0; n < nsibling; n++) { 4177 if (i915_request_wait(rq[n + 1], 0, 4178 MAX_SCHEDULE_TIMEOUT) < 0) { 4179 err = -EIO; 4180 goto out; 4181 } 4182 4183 if (rq[n + 1]->engine != siblings[n]) { 4184 pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n", 4185 siblings[n]->name, 4186 rq[n + 1]->engine->name, 4187 rq[0]->engine->name); 4188 err = -EINVAL; 4189 goto out; 4190 } 4191 } 4192 4193 for (n = 0; !IS_ERR(rq[n]); n++) 4194 i915_request_put(rq[n]); 4195 rq[0] = ERR_PTR(-ENOMEM); 4196 } 4197 4198 out: 4199 for (n = 0; !IS_ERR(rq[n]); n++) 4200 i915_request_put(rq[n]); 4201 if (igt_flush_test(gt->i915)) 4202 err = -EIO; 4203 4204 igt_spinner_fini(&spin); 4205 return err; 4206 } 4207 4208 static int live_virtual_bond(void *arg) 4209 { 4210 static const struct phase { 4211 const char *name; 4212 unsigned int flags; 4213 } phases[] = { 4214 { "", 0 }, 4215 { "schedule", BOND_SCHEDULE }, 4216 { }, 4217 }; 4218 struct intel_gt *gt = arg; 4219 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 4220 unsigned int class, inst; 4221 int err; 4222 4223 if (intel_uc_uses_guc_submission(>->uc)) 4224 return 0; 4225 4226 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 4227 const struct phase *p; 4228 int nsibling; 4229 4230 nsibling = 0; 4231 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { 4232 if (!gt->engine_class[class][inst]) 4233 break; 4234 4235 GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings)); 4236 siblings[nsibling++] = gt->engine_class[class][inst]; 4237 } 4238 if (nsibling < 2) 4239 continue; 4240 4241 for (p = phases; p->name; p++) { 4242 err = bond_virtual_engine(gt, 4243 class, siblings, nsibling, 4244 p->flags); 4245 if (err) { 4246 pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n", 4247 __func__, p->name, class, nsibling, err); 4248 return err; 4249 } 4250 } 4251 } 4252 4253 return 0; 4254 } 4255 4256 static int reset_virtual_engine(struct intel_gt *gt, 4257 struct intel_engine_cs **siblings, 4258 unsigned int nsibling) 4259 { 4260 struct intel_engine_cs *engine; 4261 struct intel_context *ve; 4262 struct igt_spinner spin; 4263 struct i915_request *rq; 4264 unsigned int n; 4265 int err = 0; 4266 4267 /* 4268 * In order to support offline error capture for fast preempt reset, 4269 * we need to decouple the guilty request and ensure that it and its 4270 * descendents are not executed while the capture is in progress. 4271 */ 4272 4273 if (igt_spinner_init(&spin, gt)) 4274 return -ENOMEM; 4275 4276 ve = intel_execlists_create_virtual(siblings, nsibling); 4277 if (IS_ERR(ve)) { 4278 err = PTR_ERR(ve); 4279 goto out_spin; 4280 } 4281 4282 for (n = 0; n < nsibling; n++) 4283 engine_heartbeat_disable(siblings[n]); 4284 4285 rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK); 4286 if (IS_ERR(rq)) { 4287 err = PTR_ERR(rq); 4288 goto out_heartbeat; 4289 } 4290 i915_request_add(rq); 4291 4292 if (!igt_wait_for_spinner(&spin, rq)) { 4293 intel_gt_set_wedged(gt); 4294 err = -ETIME; 4295 goto out_heartbeat; 4296 } 4297 4298 engine = rq->engine; 4299 GEM_BUG_ON(engine == ve->engine); 4300 4301 /* Take ownership of the reset and tasklet */ 4302 if (test_and_set_bit(I915_RESET_ENGINE + engine->id, 4303 >->reset.flags)) { 4304 intel_gt_set_wedged(gt); 4305 err = -EBUSY; 4306 goto out_heartbeat; 4307 } 4308 tasklet_disable(&engine->execlists.tasklet); 4309 4310 engine->execlists.tasklet.func(engine->execlists.tasklet.data); 4311 GEM_BUG_ON(execlists_active(&engine->execlists) != rq); 4312 4313 /* Fake a preemption event; failed of course */ 4314 spin_lock_irq(&engine->active.lock); 4315 __unwind_incomplete_requests(engine); 4316 spin_unlock_irq(&engine->active.lock); 4317 GEM_BUG_ON(rq->engine != ve->engine); 4318 4319 /* Reset the engine while keeping our active request on hold */ 4320 execlists_hold(engine, rq); 4321 GEM_BUG_ON(!i915_request_on_hold(rq)); 4322 4323 intel_engine_reset(engine, NULL); 4324 GEM_BUG_ON(rq->fence.error != -EIO); 4325 4326 /* Release our grasp on the engine, letting CS flow again */ 4327 tasklet_enable(&engine->execlists.tasklet); 4328 clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, >->reset.flags); 4329 4330 /* Check that we do not resubmit the held request */ 4331 i915_request_get(rq); 4332 if (!i915_request_wait(rq, 0, HZ / 5)) { 4333 pr_err("%s: on hold request completed!\n", 4334 engine->name); 4335 intel_gt_set_wedged(gt); 4336 err = -EIO; 4337 goto out_rq; 4338 } 4339 GEM_BUG_ON(!i915_request_on_hold(rq)); 4340 4341 /* But is resubmitted on release */ 4342 execlists_unhold(engine, rq); 4343 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 4344 pr_err("%s: held request did not complete!\n", 4345 engine->name); 4346 intel_gt_set_wedged(gt); 4347 err = -ETIME; 4348 } 4349 4350 out_rq: 4351 i915_request_put(rq); 4352 out_heartbeat: 4353 for (n = 0; n < nsibling; n++) 4354 engine_heartbeat_enable(siblings[n]); 4355 4356 intel_context_put(ve); 4357 out_spin: 4358 igt_spinner_fini(&spin); 4359 return err; 4360 } 4361 4362 static int live_virtual_reset(void *arg) 4363 { 4364 struct intel_gt *gt = arg; 4365 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 4366 unsigned int class, inst; 4367 4368 /* 4369 * Check that we handle a reset event within a virtual engine. 4370 * Only the physical engine is reset, but we have to check the flow 4371 * of the virtual requests around the reset, and make sure it is not 4372 * forgotten. 4373 */ 4374 4375 if (intel_uc_uses_guc_submission(>->uc)) 4376 return 0; 4377 4378 if (!intel_has_reset_engine(gt)) 4379 return 0; 4380 4381 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 4382 int nsibling, err; 4383 4384 nsibling = 0; 4385 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { 4386 if (!gt->engine_class[class][inst]) 4387 continue; 4388 4389 siblings[nsibling++] = gt->engine_class[class][inst]; 4390 } 4391 if (nsibling < 2) 4392 continue; 4393 4394 err = reset_virtual_engine(gt, siblings, nsibling); 4395 if (err) 4396 return err; 4397 } 4398 4399 return 0; 4400 } 4401 4402 int intel_execlists_live_selftests(struct drm_i915_private *i915) 4403 { 4404 static const struct i915_subtest tests[] = { 4405 SUBTEST(live_sanitycheck), 4406 SUBTEST(live_unlite_switch), 4407 SUBTEST(live_unlite_preempt), 4408 SUBTEST(live_pin_rewind), 4409 SUBTEST(live_hold_reset), 4410 SUBTEST(live_error_interrupt), 4411 SUBTEST(live_timeslice_preempt), 4412 SUBTEST(live_timeslice_rewind), 4413 SUBTEST(live_timeslice_queue), 4414 SUBTEST(live_timeslice_nopreempt), 4415 SUBTEST(live_busywait_preempt), 4416 SUBTEST(live_preempt), 4417 SUBTEST(live_late_preempt), 4418 SUBTEST(live_nopreempt), 4419 SUBTEST(live_preempt_cancel), 4420 SUBTEST(live_suppress_self_preempt), 4421 SUBTEST(live_suppress_wait_preempt), 4422 SUBTEST(live_chain_preempt), 4423 SUBTEST(live_preempt_gang), 4424 SUBTEST(live_preempt_timeout), 4425 SUBTEST(live_preempt_user), 4426 SUBTEST(live_preempt_smoke), 4427 SUBTEST(live_virtual_engine), 4428 SUBTEST(live_virtual_mask), 4429 SUBTEST(live_virtual_preserved), 4430 SUBTEST(live_virtual_bond), 4431 SUBTEST(live_virtual_reset), 4432 }; 4433 4434 if (!HAS_EXECLISTS(i915)) 4435 return 0; 4436 4437 if (intel_gt_is_wedged(&i915->gt)) 4438 return 0; 4439 4440 return intel_gt_live_subtests(tests, &i915->gt); 4441 } 4442 4443 static int emit_semaphore_signal(struct intel_context *ce, void *slot) 4444 { 4445 const u32 offset = 4446 i915_ggtt_offset(ce->engine->status_page.vma) + 4447 offset_in_page(slot); 4448 struct i915_request *rq; 4449 u32 *cs; 4450 4451 rq = intel_context_create_request(ce); 4452 if (IS_ERR(rq)) 4453 return PTR_ERR(rq); 4454 4455 cs = intel_ring_begin(rq, 4); 4456 if (IS_ERR(cs)) { 4457 i915_request_add(rq); 4458 return PTR_ERR(cs); 4459 } 4460 4461 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 4462 *cs++ = offset; 4463 *cs++ = 0; 4464 *cs++ = 1; 4465 4466 intel_ring_advance(rq, cs); 4467 4468 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 4469 i915_request_add(rq); 4470 return 0; 4471 } 4472 4473 static int context_flush(struct intel_context *ce, long timeout) 4474 { 4475 struct i915_request *rq; 4476 struct dma_fence *fence; 4477 int err = 0; 4478 4479 rq = intel_engine_create_kernel_request(ce->engine); 4480 if (IS_ERR(rq)) 4481 return PTR_ERR(rq); 4482 4483 fence = i915_active_fence_get(&ce->timeline->last_request); 4484 if (fence) { 4485 i915_request_await_dma_fence(rq, fence); 4486 dma_fence_put(fence); 4487 } 4488 4489 rq = i915_request_get(rq); 4490 i915_request_add(rq); 4491 if (i915_request_wait(rq, 0, timeout) < 0) 4492 err = -ETIME; 4493 i915_request_put(rq); 4494 4495 rmb(); /* We know the request is written, make sure all state is too! */ 4496 return err; 4497 } 4498 4499 static int live_lrc_layout(void *arg) 4500 { 4501 struct intel_gt *gt = arg; 4502 struct intel_engine_cs *engine; 4503 enum intel_engine_id id; 4504 u32 *lrc; 4505 int err; 4506 4507 /* 4508 * Check the registers offsets we use to create the initial reg state 4509 * match the layout saved by HW. 4510 */ 4511 4512 lrc = kmalloc(PAGE_SIZE, GFP_KERNEL); 4513 if (!lrc) 4514 return -ENOMEM; 4515 4516 err = 0; 4517 for_each_engine(engine, gt, id) { 4518 u32 *hw; 4519 int dw; 4520 4521 if (!engine->default_state) 4522 continue; 4523 4524 hw = shmem_pin_map(engine->default_state); 4525 if (IS_ERR(hw)) { 4526 err = PTR_ERR(hw); 4527 break; 4528 } 4529 hw += LRC_STATE_OFFSET / sizeof(*hw); 4530 4531 execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE), 4532 engine->kernel_context, 4533 engine, 4534 engine->kernel_context->ring, 4535 true); 4536 4537 dw = 0; 4538 do { 4539 u32 lri = hw[dw]; 4540 4541 if (lri == 0) { 4542 dw++; 4543 continue; 4544 } 4545 4546 if (lrc[dw] == 0) { 4547 pr_debug("%s: skipped instruction %x at dword %d\n", 4548 engine->name, lri, dw); 4549 dw++; 4550 continue; 4551 } 4552 4553 if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { 4554 pr_err("%s: Expected LRI command at dword %d, found %08x\n", 4555 engine->name, dw, lri); 4556 err = -EINVAL; 4557 break; 4558 } 4559 4560 if (lrc[dw] != lri) { 4561 pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n", 4562 engine->name, dw, lri, lrc[dw]); 4563 err = -EINVAL; 4564 break; 4565 } 4566 4567 lri &= 0x7f; 4568 lri++; 4569 dw++; 4570 4571 while (lri) { 4572 if (hw[dw] != lrc[dw]) { 4573 pr_err("%s: Different registers found at dword %d, expected %x, found %x\n", 4574 engine->name, dw, hw[dw], lrc[dw]); 4575 err = -EINVAL; 4576 break; 4577 } 4578 4579 /* 4580 * Skip over the actual register value as we 4581 * expect that to differ. 4582 */ 4583 dw += 2; 4584 lri -= 2; 4585 } 4586 } while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); 4587 4588 if (err) { 4589 pr_info("%s: HW register image:\n", engine->name); 4590 igt_hexdump(hw, PAGE_SIZE); 4591 4592 pr_info("%s: SW register image:\n", engine->name); 4593 igt_hexdump(lrc, PAGE_SIZE); 4594 } 4595 4596 shmem_unpin_map(engine->default_state, hw); 4597 if (err) 4598 break; 4599 } 4600 4601 kfree(lrc); 4602 return err; 4603 } 4604 4605 static int find_offset(const u32 *lri, u32 offset) 4606 { 4607 int i; 4608 4609 for (i = 0; i < PAGE_SIZE / sizeof(u32); i++) 4610 if (lri[i] == offset) 4611 return i; 4612 4613 return -1; 4614 } 4615 4616 static int live_lrc_fixed(void *arg) 4617 { 4618 struct intel_gt *gt = arg; 4619 struct intel_engine_cs *engine; 4620 enum intel_engine_id id; 4621 int err = 0; 4622 4623 /* 4624 * Check the assumed register offsets match the actual locations in 4625 * the context image. 4626 */ 4627 4628 for_each_engine(engine, gt, id) { 4629 const struct { 4630 u32 reg; 4631 u32 offset; 4632 const char *name; 4633 } tbl[] = { 4634 { 4635 i915_mmio_reg_offset(RING_START(engine->mmio_base)), 4636 CTX_RING_START - 1, 4637 "RING_START" 4638 }, 4639 { 4640 i915_mmio_reg_offset(RING_CTL(engine->mmio_base)), 4641 CTX_RING_CTL - 1, 4642 "RING_CTL" 4643 }, 4644 { 4645 i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)), 4646 CTX_RING_HEAD - 1, 4647 "RING_HEAD" 4648 }, 4649 { 4650 i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)), 4651 CTX_RING_TAIL - 1, 4652 "RING_TAIL" 4653 }, 4654 { 4655 i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)), 4656 lrc_ring_mi_mode(engine), 4657 "RING_MI_MODE" 4658 }, 4659 { 4660 i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)), 4661 CTX_BB_STATE - 1, 4662 "BB_STATE" 4663 }, 4664 { 4665 i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(engine->mmio_base)), 4666 lrc_ring_wa_bb_per_ctx(engine), 4667 "RING_BB_PER_CTX_PTR" 4668 }, 4669 { 4670 i915_mmio_reg_offset(RING_INDIRECT_CTX(engine->mmio_base)), 4671 lrc_ring_indirect_ptr(engine), 4672 "RING_INDIRECT_CTX_PTR" 4673 }, 4674 { 4675 i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(engine->mmio_base)), 4676 lrc_ring_indirect_offset(engine), 4677 "RING_INDIRECT_CTX_OFFSET" 4678 }, 4679 { 4680 i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)), 4681 CTX_TIMESTAMP - 1, 4682 "RING_CTX_TIMESTAMP" 4683 }, 4684 { 4685 i915_mmio_reg_offset(GEN8_RING_CS_GPR(engine->mmio_base, 0)), 4686 lrc_ring_gpr0(engine), 4687 "RING_CS_GPR0" 4688 }, 4689 { 4690 i915_mmio_reg_offset(RING_CMD_BUF_CCTL(engine->mmio_base)), 4691 lrc_ring_cmd_buf_cctl(engine), 4692 "RING_CMD_BUF_CCTL" 4693 }, 4694 { }, 4695 }, *t; 4696 u32 *hw; 4697 4698 if (!engine->default_state) 4699 continue; 4700 4701 hw = shmem_pin_map(engine->default_state); 4702 if (IS_ERR(hw)) { 4703 err = PTR_ERR(hw); 4704 break; 4705 } 4706 hw += LRC_STATE_OFFSET / sizeof(*hw); 4707 4708 for (t = tbl; t->name; t++) { 4709 int dw = find_offset(hw, t->reg); 4710 4711 if (dw != t->offset) { 4712 pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n", 4713 engine->name, 4714 t->name, 4715 t->reg, 4716 dw, 4717 t->offset); 4718 err = -EINVAL; 4719 } 4720 } 4721 4722 shmem_unpin_map(engine->default_state, hw); 4723 } 4724 4725 return err; 4726 } 4727 4728 static int __live_lrc_state(struct intel_engine_cs *engine, 4729 struct i915_vma *scratch) 4730 { 4731 struct intel_context *ce; 4732 struct i915_request *rq; 4733 enum { 4734 RING_START_IDX = 0, 4735 RING_TAIL_IDX, 4736 MAX_IDX 4737 }; 4738 u32 expected[MAX_IDX]; 4739 u32 *cs; 4740 int err; 4741 int n; 4742 4743 ce = intel_context_create(engine); 4744 if (IS_ERR(ce)) 4745 return PTR_ERR(ce); 4746 4747 err = intel_context_pin(ce); 4748 if (err) 4749 goto err_put; 4750 4751 rq = i915_request_create(ce); 4752 if (IS_ERR(rq)) { 4753 err = PTR_ERR(rq); 4754 goto err_unpin; 4755 } 4756 4757 cs = intel_ring_begin(rq, 4 * MAX_IDX); 4758 if (IS_ERR(cs)) { 4759 err = PTR_ERR(cs); 4760 i915_request_add(rq); 4761 goto err_unpin; 4762 } 4763 4764 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 4765 *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base)); 4766 *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32); 4767 *cs++ = 0; 4768 4769 expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma); 4770 4771 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 4772 *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)); 4773 *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32); 4774 *cs++ = 0; 4775 4776 i915_vma_lock(scratch); 4777 err = i915_request_await_object(rq, scratch->obj, true); 4778 if (!err) 4779 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE); 4780 i915_vma_unlock(scratch); 4781 4782 i915_request_get(rq); 4783 i915_request_add(rq); 4784 if (err) 4785 goto err_rq; 4786 4787 intel_engine_flush_submission(engine); 4788 expected[RING_TAIL_IDX] = ce->ring->tail; 4789 4790 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 4791 err = -ETIME; 4792 goto err_rq; 4793 } 4794 4795 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); 4796 if (IS_ERR(cs)) { 4797 err = PTR_ERR(cs); 4798 goto err_rq; 4799 } 4800 4801 for (n = 0; n < MAX_IDX; n++) { 4802 if (cs[n] != expected[n]) { 4803 pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n", 4804 engine->name, n, cs[n], expected[n]); 4805 err = -EINVAL; 4806 break; 4807 } 4808 } 4809 4810 i915_gem_object_unpin_map(scratch->obj); 4811 4812 err_rq: 4813 i915_request_put(rq); 4814 err_unpin: 4815 intel_context_unpin(ce); 4816 err_put: 4817 intel_context_put(ce); 4818 return err; 4819 } 4820 4821 static int live_lrc_state(void *arg) 4822 { 4823 struct intel_gt *gt = arg; 4824 struct intel_engine_cs *engine; 4825 struct i915_vma *scratch; 4826 enum intel_engine_id id; 4827 int err = 0; 4828 4829 /* 4830 * Check the live register state matches what we expect for this 4831 * intel_context. 4832 */ 4833 4834 scratch = create_scratch(gt); 4835 if (IS_ERR(scratch)) 4836 return PTR_ERR(scratch); 4837 4838 for_each_engine(engine, gt, id) { 4839 err = __live_lrc_state(engine, scratch); 4840 if (err) 4841 break; 4842 } 4843 4844 if (igt_flush_test(gt->i915)) 4845 err = -EIO; 4846 4847 i915_vma_unpin_and_release(&scratch, 0); 4848 return err; 4849 } 4850 4851 static int gpr_make_dirty(struct intel_context *ce) 4852 { 4853 struct i915_request *rq; 4854 u32 *cs; 4855 int n; 4856 4857 rq = intel_context_create_request(ce); 4858 if (IS_ERR(rq)) 4859 return PTR_ERR(rq); 4860 4861 cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2); 4862 if (IS_ERR(cs)) { 4863 i915_request_add(rq); 4864 return PTR_ERR(cs); 4865 } 4866 4867 *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW); 4868 for (n = 0; n < NUM_GPR_DW; n++) { 4869 *cs++ = CS_GPR(ce->engine, n); 4870 *cs++ = STACK_MAGIC; 4871 } 4872 *cs++ = MI_NOOP; 4873 4874 intel_ring_advance(rq, cs); 4875 4876 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 4877 i915_request_add(rq); 4878 4879 return 0; 4880 } 4881 4882 static struct i915_request * 4883 __gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot) 4884 { 4885 const u32 offset = 4886 i915_ggtt_offset(ce->engine->status_page.vma) + 4887 offset_in_page(slot); 4888 struct i915_request *rq; 4889 u32 *cs; 4890 int err; 4891 int n; 4892 4893 rq = intel_context_create_request(ce); 4894 if (IS_ERR(rq)) 4895 return rq; 4896 4897 cs = intel_ring_begin(rq, 6 + 4 * NUM_GPR_DW); 4898 if (IS_ERR(cs)) { 4899 i915_request_add(rq); 4900 return ERR_CAST(cs); 4901 } 4902 4903 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 4904 *cs++ = MI_NOOP; 4905 4906 *cs++ = MI_SEMAPHORE_WAIT | 4907 MI_SEMAPHORE_GLOBAL_GTT | 4908 MI_SEMAPHORE_POLL | 4909 MI_SEMAPHORE_SAD_NEQ_SDD; 4910 *cs++ = 0; 4911 *cs++ = offset; 4912 *cs++ = 0; 4913 4914 for (n = 0; n < NUM_GPR_DW; n++) { 4915 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 4916 *cs++ = CS_GPR(ce->engine, n); 4917 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32); 4918 *cs++ = 0; 4919 } 4920 4921 i915_vma_lock(scratch); 4922 err = i915_request_await_object(rq, scratch->obj, true); 4923 if (!err) 4924 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE); 4925 i915_vma_unlock(scratch); 4926 4927 i915_request_get(rq); 4928 i915_request_add(rq); 4929 if (err) { 4930 i915_request_put(rq); 4931 rq = ERR_PTR(err); 4932 } 4933 4934 return rq; 4935 } 4936 4937 static int __live_lrc_gpr(struct intel_engine_cs *engine, 4938 struct i915_vma *scratch, 4939 bool preempt) 4940 { 4941 u32 *slot = memset32(engine->status_page.addr + 1000, 0, 4); 4942 struct intel_context *ce; 4943 struct i915_request *rq; 4944 u32 *cs; 4945 int err; 4946 int n; 4947 4948 if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS) 4949 return 0; /* GPR only on rcs0 for gen8 */ 4950 4951 err = gpr_make_dirty(engine->kernel_context); 4952 if (err) 4953 return err; 4954 4955 ce = intel_context_create(engine); 4956 if (IS_ERR(ce)) 4957 return PTR_ERR(ce); 4958 4959 rq = __gpr_read(ce, scratch, slot); 4960 if (IS_ERR(rq)) { 4961 err = PTR_ERR(rq); 4962 goto err_put; 4963 } 4964 4965 err = wait_for_submit(engine, rq, HZ / 2); 4966 if (err) 4967 goto err_rq; 4968 4969 if (preempt) { 4970 err = gpr_make_dirty(engine->kernel_context); 4971 if (err) 4972 goto err_rq; 4973 4974 err = emit_semaphore_signal(engine->kernel_context, slot); 4975 if (err) 4976 goto err_rq; 4977 } else { 4978 slot[0] = 1; 4979 wmb(); 4980 } 4981 4982 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 4983 err = -ETIME; 4984 goto err_rq; 4985 } 4986 4987 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); 4988 if (IS_ERR(cs)) { 4989 err = PTR_ERR(cs); 4990 goto err_rq; 4991 } 4992 4993 for (n = 0; n < NUM_GPR_DW; n++) { 4994 if (cs[n]) { 4995 pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n", 4996 engine->name, 4997 n / 2, n & 1 ? "udw" : "ldw", 4998 cs[n]); 4999 err = -EINVAL; 5000 break; 5001 } 5002 } 5003 5004 i915_gem_object_unpin_map(scratch->obj); 5005 5006 err_rq: 5007 memset32(&slot[0], -1, 4); 5008 wmb(); 5009 i915_request_put(rq); 5010 err_put: 5011 intel_context_put(ce); 5012 return err; 5013 } 5014 5015 static int live_lrc_gpr(void *arg) 5016 { 5017 struct intel_gt *gt = arg; 5018 struct intel_engine_cs *engine; 5019 struct i915_vma *scratch; 5020 enum intel_engine_id id; 5021 int err = 0; 5022 5023 /* 5024 * Check that GPR registers are cleared in new contexts as we need 5025 * to avoid leaking any information from previous contexts. 5026 */ 5027 5028 scratch = create_scratch(gt); 5029 if (IS_ERR(scratch)) 5030 return PTR_ERR(scratch); 5031 5032 for_each_engine(engine, gt, id) { 5033 engine_heartbeat_disable(engine); 5034 5035 err = __live_lrc_gpr(engine, scratch, false); 5036 if (err) 5037 goto err; 5038 5039 err = __live_lrc_gpr(engine, scratch, true); 5040 if (err) 5041 goto err; 5042 5043 err: 5044 engine_heartbeat_enable(engine); 5045 if (igt_flush_test(gt->i915)) 5046 err = -EIO; 5047 if (err) 5048 break; 5049 } 5050 5051 i915_vma_unpin_and_release(&scratch, 0); 5052 return err; 5053 } 5054 5055 static struct i915_request * 5056 create_timestamp(struct intel_context *ce, void *slot, int idx) 5057 { 5058 const u32 offset = 5059 i915_ggtt_offset(ce->engine->status_page.vma) + 5060 offset_in_page(slot); 5061 struct i915_request *rq; 5062 u32 *cs; 5063 int err; 5064 5065 rq = intel_context_create_request(ce); 5066 if (IS_ERR(rq)) 5067 return rq; 5068 5069 cs = intel_ring_begin(rq, 10); 5070 if (IS_ERR(cs)) { 5071 err = PTR_ERR(cs); 5072 goto err; 5073 } 5074 5075 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 5076 *cs++ = MI_NOOP; 5077 5078 *cs++ = MI_SEMAPHORE_WAIT | 5079 MI_SEMAPHORE_GLOBAL_GTT | 5080 MI_SEMAPHORE_POLL | 5081 MI_SEMAPHORE_SAD_NEQ_SDD; 5082 *cs++ = 0; 5083 *cs++ = offset; 5084 *cs++ = 0; 5085 5086 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 5087 *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base)); 5088 *cs++ = offset + idx * sizeof(u32); 5089 *cs++ = 0; 5090 5091 intel_ring_advance(rq, cs); 5092 5093 rq->sched.attr.priority = I915_PRIORITY_MASK; 5094 err = 0; 5095 err: 5096 i915_request_get(rq); 5097 i915_request_add(rq); 5098 if (err) { 5099 i915_request_put(rq); 5100 return ERR_PTR(err); 5101 } 5102 5103 return rq; 5104 } 5105 5106 struct lrc_timestamp { 5107 struct intel_engine_cs *engine; 5108 struct intel_context *ce[2]; 5109 u32 poison; 5110 }; 5111 5112 static bool timestamp_advanced(u32 start, u32 end) 5113 { 5114 return (s32)(end - start) > 0; 5115 } 5116 5117 static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt) 5118 { 5119 u32 *slot = memset32(arg->engine->status_page.addr + 1000, 0, 4); 5120 struct i915_request *rq; 5121 u32 timestamp; 5122 int err = 0; 5123 5124 arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison; 5125 rq = create_timestamp(arg->ce[0], slot, 1); 5126 if (IS_ERR(rq)) 5127 return PTR_ERR(rq); 5128 5129 err = wait_for_submit(rq->engine, rq, HZ / 2); 5130 if (err) 5131 goto err; 5132 5133 if (preempt) { 5134 arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef; 5135 err = emit_semaphore_signal(arg->ce[1], slot); 5136 if (err) 5137 goto err; 5138 } else { 5139 slot[0] = 1; 5140 wmb(); 5141 } 5142 5143 /* And wait for switch to kernel (to save our context to memory) */ 5144 err = context_flush(arg->ce[0], HZ / 2); 5145 if (err) 5146 goto err; 5147 5148 if (!timestamp_advanced(arg->poison, slot[1])) { 5149 pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n", 5150 arg->engine->name, preempt ? "preempt" : "simple", 5151 arg->poison, slot[1]); 5152 err = -EINVAL; 5153 } 5154 5155 timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]); 5156 if (!timestamp_advanced(slot[1], timestamp)) { 5157 pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n", 5158 arg->engine->name, preempt ? "preempt" : "simple", 5159 slot[1], timestamp); 5160 err = -EINVAL; 5161 } 5162 5163 err: 5164 memset32(slot, -1, 4); 5165 i915_request_put(rq); 5166 return err; 5167 } 5168 5169 static int live_lrc_timestamp(void *arg) 5170 { 5171 struct lrc_timestamp data = {}; 5172 struct intel_gt *gt = arg; 5173 enum intel_engine_id id; 5174 const u32 poison[] = { 5175 0, 5176 S32_MAX, 5177 (u32)S32_MAX + 1, 5178 U32_MAX, 5179 }; 5180 5181 /* 5182 * We want to verify that the timestamp is saved and restore across 5183 * context switches and is monotonic. 5184 * 5185 * So we do this with a little bit of LRC poisoning to check various 5186 * boundary conditions, and see what happens if we preempt the context 5187 * with a second request (carrying more poison into the timestamp). 5188 */ 5189 5190 for_each_engine(data.engine, gt, id) { 5191 int i, err = 0; 5192 5193 engine_heartbeat_disable(data.engine); 5194 5195 for (i = 0; i < ARRAY_SIZE(data.ce); i++) { 5196 struct intel_context *tmp; 5197 5198 tmp = intel_context_create(data.engine); 5199 if (IS_ERR(tmp)) { 5200 err = PTR_ERR(tmp); 5201 goto err; 5202 } 5203 5204 err = intel_context_pin(tmp); 5205 if (err) { 5206 intel_context_put(tmp); 5207 goto err; 5208 } 5209 5210 data.ce[i] = tmp; 5211 } 5212 5213 for (i = 0; i < ARRAY_SIZE(poison); i++) { 5214 data.poison = poison[i]; 5215 5216 err = __lrc_timestamp(&data, false); 5217 if (err) 5218 break; 5219 5220 err = __lrc_timestamp(&data, true); 5221 if (err) 5222 break; 5223 } 5224 5225 err: 5226 engine_heartbeat_enable(data.engine); 5227 for (i = 0; i < ARRAY_SIZE(data.ce); i++) { 5228 if (!data.ce[i]) 5229 break; 5230 5231 intel_context_unpin(data.ce[i]); 5232 intel_context_put(data.ce[i]); 5233 } 5234 5235 if (igt_flush_test(gt->i915)) 5236 err = -EIO; 5237 if (err) 5238 return err; 5239 } 5240 5241 return 0; 5242 } 5243 5244 static struct i915_vma * 5245 create_user_vma(struct i915_address_space *vm, unsigned long size) 5246 { 5247 struct drm_i915_gem_object *obj; 5248 struct i915_vma *vma; 5249 int err; 5250 5251 obj = i915_gem_object_create_internal(vm->i915, size); 5252 if (IS_ERR(obj)) 5253 return ERR_CAST(obj); 5254 5255 vma = i915_vma_instance(obj, vm, NULL); 5256 if (IS_ERR(vma)) { 5257 i915_gem_object_put(obj); 5258 return vma; 5259 } 5260 5261 err = i915_vma_pin(vma, 0, 0, PIN_USER); 5262 if (err) { 5263 i915_gem_object_put(obj); 5264 return ERR_PTR(err); 5265 } 5266 5267 return vma; 5268 } 5269 5270 static struct i915_vma * 5271 store_context(struct intel_context *ce, struct i915_vma *scratch) 5272 { 5273 struct i915_vma *batch; 5274 u32 dw, x, *cs, *hw; 5275 u32 *defaults; 5276 5277 batch = create_user_vma(ce->vm, SZ_64K); 5278 if (IS_ERR(batch)) 5279 return batch; 5280 5281 cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); 5282 if (IS_ERR(cs)) { 5283 i915_vma_put(batch); 5284 return ERR_CAST(cs); 5285 } 5286 5287 defaults = shmem_pin_map(ce->engine->default_state); 5288 if (!defaults) { 5289 i915_gem_object_unpin_map(batch->obj); 5290 i915_vma_put(batch); 5291 return ERR_PTR(-ENOMEM); 5292 } 5293 5294 x = 0; 5295 dw = 0; 5296 hw = defaults; 5297 hw += LRC_STATE_OFFSET / sizeof(*hw); 5298 do { 5299 u32 len = hw[dw] & 0x7f; 5300 5301 if (hw[dw] == 0) { 5302 dw++; 5303 continue; 5304 } 5305 5306 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { 5307 dw += len + 2; 5308 continue; 5309 } 5310 5311 dw++; 5312 len = (len + 1) / 2; 5313 while (len--) { 5314 *cs++ = MI_STORE_REGISTER_MEM_GEN8; 5315 *cs++ = hw[dw]; 5316 *cs++ = lower_32_bits(scratch->node.start + x); 5317 *cs++ = upper_32_bits(scratch->node.start + x); 5318 5319 dw += 2; 5320 x += 4; 5321 } 5322 } while (dw < PAGE_SIZE / sizeof(u32) && 5323 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); 5324 5325 *cs++ = MI_BATCH_BUFFER_END; 5326 5327 shmem_unpin_map(ce->engine->default_state, defaults); 5328 5329 i915_gem_object_flush_map(batch->obj); 5330 i915_gem_object_unpin_map(batch->obj); 5331 5332 return batch; 5333 } 5334 5335 static int move_to_active(struct i915_request *rq, 5336 struct i915_vma *vma, 5337 unsigned int flags) 5338 { 5339 int err; 5340 5341 i915_vma_lock(vma); 5342 err = i915_request_await_object(rq, vma->obj, flags); 5343 if (!err) 5344 err = i915_vma_move_to_active(vma, rq, flags); 5345 i915_vma_unlock(vma); 5346 5347 return err; 5348 } 5349 5350 static struct i915_request * 5351 record_registers(struct intel_context *ce, 5352 struct i915_vma *before, 5353 struct i915_vma *after, 5354 u32 *sema) 5355 { 5356 struct i915_vma *b_before, *b_after; 5357 struct i915_request *rq; 5358 u32 *cs; 5359 int err; 5360 5361 b_before = store_context(ce, before); 5362 if (IS_ERR(b_before)) 5363 return ERR_CAST(b_before); 5364 5365 b_after = store_context(ce, after); 5366 if (IS_ERR(b_after)) { 5367 rq = ERR_CAST(b_after); 5368 goto err_before; 5369 } 5370 5371 rq = intel_context_create_request(ce); 5372 if (IS_ERR(rq)) 5373 goto err_after; 5374 5375 err = move_to_active(rq, before, EXEC_OBJECT_WRITE); 5376 if (err) 5377 goto err_rq; 5378 5379 err = move_to_active(rq, b_before, 0); 5380 if (err) 5381 goto err_rq; 5382 5383 err = move_to_active(rq, after, EXEC_OBJECT_WRITE); 5384 if (err) 5385 goto err_rq; 5386 5387 err = move_to_active(rq, b_after, 0); 5388 if (err) 5389 goto err_rq; 5390 5391 cs = intel_ring_begin(rq, 14); 5392 if (IS_ERR(cs)) { 5393 err = PTR_ERR(cs); 5394 goto err_rq; 5395 } 5396 5397 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 5398 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); 5399 *cs++ = lower_32_bits(b_before->node.start); 5400 *cs++ = upper_32_bits(b_before->node.start); 5401 5402 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 5403 *cs++ = MI_SEMAPHORE_WAIT | 5404 MI_SEMAPHORE_GLOBAL_GTT | 5405 MI_SEMAPHORE_POLL | 5406 MI_SEMAPHORE_SAD_NEQ_SDD; 5407 *cs++ = 0; 5408 *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) + 5409 offset_in_page(sema); 5410 *cs++ = 0; 5411 *cs++ = MI_NOOP; 5412 5413 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 5414 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); 5415 *cs++ = lower_32_bits(b_after->node.start); 5416 *cs++ = upper_32_bits(b_after->node.start); 5417 5418 intel_ring_advance(rq, cs); 5419 5420 WRITE_ONCE(*sema, 0); 5421 i915_request_get(rq); 5422 i915_request_add(rq); 5423 err_after: 5424 i915_vma_put(b_after); 5425 err_before: 5426 i915_vma_put(b_before); 5427 return rq; 5428 5429 err_rq: 5430 i915_request_add(rq); 5431 rq = ERR_PTR(err); 5432 goto err_after; 5433 } 5434 5435 static struct i915_vma *load_context(struct intel_context *ce, u32 poison) 5436 { 5437 struct i915_vma *batch; 5438 u32 dw, *cs, *hw; 5439 u32 *defaults; 5440 5441 batch = create_user_vma(ce->vm, SZ_64K); 5442 if (IS_ERR(batch)) 5443 return batch; 5444 5445 cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); 5446 if (IS_ERR(cs)) { 5447 i915_vma_put(batch); 5448 return ERR_CAST(cs); 5449 } 5450 5451 defaults = shmem_pin_map(ce->engine->default_state); 5452 if (!defaults) { 5453 i915_gem_object_unpin_map(batch->obj); 5454 i915_vma_put(batch); 5455 return ERR_PTR(-ENOMEM); 5456 } 5457 5458 dw = 0; 5459 hw = defaults; 5460 hw += LRC_STATE_OFFSET / sizeof(*hw); 5461 do { 5462 u32 len = hw[dw] & 0x7f; 5463 5464 if (hw[dw] == 0) { 5465 dw++; 5466 continue; 5467 } 5468 5469 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { 5470 dw += len + 2; 5471 continue; 5472 } 5473 5474 dw++; 5475 len = (len + 1) / 2; 5476 *cs++ = MI_LOAD_REGISTER_IMM(len); 5477 while (len--) { 5478 *cs++ = hw[dw]; 5479 *cs++ = poison; 5480 dw += 2; 5481 } 5482 } while (dw < PAGE_SIZE / sizeof(u32) && 5483 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); 5484 5485 *cs++ = MI_BATCH_BUFFER_END; 5486 5487 shmem_unpin_map(ce->engine->default_state, defaults); 5488 5489 i915_gem_object_flush_map(batch->obj); 5490 i915_gem_object_unpin_map(batch->obj); 5491 5492 return batch; 5493 } 5494 5495 static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema) 5496 { 5497 struct i915_request *rq; 5498 struct i915_vma *batch; 5499 u32 *cs; 5500 int err; 5501 5502 batch = load_context(ce, poison); 5503 if (IS_ERR(batch)) 5504 return PTR_ERR(batch); 5505 5506 rq = intel_context_create_request(ce); 5507 if (IS_ERR(rq)) { 5508 err = PTR_ERR(rq); 5509 goto err_batch; 5510 } 5511 5512 err = move_to_active(rq, batch, 0); 5513 if (err) 5514 goto err_rq; 5515 5516 cs = intel_ring_begin(rq, 8); 5517 if (IS_ERR(cs)) { 5518 err = PTR_ERR(cs); 5519 goto err_rq; 5520 } 5521 5522 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 5523 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); 5524 *cs++ = lower_32_bits(batch->node.start); 5525 *cs++ = upper_32_bits(batch->node.start); 5526 5527 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 5528 *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) + 5529 offset_in_page(sema); 5530 *cs++ = 0; 5531 *cs++ = 1; 5532 5533 intel_ring_advance(rq, cs); 5534 5535 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 5536 err_rq: 5537 i915_request_add(rq); 5538 err_batch: 5539 i915_vma_put(batch); 5540 return err; 5541 } 5542 5543 static bool is_moving(u32 a, u32 b) 5544 { 5545 return a != b; 5546 } 5547 5548 static int compare_isolation(struct intel_engine_cs *engine, 5549 struct i915_vma *ref[2], 5550 struct i915_vma *result[2], 5551 struct intel_context *ce, 5552 u32 poison) 5553 { 5554 u32 x, dw, *hw, *lrc; 5555 u32 *A[2], *B[2]; 5556 u32 *defaults; 5557 int err = 0; 5558 5559 A[0] = i915_gem_object_pin_map(ref[0]->obj, I915_MAP_WC); 5560 if (IS_ERR(A[0])) 5561 return PTR_ERR(A[0]); 5562 5563 A[1] = i915_gem_object_pin_map(ref[1]->obj, I915_MAP_WC); 5564 if (IS_ERR(A[1])) { 5565 err = PTR_ERR(A[1]); 5566 goto err_A0; 5567 } 5568 5569 B[0] = i915_gem_object_pin_map(result[0]->obj, I915_MAP_WC); 5570 if (IS_ERR(B[0])) { 5571 err = PTR_ERR(B[0]); 5572 goto err_A1; 5573 } 5574 5575 B[1] = i915_gem_object_pin_map(result[1]->obj, I915_MAP_WC); 5576 if (IS_ERR(B[1])) { 5577 err = PTR_ERR(B[1]); 5578 goto err_B0; 5579 } 5580 5581 lrc = i915_gem_object_pin_map(ce->state->obj, 5582 i915_coherent_map_type(engine->i915)); 5583 if (IS_ERR(lrc)) { 5584 err = PTR_ERR(lrc); 5585 goto err_B1; 5586 } 5587 lrc += LRC_STATE_OFFSET / sizeof(*hw); 5588 5589 defaults = shmem_pin_map(ce->engine->default_state); 5590 if (!defaults) { 5591 err = -ENOMEM; 5592 goto err_lrc; 5593 } 5594 5595 x = 0; 5596 dw = 0; 5597 hw = defaults; 5598 hw += LRC_STATE_OFFSET / sizeof(*hw); 5599 do { 5600 u32 len = hw[dw] & 0x7f; 5601 5602 if (hw[dw] == 0) { 5603 dw++; 5604 continue; 5605 } 5606 5607 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { 5608 dw += len + 2; 5609 continue; 5610 } 5611 5612 dw++; 5613 len = (len + 1) / 2; 5614 while (len--) { 5615 if (!is_moving(A[0][x], A[1][x]) && 5616 (A[0][x] != B[0][x] || A[1][x] != B[1][x])) { 5617 switch (hw[dw] & 4095) { 5618 case 0x30: /* RING_HEAD */ 5619 case 0x34: /* RING_TAIL */ 5620 break; 5621 5622 default: 5623 pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n", 5624 engine->name, dw, 5625 hw[dw], hw[dw + 1], 5626 A[0][x], B[0][x], B[1][x], 5627 poison, lrc[dw + 1]); 5628 err = -EINVAL; 5629 } 5630 } 5631 dw += 2; 5632 x++; 5633 } 5634 } while (dw < PAGE_SIZE / sizeof(u32) && 5635 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); 5636 5637 shmem_unpin_map(ce->engine->default_state, defaults); 5638 err_lrc: 5639 i915_gem_object_unpin_map(ce->state->obj); 5640 err_B1: 5641 i915_gem_object_unpin_map(result[1]->obj); 5642 err_B0: 5643 i915_gem_object_unpin_map(result[0]->obj); 5644 err_A1: 5645 i915_gem_object_unpin_map(ref[1]->obj); 5646 err_A0: 5647 i915_gem_object_unpin_map(ref[0]->obj); 5648 return err; 5649 } 5650 5651 static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison) 5652 { 5653 u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1); 5654 struct i915_vma *ref[2], *result[2]; 5655 struct intel_context *A, *B; 5656 struct i915_request *rq; 5657 int err; 5658 5659 A = intel_context_create(engine); 5660 if (IS_ERR(A)) 5661 return PTR_ERR(A); 5662 5663 B = intel_context_create(engine); 5664 if (IS_ERR(B)) { 5665 err = PTR_ERR(B); 5666 goto err_A; 5667 } 5668 5669 ref[0] = create_user_vma(A->vm, SZ_64K); 5670 if (IS_ERR(ref[0])) { 5671 err = PTR_ERR(ref[0]); 5672 goto err_B; 5673 } 5674 5675 ref[1] = create_user_vma(A->vm, SZ_64K); 5676 if (IS_ERR(ref[1])) { 5677 err = PTR_ERR(ref[1]); 5678 goto err_ref0; 5679 } 5680 5681 rq = record_registers(A, ref[0], ref[1], sema); 5682 if (IS_ERR(rq)) { 5683 err = PTR_ERR(rq); 5684 goto err_ref1; 5685 } 5686 5687 WRITE_ONCE(*sema, 1); 5688 wmb(); 5689 5690 if (i915_request_wait(rq, 0, HZ / 2) < 0) { 5691 i915_request_put(rq); 5692 err = -ETIME; 5693 goto err_ref1; 5694 } 5695 i915_request_put(rq); 5696 5697 result[0] = create_user_vma(A->vm, SZ_64K); 5698 if (IS_ERR(result[0])) { 5699 err = PTR_ERR(result[0]); 5700 goto err_ref1; 5701 } 5702 5703 result[1] = create_user_vma(A->vm, SZ_64K); 5704 if (IS_ERR(result[1])) { 5705 err = PTR_ERR(result[1]); 5706 goto err_result0; 5707 } 5708 5709 rq = record_registers(A, result[0], result[1], sema); 5710 if (IS_ERR(rq)) { 5711 err = PTR_ERR(rq); 5712 goto err_result1; 5713 } 5714 5715 err = poison_registers(B, poison, sema); 5716 if (err) { 5717 WRITE_ONCE(*sema, -1); 5718 i915_request_put(rq); 5719 goto err_result1; 5720 } 5721 5722 if (i915_request_wait(rq, 0, HZ / 2) < 0) { 5723 i915_request_put(rq); 5724 err = -ETIME; 5725 goto err_result1; 5726 } 5727 i915_request_put(rq); 5728 5729 err = compare_isolation(engine, ref, result, A, poison); 5730 5731 err_result1: 5732 i915_vma_put(result[1]); 5733 err_result0: 5734 i915_vma_put(result[0]); 5735 err_ref1: 5736 i915_vma_put(ref[1]); 5737 err_ref0: 5738 i915_vma_put(ref[0]); 5739 err_B: 5740 intel_context_put(B); 5741 err_A: 5742 intel_context_put(A); 5743 return err; 5744 } 5745 5746 static bool skip_isolation(const struct intel_engine_cs *engine) 5747 { 5748 if (engine->class == COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) == 9) 5749 return true; 5750 5751 if (engine->class == RENDER_CLASS && INTEL_GEN(engine->i915) == 11) 5752 return true; 5753 5754 return false; 5755 } 5756 5757 static int live_lrc_isolation(void *arg) 5758 { 5759 struct intel_gt *gt = arg; 5760 struct intel_engine_cs *engine; 5761 enum intel_engine_id id; 5762 const u32 poison[] = { 5763 STACK_MAGIC, 5764 0x3a3a3a3a, 5765 0x5c5c5c5c, 5766 0xffffffff, 5767 0xffff0000, 5768 }; 5769 int err = 0; 5770 5771 /* 5772 * Our goal is try and verify that per-context state cannot be 5773 * tampered with by another non-privileged client. 5774 * 5775 * We take the list of context registers from the LRI in the default 5776 * context image and attempt to modify that list from a remote context. 5777 */ 5778 5779 for_each_engine(engine, gt, id) { 5780 int i; 5781 5782 /* Just don't even ask */ 5783 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) && 5784 skip_isolation(engine)) 5785 continue; 5786 5787 intel_engine_pm_get(engine); 5788 for (i = 0; i < ARRAY_SIZE(poison); i++) { 5789 int result; 5790 5791 result = __lrc_isolation(engine, poison[i]); 5792 if (result && !err) 5793 err = result; 5794 5795 result = __lrc_isolation(engine, ~poison[i]); 5796 if (result && !err) 5797 err = result; 5798 } 5799 intel_engine_pm_put(engine); 5800 if (igt_flush_test(gt->i915)) { 5801 err = -EIO; 5802 break; 5803 } 5804 } 5805 5806 return err; 5807 } 5808 5809 static int indirect_ctx_submit_req(struct intel_context *ce) 5810 { 5811 struct i915_request *rq; 5812 int err = 0; 5813 5814 rq = intel_context_create_request(ce); 5815 if (IS_ERR(rq)) 5816 return PTR_ERR(rq); 5817 5818 i915_request_get(rq); 5819 i915_request_add(rq); 5820 5821 if (i915_request_wait(rq, 0, HZ / 5) < 0) 5822 err = -ETIME; 5823 5824 i915_request_put(rq); 5825 5826 return err; 5827 } 5828 5829 #define CTX_BB_CANARY_OFFSET (3 * 1024) 5830 #define CTX_BB_CANARY_INDEX (CTX_BB_CANARY_OFFSET / sizeof(u32)) 5831 5832 static u32 * 5833 emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs) 5834 { 5835 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | 5836 MI_SRM_LRM_GLOBAL_GTT | 5837 MI_LRI_LRM_CS_MMIO; 5838 *cs++ = i915_mmio_reg_offset(RING_START(0)); 5839 *cs++ = i915_ggtt_offset(ce->state) + 5840 context_wa_bb_offset(ce) + 5841 CTX_BB_CANARY_OFFSET; 5842 *cs++ = 0; 5843 5844 return cs; 5845 } 5846 5847 static void 5848 indirect_ctx_bb_setup(struct intel_context *ce) 5849 { 5850 u32 *cs = context_indirect_bb(ce); 5851 5852 cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d; 5853 5854 setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary); 5855 } 5856 5857 static bool check_ring_start(struct intel_context *ce) 5858 { 5859 const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) - 5860 LRC_STATE_OFFSET + context_wa_bb_offset(ce); 5861 5862 if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START]) 5863 return true; 5864 5865 pr_err("ring start mismatch: canary 0x%08x vs state 0x%08x\n", 5866 ctx_bb[CTX_BB_CANARY_INDEX], 5867 ce->lrc_reg_state[CTX_RING_START]); 5868 5869 return false; 5870 } 5871 5872 static int indirect_ctx_bb_check(struct intel_context *ce) 5873 { 5874 int err; 5875 5876 err = indirect_ctx_submit_req(ce); 5877 if (err) 5878 return err; 5879 5880 if (!check_ring_start(ce)) 5881 return -EINVAL; 5882 5883 return 0; 5884 } 5885 5886 static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine) 5887 { 5888 struct intel_context *a, *b; 5889 int err; 5890 5891 a = intel_context_create(engine); 5892 if (IS_ERR(a)) 5893 return PTR_ERR(a); 5894 err = intel_context_pin(a); 5895 if (err) 5896 goto put_a; 5897 5898 b = intel_context_create(engine); 5899 if (IS_ERR(b)) { 5900 err = PTR_ERR(b); 5901 goto unpin_a; 5902 } 5903 err = intel_context_pin(b); 5904 if (err) 5905 goto put_b; 5906 5907 /* We use the already reserved extra page in context state */ 5908 if (!a->wa_bb_page) { 5909 GEM_BUG_ON(b->wa_bb_page); 5910 GEM_BUG_ON(INTEL_GEN(engine->i915) == 12); 5911 goto unpin_b; 5912 } 5913 5914 /* 5915 * In order to test that our per context bb is truly per context, 5916 * and executes at the intended spot on context restoring process, 5917 * make the batch store the ring start value to memory. 5918 * As ring start is restored apriori of starting the indirect ctx bb and 5919 * as it will be different for each context, it fits to this purpose. 5920 */ 5921 indirect_ctx_bb_setup(a); 5922 indirect_ctx_bb_setup(b); 5923 5924 err = indirect_ctx_bb_check(a); 5925 if (err) 5926 goto unpin_b; 5927 5928 err = indirect_ctx_bb_check(b); 5929 5930 unpin_b: 5931 intel_context_unpin(b); 5932 put_b: 5933 intel_context_put(b); 5934 unpin_a: 5935 intel_context_unpin(a); 5936 put_a: 5937 intel_context_put(a); 5938 5939 return err; 5940 } 5941 5942 static int live_lrc_indirect_ctx_bb(void *arg) 5943 { 5944 struct intel_gt *gt = arg; 5945 struct intel_engine_cs *engine; 5946 enum intel_engine_id id; 5947 int err = 0; 5948 5949 for_each_engine(engine, gt, id) { 5950 intel_engine_pm_get(engine); 5951 err = __live_lrc_indirect_ctx_bb(engine); 5952 intel_engine_pm_put(engine); 5953 5954 if (igt_flush_test(gt->i915)) 5955 err = -EIO; 5956 5957 if (err) 5958 break; 5959 } 5960 5961 return err; 5962 } 5963 5964 static void garbage_reset(struct intel_engine_cs *engine, 5965 struct i915_request *rq) 5966 { 5967 const unsigned int bit = I915_RESET_ENGINE + engine->id; 5968 unsigned long *lock = &engine->gt->reset.flags; 5969 5970 if (test_and_set_bit(bit, lock)) 5971 return; 5972 5973 tasklet_disable(&engine->execlists.tasklet); 5974 5975 if (!rq->fence.error) 5976 intel_engine_reset(engine, NULL); 5977 5978 tasklet_enable(&engine->execlists.tasklet); 5979 clear_and_wake_up_bit(bit, lock); 5980 } 5981 5982 static struct i915_request *garbage(struct intel_context *ce, 5983 struct rnd_state *prng) 5984 { 5985 struct i915_request *rq; 5986 int err; 5987 5988 err = intel_context_pin(ce); 5989 if (err) 5990 return ERR_PTR(err); 5991 5992 prandom_bytes_state(prng, 5993 ce->lrc_reg_state, 5994 ce->engine->context_size - 5995 LRC_STATE_OFFSET); 5996 5997 rq = intel_context_create_request(ce); 5998 if (IS_ERR(rq)) { 5999 err = PTR_ERR(rq); 6000 goto err_unpin; 6001 } 6002 6003 i915_request_get(rq); 6004 i915_request_add(rq); 6005 return rq; 6006 6007 err_unpin: 6008 intel_context_unpin(ce); 6009 return ERR_PTR(err); 6010 } 6011 6012 static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng) 6013 { 6014 struct intel_context *ce; 6015 struct i915_request *hang; 6016 int err = 0; 6017 6018 ce = intel_context_create(engine); 6019 if (IS_ERR(ce)) 6020 return PTR_ERR(ce); 6021 6022 hang = garbage(ce, prng); 6023 if (IS_ERR(hang)) { 6024 err = PTR_ERR(hang); 6025 goto err_ce; 6026 } 6027 6028 if (wait_for_submit(engine, hang, HZ / 2)) { 6029 i915_request_put(hang); 6030 err = -ETIME; 6031 goto err_ce; 6032 } 6033 6034 intel_context_set_banned(ce); 6035 garbage_reset(engine, hang); 6036 6037 intel_engine_flush_submission(engine); 6038 if (!hang->fence.error) { 6039 i915_request_put(hang); 6040 pr_err("%s: corrupted context was not reset\n", 6041 engine->name); 6042 err = -EINVAL; 6043 goto err_ce; 6044 } 6045 6046 if (i915_request_wait(hang, 0, HZ / 2) < 0) { 6047 pr_err("%s: corrupted context did not recover\n", 6048 engine->name); 6049 i915_request_put(hang); 6050 err = -EIO; 6051 goto err_ce; 6052 } 6053 i915_request_put(hang); 6054 6055 err_ce: 6056 intel_context_put(ce); 6057 return err; 6058 } 6059 6060 static int live_lrc_garbage(void *arg) 6061 { 6062 struct intel_gt *gt = arg; 6063 struct intel_engine_cs *engine; 6064 enum intel_engine_id id; 6065 6066 /* 6067 * Verify that we can recover if one context state is completely 6068 * corrupted. 6069 */ 6070 6071 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN)) 6072 return 0; 6073 6074 for_each_engine(engine, gt, id) { 6075 I915_RND_STATE(prng); 6076 int err = 0, i; 6077 6078 if (!intel_has_reset_engine(engine->gt)) 6079 continue; 6080 6081 intel_engine_pm_get(engine); 6082 for (i = 0; i < 3; i++) { 6083 err = __lrc_garbage(engine, &prng); 6084 if (err) 6085 break; 6086 } 6087 intel_engine_pm_put(engine); 6088 6089 if (igt_flush_test(gt->i915)) 6090 err = -EIO; 6091 if (err) 6092 return err; 6093 } 6094 6095 return 0; 6096 } 6097 6098 static int __live_pphwsp_runtime(struct intel_engine_cs *engine) 6099 { 6100 struct intel_context *ce; 6101 struct i915_request *rq; 6102 IGT_TIMEOUT(end_time); 6103 int err; 6104 6105 ce = intel_context_create(engine); 6106 if (IS_ERR(ce)) 6107 return PTR_ERR(ce); 6108 6109 ce->runtime.num_underflow = 0; 6110 ce->runtime.max_underflow = 0; 6111 6112 do { 6113 unsigned int loop = 1024; 6114 6115 while (loop) { 6116 rq = intel_context_create_request(ce); 6117 if (IS_ERR(rq)) { 6118 err = PTR_ERR(rq); 6119 goto err_rq; 6120 } 6121 6122 if (--loop == 0) 6123 i915_request_get(rq); 6124 6125 i915_request_add(rq); 6126 } 6127 6128 if (__igt_timeout(end_time, NULL)) 6129 break; 6130 6131 i915_request_put(rq); 6132 } while (1); 6133 6134 err = i915_request_wait(rq, 0, HZ / 5); 6135 if (err < 0) { 6136 pr_err("%s: request not completed!\n", engine->name); 6137 goto err_wait; 6138 } 6139 6140 igt_flush_test(engine->i915); 6141 6142 pr_info("%s: pphwsp runtime %lluns, average %lluns\n", 6143 engine->name, 6144 intel_context_get_total_runtime_ns(ce), 6145 intel_context_get_avg_runtime_ns(ce)); 6146 6147 err = 0; 6148 if (ce->runtime.num_underflow) { 6149 pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n", 6150 engine->name, 6151 ce->runtime.num_underflow, 6152 ce->runtime.max_underflow); 6153 GEM_TRACE_DUMP(); 6154 err = -EOVERFLOW; 6155 } 6156 6157 err_wait: 6158 i915_request_put(rq); 6159 err_rq: 6160 intel_context_put(ce); 6161 return err; 6162 } 6163 6164 static int live_pphwsp_runtime(void *arg) 6165 { 6166 struct intel_gt *gt = arg; 6167 struct intel_engine_cs *engine; 6168 enum intel_engine_id id; 6169 int err = 0; 6170 6171 /* 6172 * Check that cumulative context runtime as stored in the pphwsp[16] 6173 * is monotonic. 6174 */ 6175 6176 for_each_engine(engine, gt, id) { 6177 err = __live_pphwsp_runtime(engine); 6178 if (err) 6179 break; 6180 } 6181 6182 if (igt_flush_test(gt->i915)) 6183 err = -EIO; 6184 6185 return err; 6186 } 6187 6188 int intel_lrc_live_selftests(struct drm_i915_private *i915) 6189 { 6190 static const struct i915_subtest tests[] = { 6191 SUBTEST(live_lrc_layout), 6192 SUBTEST(live_lrc_fixed), 6193 SUBTEST(live_lrc_state), 6194 SUBTEST(live_lrc_gpr), 6195 SUBTEST(live_lrc_isolation), 6196 SUBTEST(live_lrc_timestamp), 6197 SUBTEST(live_lrc_garbage), 6198 SUBTEST(live_pphwsp_runtime), 6199 SUBTEST(live_lrc_indirect_ctx_bb), 6200 }; 6201 6202 if (!HAS_LOGICAL_RING_CONTEXTS(i915)) 6203 return 0; 6204 6205 return intel_gt_live_subtests(tests, &i915->gt); 6206 } 6207