1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2018 Intel Corporation 4 */ 5 6 #include <linux/prime_numbers.h> 7 8 #include "gem/i915_gem_pm.h" 9 #include "gt/intel_engine_heartbeat.h" 10 #include "gt/intel_reset.h" 11 #include "gt/selftest_engine_heartbeat.h" 12 13 #include "i915_selftest.h" 14 #include "selftests/i915_random.h" 15 #include "selftests/igt_flush_test.h" 16 #include "selftests/igt_live_test.h" 17 #include "selftests/igt_spinner.h" 18 #include "selftests/lib_sw_fence.h" 19 20 #include "gem/selftests/igt_gem_utils.h" 21 #include "gem/selftests/mock_context.h" 22 23 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4) 24 #define NUM_GPR 16 25 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */ 26 27 static bool is_active(struct i915_request *rq) 28 { 29 if (i915_request_is_active(rq)) 30 return true; 31 32 if (i915_request_on_hold(rq)) 33 return true; 34 35 if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq)) 36 return true; 37 38 return false; 39 } 40 41 static int wait_for_submit(struct intel_engine_cs *engine, 42 struct i915_request *rq, 43 unsigned long timeout) 44 { 45 /* Ignore our own attempts to suppress excess tasklets */ 46 tasklet_hi_schedule(&engine->execlists.tasklet); 47 48 timeout += jiffies; 49 do { 50 bool done = time_after(jiffies, timeout); 51 52 if (i915_request_completed(rq)) /* that was quick! */ 53 return 0; 54 55 /* Wait until the HW has acknowleged the submission (or err) */ 56 intel_engine_flush_submission(engine); 57 if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq)) 58 return 0; 59 60 if (done) 61 return -ETIME; 62 63 cond_resched(); 64 } while (1); 65 } 66 67 static int wait_for_reset(struct intel_engine_cs *engine, 68 struct i915_request *rq, 69 unsigned long timeout) 70 { 71 timeout += jiffies; 72 73 do { 74 cond_resched(); 75 intel_engine_flush_submission(engine); 76 77 if (READ_ONCE(engine->execlists.pending[0])) 78 continue; 79 80 if (i915_request_completed(rq)) 81 break; 82 83 if (READ_ONCE(rq->fence.error)) 84 break; 85 } while (time_before(jiffies, timeout)); 86 87 flush_scheduled_work(); 88 89 if (rq->fence.error != -EIO) { 90 pr_err("%s: hanging request %llx:%lld not reset\n", 91 engine->name, 92 rq->fence.context, 93 rq->fence.seqno); 94 return -EINVAL; 95 } 96 97 /* Give the request a jiffie to complete after flushing the worker */ 98 if (i915_request_wait(rq, 0, 99 max(0l, (long)(timeout - jiffies)) + 1) < 0) { 100 pr_err("%s: hanging request %llx:%lld did not complete\n", 101 engine->name, 102 rq->fence.context, 103 rq->fence.seqno); 104 return -ETIME; 105 } 106 107 return 0; 108 } 109 110 static int live_sanitycheck(void *arg) 111 { 112 struct intel_gt *gt = arg; 113 struct intel_engine_cs *engine; 114 enum intel_engine_id id; 115 struct igt_spinner spin; 116 int err = 0; 117 118 if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915)) 119 return 0; 120 121 if (igt_spinner_init(&spin, gt)) 122 return -ENOMEM; 123 124 for_each_engine(engine, gt, id) { 125 struct intel_context *ce; 126 struct i915_request *rq; 127 128 ce = intel_context_create(engine); 129 if (IS_ERR(ce)) { 130 err = PTR_ERR(ce); 131 break; 132 } 133 134 rq = igt_spinner_create_request(&spin, ce, MI_NOOP); 135 if (IS_ERR(rq)) { 136 err = PTR_ERR(rq); 137 goto out_ctx; 138 } 139 140 i915_request_add(rq); 141 if (!igt_wait_for_spinner(&spin, rq)) { 142 GEM_TRACE("spinner failed to start\n"); 143 GEM_TRACE_DUMP(); 144 intel_gt_set_wedged(gt); 145 err = -EIO; 146 goto out_ctx; 147 } 148 149 igt_spinner_end(&spin); 150 if (igt_flush_test(gt->i915)) { 151 err = -EIO; 152 goto out_ctx; 153 } 154 155 out_ctx: 156 intel_context_put(ce); 157 if (err) 158 break; 159 } 160 161 igt_spinner_fini(&spin); 162 return err; 163 } 164 165 static int live_unlite_restore(struct intel_gt *gt, int prio) 166 { 167 struct intel_engine_cs *engine; 168 enum intel_engine_id id; 169 struct igt_spinner spin; 170 int err = -ENOMEM; 171 172 /* 173 * Check that we can correctly context switch between 2 instances 174 * on the same engine from the same parent context. 175 */ 176 177 if (igt_spinner_init(&spin, gt)) 178 return err; 179 180 err = 0; 181 for_each_engine(engine, gt, id) { 182 struct intel_context *ce[2] = {}; 183 struct i915_request *rq[2]; 184 struct igt_live_test t; 185 int n; 186 187 if (prio && !intel_engine_has_preemption(engine)) 188 continue; 189 190 if (!intel_engine_can_store_dword(engine)) 191 continue; 192 193 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 194 err = -EIO; 195 break; 196 } 197 st_engine_heartbeat_disable(engine); 198 199 for (n = 0; n < ARRAY_SIZE(ce); n++) { 200 struct intel_context *tmp; 201 202 tmp = intel_context_create(engine); 203 if (IS_ERR(tmp)) { 204 err = PTR_ERR(tmp); 205 goto err_ce; 206 } 207 208 err = intel_context_pin(tmp); 209 if (err) { 210 intel_context_put(tmp); 211 goto err_ce; 212 } 213 214 /* 215 * Setup the pair of contexts such that if we 216 * lite-restore using the RING_TAIL from ce[1] it 217 * will execute garbage from ce[0]->ring. 218 */ 219 memset(tmp->ring->vaddr, 220 POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */ 221 tmp->ring->vma->size); 222 223 ce[n] = tmp; 224 } 225 GEM_BUG_ON(!ce[1]->ring->size); 226 intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2); 227 lrc_update_regs(ce[1], engine, ce[1]->ring->head); 228 229 rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK); 230 if (IS_ERR(rq[0])) { 231 err = PTR_ERR(rq[0]); 232 goto err_ce; 233 } 234 235 i915_request_get(rq[0]); 236 i915_request_add(rq[0]); 237 GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit); 238 239 if (!igt_wait_for_spinner(&spin, rq[0])) { 240 i915_request_put(rq[0]); 241 goto err_ce; 242 } 243 244 rq[1] = i915_request_create(ce[1]); 245 if (IS_ERR(rq[1])) { 246 err = PTR_ERR(rq[1]); 247 i915_request_put(rq[0]); 248 goto err_ce; 249 } 250 251 if (!prio) { 252 /* 253 * Ensure we do the switch to ce[1] on completion. 254 * 255 * rq[0] is already submitted, so this should reduce 256 * to a no-op (a wait on a request on the same engine 257 * uses the submit fence, not the completion fence), 258 * but it will install a dependency on rq[1] for rq[0] 259 * that will prevent the pair being reordered by 260 * timeslicing. 261 */ 262 i915_request_await_dma_fence(rq[1], &rq[0]->fence); 263 } 264 265 i915_request_get(rq[1]); 266 i915_request_add(rq[1]); 267 GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix); 268 i915_request_put(rq[0]); 269 270 if (prio) { 271 struct i915_sched_attr attr = { 272 .priority = prio, 273 }; 274 275 /* Alternatively preempt the spinner with ce[1] */ 276 engine->schedule(rq[1], &attr); 277 } 278 279 /* And switch back to ce[0] for good measure */ 280 rq[0] = i915_request_create(ce[0]); 281 if (IS_ERR(rq[0])) { 282 err = PTR_ERR(rq[0]); 283 i915_request_put(rq[1]); 284 goto err_ce; 285 } 286 287 i915_request_await_dma_fence(rq[0], &rq[1]->fence); 288 i915_request_get(rq[0]); 289 i915_request_add(rq[0]); 290 GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix); 291 i915_request_put(rq[1]); 292 i915_request_put(rq[0]); 293 294 err_ce: 295 intel_engine_flush_submission(engine); 296 igt_spinner_end(&spin); 297 for (n = 0; n < ARRAY_SIZE(ce); n++) { 298 if (IS_ERR_OR_NULL(ce[n])) 299 break; 300 301 intel_context_unpin(ce[n]); 302 intel_context_put(ce[n]); 303 } 304 305 st_engine_heartbeat_enable(engine); 306 if (igt_live_test_end(&t)) 307 err = -EIO; 308 if (err) 309 break; 310 } 311 312 igt_spinner_fini(&spin); 313 return err; 314 } 315 316 static int live_unlite_switch(void *arg) 317 { 318 return live_unlite_restore(arg, 0); 319 } 320 321 static int live_unlite_preempt(void *arg) 322 { 323 return live_unlite_restore(arg, I915_PRIORITY_MAX); 324 } 325 326 static int live_unlite_ring(void *arg) 327 { 328 struct intel_gt *gt = arg; 329 struct intel_engine_cs *engine; 330 struct igt_spinner spin; 331 enum intel_engine_id id; 332 int err = 0; 333 334 /* 335 * Setup a preemption event that will cause almost the entire ring 336 * to be unwound, potentially fooling our intel_ring_direction() 337 * into emitting a forward lite-restore instead of the rollback. 338 */ 339 340 if (igt_spinner_init(&spin, gt)) 341 return -ENOMEM; 342 343 for_each_engine(engine, gt, id) { 344 struct intel_context *ce[2] = {}; 345 struct i915_request *rq; 346 struct igt_live_test t; 347 int n; 348 349 if (!intel_engine_has_preemption(engine)) 350 continue; 351 352 if (!intel_engine_can_store_dword(engine)) 353 continue; 354 355 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 356 err = -EIO; 357 break; 358 } 359 st_engine_heartbeat_disable(engine); 360 361 for (n = 0; n < ARRAY_SIZE(ce); n++) { 362 struct intel_context *tmp; 363 364 tmp = intel_context_create(engine); 365 if (IS_ERR(tmp)) { 366 err = PTR_ERR(tmp); 367 goto err_ce; 368 } 369 370 err = intel_context_pin(tmp); 371 if (err) { 372 intel_context_put(tmp); 373 goto err_ce; 374 } 375 376 memset32(tmp->ring->vaddr, 377 0xdeadbeef, /* trigger a hang if executed */ 378 tmp->ring->vma->size / sizeof(u32)); 379 380 ce[n] = tmp; 381 } 382 383 /* Create max prio spinner, followed by N low prio nops */ 384 rq = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK); 385 if (IS_ERR(rq)) { 386 err = PTR_ERR(rq); 387 goto err_ce; 388 } 389 390 i915_request_get(rq); 391 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 392 i915_request_add(rq); 393 394 if (!igt_wait_for_spinner(&spin, rq)) { 395 intel_gt_set_wedged(gt); 396 i915_request_put(rq); 397 err = -ETIME; 398 goto err_ce; 399 } 400 401 /* Fill the ring, until we will cause a wrap */ 402 n = 0; 403 while (intel_ring_direction(ce[0]->ring, 404 rq->wa_tail, 405 ce[0]->ring->tail) <= 0) { 406 struct i915_request *tmp; 407 408 tmp = intel_context_create_request(ce[0]); 409 if (IS_ERR(tmp)) { 410 err = PTR_ERR(tmp); 411 i915_request_put(rq); 412 goto err_ce; 413 } 414 415 i915_request_add(tmp); 416 intel_engine_flush_submission(engine); 417 n++; 418 } 419 intel_engine_flush_submission(engine); 420 pr_debug("%s: Filled ring with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n", 421 engine->name, n, 422 ce[0]->ring->size, 423 ce[0]->ring->tail, 424 ce[0]->ring->emit, 425 rq->tail); 426 GEM_BUG_ON(intel_ring_direction(ce[0]->ring, 427 rq->tail, 428 ce[0]->ring->tail) <= 0); 429 i915_request_put(rq); 430 431 /* Create a second ring to preempt the first ring after rq[0] */ 432 rq = intel_context_create_request(ce[1]); 433 if (IS_ERR(rq)) { 434 err = PTR_ERR(rq); 435 goto err_ce; 436 } 437 438 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 439 i915_request_get(rq); 440 i915_request_add(rq); 441 442 err = wait_for_submit(engine, rq, HZ / 2); 443 i915_request_put(rq); 444 if (err) { 445 pr_err("%s: preemption request was not submitted\n", 446 engine->name); 447 err = -ETIME; 448 } 449 450 pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n", 451 engine->name, 452 ce[0]->ring->tail, ce[0]->ring->emit, 453 ce[1]->ring->tail, ce[1]->ring->emit); 454 455 err_ce: 456 intel_engine_flush_submission(engine); 457 igt_spinner_end(&spin); 458 for (n = 0; n < ARRAY_SIZE(ce); n++) { 459 if (IS_ERR_OR_NULL(ce[n])) 460 break; 461 462 intel_context_unpin(ce[n]); 463 intel_context_put(ce[n]); 464 } 465 st_engine_heartbeat_enable(engine); 466 if (igt_live_test_end(&t)) 467 err = -EIO; 468 if (err) 469 break; 470 } 471 472 igt_spinner_fini(&spin); 473 return err; 474 } 475 476 static int live_pin_rewind(void *arg) 477 { 478 struct intel_gt *gt = arg; 479 struct intel_engine_cs *engine; 480 enum intel_engine_id id; 481 int err = 0; 482 483 /* 484 * We have to be careful not to trust intel_ring too much, for example 485 * ring->head is updated upon retire which is out of sync with pinning 486 * the context. Thus we cannot use ring->head to set CTX_RING_HEAD, 487 * or else we risk writing an older, stale value. 488 * 489 * To simulate this, let's apply a bit of deliberate sabotague. 490 */ 491 492 for_each_engine(engine, gt, id) { 493 struct intel_context *ce; 494 struct i915_request *rq; 495 struct intel_ring *ring; 496 struct igt_live_test t; 497 498 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 499 err = -EIO; 500 break; 501 } 502 503 ce = intel_context_create(engine); 504 if (IS_ERR(ce)) { 505 err = PTR_ERR(ce); 506 break; 507 } 508 509 err = intel_context_pin(ce); 510 if (err) { 511 intel_context_put(ce); 512 break; 513 } 514 515 /* Keep the context awake while we play games */ 516 err = i915_active_acquire(&ce->active); 517 if (err) { 518 intel_context_unpin(ce); 519 intel_context_put(ce); 520 break; 521 } 522 ring = ce->ring; 523 524 /* Poison the ring, and offset the next request from HEAD */ 525 memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32)); 526 ring->emit = ring->size / 2; 527 ring->tail = ring->emit; 528 GEM_BUG_ON(ring->head); 529 530 intel_context_unpin(ce); 531 532 /* Submit a simple nop request */ 533 GEM_BUG_ON(intel_context_is_pinned(ce)); 534 rq = intel_context_create_request(ce); 535 i915_active_release(&ce->active); /* e.g. async retire */ 536 intel_context_put(ce); 537 if (IS_ERR(rq)) { 538 err = PTR_ERR(rq); 539 break; 540 } 541 GEM_BUG_ON(!rq->head); 542 i915_request_add(rq); 543 544 /* Expect not to hang! */ 545 if (igt_live_test_end(&t)) { 546 err = -EIO; 547 break; 548 } 549 } 550 551 return err; 552 } 553 554 static int engine_lock_reset_tasklet(struct intel_engine_cs *engine) 555 { 556 tasklet_disable(&engine->execlists.tasklet); 557 local_bh_disable(); 558 559 if (test_and_set_bit(I915_RESET_ENGINE + engine->id, 560 &engine->gt->reset.flags)) { 561 local_bh_enable(); 562 tasklet_enable(&engine->execlists.tasklet); 563 564 intel_gt_set_wedged(engine->gt); 565 return -EBUSY; 566 } 567 568 return 0; 569 } 570 571 static void engine_unlock_reset_tasklet(struct intel_engine_cs *engine) 572 { 573 clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, 574 &engine->gt->reset.flags); 575 576 local_bh_enable(); 577 tasklet_enable(&engine->execlists.tasklet); 578 } 579 580 static int live_hold_reset(void *arg) 581 { 582 struct intel_gt *gt = arg; 583 struct intel_engine_cs *engine; 584 enum intel_engine_id id; 585 struct igt_spinner spin; 586 int err = 0; 587 588 /* 589 * In order to support offline error capture for fast preempt reset, 590 * we need to decouple the guilty request and ensure that it and its 591 * descendents are not executed while the capture is in progress. 592 */ 593 594 if (!intel_has_reset_engine(gt)) 595 return 0; 596 597 if (igt_spinner_init(&spin, gt)) 598 return -ENOMEM; 599 600 for_each_engine(engine, gt, id) { 601 struct intel_context *ce; 602 struct i915_request *rq; 603 604 ce = intel_context_create(engine); 605 if (IS_ERR(ce)) { 606 err = PTR_ERR(ce); 607 break; 608 } 609 610 st_engine_heartbeat_disable(engine); 611 612 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); 613 if (IS_ERR(rq)) { 614 err = PTR_ERR(rq); 615 goto out; 616 } 617 i915_request_add(rq); 618 619 if (!igt_wait_for_spinner(&spin, rq)) { 620 intel_gt_set_wedged(gt); 621 err = -ETIME; 622 goto out; 623 } 624 625 /* We have our request executing, now remove it and reset */ 626 627 err = engine_lock_reset_tasklet(engine); 628 if (err) 629 goto out; 630 631 engine->execlists.tasklet.callback(&engine->execlists.tasklet); 632 GEM_BUG_ON(execlists_active(&engine->execlists) != rq); 633 634 i915_request_get(rq); 635 execlists_hold(engine, rq); 636 GEM_BUG_ON(!i915_request_on_hold(rq)); 637 638 __intel_engine_reset_bh(engine, NULL); 639 GEM_BUG_ON(rq->fence.error != -EIO); 640 641 engine_unlock_reset_tasklet(engine); 642 643 /* Check that we do not resubmit the held request */ 644 if (!i915_request_wait(rq, 0, HZ / 5)) { 645 pr_err("%s: on hold request completed!\n", 646 engine->name); 647 i915_request_put(rq); 648 err = -EIO; 649 goto out; 650 } 651 GEM_BUG_ON(!i915_request_on_hold(rq)); 652 653 /* But is resubmitted on release */ 654 execlists_unhold(engine, rq); 655 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 656 pr_err("%s: held request did not complete!\n", 657 engine->name); 658 intel_gt_set_wedged(gt); 659 err = -ETIME; 660 } 661 i915_request_put(rq); 662 663 out: 664 st_engine_heartbeat_enable(engine); 665 intel_context_put(ce); 666 if (err) 667 break; 668 } 669 670 igt_spinner_fini(&spin); 671 return err; 672 } 673 674 static const char *error_repr(int err) 675 { 676 return err ? "bad" : "good"; 677 } 678 679 static int live_error_interrupt(void *arg) 680 { 681 static const struct error_phase { 682 enum { GOOD = 0, BAD = -EIO } error[2]; 683 } phases[] = { 684 { { BAD, GOOD } }, 685 { { BAD, BAD } }, 686 { { BAD, GOOD } }, 687 { { GOOD, GOOD } }, /* sentinel */ 688 }; 689 struct intel_gt *gt = arg; 690 struct intel_engine_cs *engine; 691 enum intel_engine_id id; 692 693 /* 694 * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning 695 * of invalid commands in user batches that will cause a GPU hang. 696 * This is a faster mechanism than using hangcheck/heartbeats, but 697 * only detects problems the HW knows about -- it will not warn when 698 * we kill the HW! 699 * 700 * To verify our detection and reset, we throw some invalid commands 701 * at the HW and wait for the interrupt. 702 */ 703 704 if (!intel_has_reset_engine(gt)) 705 return 0; 706 707 for_each_engine(engine, gt, id) { 708 const struct error_phase *p; 709 int err = 0; 710 711 st_engine_heartbeat_disable(engine); 712 713 for (p = phases; p->error[0] != GOOD; p++) { 714 struct i915_request *client[ARRAY_SIZE(phases->error)]; 715 u32 *cs; 716 int i; 717 718 memset(client, 0, sizeof(*client)); 719 for (i = 0; i < ARRAY_SIZE(client); i++) { 720 struct intel_context *ce; 721 struct i915_request *rq; 722 723 ce = intel_context_create(engine); 724 if (IS_ERR(ce)) { 725 err = PTR_ERR(ce); 726 goto out; 727 } 728 729 rq = intel_context_create_request(ce); 730 intel_context_put(ce); 731 if (IS_ERR(rq)) { 732 err = PTR_ERR(rq); 733 goto out; 734 } 735 736 if (rq->engine->emit_init_breadcrumb) { 737 err = rq->engine->emit_init_breadcrumb(rq); 738 if (err) { 739 i915_request_add(rq); 740 goto out; 741 } 742 } 743 744 cs = intel_ring_begin(rq, 2); 745 if (IS_ERR(cs)) { 746 i915_request_add(rq); 747 err = PTR_ERR(cs); 748 goto out; 749 } 750 751 if (p->error[i]) { 752 *cs++ = 0xdeadbeef; 753 *cs++ = 0xdeadbeef; 754 } else { 755 *cs++ = MI_NOOP; 756 *cs++ = MI_NOOP; 757 } 758 759 client[i] = i915_request_get(rq); 760 i915_request_add(rq); 761 } 762 763 err = wait_for_submit(engine, client[0], HZ / 2); 764 if (err) { 765 pr_err("%s: first request did not start within time!\n", 766 engine->name); 767 err = -ETIME; 768 goto out; 769 } 770 771 for (i = 0; i < ARRAY_SIZE(client); i++) { 772 if (i915_request_wait(client[i], 0, HZ / 5) < 0) 773 pr_debug("%s: %s request incomplete!\n", 774 engine->name, 775 error_repr(p->error[i])); 776 777 if (!i915_request_started(client[i])) { 778 pr_err("%s: %s request not started!\n", 779 engine->name, 780 error_repr(p->error[i])); 781 err = -ETIME; 782 goto out; 783 } 784 785 /* Kick the tasklet to process the error */ 786 intel_engine_flush_submission(engine); 787 if (client[i]->fence.error != p->error[i]) { 788 pr_err("%s: %s request (%s) with wrong error code: %d\n", 789 engine->name, 790 error_repr(p->error[i]), 791 i915_request_completed(client[i]) ? "completed" : "running", 792 client[i]->fence.error); 793 err = -EINVAL; 794 goto out; 795 } 796 } 797 798 out: 799 for (i = 0; i < ARRAY_SIZE(client); i++) 800 if (client[i]) 801 i915_request_put(client[i]); 802 if (err) { 803 pr_err("%s: failed at phase[%zd] { %d, %d }\n", 804 engine->name, p - phases, 805 p->error[0], p->error[1]); 806 break; 807 } 808 } 809 810 st_engine_heartbeat_enable(engine); 811 if (err) { 812 intel_gt_set_wedged(gt); 813 return err; 814 } 815 } 816 817 return 0; 818 } 819 820 static int 821 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx) 822 { 823 u32 *cs; 824 825 cs = intel_ring_begin(rq, 10); 826 if (IS_ERR(cs)) 827 return PTR_ERR(cs); 828 829 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 830 831 *cs++ = MI_SEMAPHORE_WAIT | 832 MI_SEMAPHORE_GLOBAL_GTT | 833 MI_SEMAPHORE_POLL | 834 MI_SEMAPHORE_SAD_NEQ_SDD; 835 *cs++ = 0; 836 *cs++ = i915_ggtt_offset(vma) + 4 * idx; 837 *cs++ = 0; 838 839 if (idx > 0) { 840 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 841 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1); 842 *cs++ = 0; 843 *cs++ = 1; 844 } else { 845 *cs++ = MI_NOOP; 846 *cs++ = MI_NOOP; 847 *cs++ = MI_NOOP; 848 *cs++ = MI_NOOP; 849 } 850 851 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 852 853 intel_ring_advance(rq, cs); 854 return 0; 855 } 856 857 static struct i915_request * 858 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx) 859 { 860 struct intel_context *ce; 861 struct i915_request *rq; 862 int err; 863 864 ce = intel_context_create(engine); 865 if (IS_ERR(ce)) 866 return ERR_CAST(ce); 867 868 rq = intel_context_create_request(ce); 869 if (IS_ERR(rq)) 870 goto out_ce; 871 872 err = 0; 873 if (rq->engine->emit_init_breadcrumb) 874 err = rq->engine->emit_init_breadcrumb(rq); 875 if (err == 0) 876 err = emit_semaphore_chain(rq, vma, idx); 877 if (err == 0) 878 i915_request_get(rq); 879 i915_request_add(rq); 880 if (err) 881 rq = ERR_PTR(err); 882 883 out_ce: 884 intel_context_put(ce); 885 return rq; 886 } 887 888 static int 889 release_queue(struct intel_engine_cs *engine, 890 struct i915_vma *vma, 891 int idx, int prio) 892 { 893 struct i915_sched_attr attr = { 894 .priority = prio, 895 }; 896 struct i915_request *rq; 897 u32 *cs; 898 899 rq = intel_engine_create_kernel_request(engine); 900 if (IS_ERR(rq)) 901 return PTR_ERR(rq); 902 903 cs = intel_ring_begin(rq, 4); 904 if (IS_ERR(cs)) { 905 i915_request_add(rq); 906 return PTR_ERR(cs); 907 } 908 909 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 910 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1); 911 *cs++ = 0; 912 *cs++ = 1; 913 914 intel_ring_advance(rq, cs); 915 916 i915_request_get(rq); 917 i915_request_add(rq); 918 919 local_bh_disable(); 920 engine->schedule(rq, &attr); 921 local_bh_enable(); /* kick tasklet */ 922 923 i915_request_put(rq); 924 925 return 0; 926 } 927 928 static int 929 slice_semaphore_queue(struct intel_engine_cs *outer, 930 struct i915_vma *vma, 931 int count) 932 { 933 struct intel_engine_cs *engine; 934 struct i915_request *head; 935 enum intel_engine_id id; 936 int err, i, n = 0; 937 938 head = semaphore_queue(outer, vma, n++); 939 if (IS_ERR(head)) 940 return PTR_ERR(head); 941 942 for_each_engine(engine, outer->gt, id) { 943 if (!intel_engine_has_preemption(engine)) 944 continue; 945 946 for (i = 0; i < count; i++) { 947 struct i915_request *rq; 948 949 rq = semaphore_queue(engine, vma, n++); 950 if (IS_ERR(rq)) { 951 err = PTR_ERR(rq); 952 goto out; 953 } 954 955 i915_request_put(rq); 956 } 957 } 958 959 err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER); 960 if (err) 961 goto out; 962 963 if (i915_request_wait(head, 0, 964 2 * outer->gt->info.num_engines * (count + 2) * (count + 3)) < 0) { 965 pr_err("%s: Failed to slice along semaphore chain of length (%d, %d)!\n", 966 outer->name, count, n); 967 GEM_TRACE_DUMP(); 968 intel_gt_set_wedged(outer->gt); 969 err = -EIO; 970 } 971 972 out: 973 i915_request_put(head); 974 return err; 975 } 976 977 static int live_timeslice_preempt(void *arg) 978 { 979 struct intel_gt *gt = arg; 980 struct drm_i915_gem_object *obj; 981 struct intel_engine_cs *engine; 982 enum intel_engine_id id; 983 struct i915_vma *vma; 984 void *vaddr; 985 int err = 0; 986 987 /* 988 * If a request takes too long, we would like to give other users 989 * a fair go on the GPU. In particular, users may create batches 990 * that wait upon external input, where that input may even be 991 * supplied by another GPU job. To avoid blocking forever, we 992 * need to preempt the current task and replace it with another 993 * ready task. 994 */ 995 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 996 return 0; 997 998 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 999 if (IS_ERR(obj)) 1000 return PTR_ERR(obj); 1001 1002 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 1003 if (IS_ERR(vma)) { 1004 err = PTR_ERR(vma); 1005 goto err_obj; 1006 } 1007 1008 vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC); 1009 if (IS_ERR(vaddr)) { 1010 err = PTR_ERR(vaddr); 1011 goto err_obj; 1012 } 1013 1014 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 1015 if (err) 1016 goto err_map; 1017 1018 err = i915_vma_sync(vma); 1019 if (err) 1020 goto err_pin; 1021 1022 for_each_engine(engine, gt, id) { 1023 if (!intel_engine_has_preemption(engine)) 1024 continue; 1025 1026 memset(vaddr, 0, PAGE_SIZE); 1027 1028 st_engine_heartbeat_disable(engine); 1029 err = slice_semaphore_queue(engine, vma, 5); 1030 st_engine_heartbeat_enable(engine); 1031 if (err) 1032 goto err_pin; 1033 1034 if (igt_flush_test(gt->i915)) { 1035 err = -EIO; 1036 goto err_pin; 1037 } 1038 } 1039 1040 err_pin: 1041 i915_vma_unpin(vma); 1042 err_map: 1043 i915_gem_object_unpin_map(obj); 1044 err_obj: 1045 i915_gem_object_put(obj); 1046 return err; 1047 } 1048 1049 static struct i915_request * 1050 create_rewinder(struct intel_context *ce, 1051 struct i915_request *wait, 1052 void *slot, int idx) 1053 { 1054 const u32 offset = 1055 i915_ggtt_offset(ce->engine->status_page.vma) + 1056 offset_in_page(slot); 1057 struct i915_request *rq; 1058 u32 *cs; 1059 int err; 1060 1061 rq = intel_context_create_request(ce); 1062 if (IS_ERR(rq)) 1063 return rq; 1064 1065 if (wait) { 1066 err = i915_request_await_dma_fence(rq, &wait->fence); 1067 if (err) 1068 goto err; 1069 } 1070 1071 cs = intel_ring_begin(rq, 14); 1072 if (IS_ERR(cs)) { 1073 err = PTR_ERR(cs); 1074 goto err; 1075 } 1076 1077 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 1078 *cs++ = MI_NOOP; 1079 1080 *cs++ = MI_SEMAPHORE_WAIT | 1081 MI_SEMAPHORE_GLOBAL_GTT | 1082 MI_SEMAPHORE_POLL | 1083 MI_SEMAPHORE_SAD_GTE_SDD; 1084 *cs++ = idx; 1085 *cs++ = offset; 1086 *cs++ = 0; 1087 1088 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 1089 *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base)); 1090 *cs++ = offset + idx * sizeof(u32); 1091 *cs++ = 0; 1092 1093 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 1094 *cs++ = offset; 1095 *cs++ = 0; 1096 *cs++ = idx + 1; 1097 1098 intel_ring_advance(rq, cs); 1099 1100 err = 0; 1101 err: 1102 i915_request_get(rq); 1103 i915_request_add(rq); 1104 if (err) { 1105 i915_request_put(rq); 1106 return ERR_PTR(err); 1107 } 1108 1109 return rq; 1110 } 1111 1112 static int live_timeslice_rewind(void *arg) 1113 { 1114 struct intel_gt *gt = arg; 1115 struct intel_engine_cs *engine; 1116 enum intel_engine_id id; 1117 1118 /* 1119 * The usual presumption on timeslice expiration is that we replace 1120 * the active context with another. However, given a chain of 1121 * dependencies we may end up with replacing the context with itself, 1122 * but only a few of those requests, forcing us to rewind the 1123 * RING_TAIL of the original request. 1124 */ 1125 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 1126 return 0; 1127 1128 for_each_engine(engine, gt, id) { 1129 enum { A1, A2, B1 }; 1130 enum { X = 1, Z, Y }; 1131 struct i915_request *rq[3] = {}; 1132 struct intel_context *ce; 1133 unsigned long timeslice; 1134 int i, err = 0; 1135 u32 *slot; 1136 1137 if (!intel_engine_has_timeslices(engine)) 1138 continue; 1139 1140 /* 1141 * A:rq1 -- semaphore wait, timestamp X 1142 * A:rq2 -- write timestamp Y 1143 * 1144 * B:rq1 [await A:rq1] -- write timestamp Z 1145 * 1146 * Force timeslice, release semaphore. 1147 * 1148 * Expect execution/evaluation order XZY 1149 */ 1150 1151 st_engine_heartbeat_disable(engine); 1152 timeslice = xchg(&engine->props.timeslice_duration_ms, 1); 1153 1154 slot = memset32(engine->status_page.addr + 1000, 0, 4); 1155 1156 ce = intel_context_create(engine); 1157 if (IS_ERR(ce)) { 1158 err = PTR_ERR(ce); 1159 goto err; 1160 } 1161 1162 rq[A1] = create_rewinder(ce, NULL, slot, X); 1163 if (IS_ERR(rq[A1])) { 1164 intel_context_put(ce); 1165 goto err; 1166 } 1167 1168 rq[A2] = create_rewinder(ce, NULL, slot, Y); 1169 intel_context_put(ce); 1170 if (IS_ERR(rq[A2])) 1171 goto err; 1172 1173 err = wait_for_submit(engine, rq[A2], HZ / 2); 1174 if (err) { 1175 pr_err("%s: failed to submit first context\n", 1176 engine->name); 1177 goto err; 1178 } 1179 1180 ce = intel_context_create(engine); 1181 if (IS_ERR(ce)) { 1182 err = PTR_ERR(ce); 1183 goto err; 1184 } 1185 1186 rq[B1] = create_rewinder(ce, rq[A1], slot, Z); 1187 intel_context_put(ce); 1188 if (IS_ERR(rq[2])) 1189 goto err; 1190 1191 err = wait_for_submit(engine, rq[B1], HZ / 2); 1192 if (err) { 1193 pr_err("%s: failed to submit second context\n", 1194 engine->name); 1195 goto err; 1196 } 1197 1198 /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */ 1199 ENGINE_TRACE(engine, "forcing tasklet for rewind\n"); 1200 while (i915_request_is_active(rq[A2])) { /* semaphore yield! */ 1201 /* Wait for the timeslice to kick in */ 1202 del_timer(&engine->execlists.timer); 1203 tasklet_hi_schedule(&engine->execlists.tasklet); 1204 intel_engine_flush_submission(engine); 1205 } 1206 /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */ 1207 GEM_BUG_ON(!i915_request_is_active(rq[A1])); 1208 GEM_BUG_ON(!i915_request_is_active(rq[B1])); 1209 GEM_BUG_ON(i915_request_is_active(rq[A2])); 1210 1211 /* Release the hounds! */ 1212 slot[0] = 1; 1213 wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */ 1214 1215 for (i = 1; i <= 3; i++) { 1216 unsigned long timeout = jiffies + HZ / 2; 1217 1218 while (!READ_ONCE(slot[i]) && 1219 time_before(jiffies, timeout)) 1220 ; 1221 1222 if (!time_before(jiffies, timeout)) { 1223 pr_err("%s: rq[%d] timed out\n", 1224 engine->name, i - 1); 1225 err = -ETIME; 1226 goto err; 1227 } 1228 1229 pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]); 1230 } 1231 1232 /* XZY: XZ < XY */ 1233 if (slot[Z] - slot[X] >= slot[Y] - slot[X]) { 1234 pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n", 1235 engine->name, 1236 slot[Z] - slot[X], 1237 slot[Y] - slot[X]); 1238 err = -EINVAL; 1239 } 1240 1241 err: 1242 memset32(&slot[0], -1, 4); 1243 wmb(); 1244 1245 engine->props.timeslice_duration_ms = timeslice; 1246 st_engine_heartbeat_enable(engine); 1247 for (i = 0; i < 3; i++) 1248 i915_request_put(rq[i]); 1249 if (igt_flush_test(gt->i915)) 1250 err = -EIO; 1251 if (err) 1252 return err; 1253 } 1254 1255 return 0; 1256 } 1257 1258 static struct i915_request *nop_request(struct intel_engine_cs *engine) 1259 { 1260 struct i915_request *rq; 1261 1262 rq = intel_engine_create_kernel_request(engine); 1263 if (IS_ERR(rq)) 1264 return rq; 1265 1266 i915_request_get(rq); 1267 i915_request_add(rq); 1268 1269 return rq; 1270 } 1271 1272 static long slice_timeout(struct intel_engine_cs *engine) 1273 { 1274 long timeout; 1275 1276 /* Enough time for a timeslice to kick in, and kick out */ 1277 timeout = 2 * msecs_to_jiffies_timeout(timeslice(engine)); 1278 1279 /* Enough time for the nop request to complete */ 1280 timeout += HZ / 5; 1281 1282 return timeout + 1; 1283 } 1284 1285 static int live_timeslice_queue(void *arg) 1286 { 1287 struct intel_gt *gt = arg; 1288 struct drm_i915_gem_object *obj; 1289 struct intel_engine_cs *engine; 1290 enum intel_engine_id id; 1291 struct i915_vma *vma; 1292 void *vaddr; 1293 int err = 0; 1294 1295 /* 1296 * Make sure that even if ELSP[0] and ELSP[1] are filled with 1297 * timeslicing between them disabled, we *do* enable timeslicing 1298 * if the queue demands it. (Normally, we do not submit if 1299 * ELSP[1] is already occupied, so must rely on timeslicing to 1300 * eject ELSP[0] in favour of the queue.) 1301 */ 1302 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 1303 return 0; 1304 1305 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 1306 if (IS_ERR(obj)) 1307 return PTR_ERR(obj); 1308 1309 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 1310 if (IS_ERR(vma)) { 1311 err = PTR_ERR(vma); 1312 goto err_obj; 1313 } 1314 1315 vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC); 1316 if (IS_ERR(vaddr)) { 1317 err = PTR_ERR(vaddr); 1318 goto err_obj; 1319 } 1320 1321 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 1322 if (err) 1323 goto err_map; 1324 1325 err = i915_vma_sync(vma); 1326 if (err) 1327 goto err_pin; 1328 1329 for_each_engine(engine, gt, id) { 1330 struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX }; 1331 struct i915_request *rq, *nop; 1332 1333 if (!intel_engine_has_preemption(engine)) 1334 continue; 1335 1336 st_engine_heartbeat_disable(engine); 1337 memset(vaddr, 0, PAGE_SIZE); 1338 1339 /* ELSP[0]: semaphore wait */ 1340 rq = semaphore_queue(engine, vma, 0); 1341 if (IS_ERR(rq)) { 1342 err = PTR_ERR(rq); 1343 goto err_heartbeat; 1344 } 1345 engine->schedule(rq, &attr); 1346 err = wait_for_submit(engine, rq, HZ / 2); 1347 if (err) { 1348 pr_err("%s: Timed out trying to submit semaphores\n", 1349 engine->name); 1350 goto err_rq; 1351 } 1352 1353 /* ELSP[1]: nop request */ 1354 nop = nop_request(engine); 1355 if (IS_ERR(nop)) { 1356 err = PTR_ERR(nop); 1357 goto err_rq; 1358 } 1359 err = wait_for_submit(engine, nop, HZ / 2); 1360 i915_request_put(nop); 1361 if (err) { 1362 pr_err("%s: Timed out trying to submit nop\n", 1363 engine->name); 1364 goto err_rq; 1365 } 1366 1367 GEM_BUG_ON(i915_request_completed(rq)); 1368 GEM_BUG_ON(execlists_active(&engine->execlists) != rq); 1369 1370 /* Queue: semaphore signal, matching priority as semaphore */ 1371 err = release_queue(engine, vma, 1, effective_prio(rq)); 1372 if (err) 1373 goto err_rq; 1374 1375 /* Wait until we ack the release_queue and start timeslicing */ 1376 do { 1377 cond_resched(); 1378 intel_engine_flush_submission(engine); 1379 } while (READ_ONCE(engine->execlists.pending[0])); 1380 1381 /* Timeslice every jiffy, so within 2 we should signal */ 1382 if (i915_request_wait(rq, 0, slice_timeout(engine)) < 0) { 1383 struct drm_printer p = 1384 drm_info_printer(gt->i915->drm.dev); 1385 1386 pr_err("%s: Failed to timeslice into queue\n", 1387 engine->name); 1388 intel_engine_dump(engine, &p, 1389 "%s\n", engine->name); 1390 1391 memset(vaddr, 0xff, PAGE_SIZE); 1392 err = -EIO; 1393 } 1394 err_rq: 1395 i915_request_put(rq); 1396 err_heartbeat: 1397 st_engine_heartbeat_enable(engine); 1398 if (err) 1399 break; 1400 } 1401 1402 err_pin: 1403 i915_vma_unpin(vma); 1404 err_map: 1405 i915_gem_object_unpin_map(obj); 1406 err_obj: 1407 i915_gem_object_put(obj); 1408 return err; 1409 } 1410 1411 static int live_timeslice_nopreempt(void *arg) 1412 { 1413 struct intel_gt *gt = arg; 1414 struct intel_engine_cs *engine; 1415 enum intel_engine_id id; 1416 struct igt_spinner spin; 1417 int err = 0; 1418 1419 /* 1420 * We should not timeslice into a request that is marked with 1421 * I915_REQUEST_NOPREEMPT. 1422 */ 1423 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 1424 return 0; 1425 1426 if (igt_spinner_init(&spin, gt)) 1427 return -ENOMEM; 1428 1429 for_each_engine(engine, gt, id) { 1430 struct intel_context *ce; 1431 struct i915_request *rq; 1432 unsigned long timeslice; 1433 1434 if (!intel_engine_has_preemption(engine)) 1435 continue; 1436 1437 ce = intel_context_create(engine); 1438 if (IS_ERR(ce)) { 1439 err = PTR_ERR(ce); 1440 break; 1441 } 1442 1443 st_engine_heartbeat_disable(engine); 1444 timeslice = xchg(&engine->props.timeslice_duration_ms, 1); 1445 1446 /* Create an unpreemptible spinner */ 1447 1448 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); 1449 intel_context_put(ce); 1450 if (IS_ERR(rq)) { 1451 err = PTR_ERR(rq); 1452 goto out_heartbeat; 1453 } 1454 1455 i915_request_get(rq); 1456 i915_request_add(rq); 1457 1458 if (!igt_wait_for_spinner(&spin, rq)) { 1459 i915_request_put(rq); 1460 err = -ETIME; 1461 goto out_spin; 1462 } 1463 1464 set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags); 1465 i915_request_put(rq); 1466 1467 /* Followed by a maximum priority barrier (heartbeat) */ 1468 1469 ce = intel_context_create(engine); 1470 if (IS_ERR(ce)) { 1471 err = PTR_ERR(ce); 1472 goto out_spin; 1473 } 1474 1475 rq = intel_context_create_request(ce); 1476 intel_context_put(ce); 1477 if (IS_ERR(rq)) { 1478 err = PTR_ERR(rq); 1479 goto out_spin; 1480 } 1481 1482 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 1483 i915_request_get(rq); 1484 i915_request_add(rq); 1485 1486 /* 1487 * Wait until the barrier is in ELSP, and we know timeslicing 1488 * will have been activated. 1489 */ 1490 if (wait_for_submit(engine, rq, HZ / 2)) { 1491 i915_request_put(rq); 1492 err = -ETIME; 1493 goto out_spin; 1494 } 1495 1496 /* 1497 * Since the ELSP[0] request is unpreemptible, it should not 1498 * allow the maximum priority barrier through. Wait long 1499 * enough to see if it is timesliced in by mistake. 1500 */ 1501 if (i915_request_wait(rq, 0, slice_timeout(engine)) >= 0) { 1502 pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n", 1503 engine->name); 1504 err = -EINVAL; 1505 } 1506 i915_request_put(rq); 1507 1508 out_spin: 1509 igt_spinner_end(&spin); 1510 out_heartbeat: 1511 xchg(&engine->props.timeslice_duration_ms, timeslice); 1512 st_engine_heartbeat_enable(engine); 1513 if (err) 1514 break; 1515 1516 if (igt_flush_test(gt->i915)) { 1517 err = -EIO; 1518 break; 1519 } 1520 } 1521 1522 igt_spinner_fini(&spin); 1523 return err; 1524 } 1525 1526 static int live_busywait_preempt(void *arg) 1527 { 1528 struct intel_gt *gt = arg; 1529 struct i915_gem_context *ctx_hi, *ctx_lo; 1530 struct intel_engine_cs *engine; 1531 struct drm_i915_gem_object *obj; 1532 struct i915_vma *vma; 1533 enum intel_engine_id id; 1534 int err = -ENOMEM; 1535 u32 *map; 1536 1537 /* 1538 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can 1539 * preempt the busywaits used to synchronise between rings. 1540 */ 1541 1542 ctx_hi = kernel_context(gt->i915); 1543 if (!ctx_hi) 1544 return -ENOMEM; 1545 ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY; 1546 1547 ctx_lo = kernel_context(gt->i915); 1548 if (!ctx_lo) 1549 goto err_ctx_hi; 1550 ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY; 1551 1552 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 1553 if (IS_ERR(obj)) { 1554 err = PTR_ERR(obj); 1555 goto err_ctx_lo; 1556 } 1557 1558 map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC); 1559 if (IS_ERR(map)) { 1560 err = PTR_ERR(map); 1561 goto err_obj; 1562 } 1563 1564 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 1565 if (IS_ERR(vma)) { 1566 err = PTR_ERR(vma); 1567 goto err_map; 1568 } 1569 1570 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 1571 if (err) 1572 goto err_map; 1573 1574 err = i915_vma_sync(vma); 1575 if (err) 1576 goto err_vma; 1577 1578 for_each_engine(engine, gt, id) { 1579 struct i915_request *lo, *hi; 1580 struct igt_live_test t; 1581 u32 *cs; 1582 1583 if (!intel_engine_has_preemption(engine)) 1584 continue; 1585 1586 if (!intel_engine_can_store_dword(engine)) 1587 continue; 1588 1589 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 1590 err = -EIO; 1591 goto err_vma; 1592 } 1593 1594 /* 1595 * We create two requests. The low priority request 1596 * busywaits on a semaphore (inside the ringbuffer where 1597 * is should be preemptible) and the high priority requests 1598 * uses a MI_STORE_DWORD_IMM to update the semaphore value 1599 * allowing the first request to complete. If preemption 1600 * fails, we hang instead. 1601 */ 1602 1603 lo = igt_request_alloc(ctx_lo, engine); 1604 if (IS_ERR(lo)) { 1605 err = PTR_ERR(lo); 1606 goto err_vma; 1607 } 1608 1609 cs = intel_ring_begin(lo, 8); 1610 if (IS_ERR(cs)) { 1611 err = PTR_ERR(cs); 1612 i915_request_add(lo); 1613 goto err_vma; 1614 } 1615 1616 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 1617 *cs++ = i915_ggtt_offset(vma); 1618 *cs++ = 0; 1619 *cs++ = 1; 1620 1621 /* XXX Do we need a flush + invalidate here? */ 1622 1623 *cs++ = MI_SEMAPHORE_WAIT | 1624 MI_SEMAPHORE_GLOBAL_GTT | 1625 MI_SEMAPHORE_POLL | 1626 MI_SEMAPHORE_SAD_EQ_SDD; 1627 *cs++ = 0; 1628 *cs++ = i915_ggtt_offset(vma); 1629 *cs++ = 0; 1630 1631 intel_ring_advance(lo, cs); 1632 1633 i915_request_get(lo); 1634 i915_request_add(lo); 1635 1636 if (wait_for(READ_ONCE(*map), 10)) { 1637 i915_request_put(lo); 1638 err = -ETIMEDOUT; 1639 goto err_vma; 1640 } 1641 1642 /* Low priority request should be busywaiting now */ 1643 if (i915_request_wait(lo, 0, 1) != -ETIME) { 1644 i915_request_put(lo); 1645 pr_err("%s: Busywaiting request did not!\n", 1646 engine->name); 1647 err = -EIO; 1648 goto err_vma; 1649 } 1650 1651 hi = igt_request_alloc(ctx_hi, engine); 1652 if (IS_ERR(hi)) { 1653 err = PTR_ERR(hi); 1654 i915_request_put(lo); 1655 goto err_vma; 1656 } 1657 1658 cs = intel_ring_begin(hi, 4); 1659 if (IS_ERR(cs)) { 1660 err = PTR_ERR(cs); 1661 i915_request_add(hi); 1662 i915_request_put(lo); 1663 goto err_vma; 1664 } 1665 1666 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 1667 *cs++ = i915_ggtt_offset(vma); 1668 *cs++ = 0; 1669 *cs++ = 0; 1670 1671 intel_ring_advance(hi, cs); 1672 i915_request_add(hi); 1673 1674 if (i915_request_wait(lo, 0, HZ / 5) < 0) { 1675 struct drm_printer p = drm_info_printer(gt->i915->drm.dev); 1676 1677 pr_err("%s: Failed to preempt semaphore busywait!\n", 1678 engine->name); 1679 1680 intel_engine_dump(engine, &p, "%s\n", engine->name); 1681 GEM_TRACE_DUMP(); 1682 1683 i915_request_put(lo); 1684 intel_gt_set_wedged(gt); 1685 err = -EIO; 1686 goto err_vma; 1687 } 1688 GEM_BUG_ON(READ_ONCE(*map)); 1689 i915_request_put(lo); 1690 1691 if (igt_live_test_end(&t)) { 1692 err = -EIO; 1693 goto err_vma; 1694 } 1695 } 1696 1697 err = 0; 1698 err_vma: 1699 i915_vma_unpin(vma); 1700 err_map: 1701 i915_gem_object_unpin_map(obj); 1702 err_obj: 1703 i915_gem_object_put(obj); 1704 err_ctx_lo: 1705 kernel_context_close(ctx_lo); 1706 err_ctx_hi: 1707 kernel_context_close(ctx_hi); 1708 return err; 1709 } 1710 1711 static struct i915_request * 1712 spinner_create_request(struct igt_spinner *spin, 1713 struct i915_gem_context *ctx, 1714 struct intel_engine_cs *engine, 1715 u32 arb) 1716 { 1717 struct intel_context *ce; 1718 struct i915_request *rq; 1719 1720 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); 1721 if (IS_ERR(ce)) 1722 return ERR_CAST(ce); 1723 1724 rq = igt_spinner_create_request(spin, ce, arb); 1725 intel_context_put(ce); 1726 return rq; 1727 } 1728 1729 static int live_preempt(void *arg) 1730 { 1731 struct intel_gt *gt = arg; 1732 struct i915_gem_context *ctx_hi, *ctx_lo; 1733 struct igt_spinner spin_hi, spin_lo; 1734 struct intel_engine_cs *engine; 1735 enum intel_engine_id id; 1736 int err = -ENOMEM; 1737 1738 if (igt_spinner_init(&spin_hi, gt)) 1739 return -ENOMEM; 1740 1741 if (igt_spinner_init(&spin_lo, gt)) 1742 goto err_spin_hi; 1743 1744 ctx_hi = kernel_context(gt->i915); 1745 if (!ctx_hi) 1746 goto err_spin_lo; 1747 ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY; 1748 1749 ctx_lo = kernel_context(gt->i915); 1750 if (!ctx_lo) 1751 goto err_ctx_hi; 1752 ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY; 1753 1754 for_each_engine(engine, gt, id) { 1755 struct igt_live_test t; 1756 struct i915_request *rq; 1757 1758 if (!intel_engine_has_preemption(engine)) 1759 continue; 1760 1761 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 1762 err = -EIO; 1763 goto err_ctx_lo; 1764 } 1765 1766 rq = spinner_create_request(&spin_lo, ctx_lo, engine, 1767 MI_ARB_CHECK); 1768 if (IS_ERR(rq)) { 1769 err = PTR_ERR(rq); 1770 goto err_ctx_lo; 1771 } 1772 1773 i915_request_add(rq); 1774 if (!igt_wait_for_spinner(&spin_lo, rq)) { 1775 GEM_TRACE("lo spinner failed to start\n"); 1776 GEM_TRACE_DUMP(); 1777 intel_gt_set_wedged(gt); 1778 err = -EIO; 1779 goto err_ctx_lo; 1780 } 1781 1782 rq = spinner_create_request(&spin_hi, ctx_hi, engine, 1783 MI_ARB_CHECK); 1784 if (IS_ERR(rq)) { 1785 igt_spinner_end(&spin_lo); 1786 err = PTR_ERR(rq); 1787 goto err_ctx_lo; 1788 } 1789 1790 i915_request_add(rq); 1791 if (!igt_wait_for_spinner(&spin_hi, rq)) { 1792 GEM_TRACE("hi spinner failed to start\n"); 1793 GEM_TRACE_DUMP(); 1794 intel_gt_set_wedged(gt); 1795 err = -EIO; 1796 goto err_ctx_lo; 1797 } 1798 1799 igt_spinner_end(&spin_hi); 1800 igt_spinner_end(&spin_lo); 1801 1802 if (igt_live_test_end(&t)) { 1803 err = -EIO; 1804 goto err_ctx_lo; 1805 } 1806 } 1807 1808 err = 0; 1809 err_ctx_lo: 1810 kernel_context_close(ctx_lo); 1811 err_ctx_hi: 1812 kernel_context_close(ctx_hi); 1813 err_spin_lo: 1814 igt_spinner_fini(&spin_lo); 1815 err_spin_hi: 1816 igt_spinner_fini(&spin_hi); 1817 return err; 1818 } 1819 1820 static int live_late_preempt(void *arg) 1821 { 1822 struct intel_gt *gt = arg; 1823 struct i915_gem_context *ctx_hi, *ctx_lo; 1824 struct igt_spinner spin_hi, spin_lo; 1825 struct intel_engine_cs *engine; 1826 struct i915_sched_attr attr = {}; 1827 enum intel_engine_id id; 1828 int err = -ENOMEM; 1829 1830 if (igt_spinner_init(&spin_hi, gt)) 1831 return -ENOMEM; 1832 1833 if (igt_spinner_init(&spin_lo, gt)) 1834 goto err_spin_hi; 1835 1836 ctx_hi = kernel_context(gt->i915); 1837 if (!ctx_hi) 1838 goto err_spin_lo; 1839 1840 ctx_lo = kernel_context(gt->i915); 1841 if (!ctx_lo) 1842 goto err_ctx_hi; 1843 1844 /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */ 1845 ctx_lo->sched.priority = 1; 1846 1847 for_each_engine(engine, gt, id) { 1848 struct igt_live_test t; 1849 struct i915_request *rq; 1850 1851 if (!intel_engine_has_preemption(engine)) 1852 continue; 1853 1854 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 1855 err = -EIO; 1856 goto err_ctx_lo; 1857 } 1858 1859 rq = spinner_create_request(&spin_lo, ctx_lo, engine, 1860 MI_ARB_CHECK); 1861 if (IS_ERR(rq)) { 1862 err = PTR_ERR(rq); 1863 goto err_ctx_lo; 1864 } 1865 1866 i915_request_add(rq); 1867 if (!igt_wait_for_spinner(&spin_lo, rq)) { 1868 pr_err("First context failed to start\n"); 1869 goto err_wedged; 1870 } 1871 1872 rq = spinner_create_request(&spin_hi, ctx_hi, engine, 1873 MI_NOOP); 1874 if (IS_ERR(rq)) { 1875 igt_spinner_end(&spin_lo); 1876 err = PTR_ERR(rq); 1877 goto err_ctx_lo; 1878 } 1879 1880 i915_request_add(rq); 1881 if (igt_wait_for_spinner(&spin_hi, rq)) { 1882 pr_err("Second context overtook first?\n"); 1883 goto err_wedged; 1884 } 1885 1886 attr.priority = I915_PRIORITY_MAX; 1887 engine->schedule(rq, &attr); 1888 1889 if (!igt_wait_for_spinner(&spin_hi, rq)) { 1890 pr_err("High priority context failed to preempt the low priority context\n"); 1891 GEM_TRACE_DUMP(); 1892 goto err_wedged; 1893 } 1894 1895 igt_spinner_end(&spin_hi); 1896 igt_spinner_end(&spin_lo); 1897 1898 if (igt_live_test_end(&t)) { 1899 err = -EIO; 1900 goto err_ctx_lo; 1901 } 1902 } 1903 1904 err = 0; 1905 err_ctx_lo: 1906 kernel_context_close(ctx_lo); 1907 err_ctx_hi: 1908 kernel_context_close(ctx_hi); 1909 err_spin_lo: 1910 igt_spinner_fini(&spin_lo); 1911 err_spin_hi: 1912 igt_spinner_fini(&spin_hi); 1913 return err; 1914 1915 err_wedged: 1916 igt_spinner_end(&spin_hi); 1917 igt_spinner_end(&spin_lo); 1918 intel_gt_set_wedged(gt); 1919 err = -EIO; 1920 goto err_ctx_lo; 1921 } 1922 1923 struct preempt_client { 1924 struct igt_spinner spin; 1925 struct i915_gem_context *ctx; 1926 }; 1927 1928 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c) 1929 { 1930 c->ctx = kernel_context(gt->i915); 1931 if (!c->ctx) 1932 return -ENOMEM; 1933 1934 if (igt_spinner_init(&c->spin, gt)) 1935 goto err_ctx; 1936 1937 return 0; 1938 1939 err_ctx: 1940 kernel_context_close(c->ctx); 1941 return -ENOMEM; 1942 } 1943 1944 static void preempt_client_fini(struct preempt_client *c) 1945 { 1946 igt_spinner_fini(&c->spin); 1947 kernel_context_close(c->ctx); 1948 } 1949 1950 static int live_nopreempt(void *arg) 1951 { 1952 struct intel_gt *gt = arg; 1953 struct intel_engine_cs *engine; 1954 struct preempt_client a, b; 1955 enum intel_engine_id id; 1956 int err = -ENOMEM; 1957 1958 /* 1959 * Verify that we can disable preemption for an individual request 1960 * that may be being observed and not want to be interrupted. 1961 */ 1962 1963 if (preempt_client_init(gt, &a)) 1964 return -ENOMEM; 1965 if (preempt_client_init(gt, &b)) 1966 goto err_client_a; 1967 b.ctx->sched.priority = I915_PRIORITY_MAX; 1968 1969 for_each_engine(engine, gt, id) { 1970 struct i915_request *rq_a, *rq_b; 1971 1972 if (!intel_engine_has_preemption(engine)) 1973 continue; 1974 1975 engine->execlists.preempt_hang.count = 0; 1976 1977 rq_a = spinner_create_request(&a.spin, 1978 a.ctx, engine, 1979 MI_ARB_CHECK); 1980 if (IS_ERR(rq_a)) { 1981 err = PTR_ERR(rq_a); 1982 goto err_client_b; 1983 } 1984 1985 /* Low priority client, but unpreemptable! */ 1986 __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags); 1987 1988 i915_request_add(rq_a); 1989 if (!igt_wait_for_spinner(&a.spin, rq_a)) { 1990 pr_err("First client failed to start\n"); 1991 goto err_wedged; 1992 } 1993 1994 rq_b = spinner_create_request(&b.spin, 1995 b.ctx, engine, 1996 MI_ARB_CHECK); 1997 if (IS_ERR(rq_b)) { 1998 err = PTR_ERR(rq_b); 1999 goto err_client_b; 2000 } 2001 2002 i915_request_add(rq_b); 2003 2004 /* B is much more important than A! (But A is unpreemptable.) */ 2005 GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a)); 2006 2007 /* Wait long enough for preemption and timeslicing */ 2008 if (igt_wait_for_spinner(&b.spin, rq_b)) { 2009 pr_err("Second client started too early!\n"); 2010 goto err_wedged; 2011 } 2012 2013 igt_spinner_end(&a.spin); 2014 2015 if (!igt_wait_for_spinner(&b.spin, rq_b)) { 2016 pr_err("Second client failed to start\n"); 2017 goto err_wedged; 2018 } 2019 2020 igt_spinner_end(&b.spin); 2021 2022 if (engine->execlists.preempt_hang.count) { 2023 pr_err("Preemption recorded x%d; should have been suppressed!\n", 2024 engine->execlists.preempt_hang.count); 2025 err = -EINVAL; 2026 goto err_wedged; 2027 } 2028 2029 if (igt_flush_test(gt->i915)) 2030 goto err_wedged; 2031 } 2032 2033 err = 0; 2034 err_client_b: 2035 preempt_client_fini(&b); 2036 err_client_a: 2037 preempt_client_fini(&a); 2038 return err; 2039 2040 err_wedged: 2041 igt_spinner_end(&b.spin); 2042 igt_spinner_end(&a.spin); 2043 intel_gt_set_wedged(gt); 2044 err = -EIO; 2045 goto err_client_b; 2046 } 2047 2048 struct live_preempt_cancel { 2049 struct intel_engine_cs *engine; 2050 struct preempt_client a, b; 2051 }; 2052 2053 static int __cancel_active0(struct live_preempt_cancel *arg) 2054 { 2055 struct i915_request *rq; 2056 struct igt_live_test t; 2057 int err; 2058 2059 /* Preempt cancel of ELSP0 */ 2060 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 2061 if (igt_live_test_begin(&t, arg->engine->i915, 2062 __func__, arg->engine->name)) 2063 return -EIO; 2064 2065 rq = spinner_create_request(&arg->a.spin, 2066 arg->a.ctx, arg->engine, 2067 MI_ARB_CHECK); 2068 if (IS_ERR(rq)) 2069 return PTR_ERR(rq); 2070 2071 clear_bit(CONTEXT_BANNED, &rq->context->flags); 2072 i915_request_get(rq); 2073 i915_request_add(rq); 2074 if (!igt_wait_for_spinner(&arg->a.spin, rq)) { 2075 err = -EIO; 2076 goto out; 2077 } 2078 2079 intel_context_set_banned(rq->context); 2080 err = intel_engine_pulse(arg->engine); 2081 if (err) 2082 goto out; 2083 2084 err = wait_for_reset(arg->engine, rq, HZ / 2); 2085 if (err) { 2086 pr_err("Cancelled inflight0 request did not reset\n"); 2087 goto out; 2088 } 2089 2090 out: 2091 i915_request_put(rq); 2092 if (igt_live_test_end(&t)) 2093 err = -EIO; 2094 return err; 2095 } 2096 2097 static int __cancel_active1(struct live_preempt_cancel *arg) 2098 { 2099 struct i915_request *rq[2] = {}; 2100 struct igt_live_test t; 2101 int err; 2102 2103 /* Preempt cancel of ELSP1 */ 2104 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 2105 if (igt_live_test_begin(&t, arg->engine->i915, 2106 __func__, arg->engine->name)) 2107 return -EIO; 2108 2109 rq[0] = spinner_create_request(&arg->a.spin, 2110 arg->a.ctx, arg->engine, 2111 MI_NOOP); /* no preemption */ 2112 if (IS_ERR(rq[0])) 2113 return PTR_ERR(rq[0]); 2114 2115 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); 2116 i915_request_get(rq[0]); 2117 i915_request_add(rq[0]); 2118 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { 2119 err = -EIO; 2120 goto out; 2121 } 2122 2123 rq[1] = spinner_create_request(&arg->b.spin, 2124 arg->b.ctx, arg->engine, 2125 MI_ARB_CHECK); 2126 if (IS_ERR(rq[1])) { 2127 err = PTR_ERR(rq[1]); 2128 goto out; 2129 } 2130 2131 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); 2132 i915_request_get(rq[1]); 2133 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); 2134 i915_request_add(rq[1]); 2135 if (err) 2136 goto out; 2137 2138 intel_context_set_banned(rq[1]->context); 2139 err = intel_engine_pulse(arg->engine); 2140 if (err) 2141 goto out; 2142 2143 igt_spinner_end(&arg->a.spin); 2144 err = wait_for_reset(arg->engine, rq[1], HZ / 2); 2145 if (err) 2146 goto out; 2147 2148 if (rq[0]->fence.error != 0) { 2149 pr_err("Normal inflight0 request did not complete\n"); 2150 err = -EINVAL; 2151 goto out; 2152 } 2153 2154 if (rq[1]->fence.error != -EIO) { 2155 pr_err("Cancelled inflight1 request did not report -EIO\n"); 2156 err = -EINVAL; 2157 goto out; 2158 } 2159 2160 out: 2161 i915_request_put(rq[1]); 2162 i915_request_put(rq[0]); 2163 if (igt_live_test_end(&t)) 2164 err = -EIO; 2165 return err; 2166 } 2167 2168 static int __cancel_queued(struct live_preempt_cancel *arg) 2169 { 2170 struct i915_request *rq[3] = {}; 2171 struct igt_live_test t; 2172 int err; 2173 2174 /* Full ELSP and one in the wings */ 2175 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 2176 if (igt_live_test_begin(&t, arg->engine->i915, 2177 __func__, arg->engine->name)) 2178 return -EIO; 2179 2180 rq[0] = spinner_create_request(&arg->a.spin, 2181 arg->a.ctx, arg->engine, 2182 MI_ARB_CHECK); 2183 if (IS_ERR(rq[0])) 2184 return PTR_ERR(rq[0]); 2185 2186 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); 2187 i915_request_get(rq[0]); 2188 i915_request_add(rq[0]); 2189 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { 2190 err = -EIO; 2191 goto out; 2192 } 2193 2194 rq[1] = igt_request_alloc(arg->b.ctx, arg->engine); 2195 if (IS_ERR(rq[1])) { 2196 err = PTR_ERR(rq[1]); 2197 goto out; 2198 } 2199 2200 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); 2201 i915_request_get(rq[1]); 2202 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); 2203 i915_request_add(rq[1]); 2204 if (err) 2205 goto out; 2206 2207 rq[2] = spinner_create_request(&arg->b.spin, 2208 arg->a.ctx, arg->engine, 2209 MI_ARB_CHECK); 2210 if (IS_ERR(rq[2])) { 2211 err = PTR_ERR(rq[2]); 2212 goto out; 2213 } 2214 2215 i915_request_get(rq[2]); 2216 err = i915_request_await_dma_fence(rq[2], &rq[1]->fence); 2217 i915_request_add(rq[2]); 2218 if (err) 2219 goto out; 2220 2221 intel_context_set_banned(rq[2]->context); 2222 err = intel_engine_pulse(arg->engine); 2223 if (err) 2224 goto out; 2225 2226 err = wait_for_reset(arg->engine, rq[2], HZ / 2); 2227 if (err) 2228 goto out; 2229 2230 if (rq[0]->fence.error != -EIO) { 2231 pr_err("Cancelled inflight0 request did not report -EIO\n"); 2232 err = -EINVAL; 2233 goto out; 2234 } 2235 2236 if (rq[1]->fence.error != 0) { 2237 pr_err("Normal inflight1 request did not complete\n"); 2238 err = -EINVAL; 2239 goto out; 2240 } 2241 2242 if (rq[2]->fence.error != -EIO) { 2243 pr_err("Cancelled queued request did not report -EIO\n"); 2244 err = -EINVAL; 2245 goto out; 2246 } 2247 2248 out: 2249 i915_request_put(rq[2]); 2250 i915_request_put(rq[1]); 2251 i915_request_put(rq[0]); 2252 if (igt_live_test_end(&t)) 2253 err = -EIO; 2254 return err; 2255 } 2256 2257 static int __cancel_hostile(struct live_preempt_cancel *arg) 2258 { 2259 struct i915_request *rq; 2260 int err; 2261 2262 /* Preempt cancel non-preemptible spinner in ELSP0 */ 2263 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) 2264 return 0; 2265 2266 if (!intel_has_reset_engine(arg->engine->gt)) 2267 return 0; 2268 2269 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 2270 rq = spinner_create_request(&arg->a.spin, 2271 arg->a.ctx, arg->engine, 2272 MI_NOOP); /* preemption disabled */ 2273 if (IS_ERR(rq)) 2274 return PTR_ERR(rq); 2275 2276 clear_bit(CONTEXT_BANNED, &rq->context->flags); 2277 i915_request_get(rq); 2278 i915_request_add(rq); 2279 if (!igt_wait_for_spinner(&arg->a.spin, rq)) { 2280 err = -EIO; 2281 goto out; 2282 } 2283 2284 intel_context_set_banned(rq->context); 2285 err = intel_engine_pulse(arg->engine); /* force reset */ 2286 if (err) 2287 goto out; 2288 2289 err = wait_for_reset(arg->engine, rq, HZ / 2); 2290 if (err) { 2291 pr_err("Cancelled inflight0 request did not reset\n"); 2292 goto out; 2293 } 2294 2295 out: 2296 i915_request_put(rq); 2297 if (igt_flush_test(arg->engine->i915)) 2298 err = -EIO; 2299 return err; 2300 } 2301 2302 static void force_reset_timeout(struct intel_engine_cs *engine) 2303 { 2304 engine->reset_timeout.probability = 999; 2305 atomic_set(&engine->reset_timeout.times, -1); 2306 } 2307 2308 static void cancel_reset_timeout(struct intel_engine_cs *engine) 2309 { 2310 memset(&engine->reset_timeout, 0, sizeof(engine->reset_timeout)); 2311 } 2312 2313 static int __cancel_fail(struct live_preempt_cancel *arg) 2314 { 2315 struct intel_engine_cs *engine = arg->engine; 2316 struct i915_request *rq; 2317 int err; 2318 2319 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) 2320 return 0; 2321 2322 if (!intel_has_reset_engine(engine->gt)) 2323 return 0; 2324 2325 GEM_TRACE("%s(%s)\n", __func__, engine->name); 2326 rq = spinner_create_request(&arg->a.spin, 2327 arg->a.ctx, engine, 2328 MI_NOOP); /* preemption disabled */ 2329 if (IS_ERR(rq)) 2330 return PTR_ERR(rq); 2331 2332 clear_bit(CONTEXT_BANNED, &rq->context->flags); 2333 i915_request_get(rq); 2334 i915_request_add(rq); 2335 if (!igt_wait_for_spinner(&arg->a.spin, rq)) { 2336 err = -EIO; 2337 goto out; 2338 } 2339 2340 intel_context_set_banned(rq->context); 2341 2342 err = intel_engine_pulse(engine); 2343 if (err) 2344 goto out; 2345 2346 force_reset_timeout(engine); 2347 2348 /* force preempt reset [failure] */ 2349 while (!engine->execlists.pending[0]) 2350 intel_engine_flush_submission(engine); 2351 del_timer_sync(&engine->execlists.preempt); 2352 intel_engine_flush_submission(engine); 2353 2354 cancel_reset_timeout(engine); 2355 2356 /* after failure, require heartbeats to reset device */ 2357 intel_engine_set_heartbeat(engine, 1); 2358 err = wait_for_reset(engine, rq, HZ / 2); 2359 intel_engine_set_heartbeat(engine, 2360 engine->defaults.heartbeat_interval_ms); 2361 if (err) { 2362 pr_err("Cancelled inflight0 request did not reset\n"); 2363 goto out; 2364 } 2365 2366 out: 2367 i915_request_put(rq); 2368 if (igt_flush_test(engine->i915)) 2369 err = -EIO; 2370 return err; 2371 } 2372 2373 static int live_preempt_cancel(void *arg) 2374 { 2375 struct intel_gt *gt = arg; 2376 struct live_preempt_cancel data; 2377 enum intel_engine_id id; 2378 int err = -ENOMEM; 2379 2380 /* 2381 * To cancel an inflight context, we need to first remove it from the 2382 * GPU. That sounds like preemption! Plus a little bit of bookkeeping. 2383 */ 2384 2385 if (preempt_client_init(gt, &data.a)) 2386 return -ENOMEM; 2387 if (preempt_client_init(gt, &data.b)) 2388 goto err_client_a; 2389 2390 for_each_engine(data.engine, gt, id) { 2391 if (!intel_engine_has_preemption(data.engine)) 2392 continue; 2393 2394 err = __cancel_active0(&data); 2395 if (err) 2396 goto err_wedged; 2397 2398 err = __cancel_active1(&data); 2399 if (err) 2400 goto err_wedged; 2401 2402 err = __cancel_queued(&data); 2403 if (err) 2404 goto err_wedged; 2405 2406 err = __cancel_hostile(&data); 2407 if (err) 2408 goto err_wedged; 2409 2410 err = __cancel_fail(&data); 2411 if (err) 2412 goto err_wedged; 2413 } 2414 2415 err = 0; 2416 err_client_b: 2417 preempt_client_fini(&data.b); 2418 err_client_a: 2419 preempt_client_fini(&data.a); 2420 return err; 2421 2422 err_wedged: 2423 GEM_TRACE_DUMP(); 2424 igt_spinner_end(&data.b.spin); 2425 igt_spinner_end(&data.a.spin); 2426 intel_gt_set_wedged(gt); 2427 goto err_client_b; 2428 } 2429 2430 static int live_suppress_self_preempt(void *arg) 2431 { 2432 struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX }; 2433 struct intel_gt *gt = arg; 2434 struct intel_engine_cs *engine; 2435 struct preempt_client a, b; 2436 enum intel_engine_id id; 2437 int err = -ENOMEM; 2438 2439 /* 2440 * Verify that if a preemption request does not cause a change in 2441 * the current execution order, the preempt-to-idle injection is 2442 * skipped and that we do not accidentally apply it after the CS 2443 * completion event. 2444 */ 2445 2446 if (intel_uc_uses_guc_submission(>->uc)) 2447 return 0; /* presume black blox */ 2448 2449 if (intel_vgpu_active(gt->i915)) 2450 return 0; /* GVT forces single port & request submission */ 2451 2452 if (preempt_client_init(gt, &a)) 2453 return -ENOMEM; 2454 if (preempt_client_init(gt, &b)) 2455 goto err_client_a; 2456 2457 for_each_engine(engine, gt, id) { 2458 struct i915_request *rq_a, *rq_b; 2459 int depth; 2460 2461 if (!intel_engine_has_preemption(engine)) 2462 continue; 2463 2464 if (igt_flush_test(gt->i915)) 2465 goto err_wedged; 2466 2467 st_engine_heartbeat_disable(engine); 2468 engine->execlists.preempt_hang.count = 0; 2469 2470 rq_a = spinner_create_request(&a.spin, 2471 a.ctx, engine, 2472 MI_NOOP); 2473 if (IS_ERR(rq_a)) { 2474 err = PTR_ERR(rq_a); 2475 st_engine_heartbeat_enable(engine); 2476 goto err_client_b; 2477 } 2478 2479 i915_request_add(rq_a); 2480 if (!igt_wait_for_spinner(&a.spin, rq_a)) { 2481 pr_err("First client failed to start\n"); 2482 st_engine_heartbeat_enable(engine); 2483 goto err_wedged; 2484 } 2485 2486 /* Keep postponing the timer to avoid premature slicing */ 2487 mod_timer(&engine->execlists.timer, jiffies + HZ); 2488 for (depth = 0; depth < 8; depth++) { 2489 rq_b = spinner_create_request(&b.spin, 2490 b.ctx, engine, 2491 MI_NOOP); 2492 if (IS_ERR(rq_b)) { 2493 err = PTR_ERR(rq_b); 2494 st_engine_heartbeat_enable(engine); 2495 goto err_client_b; 2496 } 2497 i915_request_add(rq_b); 2498 2499 GEM_BUG_ON(i915_request_completed(rq_a)); 2500 engine->schedule(rq_a, &attr); 2501 igt_spinner_end(&a.spin); 2502 2503 if (!igt_wait_for_spinner(&b.spin, rq_b)) { 2504 pr_err("Second client failed to start\n"); 2505 st_engine_heartbeat_enable(engine); 2506 goto err_wedged; 2507 } 2508 2509 swap(a, b); 2510 rq_a = rq_b; 2511 } 2512 igt_spinner_end(&a.spin); 2513 2514 if (engine->execlists.preempt_hang.count) { 2515 pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n", 2516 engine->name, 2517 engine->execlists.preempt_hang.count, 2518 depth); 2519 st_engine_heartbeat_enable(engine); 2520 err = -EINVAL; 2521 goto err_client_b; 2522 } 2523 2524 st_engine_heartbeat_enable(engine); 2525 if (igt_flush_test(gt->i915)) 2526 goto err_wedged; 2527 } 2528 2529 err = 0; 2530 err_client_b: 2531 preempt_client_fini(&b); 2532 err_client_a: 2533 preempt_client_fini(&a); 2534 return err; 2535 2536 err_wedged: 2537 igt_spinner_end(&b.spin); 2538 igt_spinner_end(&a.spin); 2539 intel_gt_set_wedged(gt); 2540 err = -EIO; 2541 goto err_client_b; 2542 } 2543 2544 static int live_chain_preempt(void *arg) 2545 { 2546 struct intel_gt *gt = arg; 2547 struct intel_engine_cs *engine; 2548 struct preempt_client hi, lo; 2549 enum intel_engine_id id; 2550 int err = -ENOMEM; 2551 2552 /* 2553 * Build a chain AB...BA between two contexts (A, B) and request 2554 * preemption of the last request. It should then complete before 2555 * the previously submitted spinner in B. 2556 */ 2557 2558 if (preempt_client_init(gt, &hi)) 2559 return -ENOMEM; 2560 2561 if (preempt_client_init(gt, &lo)) 2562 goto err_client_hi; 2563 2564 for_each_engine(engine, gt, id) { 2565 struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX }; 2566 struct igt_live_test t; 2567 struct i915_request *rq; 2568 int ring_size, count, i; 2569 2570 if (!intel_engine_has_preemption(engine)) 2571 continue; 2572 2573 rq = spinner_create_request(&lo.spin, 2574 lo.ctx, engine, 2575 MI_ARB_CHECK); 2576 if (IS_ERR(rq)) 2577 goto err_wedged; 2578 2579 i915_request_get(rq); 2580 i915_request_add(rq); 2581 2582 ring_size = rq->wa_tail - rq->head; 2583 if (ring_size < 0) 2584 ring_size += rq->ring->size; 2585 ring_size = rq->ring->size / ring_size; 2586 pr_debug("%s(%s): Using maximum of %d requests\n", 2587 __func__, engine->name, ring_size); 2588 2589 igt_spinner_end(&lo.spin); 2590 if (i915_request_wait(rq, 0, HZ / 2) < 0) { 2591 pr_err("Timed out waiting to flush %s\n", engine->name); 2592 i915_request_put(rq); 2593 goto err_wedged; 2594 } 2595 i915_request_put(rq); 2596 2597 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 2598 err = -EIO; 2599 goto err_wedged; 2600 } 2601 2602 for_each_prime_number_from(count, 1, ring_size) { 2603 rq = spinner_create_request(&hi.spin, 2604 hi.ctx, engine, 2605 MI_ARB_CHECK); 2606 if (IS_ERR(rq)) 2607 goto err_wedged; 2608 i915_request_add(rq); 2609 if (!igt_wait_for_spinner(&hi.spin, rq)) 2610 goto err_wedged; 2611 2612 rq = spinner_create_request(&lo.spin, 2613 lo.ctx, engine, 2614 MI_ARB_CHECK); 2615 if (IS_ERR(rq)) 2616 goto err_wedged; 2617 i915_request_add(rq); 2618 2619 for (i = 0; i < count; i++) { 2620 rq = igt_request_alloc(lo.ctx, engine); 2621 if (IS_ERR(rq)) 2622 goto err_wedged; 2623 i915_request_add(rq); 2624 } 2625 2626 rq = igt_request_alloc(hi.ctx, engine); 2627 if (IS_ERR(rq)) 2628 goto err_wedged; 2629 2630 i915_request_get(rq); 2631 i915_request_add(rq); 2632 engine->schedule(rq, &attr); 2633 2634 igt_spinner_end(&hi.spin); 2635 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 2636 struct drm_printer p = 2637 drm_info_printer(gt->i915->drm.dev); 2638 2639 pr_err("Failed to preempt over chain of %d\n", 2640 count); 2641 intel_engine_dump(engine, &p, 2642 "%s\n", engine->name); 2643 i915_request_put(rq); 2644 goto err_wedged; 2645 } 2646 igt_spinner_end(&lo.spin); 2647 i915_request_put(rq); 2648 2649 rq = igt_request_alloc(lo.ctx, engine); 2650 if (IS_ERR(rq)) 2651 goto err_wedged; 2652 2653 i915_request_get(rq); 2654 i915_request_add(rq); 2655 2656 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 2657 struct drm_printer p = 2658 drm_info_printer(gt->i915->drm.dev); 2659 2660 pr_err("Failed to flush low priority chain of %d requests\n", 2661 count); 2662 intel_engine_dump(engine, &p, 2663 "%s\n", engine->name); 2664 2665 i915_request_put(rq); 2666 goto err_wedged; 2667 } 2668 i915_request_put(rq); 2669 } 2670 2671 if (igt_live_test_end(&t)) { 2672 err = -EIO; 2673 goto err_wedged; 2674 } 2675 } 2676 2677 err = 0; 2678 err_client_lo: 2679 preempt_client_fini(&lo); 2680 err_client_hi: 2681 preempt_client_fini(&hi); 2682 return err; 2683 2684 err_wedged: 2685 igt_spinner_end(&hi.spin); 2686 igt_spinner_end(&lo.spin); 2687 intel_gt_set_wedged(gt); 2688 err = -EIO; 2689 goto err_client_lo; 2690 } 2691 2692 static int create_gang(struct intel_engine_cs *engine, 2693 struct i915_request **prev) 2694 { 2695 struct drm_i915_gem_object *obj; 2696 struct intel_context *ce; 2697 struct i915_request *rq; 2698 struct i915_vma *vma; 2699 u32 *cs; 2700 int err; 2701 2702 ce = intel_context_create(engine); 2703 if (IS_ERR(ce)) 2704 return PTR_ERR(ce); 2705 2706 obj = i915_gem_object_create_internal(engine->i915, 4096); 2707 if (IS_ERR(obj)) { 2708 err = PTR_ERR(obj); 2709 goto err_ce; 2710 } 2711 2712 vma = i915_vma_instance(obj, ce->vm, NULL); 2713 if (IS_ERR(vma)) { 2714 err = PTR_ERR(vma); 2715 goto err_obj; 2716 } 2717 2718 err = i915_vma_pin(vma, 0, 0, PIN_USER); 2719 if (err) 2720 goto err_obj; 2721 2722 cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC); 2723 if (IS_ERR(cs)) { 2724 err = PTR_ERR(cs); 2725 goto err_obj; 2726 } 2727 2728 /* Semaphore target: spin until zero */ 2729 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 2730 2731 *cs++ = MI_SEMAPHORE_WAIT | 2732 MI_SEMAPHORE_POLL | 2733 MI_SEMAPHORE_SAD_EQ_SDD; 2734 *cs++ = 0; 2735 *cs++ = lower_32_bits(vma->node.start); 2736 *cs++ = upper_32_bits(vma->node.start); 2737 2738 if (*prev) { 2739 u64 offset = (*prev)->batch->node.start; 2740 2741 /* Terminate the spinner in the next lower priority batch. */ 2742 *cs++ = MI_STORE_DWORD_IMM_GEN4; 2743 *cs++ = lower_32_bits(offset); 2744 *cs++ = upper_32_bits(offset); 2745 *cs++ = 0; 2746 } 2747 2748 *cs++ = MI_BATCH_BUFFER_END; 2749 i915_gem_object_flush_map(obj); 2750 i915_gem_object_unpin_map(obj); 2751 2752 rq = intel_context_create_request(ce); 2753 if (IS_ERR(rq)) { 2754 err = PTR_ERR(rq); 2755 goto err_obj; 2756 } 2757 2758 rq->batch = i915_vma_get(vma); 2759 i915_request_get(rq); 2760 2761 i915_vma_lock(vma); 2762 err = i915_request_await_object(rq, vma->obj, false); 2763 if (!err) 2764 err = i915_vma_move_to_active(vma, rq, 0); 2765 if (!err) 2766 err = rq->engine->emit_bb_start(rq, 2767 vma->node.start, 2768 PAGE_SIZE, 0); 2769 i915_vma_unlock(vma); 2770 i915_request_add(rq); 2771 if (err) 2772 goto err_rq; 2773 2774 i915_gem_object_put(obj); 2775 intel_context_put(ce); 2776 2777 rq->mock.link.next = &(*prev)->mock.link; 2778 *prev = rq; 2779 return 0; 2780 2781 err_rq: 2782 i915_vma_put(rq->batch); 2783 i915_request_put(rq); 2784 err_obj: 2785 i915_gem_object_put(obj); 2786 err_ce: 2787 intel_context_put(ce); 2788 return err; 2789 } 2790 2791 static int __live_preempt_ring(struct intel_engine_cs *engine, 2792 struct igt_spinner *spin, 2793 int queue_sz, int ring_sz) 2794 { 2795 struct intel_context *ce[2] = {}; 2796 struct i915_request *rq; 2797 struct igt_live_test t; 2798 int err = 0; 2799 int n; 2800 2801 if (igt_live_test_begin(&t, engine->i915, __func__, engine->name)) 2802 return -EIO; 2803 2804 for (n = 0; n < ARRAY_SIZE(ce); n++) { 2805 struct intel_context *tmp; 2806 2807 tmp = intel_context_create(engine); 2808 if (IS_ERR(tmp)) { 2809 err = PTR_ERR(tmp); 2810 goto err_ce; 2811 } 2812 2813 tmp->ring = __intel_context_ring_size(ring_sz); 2814 2815 err = intel_context_pin(tmp); 2816 if (err) { 2817 intel_context_put(tmp); 2818 goto err_ce; 2819 } 2820 2821 memset32(tmp->ring->vaddr, 2822 0xdeadbeef, /* trigger a hang if executed */ 2823 tmp->ring->vma->size / sizeof(u32)); 2824 2825 ce[n] = tmp; 2826 } 2827 2828 rq = igt_spinner_create_request(spin, ce[0], MI_ARB_CHECK); 2829 if (IS_ERR(rq)) { 2830 err = PTR_ERR(rq); 2831 goto err_ce; 2832 } 2833 2834 i915_request_get(rq); 2835 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 2836 i915_request_add(rq); 2837 2838 if (!igt_wait_for_spinner(spin, rq)) { 2839 intel_gt_set_wedged(engine->gt); 2840 i915_request_put(rq); 2841 err = -ETIME; 2842 goto err_ce; 2843 } 2844 2845 /* Fill the ring, until we will cause a wrap */ 2846 n = 0; 2847 while (ce[0]->ring->tail - rq->wa_tail <= queue_sz) { 2848 struct i915_request *tmp; 2849 2850 tmp = intel_context_create_request(ce[0]); 2851 if (IS_ERR(tmp)) { 2852 err = PTR_ERR(tmp); 2853 i915_request_put(rq); 2854 goto err_ce; 2855 } 2856 2857 i915_request_add(tmp); 2858 intel_engine_flush_submission(engine); 2859 n++; 2860 } 2861 intel_engine_flush_submission(engine); 2862 pr_debug("%s: Filled %d with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n", 2863 engine->name, queue_sz, n, 2864 ce[0]->ring->size, 2865 ce[0]->ring->tail, 2866 ce[0]->ring->emit, 2867 rq->tail); 2868 i915_request_put(rq); 2869 2870 /* Create a second request to preempt the first ring */ 2871 rq = intel_context_create_request(ce[1]); 2872 if (IS_ERR(rq)) { 2873 err = PTR_ERR(rq); 2874 goto err_ce; 2875 } 2876 2877 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 2878 i915_request_get(rq); 2879 i915_request_add(rq); 2880 2881 err = wait_for_submit(engine, rq, HZ / 2); 2882 i915_request_put(rq); 2883 if (err) { 2884 pr_err("%s: preemption request was not submitted\n", 2885 engine->name); 2886 err = -ETIME; 2887 } 2888 2889 pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n", 2890 engine->name, 2891 ce[0]->ring->tail, ce[0]->ring->emit, 2892 ce[1]->ring->tail, ce[1]->ring->emit); 2893 2894 err_ce: 2895 intel_engine_flush_submission(engine); 2896 igt_spinner_end(spin); 2897 for (n = 0; n < ARRAY_SIZE(ce); n++) { 2898 if (IS_ERR_OR_NULL(ce[n])) 2899 break; 2900 2901 intel_context_unpin(ce[n]); 2902 intel_context_put(ce[n]); 2903 } 2904 if (igt_live_test_end(&t)) 2905 err = -EIO; 2906 return err; 2907 } 2908 2909 static int live_preempt_ring(void *arg) 2910 { 2911 struct intel_gt *gt = arg; 2912 struct intel_engine_cs *engine; 2913 struct igt_spinner spin; 2914 enum intel_engine_id id; 2915 int err = 0; 2916 2917 /* 2918 * Check that we rollback large chunks of a ring in order to do a 2919 * preemption event. Similar to live_unlite_ring, but looking at 2920 * ring size rather than the impact of intel_ring_direction(). 2921 */ 2922 2923 if (igt_spinner_init(&spin, gt)) 2924 return -ENOMEM; 2925 2926 for_each_engine(engine, gt, id) { 2927 int n; 2928 2929 if (!intel_engine_has_preemption(engine)) 2930 continue; 2931 2932 if (!intel_engine_can_store_dword(engine)) 2933 continue; 2934 2935 st_engine_heartbeat_disable(engine); 2936 2937 for (n = 0; n <= 3; n++) { 2938 err = __live_preempt_ring(engine, &spin, 2939 n * SZ_4K / 4, SZ_4K); 2940 if (err) 2941 break; 2942 } 2943 2944 st_engine_heartbeat_enable(engine); 2945 if (err) 2946 break; 2947 } 2948 2949 igt_spinner_fini(&spin); 2950 return err; 2951 } 2952 2953 static int live_preempt_gang(void *arg) 2954 { 2955 struct intel_gt *gt = arg; 2956 struct intel_engine_cs *engine; 2957 enum intel_engine_id id; 2958 2959 /* 2960 * Build as long a chain of preempters as we can, with each 2961 * request higher priority than the last. Once we are ready, we release 2962 * the last batch which then precolates down the chain, each releasing 2963 * the next oldest in turn. The intent is to simply push as hard as we 2964 * can with the number of preemptions, trying to exceed narrow HW 2965 * limits. At a minimum, we insist that we can sort all the user 2966 * high priority levels into execution order. 2967 */ 2968 2969 for_each_engine(engine, gt, id) { 2970 struct i915_request *rq = NULL; 2971 struct igt_live_test t; 2972 IGT_TIMEOUT(end_time); 2973 int prio = 0; 2974 int err = 0; 2975 u32 *cs; 2976 2977 if (!intel_engine_has_preemption(engine)) 2978 continue; 2979 2980 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) 2981 return -EIO; 2982 2983 do { 2984 struct i915_sched_attr attr = { .priority = prio++ }; 2985 2986 err = create_gang(engine, &rq); 2987 if (err) 2988 break; 2989 2990 /* Submit each spinner at increasing priority */ 2991 engine->schedule(rq, &attr); 2992 } while (prio <= I915_PRIORITY_MAX && 2993 !__igt_timeout(end_time, NULL)); 2994 pr_debug("%s: Preempt chain of %d requests\n", 2995 engine->name, prio); 2996 2997 /* 2998 * Such that the last spinner is the highest priority and 2999 * should execute first. When that spinner completes, 3000 * it will terminate the next lowest spinner until there 3001 * are no more spinners and the gang is complete. 3002 */ 3003 cs = i915_gem_object_pin_map_unlocked(rq->batch->obj, I915_MAP_WC); 3004 if (!IS_ERR(cs)) { 3005 *cs = 0; 3006 i915_gem_object_unpin_map(rq->batch->obj); 3007 } else { 3008 err = PTR_ERR(cs); 3009 intel_gt_set_wedged(gt); 3010 } 3011 3012 while (rq) { /* wait for each rq from highest to lowest prio */ 3013 struct i915_request *n = list_next_entry(rq, mock.link); 3014 3015 if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) { 3016 struct drm_printer p = 3017 drm_info_printer(engine->i915->drm.dev); 3018 3019 pr_err("Failed to flush chain of %d requests, at %d\n", 3020 prio, rq_prio(rq)); 3021 intel_engine_dump(engine, &p, 3022 "%s\n", engine->name); 3023 3024 err = -ETIME; 3025 } 3026 3027 i915_vma_put(rq->batch); 3028 i915_request_put(rq); 3029 rq = n; 3030 } 3031 3032 if (igt_live_test_end(&t)) 3033 err = -EIO; 3034 if (err) 3035 return err; 3036 } 3037 3038 return 0; 3039 } 3040 3041 static struct i915_vma * 3042 create_gpr_user(struct intel_engine_cs *engine, 3043 struct i915_vma *result, 3044 unsigned int offset) 3045 { 3046 struct drm_i915_gem_object *obj; 3047 struct i915_vma *vma; 3048 u32 *cs; 3049 int err; 3050 int i; 3051 3052 obj = i915_gem_object_create_internal(engine->i915, 4096); 3053 if (IS_ERR(obj)) 3054 return ERR_CAST(obj); 3055 3056 vma = i915_vma_instance(obj, result->vm, NULL); 3057 if (IS_ERR(vma)) { 3058 i915_gem_object_put(obj); 3059 return vma; 3060 } 3061 3062 err = i915_vma_pin(vma, 0, 0, PIN_USER); 3063 if (err) { 3064 i915_vma_put(vma); 3065 return ERR_PTR(err); 3066 } 3067 3068 cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC); 3069 if (IS_ERR(cs)) { 3070 i915_vma_put(vma); 3071 return ERR_CAST(cs); 3072 } 3073 3074 /* All GPR are clear for new contexts. We use GPR(0) as a constant */ 3075 *cs++ = MI_LOAD_REGISTER_IMM(1); 3076 *cs++ = CS_GPR(engine, 0); 3077 *cs++ = 1; 3078 3079 for (i = 1; i < NUM_GPR; i++) { 3080 u64 addr; 3081 3082 /* 3083 * Perform: GPR[i]++ 3084 * 3085 * As we read and write into the context saved GPR[i], if 3086 * we restart this batch buffer from an earlier point, we 3087 * will repeat the increment and store a value > 1. 3088 */ 3089 *cs++ = MI_MATH(4); 3090 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(i)); 3091 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(0)); 3092 *cs++ = MI_MATH_ADD; 3093 *cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU); 3094 3095 addr = result->node.start + offset + i * sizeof(*cs); 3096 *cs++ = MI_STORE_REGISTER_MEM_GEN8; 3097 *cs++ = CS_GPR(engine, 2 * i); 3098 *cs++ = lower_32_bits(addr); 3099 *cs++ = upper_32_bits(addr); 3100 3101 *cs++ = MI_SEMAPHORE_WAIT | 3102 MI_SEMAPHORE_POLL | 3103 MI_SEMAPHORE_SAD_GTE_SDD; 3104 *cs++ = i; 3105 *cs++ = lower_32_bits(result->node.start); 3106 *cs++ = upper_32_bits(result->node.start); 3107 } 3108 3109 *cs++ = MI_BATCH_BUFFER_END; 3110 i915_gem_object_flush_map(obj); 3111 i915_gem_object_unpin_map(obj); 3112 3113 return vma; 3114 } 3115 3116 static struct i915_vma *create_global(struct intel_gt *gt, size_t sz) 3117 { 3118 struct drm_i915_gem_object *obj; 3119 struct i915_vma *vma; 3120 int err; 3121 3122 obj = i915_gem_object_create_internal(gt->i915, sz); 3123 if (IS_ERR(obj)) 3124 return ERR_CAST(obj); 3125 3126 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 3127 if (IS_ERR(vma)) { 3128 i915_gem_object_put(obj); 3129 return vma; 3130 } 3131 3132 err = i915_ggtt_pin(vma, NULL, 0, 0); 3133 if (err) { 3134 i915_vma_put(vma); 3135 return ERR_PTR(err); 3136 } 3137 3138 return vma; 3139 } 3140 3141 static struct i915_request * 3142 create_gpr_client(struct intel_engine_cs *engine, 3143 struct i915_vma *global, 3144 unsigned int offset) 3145 { 3146 struct i915_vma *batch, *vma; 3147 struct intel_context *ce; 3148 struct i915_request *rq; 3149 int err; 3150 3151 ce = intel_context_create(engine); 3152 if (IS_ERR(ce)) 3153 return ERR_CAST(ce); 3154 3155 vma = i915_vma_instance(global->obj, ce->vm, NULL); 3156 if (IS_ERR(vma)) { 3157 err = PTR_ERR(vma); 3158 goto out_ce; 3159 } 3160 3161 err = i915_vma_pin(vma, 0, 0, PIN_USER); 3162 if (err) 3163 goto out_ce; 3164 3165 batch = create_gpr_user(engine, vma, offset); 3166 if (IS_ERR(batch)) { 3167 err = PTR_ERR(batch); 3168 goto out_vma; 3169 } 3170 3171 rq = intel_context_create_request(ce); 3172 if (IS_ERR(rq)) { 3173 err = PTR_ERR(rq); 3174 goto out_batch; 3175 } 3176 3177 i915_vma_lock(vma); 3178 err = i915_request_await_object(rq, vma->obj, false); 3179 if (!err) 3180 err = i915_vma_move_to_active(vma, rq, 0); 3181 i915_vma_unlock(vma); 3182 3183 i915_vma_lock(batch); 3184 if (!err) 3185 err = i915_request_await_object(rq, batch->obj, false); 3186 if (!err) 3187 err = i915_vma_move_to_active(batch, rq, 0); 3188 if (!err) 3189 err = rq->engine->emit_bb_start(rq, 3190 batch->node.start, 3191 PAGE_SIZE, 0); 3192 i915_vma_unlock(batch); 3193 i915_vma_unpin(batch); 3194 3195 if (!err) 3196 i915_request_get(rq); 3197 i915_request_add(rq); 3198 3199 out_batch: 3200 i915_vma_put(batch); 3201 out_vma: 3202 i915_vma_unpin(vma); 3203 out_ce: 3204 intel_context_put(ce); 3205 return err ? ERR_PTR(err) : rq; 3206 } 3207 3208 static int preempt_user(struct intel_engine_cs *engine, 3209 struct i915_vma *global, 3210 int id) 3211 { 3212 struct i915_sched_attr attr = { 3213 .priority = I915_PRIORITY_MAX 3214 }; 3215 struct i915_request *rq; 3216 int err = 0; 3217 u32 *cs; 3218 3219 rq = intel_engine_create_kernel_request(engine); 3220 if (IS_ERR(rq)) 3221 return PTR_ERR(rq); 3222 3223 cs = intel_ring_begin(rq, 4); 3224 if (IS_ERR(cs)) { 3225 i915_request_add(rq); 3226 return PTR_ERR(cs); 3227 } 3228 3229 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 3230 *cs++ = i915_ggtt_offset(global); 3231 *cs++ = 0; 3232 *cs++ = id; 3233 3234 intel_ring_advance(rq, cs); 3235 3236 i915_request_get(rq); 3237 i915_request_add(rq); 3238 3239 engine->schedule(rq, &attr); 3240 3241 if (i915_request_wait(rq, 0, HZ / 2) < 0) 3242 err = -ETIME; 3243 i915_request_put(rq); 3244 3245 return err; 3246 } 3247 3248 static int live_preempt_user(void *arg) 3249 { 3250 struct intel_gt *gt = arg; 3251 struct intel_engine_cs *engine; 3252 struct i915_vma *global; 3253 enum intel_engine_id id; 3254 u32 *result; 3255 int err = 0; 3256 3257 /* 3258 * In our other tests, we look at preemption in carefully 3259 * controlled conditions in the ringbuffer. Since most of the 3260 * time is spent in user batches, most of our preemptions naturally 3261 * occur there. We want to verify that when we preempt inside a batch 3262 * we continue on from the current instruction and do not roll back 3263 * to the start, or another earlier arbitration point. 3264 * 3265 * To verify this, we create a batch which is a mixture of 3266 * MI_MATH (gpr++) MI_SRM (gpr) and preemption points. Then with 3267 * a few preempting contexts thrown into the mix, we look for any 3268 * repeated instructions (which show up as incorrect values). 3269 */ 3270 3271 global = create_global(gt, 4096); 3272 if (IS_ERR(global)) 3273 return PTR_ERR(global); 3274 3275 result = i915_gem_object_pin_map_unlocked(global->obj, I915_MAP_WC); 3276 if (IS_ERR(result)) { 3277 i915_vma_unpin_and_release(&global, 0); 3278 return PTR_ERR(result); 3279 } 3280 3281 for_each_engine(engine, gt, id) { 3282 struct i915_request *client[3] = {}; 3283 struct igt_live_test t; 3284 int i; 3285 3286 if (!intel_engine_has_preemption(engine)) 3287 continue; 3288 3289 if (GRAPHICS_VER(gt->i915) == 8 && engine->class != RENDER_CLASS) 3290 continue; /* we need per-context GPR */ 3291 3292 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 3293 err = -EIO; 3294 break; 3295 } 3296 3297 memset(result, 0, 4096); 3298 3299 for (i = 0; i < ARRAY_SIZE(client); i++) { 3300 struct i915_request *rq; 3301 3302 rq = create_gpr_client(engine, global, 3303 NUM_GPR * i * sizeof(u32)); 3304 if (IS_ERR(rq)) { 3305 err = PTR_ERR(rq); 3306 goto end_test; 3307 } 3308 3309 client[i] = rq; 3310 } 3311 3312 /* Continuously preempt the set of 3 running contexts */ 3313 for (i = 1; i <= NUM_GPR; i++) { 3314 err = preempt_user(engine, global, i); 3315 if (err) 3316 goto end_test; 3317 } 3318 3319 if (READ_ONCE(result[0]) != NUM_GPR) { 3320 pr_err("%s: Failed to release semaphore\n", 3321 engine->name); 3322 err = -EIO; 3323 goto end_test; 3324 } 3325 3326 for (i = 0; i < ARRAY_SIZE(client); i++) { 3327 int gpr; 3328 3329 if (i915_request_wait(client[i], 0, HZ / 2) < 0) { 3330 err = -ETIME; 3331 goto end_test; 3332 } 3333 3334 for (gpr = 1; gpr < NUM_GPR; gpr++) { 3335 if (result[NUM_GPR * i + gpr] != 1) { 3336 pr_err("%s: Invalid result, client %d, gpr %d, result: %d\n", 3337 engine->name, 3338 i, gpr, result[NUM_GPR * i + gpr]); 3339 err = -EINVAL; 3340 goto end_test; 3341 } 3342 } 3343 } 3344 3345 end_test: 3346 for (i = 0; i < ARRAY_SIZE(client); i++) { 3347 if (!client[i]) 3348 break; 3349 3350 i915_request_put(client[i]); 3351 } 3352 3353 /* Flush the semaphores on error */ 3354 smp_store_mb(result[0], -1); 3355 if (igt_live_test_end(&t)) 3356 err = -EIO; 3357 if (err) 3358 break; 3359 } 3360 3361 i915_vma_unpin_and_release(&global, I915_VMA_RELEASE_MAP); 3362 return err; 3363 } 3364 3365 static int live_preempt_timeout(void *arg) 3366 { 3367 struct intel_gt *gt = arg; 3368 struct i915_gem_context *ctx_hi, *ctx_lo; 3369 struct igt_spinner spin_lo; 3370 struct intel_engine_cs *engine; 3371 enum intel_engine_id id; 3372 int err = -ENOMEM; 3373 3374 /* 3375 * Check that we force preemption to occur by cancelling the previous 3376 * context if it refuses to yield the GPU. 3377 */ 3378 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) 3379 return 0; 3380 3381 if (!intel_has_reset_engine(gt)) 3382 return 0; 3383 3384 if (igt_spinner_init(&spin_lo, gt)) 3385 return -ENOMEM; 3386 3387 ctx_hi = kernel_context(gt->i915); 3388 if (!ctx_hi) 3389 goto err_spin_lo; 3390 ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY; 3391 3392 ctx_lo = kernel_context(gt->i915); 3393 if (!ctx_lo) 3394 goto err_ctx_hi; 3395 ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY; 3396 3397 for_each_engine(engine, gt, id) { 3398 unsigned long saved_timeout; 3399 struct i915_request *rq; 3400 3401 if (!intel_engine_has_preemption(engine)) 3402 continue; 3403 3404 rq = spinner_create_request(&spin_lo, ctx_lo, engine, 3405 MI_NOOP); /* preemption disabled */ 3406 if (IS_ERR(rq)) { 3407 err = PTR_ERR(rq); 3408 goto err_ctx_lo; 3409 } 3410 3411 i915_request_add(rq); 3412 if (!igt_wait_for_spinner(&spin_lo, rq)) { 3413 intel_gt_set_wedged(gt); 3414 err = -EIO; 3415 goto err_ctx_lo; 3416 } 3417 3418 rq = igt_request_alloc(ctx_hi, engine); 3419 if (IS_ERR(rq)) { 3420 igt_spinner_end(&spin_lo); 3421 err = PTR_ERR(rq); 3422 goto err_ctx_lo; 3423 } 3424 3425 /* Flush the previous CS ack before changing timeouts */ 3426 while (READ_ONCE(engine->execlists.pending[0])) 3427 cpu_relax(); 3428 3429 saved_timeout = engine->props.preempt_timeout_ms; 3430 engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */ 3431 3432 i915_request_get(rq); 3433 i915_request_add(rq); 3434 3435 intel_engine_flush_submission(engine); 3436 engine->props.preempt_timeout_ms = saved_timeout; 3437 3438 if (i915_request_wait(rq, 0, HZ / 10) < 0) { 3439 intel_gt_set_wedged(gt); 3440 i915_request_put(rq); 3441 err = -ETIME; 3442 goto err_ctx_lo; 3443 } 3444 3445 igt_spinner_end(&spin_lo); 3446 i915_request_put(rq); 3447 } 3448 3449 err = 0; 3450 err_ctx_lo: 3451 kernel_context_close(ctx_lo); 3452 err_ctx_hi: 3453 kernel_context_close(ctx_hi); 3454 err_spin_lo: 3455 igt_spinner_fini(&spin_lo); 3456 return err; 3457 } 3458 3459 static int random_range(struct rnd_state *rnd, int min, int max) 3460 { 3461 return i915_prandom_u32_max_state(max - min, rnd) + min; 3462 } 3463 3464 static int random_priority(struct rnd_state *rnd) 3465 { 3466 return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX); 3467 } 3468 3469 struct preempt_smoke { 3470 struct intel_gt *gt; 3471 struct i915_gem_context **contexts; 3472 struct intel_engine_cs *engine; 3473 struct drm_i915_gem_object *batch; 3474 unsigned int ncontext; 3475 struct rnd_state prng; 3476 unsigned long count; 3477 }; 3478 3479 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke) 3480 { 3481 return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext, 3482 &smoke->prng)]; 3483 } 3484 3485 static int smoke_submit(struct preempt_smoke *smoke, 3486 struct i915_gem_context *ctx, int prio, 3487 struct drm_i915_gem_object *batch) 3488 { 3489 struct i915_request *rq; 3490 struct i915_vma *vma = NULL; 3491 int err = 0; 3492 3493 if (batch) { 3494 struct i915_address_space *vm; 3495 3496 vm = i915_gem_context_get_vm_rcu(ctx); 3497 vma = i915_vma_instance(batch, vm, NULL); 3498 i915_vm_put(vm); 3499 if (IS_ERR(vma)) 3500 return PTR_ERR(vma); 3501 3502 err = i915_vma_pin(vma, 0, 0, PIN_USER); 3503 if (err) 3504 return err; 3505 } 3506 3507 ctx->sched.priority = prio; 3508 3509 rq = igt_request_alloc(ctx, smoke->engine); 3510 if (IS_ERR(rq)) { 3511 err = PTR_ERR(rq); 3512 goto unpin; 3513 } 3514 3515 if (vma) { 3516 i915_vma_lock(vma); 3517 err = i915_request_await_object(rq, vma->obj, false); 3518 if (!err) 3519 err = i915_vma_move_to_active(vma, rq, 0); 3520 if (!err) 3521 err = rq->engine->emit_bb_start(rq, 3522 vma->node.start, 3523 PAGE_SIZE, 0); 3524 i915_vma_unlock(vma); 3525 } 3526 3527 i915_request_add(rq); 3528 3529 unpin: 3530 if (vma) 3531 i915_vma_unpin(vma); 3532 3533 return err; 3534 } 3535 3536 static int smoke_crescendo_thread(void *arg) 3537 { 3538 struct preempt_smoke *smoke = arg; 3539 IGT_TIMEOUT(end_time); 3540 unsigned long count; 3541 3542 count = 0; 3543 do { 3544 struct i915_gem_context *ctx = smoke_context(smoke); 3545 int err; 3546 3547 err = smoke_submit(smoke, 3548 ctx, count % I915_PRIORITY_MAX, 3549 smoke->batch); 3550 if (err) 3551 return err; 3552 3553 count++; 3554 } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL)); 3555 3556 smoke->count = count; 3557 return 0; 3558 } 3559 3560 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) 3561 #define BATCH BIT(0) 3562 { 3563 struct task_struct *tsk[I915_NUM_ENGINES] = {}; 3564 struct preempt_smoke arg[I915_NUM_ENGINES]; 3565 struct intel_engine_cs *engine; 3566 enum intel_engine_id id; 3567 unsigned long count; 3568 int err = 0; 3569 3570 for_each_engine(engine, smoke->gt, id) { 3571 arg[id] = *smoke; 3572 arg[id].engine = engine; 3573 if (!(flags & BATCH)) 3574 arg[id].batch = NULL; 3575 arg[id].count = 0; 3576 3577 tsk[id] = kthread_run(smoke_crescendo_thread, &arg, 3578 "igt/smoke:%d", id); 3579 if (IS_ERR(tsk[id])) { 3580 err = PTR_ERR(tsk[id]); 3581 break; 3582 } 3583 get_task_struct(tsk[id]); 3584 } 3585 3586 yield(); /* start all threads before we kthread_stop() */ 3587 3588 count = 0; 3589 for_each_engine(engine, smoke->gt, id) { 3590 int status; 3591 3592 if (IS_ERR_OR_NULL(tsk[id])) 3593 continue; 3594 3595 status = kthread_stop(tsk[id]); 3596 if (status && !err) 3597 err = status; 3598 3599 count += arg[id].count; 3600 3601 put_task_struct(tsk[id]); 3602 } 3603 3604 pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n", 3605 count, flags, smoke->gt->info.num_engines, smoke->ncontext); 3606 return 0; 3607 } 3608 3609 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags) 3610 { 3611 enum intel_engine_id id; 3612 IGT_TIMEOUT(end_time); 3613 unsigned long count; 3614 3615 count = 0; 3616 do { 3617 for_each_engine(smoke->engine, smoke->gt, id) { 3618 struct i915_gem_context *ctx = smoke_context(smoke); 3619 int err; 3620 3621 err = smoke_submit(smoke, 3622 ctx, random_priority(&smoke->prng), 3623 flags & BATCH ? smoke->batch : NULL); 3624 if (err) 3625 return err; 3626 3627 count++; 3628 } 3629 } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL)); 3630 3631 pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n", 3632 count, flags, smoke->gt->info.num_engines, smoke->ncontext); 3633 return 0; 3634 } 3635 3636 static int live_preempt_smoke(void *arg) 3637 { 3638 struct preempt_smoke smoke = { 3639 .gt = arg, 3640 .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed), 3641 .ncontext = 256, 3642 }; 3643 const unsigned int phase[] = { 0, BATCH }; 3644 struct igt_live_test t; 3645 int err = -ENOMEM; 3646 u32 *cs; 3647 int n; 3648 3649 smoke.contexts = kmalloc_array(smoke.ncontext, 3650 sizeof(*smoke.contexts), 3651 GFP_KERNEL); 3652 if (!smoke.contexts) 3653 return -ENOMEM; 3654 3655 smoke.batch = 3656 i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE); 3657 if (IS_ERR(smoke.batch)) { 3658 err = PTR_ERR(smoke.batch); 3659 goto err_free; 3660 } 3661 3662 cs = i915_gem_object_pin_map_unlocked(smoke.batch, I915_MAP_WB); 3663 if (IS_ERR(cs)) { 3664 err = PTR_ERR(cs); 3665 goto err_batch; 3666 } 3667 for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++) 3668 cs[n] = MI_ARB_CHECK; 3669 cs[n] = MI_BATCH_BUFFER_END; 3670 i915_gem_object_flush_map(smoke.batch); 3671 i915_gem_object_unpin_map(smoke.batch); 3672 3673 if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) { 3674 err = -EIO; 3675 goto err_batch; 3676 } 3677 3678 for (n = 0; n < smoke.ncontext; n++) { 3679 smoke.contexts[n] = kernel_context(smoke.gt->i915); 3680 if (!smoke.contexts[n]) 3681 goto err_ctx; 3682 } 3683 3684 for (n = 0; n < ARRAY_SIZE(phase); n++) { 3685 err = smoke_crescendo(&smoke, phase[n]); 3686 if (err) 3687 goto err_ctx; 3688 3689 err = smoke_random(&smoke, phase[n]); 3690 if (err) 3691 goto err_ctx; 3692 } 3693 3694 err_ctx: 3695 if (igt_live_test_end(&t)) 3696 err = -EIO; 3697 3698 for (n = 0; n < smoke.ncontext; n++) { 3699 if (!smoke.contexts[n]) 3700 break; 3701 kernel_context_close(smoke.contexts[n]); 3702 } 3703 3704 err_batch: 3705 i915_gem_object_put(smoke.batch); 3706 err_free: 3707 kfree(smoke.contexts); 3708 3709 return err; 3710 } 3711 3712 static int nop_virtual_engine(struct intel_gt *gt, 3713 struct intel_engine_cs **siblings, 3714 unsigned int nsibling, 3715 unsigned int nctx, 3716 unsigned int flags) 3717 #define CHAIN BIT(0) 3718 { 3719 IGT_TIMEOUT(end_time); 3720 struct i915_request *request[16] = {}; 3721 struct intel_context *ve[16]; 3722 unsigned long n, prime, nc; 3723 struct igt_live_test t; 3724 ktime_t times[2] = {}; 3725 int err; 3726 3727 GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve)); 3728 3729 for (n = 0; n < nctx; n++) { 3730 ve[n] = intel_execlists_create_virtual(siblings, nsibling); 3731 if (IS_ERR(ve[n])) { 3732 err = PTR_ERR(ve[n]); 3733 nctx = n; 3734 goto out; 3735 } 3736 3737 err = intel_context_pin(ve[n]); 3738 if (err) { 3739 intel_context_put(ve[n]); 3740 nctx = n; 3741 goto out; 3742 } 3743 } 3744 3745 err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name); 3746 if (err) 3747 goto out; 3748 3749 for_each_prime_number_from(prime, 1, 8192) { 3750 times[1] = ktime_get_raw(); 3751 3752 if (flags & CHAIN) { 3753 for (nc = 0; nc < nctx; nc++) { 3754 for (n = 0; n < prime; n++) { 3755 struct i915_request *rq; 3756 3757 rq = i915_request_create(ve[nc]); 3758 if (IS_ERR(rq)) { 3759 err = PTR_ERR(rq); 3760 goto out; 3761 } 3762 3763 if (request[nc]) 3764 i915_request_put(request[nc]); 3765 request[nc] = i915_request_get(rq); 3766 i915_request_add(rq); 3767 } 3768 } 3769 } else { 3770 for (n = 0; n < prime; n++) { 3771 for (nc = 0; nc < nctx; nc++) { 3772 struct i915_request *rq; 3773 3774 rq = i915_request_create(ve[nc]); 3775 if (IS_ERR(rq)) { 3776 err = PTR_ERR(rq); 3777 goto out; 3778 } 3779 3780 if (request[nc]) 3781 i915_request_put(request[nc]); 3782 request[nc] = i915_request_get(rq); 3783 i915_request_add(rq); 3784 } 3785 } 3786 } 3787 3788 for (nc = 0; nc < nctx; nc++) { 3789 if (i915_request_wait(request[nc], 0, HZ / 10) < 0) { 3790 pr_err("%s(%s): wait for %llx:%lld timed out\n", 3791 __func__, ve[0]->engine->name, 3792 request[nc]->fence.context, 3793 request[nc]->fence.seqno); 3794 3795 GEM_TRACE("%s(%s) failed at request %llx:%lld\n", 3796 __func__, ve[0]->engine->name, 3797 request[nc]->fence.context, 3798 request[nc]->fence.seqno); 3799 GEM_TRACE_DUMP(); 3800 intel_gt_set_wedged(gt); 3801 break; 3802 } 3803 } 3804 3805 times[1] = ktime_sub(ktime_get_raw(), times[1]); 3806 if (prime == 1) 3807 times[0] = times[1]; 3808 3809 for (nc = 0; nc < nctx; nc++) { 3810 i915_request_put(request[nc]); 3811 request[nc] = NULL; 3812 } 3813 3814 if (__igt_timeout(end_time, NULL)) 3815 break; 3816 } 3817 3818 err = igt_live_test_end(&t); 3819 if (err) 3820 goto out; 3821 3822 pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n", 3823 nctx, ve[0]->engine->name, ktime_to_ns(times[0]), 3824 prime, div64_u64(ktime_to_ns(times[1]), prime)); 3825 3826 out: 3827 if (igt_flush_test(gt->i915)) 3828 err = -EIO; 3829 3830 for (nc = 0; nc < nctx; nc++) { 3831 i915_request_put(request[nc]); 3832 intel_context_unpin(ve[nc]); 3833 intel_context_put(ve[nc]); 3834 } 3835 return err; 3836 } 3837 3838 static unsigned int 3839 __select_siblings(struct intel_gt *gt, 3840 unsigned int class, 3841 struct intel_engine_cs **siblings, 3842 bool (*filter)(const struct intel_engine_cs *)) 3843 { 3844 unsigned int n = 0; 3845 unsigned int inst; 3846 3847 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { 3848 if (!gt->engine_class[class][inst]) 3849 continue; 3850 3851 if (filter && !filter(gt->engine_class[class][inst])) 3852 continue; 3853 3854 siblings[n++] = gt->engine_class[class][inst]; 3855 } 3856 3857 return n; 3858 } 3859 3860 static unsigned int 3861 select_siblings(struct intel_gt *gt, 3862 unsigned int class, 3863 struct intel_engine_cs **siblings) 3864 { 3865 return __select_siblings(gt, class, siblings, NULL); 3866 } 3867 3868 static int live_virtual_engine(void *arg) 3869 { 3870 struct intel_gt *gt = arg; 3871 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 3872 struct intel_engine_cs *engine; 3873 enum intel_engine_id id; 3874 unsigned int class; 3875 int err; 3876 3877 if (intel_uc_uses_guc_submission(>->uc)) 3878 return 0; 3879 3880 for_each_engine(engine, gt, id) { 3881 err = nop_virtual_engine(gt, &engine, 1, 1, 0); 3882 if (err) { 3883 pr_err("Failed to wrap engine %s: err=%d\n", 3884 engine->name, err); 3885 return err; 3886 } 3887 } 3888 3889 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 3890 int nsibling, n; 3891 3892 nsibling = select_siblings(gt, class, siblings); 3893 if (nsibling < 2) 3894 continue; 3895 3896 for (n = 1; n <= nsibling + 1; n++) { 3897 err = nop_virtual_engine(gt, siblings, nsibling, 3898 n, 0); 3899 if (err) 3900 return err; 3901 } 3902 3903 err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN); 3904 if (err) 3905 return err; 3906 } 3907 3908 return 0; 3909 } 3910 3911 static int mask_virtual_engine(struct intel_gt *gt, 3912 struct intel_engine_cs **siblings, 3913 unsigned int nsibling) 3914 { 3915 struct i915_request *request[MAX_ENGINE_INSTANCE + 1]; 3916 struct intel_context *ve; 3917 struct igt_live_test t; 3918 unsigned int n; 3919 int err; 3920 3921 /* 3922 * Check that by setting the execution mask on a request, we can 3923 * restrict it to our desired engine within the virtual engine. 3924 */ 3925 3926 ve = intel_execlists_create_virtual(siblings, nsibling); 3927 if (IS_ERR(ve)) { 3928 err = PTR_ERR(ve); 3929 goto out_close; 3930 } 3931 3932 err = intel_context_pin(ve); 3933 if (err) 3934 goto out_put; 3935 3936 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name); 3937 if (err) 3938 goto out_unpin; 3939 3940 for (n = 0; n < nsibling; n++) { 3941 request[n] = i915_request_create(ve); 3942 if (IS_ERR(request[n])) { 3943 err = PTR_ERR(request[n]); 3944 nsibling = n; 3945 goto out; 3946 } 3947 3948 /* Reverse order as it's more likely to be unnatural */ 3949 request[n]->execution_mask = siblings[nsibling - n - 1]->mask; 3950 3951 i915_request_get(request[n]); 3952 i915_request_add(request[n]); 3953 } 3954 3955 for (n = 0; n < nsibling; n++) { 3956 if (i915_request_wait(request[n], 0, HZ / 10) < 0) { 3957 pr_err("%s(%s): wait for %llx:%lld timed out\n", 3958 __func__, ve->engine->name, 3959 request[n]->fence.context, 3960 request[n]->fence.seqno); 3961 3962 GEM_TRACE("%s(%s) failed at request %llx:%lld\n", 3963 __func__, ve->engine->name, 3964 request[n]->fence.context, 3965 request[n]->fence.seqno); 3966 GEM_TRACE_DUMP(); 3967 intel_gt_set_wedged(gt); 3968 err = -EIO; 3969 goto out; 3970 } 3971 3972 if (request[n]->engine != siblings[nsibling - n - 1]) { 3973 pr_err("Executed on wrong sibling '%s', expected '%s'\n", 3974 request[n]->engine->name, 3975 siblings[nsibling - n - 1]->name); 3976 err = -EINVAL; 3977 goto out; 3978 } 3979 } 3980 3981 err = igt_live_test_end(&t); 3982 out: 3983 if (igt_flush_test(gt->i915)) 3984 err = -EIO; 3985 3986 for (n = 0; n < nsibling; n++) 3987 i915_request_put(request[n]); 3988 3989 out_unpin: 3990 intel_context_unpin(ve); 3991 out_put: 3992 intel_context_put(ve); 3993 out_close: 3994 return err; 3995 } 3996 3997 static int live_virtual_mask(void *arg) 3998 { 3999 struct intel_gt *gt = arg; 4000 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 4001 unsigned int class; 4002 int err; 4003 4004 if (intel_uc_uses_guc_submission(>->uc)) 4005 return 0; 4006 4007 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 4008 unsigned int nsibling; 4009 4010 nsibling = select_siblings(gt, class, siblings); 4011 if (nsibling < 2) 4012 continue; 4013 4014 err = mask_virtual_engine(gt, siblings, nsibling); 4015 if (err) 4016 return err; 4017 } 4018 4019 return 0; 4020 } 4021 4022 static int slicein_virtual_engine(struct intel_gt *gt, 4023 struct intel_engine_cs **siblings, 4024 unsigned int nsibling) 4025 { 4026 const long timeout = slice_timeout(siblings[0]); 4027 struct intel_context *ce; 4028 struct i915_request *rq; 4029 struct igt_spinner spin; 4030 unsigned int n; 4031 int err = 0; 4032 4033 /* 4034 * Virtual requests must take part in timeslicing on the target engines. 4035 */ 4036 4037 if (igt_spinner_init(&spin, gt)) 4038 return -ENOMEM; 4039 4040 for (n = 0; n < nsibling; n++) { 4041 ce = intel_context_create(siblings[n]); 4042 if (IS_ERR(ce)) { 4043 err = PTR_ERR(ce); 4044 goto out; 4045 } 4046 4047 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); 4048 intel_context_put(ce); 4049 if (IS_ERR(rq)) { 4050 err = PTR_ERR(rq); 4051 goto out; 4052 } 4053 4054 i915_request_add(rq); 4055 } 4056 4057 ce = intel_execlists_create_virtual(siblings, nsibling); 4058 if (IS_ERR(ce)) { 4059 err = PTR_ERR(ce); 4060 goto out; 4061 } 4062 4063 rq = intel_context_create_request(ce); 4064 intel_context_put(ce); 4065 if (IS_ERR(rq)) { 4066 err = PTR_ERR(rq); 4067 goto out; 4068 } 4069 4070 i915_request_get(rq); 4071 i915_request_add(rq); 4072 if (i915_request_wait(rq, 0, timeout) < 0) { 4073 GEM_TRACE_ERR("%s(%s) failed to slice in virtual request\n", 4074 __func__, rq->engine->name); 4075 GEM_TRACE_DUMP(); 4076 intel_gt_set_wedged(gt); 4077 err = -EIO; 4078 } 4079 i915_request_put(rq); 4080 4081 out: 4082 igt_spinner_end(&spin); 4083 if (igt_flush_test(gt->i915)) 4084 err = -EIO; 4085 igt_spinner_fini(&spin); 4086 return err; 4087 } 4088 4089 static int sliceout_virtual_engine(struct intel_gt *gt, 4090 struct intel_engine_cs **siblings, 4091 unsigned int nsibling) 4092 { 4093 const long timeout = slice_timeout(siblings[0]); 4094 struct intel_context *ce; 4095 struct i915_request *rq; 4096 struct igt_spinner spin; 4097 unsigned int n; 4098 int err = 0; 4099 4100 /* 4101 * Virtual requests must allow others a fair timeslice. 4102 */ 4103 4104 if (igt_spinner_init(&spin, gt)) 4105 return -ENOMEM; 4106 4107 /* XXX We do not handle oversubscription and fairness with normal rq */ 4108 for (n = 0; n < nsibling; n++) { 4109 ce = intel_execlists_create_virtual(siblings, nsibling); 4110 if (IS_ERR(ce)) { 4111 err = PTR_ERR(ce); 4112 goto out; 4113 } 4114 4115 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); 4116 intel_context_put(ce); 4117 if (IS_ERR(rq)) { 4118 err = PTR_ERR(rq); 4119 goto out; 4120 } 4121 4122 i915_request_add(rq); 4123 } 4124 4125 for (n = 0; !err && n < nsibling; n++) { 4126 ce = intel_context_create(siblings[n]); 4127 if (IS_ERR(ce)) { 4128 err = PTR_ERR(ce); 4129 goto out; 4130 } 4131 4132 rq = intel_context_create_request(ce); 4133 intel_context_put(ce); 4134 if (IS_ERR(rq)) { 4135 err = PTR_ERR(rq); 4136 goto out; 4137 } 4138 4139 i915_request_get(rq); 4140 i915_request_add(rq); 4141 if (i915_request_wait(rq, 0, timeout) < 0) { 4142 GEM_TRACE_ERR("%s(%s) failed to slice out virtual request\n", 4143 __func__, siblings[n]->name); 4144 GEM_TRACE_DUMP(); 4145 intel_gt_set_wedged(gt); 4146 err = -EIO; 4147 } 4148 i915_request_put(rq); 4149 } 4150 4151 out: 4152 igt_spinner_end(&spin); 4153 if (igt_flush_test(gt->i915)) 4154 err = -EIO; 4155 igt_spinner_fini(&spin); 4156 return err; 4157 } 4158 4159 static int live_virtual_slice(void *arg) 4160 { 4161 struct intel_gt *gt = arg; 4162 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 4163 unsigned int class; 4164 int err; 4165 4166 if (intel_uc_uses_guc_submission(>->uc)) 4167 return 0; 4168 4169 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 4170 unsigned int nsibling; 4171 4172 nsibling = __select_siblings(gt, class, siblings, 4173 intel_engine_has_timeslices); 4174 if (nsibling < 2) 4175 continue; 4176 4177 err = slicein_virtual_engine(gt, siblings, nsibling); 4178 if (err) 4179 return err; 4180 4181 err = sliceout_virtual_engine(gt, siblings, nsibling); 4182 if (err) 4183 return err; 4184 } 4185 4186 return 0; 4187 } 4188 4189 static int preserved_virtual_engine(struct intel_gt *gt, 4190 struct intel_engine_cs **siblings, 4191 unsigned int nsibling) 4192 { 4193 struct i915_request *last = NULL; 4194 struct intel_context *ve; 4195 struct i915_vma *scratch; 4196 struct igt_live_test t; 4197 unsigned int n; 4198 int err = 0; 4199 u32 *cs; 4200 4201 scratch = 4202 __vm_create_scratch_for_read_pinned(&siblings[0]->gt->ggtt->vm, 4203 PAGE_SIZE); 4204 if (IS_ERR(scratch)) 4205 return PTR_ERR(scratch); 4206 4207 err = i915_vma_sync(scratch); 4208 if (err) 4209 goto out_scratch; 4210 4211 ve = intel_execlists_create_virtual(siblings, nsibling); 4212 if (IS_ERR(ve)) { 4213 err = PTR_ERR(ve); 4214 goto out_scratch; 4215 } 4216 4217 err = intel_context_pin(ve); 4218 if (err) 4219 goto out_put; 4220 4221 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name); 4222 if (err) 4223 goto out_unpin; 4224 4225 for (n = 0; n < NUM_GPR_DW; n++) { 4226 struct intel_engine_cs *engine = siblings[n % nsibling]; 4227 struct i915_request *rq; 4228 4229 rq = i915_request_create(ve); 4230 if (IS_ERR(rq)) { 4231 err = PTR_ERR(rq); 4232 goto out_end; 4233 } 4234 4235 i915_request_put(last); 4236 last = i915_request_get(rq); 4237 4238 cs = intel_ring_begin(rq, 8); 4239 if (IS_ERR(cs)) { 4240 i915_request_add(rq); 4241 err = PTR_ERR(cs); 4242 goto out_end; 4243 } 4244 4245 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 4246 *cs++ = CS_GPR(engine, n); 4247 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32); 4248 *cs++ = 0; 4249 4250 *cs++ = MI_LOAD_REGISTER_IMM(1); 4251 *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW); 4252 *cs++ = n + 1; 4253 4254 *cs++ = MI_NOOP; 4255 intel_ring_advance(rq, cs); 4256 4257 /* Restrict this request to run on a particular engine */ 4258 rq->execution_mask = engine->mask; 4259 i915_request_add(rq); 4260 } 4261 4262 if (i915_request_wait(last, 0, HZ / 5) < 0) { 4263 err = -ETIME; 4264 goto out_end; 4265 } 4266 4267 cs = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB); 4268 if (IS_ERR(cs)) { 4269 err = PTR_ERR(cs); 4270 goto out_end; 4271 } 4272 4273 for (n = 0; n < NUM_GPR_DW; n++) { 4274 if (cs[n] != n) { 4275 pr_err("Incorrect value[%d] found for GPR[%d]\n", 4276 cs[n], n); 4277 err = -EINVAL; 4278 break; 4279 } 4280 } 4281 4282 i915_gem_object_unpin_map(scratch->obj); 4283 4284 out_end: 4285 if (igt_live_test_end(&t)) 4286 err = -EIO; 4287 i915_request_put(last); 4288 out_unpin: 4289 intel_context_unpin(ve); 4290 out_put: 4291 intel_context_put(ve); 4292 out_scratch: 4293 i915_vma_unpin_and_release(&scratch, 0); 4294 return err; 4295 } 4296 4297 static int live_virtual_preserved(void *arg) 4298 { 4299 struct intel_gt *gt = arg; 4300 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 4301 unsigned int class; 4302 4303 /* 4304 * Check that the context image retains non-privileged (user) registers 4305 * from one engine to the next. For this we check that the CS_GPR 4306 * are preserved. 4307 */ 4308 4309 if (intel_uc_uses_guc_submission(>->uc)) 4310 return 0; 4311 4312 /* As we use CS_GPR we cannot run before they existed on all engines. */ 4313 if (GRAPHICS_VER(gt->i915) < 9) 4314 return 0; 4315 4316 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 4317 int nsibling, err; 4318 4319 nsibling = select_siblings(gt, class, siblings); 4320 if (nsibling < 2) 4321 continue; 4322 4323 err = preserved_virtual_engine(gt, siblings, nsibling); 4324 if (err) 4325 return err; 4326 } 4327 4328 return 0; 4329 } 4330 4331 static int bond_virtual_engine(struct intel_gt *gt, 4332 unsigned int class, 4333 struct intel_engine_cs **siblings, 4334 unsigned int nsibling, 4335 unsigned int flags) 4336 #define BOND_SCHEDULE BIT(0) 4337 { 4338 struct intel_engine_cs *master; 4339 struct i915_request *rq[16]; 4340 enum intel_engine_id id; 4341 struct igt_spinner spin; 4342 unsigned long n; 4343 int err; 4344 4345 /* 4346 * A set of bonded requests is intended to be run concurrently 4347 * across a number of engines. We use one request per-engine 4348 * and a magic fence to schedule each of the bonded requests 4349 * at the same time. A consequence of our current scheduler is that 4350 * we only move requests to the HW ready queue when the request 4351 * becomes ready, that is when all of its prerequisite fences have 4352 * been signaled. As one of those fences is the master submit fence, 4353 * there is a delay on all secondary fences as the HW may be 4354 * currently busy. Equally, as all the requests are independent, 4355 * they may have other fences that delay individual request 4356 * submission to HW. Ergo, we do not guarantee that all requests are 4357 * immediately submitted to HW at the same time, just that if the 4358 * rules are abided by, they are ready at the same time as the 4359 * first is submitted. Userspace can embed semaphores in its batch 4360 * to ensure parallel execution of its phases as it requires. 4361 * Though naturally it gets requested that perhaps the scheduler should 4362 * take care of parallel execution, even across preemption events on 4363 * different HW. (The proper answer is of course "lalalala".) 4364 * 4365 * With the submit-fence, we have identified three possible phases 4366 * of synchronisation depending on the master fence: queued (not 4367 * ready), executing, and signaled. The first two are quite simple 4368 * and checked below. However, the signaled master fence handling is 4369 * contentious. Currently we do not distinguish between a signaled 4370 * fence and an expired fence, as once signaled it does not convey 4371 * any information about the previous execution. It may even be freed 4372 * and hence checking later it may not exist at all. Ergo we currently 4373 * do not apply the bonding constraint for an already signaled fence, 4374 * as our expectation is that it should not constrain the secondaries 4375 * and is outside of the scope of the bonded request API (i.e. all 4376 * userspace requests are meant to be running in parallel). As 4377 * it imposes no constraint, and is effectively a no-op, we do not 4378 * check below as normal execution flows are checked extensively above. 4379 * 4380 * XXX Is the degenerate handling of signaled submit fences the 4381 * expected behaviour for userpace? 4382 */ 4383 4384 GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1); 4385 4386 if (igt_spinner_init(&spin, gt)) 4387 return -ENOMEM; 4388 4389 err = 0; 4390 rq[0] = ERR_PTR(-ENOMEM); 4391 for_each_engine(master, gt, id) { 4392 struct i915_sw_fence fence = {}; 4393 struct intel_context *ce; 4394 4395 if (master->class == class) 4396 continue; 4397 4398 ce = intel_context_create(master); 4399 if (IS_ERR(ce)) { 4400 err = PTR_ERR(ce); 4401 goto out; 4402 } 4403 4404 memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq)); 4405 4406 rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP); 4407 intel_context_put(ce); 4408 if (IS_ERR(rq[0])) { 4409 err = PTR_ERR(rq[0]); 4410 goto out; 4411 } 4412 i915_request_get(rq[0]); 4413 4414 if (flags & BOND_SCHEDULE) { 4415 onstack_fence_init(&fence); 4416 err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit, 4417 &fence, 4418 GFP_KERNEL); 4419 } 4420 4421 i915_request_add(rq[0]); 4422 if (err < 0) 4423 goto out; 4424 4425 if (!(flags & BOND_SCHEDULE) && 4426 !igt_wait_for_spinner(&spin, rq[0])) { 4427 err = -EIO; 4428 goto out; 4429 } 4430 4431 for (n = 0; n < nsibling; n++) { 4432 struct intel_context *ve; 4433 4434 ve = intel_execlists_create_virtual(siblings, nsibling); 4435 if (IS_ERR(ve)) { 4436 err = PTR_ERR(ve); 4437 onstack_fence_fini(&fence); 4438 goto out; 4439 } 4440 4441 err = intel_virtual_engine_attach_bond(ve->engine, 4442 master, 4443 siblings[n]); 4444 if (err) { 4445 intel_context_put(ve); 4446 onstack_fence_fini(&fence); 4447 goto out; 4448 } 4449 4450 err = intel_context_pin(ve); 4451 intel_context_put(ve); 4452 if (err) { 4453 onstack_fence_fini(&fence); 4454 goto out; 4455 } 4456 4457 rq[n + 1] = i915_request_create(ve); 4458 intel_context_unpin(ve); 4459 if (IS_ERR(rq[n + 1])) { 4460 err = PTR_ERR(rq[n + 1]); 4461 onstack_fence_fini(&fence); 4462 goto out; 4463 } 4464 i915_request_get(rq[n + 1]); 4465 4466 err = i915_request_await_execution(rq[n + 1], 4467 &rq[0]->fence, 4468 ve->engine->bond_execute); 4469 i915_request_add(rq[n + 1]); 4470 if (err < 0) { 4471 onstack_fence_fini(&fence); 4472 goto out; 4473 } 4474 } 4475 onstack_fence_fini(&fence); 4476 intel_engine_flush_submission(master); 4477 igt_spinner_end(&spin); 4478 4479 if (i915_request_wait(rq[0], 0, HZ / 10) < 0) { 4480 pr_err("Master request did not execute (on %s)!\n", 4481 rq[0]->engine->name); 4482 err = -EIO; 4483 goto out; 4484 } 4485 4486 for (n = 0; n < nsibling; n++) { 4487 if (i915_request_wait(rq[n + 1], 0, 4488 MAX_SCHEDULE_TIMEOUT) < 0) { 4489 err = -EIO; 4490 goto out; 4491 } 4492 4493 if (rq[n + 1]->engine != siblings[n]) { 4494 pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n", 4495 siblings[n]->name, 4496 rq[n + 1]->engine->name, 4497 rq[0]->engine->name); 4498 err = -EINVAL; 4499 goto out; 4500 } 4501 } 4502 4503 for (n = 0; !IS_ERR(rq[n]); n++) 4504 i915_request_put(rq[n]); 4505 rq[0] = ERR_PTR(-ENOMEM); 4506 } 4507 4508 out: 4509 for (n = 0; !IS_ERR(rq[n]); n++) 4510 i915_request_put(rq[n]); 4511 if (igt_flush_test(gt->i915)) 4512 err = -EIO; 4513 4514 igt_spinner_fini(&spin); 4515 return err; 4516 } 4517 4518 static int live_virtual_bond(void *arg) 4519 { 4520 static const struct phase { 4521 const char *name; 4522 unsigned int flags; 4523 } phases[] = { 4524 { "", 0 }, 4525 { "schedule", BOND_SCHEDULE }, 4526 { }, 4527 }; 4528 struct intel_gt *gt = arg; 4529 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 4530 unsigned int class; 4531 int err; 4532 4533 if (intel_uc_uses_guc_submission(>->uc)) 4534 return 0; 4535 4536 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 4537 const struct phase *p; 4538 int nsibling; 4539 4540 nsibling = select_siblings(gt, class, siblings); 4541 if (nsibling < 2) 4542 continue; 4543 4544 for (p = phases; p->name; p++) { 4545 err = bond_virtual_engine(gt, 4546 class, siblings, nsibling, 4547 p->flags); 4548 if (err) { 4549 pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n", 4550 __func__, p->name, class, nsibling, err); 4551 return err; 4552 } 4553 } 4554 } 4555 4556 return 0; 4557 } 4558 4559 static int reset_virtual_engine(struct intel_gt *gt, 4560 struct intel_engine_cs **siblings, 4561 unsigned int nsibling) 4562 { 4563 struct intel_engine_cs *engine; 4564 struct intel_context *ve; 4565 struct igt_spinner spin; 4566 struct i915_request *rq; 4567 unsigned int n; 4568 int err = 0; 4569 4570 /* 4571 * In order to support offline error capture for fast preempt reset, 4572 * we need to decouple the guilty request and ensure that it and its 4573 * descendents are not executed while the capture is in progress. 4574 */ 4575 4576 if (igt_spinner_init(&spin, gt)) 4577 return -ENOMEM; 4578 4579 ve = intel_execlists_create_virtual(siblings, nsibling); 4580 if (IS_ERR(ve)) { 4581 err = PTR_ERR(ve); 4582 goto out_spin; 4583 } 4584 4585 for (n = 0; n < nsibling; n++) 4586 st_engine_heartbeat_disable(siblings[n]); 4587 4588 rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK); 4589 if (IS_ERR(rq)) { 4590 err = PTR_ERR(rq); 4591 goto out_heartbeat; 4592 } 4593 i915_request_add(rq); 4594 4595 if (!igt_wait_for_spinner(&spin, rq)) { 4596 intel_gt_set_wedged(gt); 4597 err = -ETIME; 4598 goto out_heartbeat; 4599 } 4600 4601 engine = rq->engine; 4602 GEM_BUG_ON(engine == ve->engine); 4603 4604 /* Take ownership of the reset and tasklet */ 4605 err = engine_lock_reset_tasklet(engine); 4606 if (err) 4607 goto out_heartbeat; 4608 4609 engine->execlists.tasklet.callback(&engine->execlists.tasklet); 4610 GEM_BUG_ON(execlists_active(&engine->execlists) != rq); 4611 4612 /* Fake a preemption event; failed of course */ 4613 spin_lock_irq(&engine->active.lock); 4614 __unwind_incomplete_requests(engine); 4615 spin_unlock_irq(&engine->active.lock); 4616 GEM_BUG_ON(rq->engine != engine); 4617 4618 /* Reset the engine while keeping our active request on hold */ 4619 execlists_hold(engine, rq); 4620 GEM_BUG_ON(!i915_request_on_hold(rq)); 4621 4622 __intel_engine_reset_bh(engine, NULL); 4623 GEM_BUG_ON(rq->fence.error != -EIO); 4624 4625 /* Release our grasp on the engine, letting CS flow again */ 4626 engine_unlock_reset_tasklet(engine); 4627 4628 /* Check that we do not resubmit the held request */ 4629 i915_request_get(rq); 4630 if (!i915_request_wait(rq, 0, HZ / 5)) { 4631 pr_err("%s: on hold request completed!\n", 4632 engine->name); 4633 intel_gt_set_wedged(gt); 4634 err = -EIO; 4635 goto out_rq; 4636 } 4637 GEM_BUG_ON(!i915_request_on_hold(rq)); 4638 4639 /* But is resubmitted on release */ 4640 execlists_unhold(engine, rq); 4641 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 4642 pr_err("%s: held request did not complete!\n", 4643 engine->name); 4644 intel_gt_set_wedged(gt); 4645 err = -ETIME; 4646 } 4647 4648 out_rq: 4649 i915_request_put(rq); 4650 out_heartbeat: 4651 for (n = 0; n < nsibling; n++) 4652 st_engine_heartbeat_enable(siblings[n]); 4653 4654 intel_context_put(ve); 4655 out_spin: 4656 igt_spinner_fini(&spin); 4657 return err; 4658 } 4659 4660 static int live_virtual_reset(void *arg) 4661 { 4662 struct intel_gt *gt = arg; 4663 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 4664 unsigned int class; 4665 4666 /* 4667 * Check that we handle a reset event within a virtual engine. 4668 * Only the physical engine is reset, but we have to check the flow 4669 * of the virtual requests around the reset, and make sure it is not 4670 * forgotten. 4671 */ 4672 4673 if (intel_uc_uses_guc_submission(>->uc)) 4674 return 0; 4675 4676 if (!intel_has_reset_engine(gt)) 4677 return 0; 4678 4679 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 4680 int nsibling, err; 4681 4682 nsibling = select_siblings(gt, class, siblings); 4683 if (nsibling < 2) 4684 continue; 4685 4686 err = reset_virtual_engine(gt, siblings, nsibling); 4687 if (err) 4688 return err; 4689 } 4690 4691 return 0; 4692 } 4693 4694 int intel_execlists_live_selftests(struct drm_i915_private *i915) 4695 { 4696 static const struct i915_subtest tests[] = { 4697 SUBTEST(live_sanitycheck), 4698 SUBTEST(live_unlite_switch), 4699 SUBTEST(live_unlite_preempt), 4700 SUBTEST(live_unlite_ring), 4701 SUBTEST(live_pin_rewind), 4702 SUBTEST(live_hold_reset), 4703 SUBTEST(live_error_interrupt), 4704 SUBTEST(live_timeslice_preempt), 4705 SUBTEST(live_timeslice_rewind), 4706 SUBTEST(live_timeslice_queue), 4707 SUBTEST(live_timeslice_nopreempt), 4708 SUBTEST(live_busywait_preempt), 4709 SUBTEST(live_preempt), 4710 SUBTEST(live_late_preempt), 4711 SUBTEST(live_nopreempt), 4712 SUBTEST(live_preempt_cancel), 4713 SUBTEST(live_suppress_self_preempt), 4714 SUBTEST(live_chain_preempt), 4715 SUBTEST(live_preempt_ring), 4716 SUBTEST(live_preempt_gang), 4717 SUBTEST(live_preempt_timeout), 4718 SUBTEST(live_preempt_user), 4719 SUBTEST(live_preempt_smoke), 4720 SUBTEST(live_virtual_engine), 4721 SUBTEST(live_virtual_mask), 4722 SUBTEST(live_virtual_preserved), 4723 SUBTEST(live_virtual_slice), 4724 SUBTEST(live_virtual_bond), 4725 SUBTEST(live_virtual_reset), 4726 }; 4727 4728 if (i915->gt.submission_method != INTEL_SUBMISSION_ELSP) 4729 return 0; 4730 4731 if (intel_gt_is_wedged(&i915->gt)) 4732 return 0; 4733 4734 return intel_gt_live_subtests(tests, &i915->gt); 4735 } 4736