1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2018 Intel Corporation 4 */ 5 6 #include <linux/prime_numbers.h> 7 8 #include "gem/i915_gem_pm.h" 9 #include "gt/intel_engine_heartbeat.h" 10 #include "gt/intel_reset.h" 11 #include "gt/selftest_engine_heartbeat.h" 12 13 #include "i915_selftest.h" 14 #include "selftests/i915_random.h" 15 #include "selftests/igt_flush_test.h" 16 #include "selftests/igt_live_test.h" 17 #include "selftests/igt_spinner.h" 18 #include "selftests/lib_sw_fence.h" 19 20 #include "gem/selftests/igt_gem_utils.h" 21 #include "gem/selftests/mock_context.h" 22 23 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4) 24 #define NUM_GPR 16 25 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */ 26 27 static bool is_active(struct i915_request *rq) 28 { 29 if (i915_request_is_active(rq)) 30 return true; 31 32 if (i915_request_on_hold(rq)) 33 return true; 34 35 if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq)) 36 return true; 37 38 return false; 39 } 40 41 static int wait_for_submit(struct intel_engine_cs *engine, 42 struct i915_request *rq, 43 unsigned long timeout) 44 { 45 /* Ignore our own attempts to suppress excess tasklets */ 46 tasklet_hi_schedule(&engine->execlists.tasklet); 47 48 timeout += jiffies; 49 do { 50 bool done = time_after(jiffies, timeout); 51 52 if (i915_request_completed(rq)) /* that was quick! */ 53 return 0; 54 55 /* Wait until the HW has acknowleged the submission (or err) */ 56 intel_engine_flush_submission(engine); 57 if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq)) 58 return 0; 59 60 if (done) 61 return -ETIME; 62 63 cond_resched(); 64 } while (1); 65 } 66 67 static int wait_for_reset(struct intel_engine_cs *engine, 68 struct i915_request *rq, 69 unsigned long timeout) 70 { 71 timeout += jiffies; 72 73 do { 74 cond_resched(); 75 intel_engine_flush_submission(engine); 76 77 if (READ_ONCE(engine->execlists.pending[0])) 78 continue; 79 80 if (i915_request_completed(rq)) 81 break; 82 83 if (READ_ONCE(rq->fence.error)) 84 break; 85 } while (time_before(jiffies, timeout)); 86 87 flush_scheduled_work(); 88 89 if (rq->fence.error != -EIO) { 90 pr_err("%s: hanging request %llx:%lld not reset\n", 91 engine->name, 92 rq->fence.context, 93 rq->fence.seqno); 94 return -EINVAL; 95 } 96 97 /* Give the request a jiffie to complete after flushing the worker */ 98 if (i915_request_wait(rq, 0, 99 max(0l, (long)(timeout - jiffies)) + 1) < 0) { 100 pr_err("%s: hanging request %llx:%lld did not complete\n", 101 engine->name, 102 rq->fence.context, 103 rq->fence.seqno); 104 return -ETIME; 105 } 106 107 return 0; 108 } 109 110 static int live_sanitycheck(void *arg) 111 { 112 struct intel_gt *gt = arg; 113 struct intel_engine_cs *engine; 114 enum intel_engine_id id; 115 struct igt_spinner spin; 116 int err = 0; 117 118 if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915)) 119 return 0; 120 121 if (igt_spinner_init(&spin, gt)) 122 return -ENOMEM; 123 124 for_each_engine(engine, gt, id) { 125 struct intel_context *ce; 126 struct i915_request *rq; 127 128 ce = intel_context_create(engine); 129 if (IS_ERR(ce)) { 130 err = PTR_ERR(ce); 131 break; 132 } 133 134 rq = igt_spinner_create_request(&spin, ce, MI_NOOP); 135 if (IS_ERR(rq)) { 136 err = PTR_ERR(rq); 137 goto out_ctx; 138 } 139 140 i915_request_add(rq); 141 if (!igt_wait_for_spinner(&spin, rq)) { 142 GEM_TRACE("spinner failed to start\n"); 143 GEM_TRACE_DUMP(); 144 intel_gt_set_wedged(gt); 145 err = -EIO; 146 goto out_ctx; 147 } 148 149 igt_spinner_end(&spin); 150 if (igt_flush_test(gt->i915)) { 151 err = -EIO; 152 goto out_ctx; 153 } 154 155 out_ctx: 156 intel_context_put(ce); 157 if (err) 158 break; 159 } 160 161 igt_spinner_fini(&spin); 162 return err; 163 } 164 165 static int live_unlite_restore(struct intel_gt *gt, int prio) 166 { 167 struct intel_engine_cs *engine; 168 enum intel_engine_id id; 169 struct igt_spinner spin; 170 int err = -ENOMEM; 171 172 /* 173 * Check that we can correctly context switch between 2 instances 174 * on the same engine from the same parent context. 175 */ 176 177 if (igt_spinner_init(&spin, gt)) 178 return err; 179 180 err = 0; 181 for_each_engine(engine, gt, id) { 182 struct intel_context *ce[2] = {}; 183 struct i915_request *rq[2]; 184 struct igt_live_test t; 185 int n; 186 187 if (prio && !intel_engine_has_preemption(engine)) 188 continue; 189 190 if (!intel_engine_can_store_dword(engine)) 191 continue; 192 193 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 194 err = -EIO; 195 break; 196 } 197 st_engine_heartbeat_disable(engine); 198 199 for (n = 0; n < ARRAY_SIZE(ce); n++) { 200 struct intel_context *tmp; 201 202 tmp = intel_context_create(engine); 203 if (IS_ERR(tmp)) { 204 err = PTR_ERR(tmp); 205 goto err_ce; 206 } 207 208 err = intel_context_pin(tmp); 209 if (err) { 210 intel_context_put(tmp); 211 goto err_ce; 212 } 213 214 /* 215 * Setup the pair of contexts such that if we 216 * lite-restore using the RING_TAIL from ce[1] it 217 * will execute garbage from ce[0]->ring. 218 */ 219 memset(tmp->ring->vaddr, 220 POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */ 221 tmp->ring->vma->size); 222 223 ce[n] = tmp; 224 } 225 GEM_BUG_ON(!ce[1]->ring->size); 226 intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2); 227 lrc_update_regs(ce[1], engine, ce[1]->ring->head); 228 229 rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK); 230 if (IS_ERR(rq[0])) { 231 err = PTR_ERR(rq[0]); 232 goto err_ce; 233 } 234 235 i915_request_get(rq[0]); 236 i915_request_add(rq[0]); 237 GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit); 238 239 if (!igt_wait_for_spinner(&spin, rq[0])) { 240 i915_request_put(rq[0]); 241 goto err_ce; 242 } 243 244 rq[1] = i915_request_create(ce[1]); 245 if (IS_ERR(rq[1])) { 246 err = PTR_ERR(rq[1]); 247 i915_request_put(rq[0]); 248 goto err_ce; 249 } 250 251 if (!prio) { 252 /* 253 * Ensure we do the switch to ce[1] on completion. 254 * 255 * rq[0] is already submitted, so this should reduce 256 * to a no-op (a wait on a request on the same engine 257 * uses the submit fence, not the completion fence), 258 * but it will install a dependency on rq[1] for rq[0] 259 * that will prevent the pair being reordered by 260 * timeslicing. 261 */ 262 i915_request_await_dma_fence(rq[1], &rq[0]->fence); 263 } 264 265 i915_request_get(rq[1]); 266 i915_request_add(rq[1]); 267 GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix); 268 i915_request_put(rq[0]); 269 270 if (prio) { 271 struct i915_sched_attr attr = { 272 .priority = prio, 273 }; 274 275 /* Alternatively preempt the spinner with ce[1] */ 276 engine->schedule(rq[1], &attr); 277 } 278 279 /* And switch back to ce[0] for good measure */ 280 rq[0] = i915_request_create(ce[0]); 281 if (IS_ERR(rq[0])) { 282 err = PTR_ERR(rq[0]); 283 i915_request_put(rq[1]); 284 goto err_ce; 285 } 286 287 i915_request_await_dma_fence(rq[0], &rq[1]->fence); 288 i915_request_get(rq[0]); 289 i915_request_add(rq[0]); 290 GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix); 291 i915_request_put(rq[1]); 292 i915_request_put(rq[0]); 293 294 err_ce: 295 intel_engine_flush_submission(engine); 296 igt_spinner_end(&spin); 297 for (n = 0; n < ARRAY_SIZE(ce); n++) { 298 if (IS_ERR_OR_NULL(ce[n])) 299 break; 300 301 intel_context_unpin(ce[n]); 302 intel_context_put(ce[n]); 303 } 304 305 st_engine_heartbeat_enable(engine); 306 if (igt_live_test_end(&t)) 307 err = -EIO; 308 if (err) 309 break; 310 } 311 312 igt_spinner_fini(&spin); 313 return err; 314 } 315 316 static int live_unlite_switch(void *arg) 317 { 318 return live_unlite_restore(arg, 0); 319 } 320 321 static int live_unlite_preempt(void *arg) 322 { 323 return live_unlite_restore(arg, I915_PRIORITY_MAX); 324 } 325 326 static int live_unlite_ring(void *arg) 327 { 328 struct intel_gt *gt = arg; 329 struct intel_engine_cs *engine; 330 struct igt_spinner spin; 331 enum intel_engine_id id; 332 int err = 0; 333 334 /* 335 * Setup a preemption event that will cause almost the entire ring 336 * to be unwound, potentially fooling our intel_ring_direction() 337 * into emitting a forward lite-restore instead of the rollback. 338 */ 339 340 if (igt_spinner_init(&spin, gt)) 341 return -ENOMEM; 342 343 for_each_engine(engine, gt, id) { 344 struct intel_context *ce[2] = {}; 345 struct i915_request *rq; 346 struct igt_live_test t; 347 int n; 348 349 if (!intel_engine_has_preemption(engine)) 350 continue; 351 352 if (!intel_engine_can_store_dword(engine)) 353 continue; 354 355 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 356 err = -EIO; 357 break; 358 } 359 st_engine_heartbeat_disable(engine); 360 361 for (n = 0; n < ARRAY_SIZE(ce); n++) { 362 struct intel_context *tmp; 363 364 tmp = intel_context_create(engine); 365 if (IS_ERR(tmp)) { 366 err = PTR_ERR(tmp); 367 goto err_ce; 368 } 369 370 err = intel_context_pin(tmp); 371 if (err) { 372 intel_context_put(tmp); 373 goto err_ce; 374 } 375 376 memset32(tmp->ring->vaddr, 377 0xdeadbeef, /* trigger a hang if executed */ 378 tmp->ring->vma->size / sizeof(u32)); 379 380 ce[n] = tmp; 381 } 382 383 /* Create max prio spinner, followed by N low prio nops */ 384 rq = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK); 385 if (IS_ERR(rq)) { 386 err = PTR_ERR(rq); 387 goto err_ce; 388 } 389 390 i915_request_get(rq); 391 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 392 i915_request_add(rq); 393 394 if (!igt_wait_for_spinner(&spin, rq)) { 395 intel_gt_set_wedged(gt); 396 i915_request_put(rq); 397 err = -ETIME; 398 goto err_ce; 399 } 400 401 /* Fill the ring, until we will cause a wrap */ 402 n = 0; 403 while (intel_ring_direction(ce[0]->ring, 404 rq->wa_tail, 405 ce[0]->ring->tail) <= 0) { 406 struct i915_request *tmp; 407 408 tmp = intel_context_create_request(ce[0]); 409 if (IS_ERR(tmp)) { 410 err = PTR_ERR(tmp); 411 i915_request_put(rq); 412 goto err_ce; 413 } 414 415 i915_request_add(tmp); 416 intel_engine_flush_submission(engine); 417 n++; 418 } 419 intel_engine_flush_submission(engine); 420 pr_debug("%s: Filled ring with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n", 421 engine->name, n, 422 ce[0]->ring->size, 423 ce[0]->ring->tail, 424 ce[0]->ring->emit, 425 rq->tail); 426 GEM_BUG_ON(intel_ring_direction(ce[0]->ring, 427 rq->tail, 428 ce[0]->ring->tail) <= 0); 429 i915_request_put(rq); 430 431 /* Create a second ring to preempt the first ring after rq[0] */ 432 rq = intel_context_create_request(ce[1]); 433 if (IS_ERR(rq)) { 434 err = PTR_ERR(rq); 435 goto err_ce; 436 } 437 438 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 439 i915_request_get(rq); 440 i915_request_add(rq); 441 442 err = wait_for_submit(engine, rq, HZ / 2); 443 i915_request_put(rq); 444 if (err) { 445 pr_err("%s: preemption request was not submitted\n", 446 engine->name); 447 err = -ETIME; 448 } 449 450 pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n", 451 engine->name, 452 ce[0]->ring->tail, ce[0]->ring->emit, 453 ce[1]->ring->tail, ce[1]->ring->emit); 454 455 err_ce: 456 intel_engine_flush_submission(engine); 457 igt_spinner_end(&spin); 458 for (n = 0; n < ARRAY_SIZE(ce); n++) { 459 if (IS_ERR_OR_NULL(ce[n])) 460 break; 461 462 intel_context_unpin(ce[n]); 463 intel_context_put(ce[n]); 464 } 465 st_engine_heartbeat_enable(engine); 466 if (igt_live_test_end(&t)) 467 err = -EIO; 468 if (err) 469 break; 470 } 471 472 igt_spinner_fini(&spin); 473 return err; 474 } 475 476 static int live_pin_rewind(void *arg) 477 { 478 struct intel_gt *gt = arg; 479 struct intel_engine_cs *engine; 480 enum intel_engine_id id; 481 int err = 0; 482 483 /* 484 * We have to be careful not to trust intel_ring too much, for example 485 * ring->head is updated upon retire which is out of sync with pinning 486 * the context. Thus we cannot use ring->head to set CTX_RING_HEAD, 487 * or else we risk writing an older, stale value. 488 * 489 * To simulate this, let's apply a bit of deliberate sabotague. 490 */ 491 492 for_each_engine(engine, gt, id) { 493 struct intel_context *ce; 494 struct i915_request *rq; 495 struct intel_ring *ring; 496 struct igt_live_test t; 497 498 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 499 err = -EIO; 500 break; 501 } 502 503 ce = intel_context_create(engine); 504 if (IS_ERR(ce)) { 505 err = PTR_ERR(ce); 506 break; 507 } 508 509 err = intel_context_pin(ce); 510 if (err) { 511 intel_context_put(ce); 512 break; 513 } 514 515 /* Keep the context awake while we play games */ 516 err = i915_active_acquire(&ce->active); 517 if (err) { 518 intel_context_unpin(ce); 519 intel_context_put(ce); 520 break; 521 } 522 ring = ce->ring; 523 524 /* Poison the ring, and offset the next request from HEAD */ 525 memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32)); 526 ring->emit = ring->size / 2; 527 ring->tail = ring->emit; 528 GEM_BUG_ON(ring->head); 529 530 intel_context_unpin(ce); 531 532 /* Submit a simple nop request */ 533 GEM_BUG_ON(intel_context_is_pinned(ce)); 534 rq = intel_context_create_request(ce); 535 i915_active_release(&ce->active); /* e.g. async retire */ 536 intel_context_put(ce); 537 if (IS_ERR(rq)) { 538 err = PTR_ERR(rq); 539 break; 540 } 541 GEM_BUG_ON(!rq->head); 542 i915_request_add(rq); 543 544 /* Expect not to hang! */ 545 if (igt_live_test_end(&t)) { 546 err = -EIO; 547 break; 548 } 549 } 550 551 return err; 552 } 553 554 static int live_hold_reset(void *arg) 555 { 556 struct intel_gt *gt = arg; 557 struct intel_engine_cs *engine; 558 enum intel_engine_id id; 559 struct igt_spinner spin; 560 int err = 0; 561 562 /* 563 * In order to support offline error capture for fast preempt reset, 564 * we need to decouple the guilty request and ensure that it and its 565 * descendents are not executed while the capture is in progress. 566 */ 567 568 if (!intel_has_reset_engine(gt)) 569 return 0; 570 571 if (igt_spinner_init(&spin, gt)) 572 return -ENOMEM; 573 574 for_each_engine(engine, gt, id) { 575 struct intel_context *ce; 576 struct i915_request *rq; 577 578 ce = intel_context_create(engine); 579 if (IS_ERR(ce)) { 580 err = PTR_ERR(ce); 581 break; 582 } 583 584 st_engine_heartbeat_disable(engine); 585 586 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); 587 if (IS_ERR(rq)) { 588 err = PTR_ERR(rq); 589 goto out; 590 } 591 i915_request_add(rq); 592 593 if (!igt_wait_for_spinner(&spin, rq)) { 594 intel_gt_set_wedged(gt); 595 err = -ETIME; 596 goto out; 597 } 598 599 /* We have our request executing, now remove it and reset */ 600 601 local_bh_disable(); 602 if (test_and_set_bit(I915_RESET_ENGINE + id, 603 >->reset.flags)) { 604 local_bh_enable(); 605 intel_gt_set_wedged(gt); 606 err = -EBUSY; 607 goto out; 608 } 609 tasklet_disable(&engine->execlists.tasklet); 610 611 engine->execlists.tasklet.callback(&engine->execlists.tasklet); 612 GEM_BUG_ON(execlists_active(&engine->execlists) != rq); 613 614 i915_request_get(rq); 615 execlists_hold(engine, rq); 616 GEM_BUG_ON(!i915_request_on_hold(rq)); 617 618 __intel_engine_reset_bh(engine, NULL); 619 GEM_BUG_ON(rq->fence.error != -EIO); 620 621 tasklet_enable(&engine->execlists.tasklet); 622 clear_and_wake_up_bit(I915_RESET_ENGINE + id, 623 >->reset.flags); 624 local_bh_enable(); 625 626 /* Check that we do not resubmit the held request */ 627 if (!i915_request_wait(rq, 0, HZ / 5)) { 628 pr_err("%s: on hold request completed!\n", 629 engine->name); 630 i915_request_put(rq); 631 err = -EIO; 632 goto out; 633 } 634 GEM_BUG_ON(!i915_request_on_hold(rq)); 635 636 /* But is resubmitted on release */ 637 execlists_unhold(engine, rq); 638 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 639 pr_err("%s: held request did not complete!\n", 640 engine->name); 641 intel_gt_set_wedged(gt); 642 err = -ETIME; 643 } 644 i915_request_put(rq); 645 646 out: 647 st_engine_heartbeat_enable(engine); 648 intel_context_put(ce); 649 if (err) 650 break; 651 } 652 653 igt_spinner_fini(&spin); 654 return err; 655 } 656 657 static const char *error_repr(int err) 658 { 659 return err ? "bad" : "good"; 660 } 661 662 static int live_error_interrupt(void *arg) 663 { 664 static const struct error_phase { 665 enum { GOOD = 0, BAD = -EIO } error[2]; 666 } phases[] = { 667 { { BAD, GOOD } }, 668 { { BAD, BAD } }, 669 { { BAD, GOOD } }, 670 { { GOOD, GOOD } }, /* sentinel */ 671 }; 672 struct intel_gt *gt = arg; 673 struct intel_engine_cs *engine; 674 enum intel_engine_id id; 675 676 /* 677 * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning 678 * of invalid commands in user batches that will cause a GPU hang. 679 * This is a faster mechanism than using hangcheck/heartbeats, but 680 * only detects problems the HW knows about -- it will not warn when 681 * we kill the HW! 682 * 683 * To verify our detection and reset, we throw some invalid commands 684 * at the HW and wait for the interrupt. 685 */ 686 687 if (!intel_has_reset_engine(gt)) 688 return 0; 689 690 for_each_engine(engine, gt, id) { 691 const struct error_phase *p; 692 int err = 0; 693 694 st_engine_heartbeat_disable(engine); 695 696 for (p = phases; p->error[0] != GOOD; p++) { 697 struct i915_request *client[ARRAY_SIZE(phases->error)]; 698 u32 *cs; 699 int i; 700 701 memset(client, 0, sizeof(*client)); 702 for (i = 0; i < ARRAY_SIZE(client); i++) { 703 struct intel_context *ce; 704 struct i915_request *rq; 705 706 ce = intel_context_create(engine); 707 if (IS_ERR(ce)) { 708 err = PTR_ERR(ce); 709 goto out; 710 } 711 712 rq = intel_context_create_request(ce); 713 intel_context_put(ce); 714 if (IS_ERR(rq)) { 715 err = PTR_ERR(rq); 716 goto out; 717 } 718 719 if (rq->engine->emit_init_breadcrumb) { 720 err = rq->engine->emit_init_breadcrumb(rq); 721 if (err) { 722 i915_request_add(rq); 723 goto out; 724 } 725 } 726 727 cs = intel_ring_begin(rq, 2); 728 if (IS_ERR(cs)) { 729 i915_request_add(rq); 730 err = PTR_ERR(cs); 731 goto out; 732 } 733 734 if (p->error[i]) { 735 *cs++ = 0xdeadbeef; 736 *cs++ = 0xdeadbeef; 737 } else { 738 *cs++ = MI_NOOP; 739 *cs++ = MI_NOOP; 740 } 741 742 client[i] = i915_request_get(rq); 743 i915_request_add(rq); 744 } 745 746 err = wait_for_submit(engine, client[0], HZ / 2); 747 if (err) { 748 pr_err("%s: first request did not start within time!\n", 749 engine->name); 750 err = -ETIME; 751 goto out; 752 } 753 754 for (i = 0; i < ARRAY_SIZE(client); i++) { 755 if (i915_request_wait(client[i], 0, HZ / 5) < 0) 756 pr_debug("%s: %s request incomplete!\n", 757 engine->name, 758 error_repr(p->error[i])); 759 760 if (!i915_request_started(client[i])) { 761 pr_err("%s: %s request not started!\n", 762 engine->name, 763 error_repr(p->error[i])); 764 err = -ETIME; 765 goto out; 766 } 767 768 /* Kick the tasklet to process the error */ 769 intel_engine_flush_submission(engine); 770 if (client[i]->fence.error != p->error[i]) { 771 pr_err("%s: %s request (%s) with wrong error code: %d\n", 772 engine->name, 773 error_repr(p->error[i]), 774 i915_request_completed(client[i]) ? "completed" : "running", 775 client[i]->fence.error); 776 err = -EINVAL; 777 goto out; 778 } 779 } 780 781 out: 782 for (i = 0; i < ARRAY_SIZE(client); i++) 783 if (client[i]) 784 i915_request_put(client[i]); 785 if (err) { 786 pr_err("%s: failed at phase[%zd] { %d, %d }\n", 787 engine->name, p - phases, 788 p->error[0], p->error[1]); 789 break; 790 } 791 } 792 793 st_engine_heartbeat_enable(engine); 794 if (err) { 795 intel_gt_set_wedged(gt); 796 return err; 797 } 798 } 799 800 return 0; 801 } 802 803 static int 804 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx) 805 { 806 u32 *cs; 807 808 cs = intel_ring_begin(rq, 10); 809 if (IS_ERR(cs)) 810 return PTR_ERR(cs); 811 812 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 813 814 *cs++ = MI_SEMAPHORE_WAIT | 815 MI_SEMAPHORE_GLOBAL_GTT | 816 MI_SEMAPHORE_POLL | 817 MI_SEMAPHORE_SAD_NEQ_SDD; 818 *cs++ = 0; 819 *cs++ = i915_ggtt_offset(vma) + 4 * idx; 820 *cs++ = 0; 821 822 if (idx > 0) { 823 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 824 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1); 825 *cs++ = 0; 826 *cs++ = 1; 827 } else { 828 *cs++ = MI_NOOP; 829 *cs++ = MI_NOOP; 830 *cs++ = MI_NOOP; 831 *cs++ = MI_NOOP; 832 } 833 834 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 835 836 intel_ring_advance(rq, cs); 837 return 0; 838 } 839 840 static struct i915_request * 841 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx) 842 { 843 struct intel_context *ce; 844 struct i915_request *rq; 845 int err; 846 847 ce = intel_context_create(engine); 848 if (IS_ERR(ce)) 849 return ERR_CAST(ce); 850 851 rq = intel_context_create_request(ce); 852 if (IS_ERR(rq)) 853 goto out_ce; 854 855 err = 0; 856 if (rq->engine->emit_init_breadcrumb) 857 err = rq->engine->emit_init_breadcrumb(rq); 858 if (err == 0) 859 err = emit_semaphore_chain(rq, vma, idx); 860 if (err == 0) 861 i915_request_get(rq); 862 i915_request_add(rq); 863 if (err) 864 rq = ERR_PTR(err); 865 866 out_ce: 867 intel_context_put(ce); 868 return rq; 869 } 870 871 static int 872 release_queue(struct intel_engine_cs *engine, 873 struct i915_vma *vma, 874 int idx, int prio) 875 { 876 struct i915_sched_attr attr = { 877 .priority = prio, 878 }; 879 struct i915_request *rq; 880 u32 *cs; 881 882 rq = intel_engine_create_kernel_request(engine); 883 if (IS_ERR(rq)) 884 return PTR_ERR(rq); 885 886 cs = intel_ring_begin(rq, 4); 887 if (IS_ERR(cs)) { 888 i915_request_add(rq); 889 return PTR_ERR(cs); 890 } 891 892 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 893 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1); 894 *cs++ = 0; 895 *cs++ = 1; 896 897 intel_ring_advance(rq, cs); 898 899 i915_request_get(rq); 900 i915_request_add(rq); 901 902 local_bh_disable(); 903 engine->schedule(rq, &attr); 904 local_bh_enable(); /* kick tasklet */ 905 906 i915_request_put(rq); 907 908 return 0; 909 } 910 911 static int 912 slice_semaphore_queue(struct intel_engine_cs *outer, 913 struct i915_vma *vma, 914 int count) 915 { 916 struct intel_engine_cs *engine; 917 struct i915_request *head; 918 enum intel_engine_id id; 919 int err, i, n = 0; 920 921 head = semaphore_queue(outer, vma, n++); 922 if (IS_ERR(head)) 923 return PTR_ERR(head); 924 925 for_each_engine(engine, outer->gt, id) { 926 if (!intel_engine_has_preemption(engine)) 927 continue; 928 929 for (i = 0; i < count; i++) { 930 struct i915_request *rq; 931 932 rq = semaphore_queue(engine, vma, n++); 933 if (IS_ERR(rq)) { 934 err = PTR_ERR(rq); 935 goto out; 936 } 937 938 i915_request_put(rq); 939 } 940 } 941 942 err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER); 943 if (err) 944 goto out; 945 946 if (i915_request_wait(head, 0, 947 2 * outer->gt->info.num_engines * (count + 2) * (count + 3)) < 0) { 948 pr_err("%s: Failed to slice along semaphore chain of length (%d, %d)!\n", 949 outer->name, count, n); 950 GEM_TRACE_DUMP(); 951 intel_gt_set_wedged(outer->gt); 952 err = -EIO; 953 } 954 955 out: 956 i915_request_put(head); 957 return err; 958 } 959 960 static int live_timeslice_preempt(void *arg) 961 { 962 struct intel_gt *gt = arg; 963 struct drm_i915_gem_object *obj; 964 struct intel_engine_cs *engine; 965 enum intel_engine_id id; 966 struct i915_vma *vma; 967 void *vaddr; 968 int err = 0; 969 970 /* 971 * If a request takes too long, we would like to give other users 972 * a fair go on the GPU. In particular, users may create batches 973 * that wait upon external input, where that input may even be 974 * supplied by another GPU job. To avoid blocking forever, we 975 * need to preempt the current task and replace it with another 976 * ready task. 977 */ 978 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 979 return 0; 980 981 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 982 if (IS_ERR(obj)) 983 return PTR_ERR(obj); 984 985 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 986 if (IS_ERR(vma)) { 987 err = PTR_ERR(vma); 988 goto err_obj; 989 } 990 991 vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC); 992 if (IS_ERR(vaddr)) { 993 err = PTR_ERR(vaddr); 994 goto err_obj; 995 } 996 997 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 998 if (err) 999 goto err_map; 1000 1001 err = i915_vma_sync(vma); 1002 if (err) 1003 goto err_pin; 1004 1005 for_each_engine(engine, gt, id) { 1006 if (!intel_engine_has_preemption(engine)) 1007 continue; 1008 1009 memset(vaddr, 0, PAGE_SIZE); 1010 1011 st_engine_heartbeat_disable(engine); 1012 err = slice_semaphore_queue(engine, vma, 5); 1013 st_engine_heartbeat_enable(engine); 1014 if (err) 1015 goto err_pin; 1016 1017 if (igt_flush_test(gt->i915)) { 1018 err = -EIO; 1019 goto err_pin; 1020 } 1021 } 1022 1023 err_pin: 1024 i915_vma_unpin(vma); 1025 err_map: 1026 i915_gem_object_unpin_map(obj); 1027 err_obj: 1028 i915_gem_object_put(obj); 1029 return err; 1030 } 1031 1032 static struct i915_request * 1033 create_rewinder(struct intel_context *ce, 1034 struct i915_request *wait, 1035 void *slot, int idx) 1036 { 1037 const u32 offset = 1038 i915_ggtt_offset(ce->engine->status_page.vma) + 1039 offset_in_page(slot); 1040 struct i915_request *rq; 1041 u32 *cs; 1042 int err; 1043 1044 rq = intel_context_create_request(ce); 1045 if (IS_ERR(rq)) 1046 return rq; 1047 1048 if (wait) { 1049 err = i915_request_await_dma_fence(rq, &wait->fence); 1050 if (err) 1051 goto err; 1052 } 1053 1054 cs = intel_ring_begin(rq, 14); 1055 if (IS_ERR(cs)) { 1056 err = PTR_ERR(cs); 1057 goto err; 1058 } 1059 1060 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 1061 *cs++ = MI_NOOP; 1062 1063 *cs++ = MI_SEMAPHORE_WAIT | 1064 MI_SEMAPHORE_GLOBAL_GTT | 1065 MI_SEMAPHORE_POLL | 1066 MI_SEMAPHORE_SAD_GTE_SDD; 1067 *cs++ = idx; 1068 *cs++ = offset; 1069 *cs++ = 0; 1070 1071 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 1072 *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base)); 1073 *cs++ = offset + idx * sizeof(u32); 1074 *cs++ = 0; 1075 1076 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 1077 *cs++ = offset; 1078 *cs++ = 0; 1079 *cs++ = idx + 1; 1080 1081 intel_ring_advance(rq, cs); 1082 1083 err = 0; 1084 err: 1085 i915_request_get(rq); 1086 i915_request_add(rq); 1087 if (err) { 1088 i915_request_put(rq); 1089 return ERR_PTR(err); 1090 } 1091 1092 return rq; 1093 } 1094 1095 static int live_timeslice_rewind(void *arg) 1096 { 1097 struct intel_gt *gt = arg; 1098 struct intel_engine_cs *engine; 1099 enum intel_engine_id id; 1100 1101 /* 1102 * The usual presumption on timeslice expiration is that we replace 1103 * the active context with another. However, given a chain of 1104 * dependencies we may end up with replacing the context with itself, 1105 * but only a few of those requests, forcing us to rewind the 1106 * RING_TAIL of the original request. 1107 */ 1108 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 1109 return 0; 1110 1111 for_each_engine(engine, gt, id) { 1112 enum { A1, A2, B1 }; 1113 enum { X = 1, Z, Y }; 1114 struct i915_request *rq[3] = {}; 1115 struct intel_context *ce; 1116 unsigned long timeslice; 1117 int i, err = 0; 1118 u32 *slot; 1119 1120 if (!intel_engine_has_timeslices(engine)) 1121 continue; 1122 1123 /* 1124 * A:rq1 -- semaphore wait, timestamp X 1125 * A:rq2 -- write timestamp Y 1126 * 1127 * B:rq1 [await A:rq1] -- write timestamp Z 1128 * 1129 * Force timeslice, release semaphore. 1130 * 1131 * Expect execution/evaluation order XZY 1132 */ 1133 1134 st_engine_heartbeat_disable(engine); 1135 timeslice = xchg(&engine->props.timeslice_duration_ms, 1); 1136 1137 slot = memset32(engine->status_page.addr + 1000, 0, 4); 1138 1139 ce = intel_context_create(engine); 1140 if (IS_ERR(ce)) { 1141 err = PTR_ERR(ce); 1142 goto err; 1143 } 1144 1145 rq[A1] = create_rewinder(ce, NULL, slot, X); 1146 if (IS_ERR(rq[A1])) { 1147 intel_context_put(ce); 1148 goto err; 1149 } 1150 1151 rq[A2] = create_rewinder(ce, NULL, slot, Y); 1152 intel_context_put(ce); 1153 if (IS_ERR(rq[A2])) 1154 goto err; 1155 1156 err = wait_for_submit(engine, rq[A2], HZ / 2); 1157 if (err) { 1158 pr_err("%s: failed to submit first context\n", 1159 engine->name); 1160 goto err; 1161 } 1162 1163 ce = intel_context_create(engine); 1164 if (IS_ERR(ce)) { 1165 err = PTR_ERR(ce); 1166 goto err; 1167 } 1168 1169 rq[B1] = create_rewinder(ce, rq[A1], slot, Z); 1170 intel_context_put(ce); 1171 if (IS_ERR(rq[2])) 1172 goto err; 1173 1174 err = wait_for_submit(engine, rq[B1], HZ / 2); 1175 if (err) { 1176 pr_err("%s: failed to submit second context\n", 1177 engine->name); 1178 goto err; 1179 } 1180 1181 /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */ 1182 ENGINE_TRACE(engine, "forcing tasklet for rewind\n"); 1183 while (i915_request_is_active(rq[A2])) { /* semaphore yield! */ 1184 /* Wait for the timeslice to kick in */ 1185 del_timer(&engine->execlists.timer); 1186 tasklet_hi_schedule(&engine->execlists.tasklet); 1187 intel_engine_flush_submission(engine); 1188 } 1189 /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */ 1190 GEM_BUG_ON(!i915_request_is_active(rq[A1])); 1191 GEM_BUG_ON(!i915_request_is_active(rq[B1])); 1192 GEM_BUG_ON(i915_request_is_active(rq[A2])); 1193 1194 /* Release the hounds! */ 1195 slot[0] = 1; 1196 wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */ 1197 1198 for (i = 1; i <= 3; i++) { 1199 unsigned long timeout = jiffies + HZ / 2; 1200 1201 while (!READ_ONCE(slot[i]) && 1202 time_before(jiffies, timeout)) 1203 ; 1204 1205 if (!time_before(jiffies, timeout)) { 1206 pr_err("%s: rq[%d] timed out\n", 1207 engine->name, i - 1); 1208 err = -ETIME; 1209 goto err; 1210 } 1211 1212 pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]); 1213 } 1214 1215 /* XZY: XZ < XY */ 1216 if (slot[Z] - slot[X] >= slot[Y] - slot[X]) { 1217 pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n", 1218 engine->name, 1219 slot[Z] - slot[X], 1220 slot[Y] - slot[X]); 1221 err = -EINVAL; 1222 } 1223 1224 err: 1225 memset32(&slot[0], -1, 4); 1226 wmb(); 1227 1228 engine->props.timeslice_duration_ms = timeslice; 1229 st_engine_heartbeat_enable(engine); 1230 for (i = 0; i < 3; i++) 1231 i915_request_put(rq[i]); 1232 if (igt_flush_test(gt->i915)) 1233 err = -EIO; 1234 if (err) 1235 return err; 1236 } 1237 1238 return 0; 1239 } 1240 1241 static struct i915_request *nop_request(struct intel_engine_cs *engine) 1242 { 1243 struct i915_request *rq; 1244 1245 rq = intel_engine_create_kernel_request(engine); 1246 if (IS_ERR(rq)) 1247 return rq; 1248 1249 i915_request_get(rq); 1250 i915_request_add(rq); 1251 1252 return rq; 1253 } 1254 1255 static long slice_timeout(struct intel_engine_cs *engine) 1256 { 1257 long timeout; 1258 1259 /* Enough time for a timeslice to kick in, and kick out */ 1260 timeout = 2 * msecs_to_jiffies_timeout(timeslice(engine)); 1261 1262 /* Enough time for the nop request to complete */ 1263 timeout += HZ / 5; 1264 1265 return timeout + 1; 1266 } 1267 1268 static int live_timeslice_queue(void *arg) 1269 { 1270 struct intel_gt *gt = arg; 1271 struct drm_i915_gem_object *obj; 1272 struct intel_engine_cs *engine; 1273 enum intel_engine_id id; 1274 struct i915_vma *vma; 1275 void *vaddr; 1276 int err = 0; 1277 1278 /* 1279 * Make sure that even if ELSP[0] and ELSP[1] are filled with 1280 * timeslicing between them disabled, we *do* enable timeslicing 1281 * if the queue demands it. (Normally, we do not submit if 1282 * ELSP[1] is already occupied, so must rely on timeslicing to 1283 * eject ELSP[0] in favour of the queue.) 1284 */ 1285 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 1286 return 0; 1287 1288 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 1289 if (IS_ERR(obj)) 1290 return PTR_ERR(obj); 1291 1292 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 1293 if (IS_ERR(vma)) { 1294 err = PTR_ERR(vma); 1295 goto err_obj; 1296 } 1297 1298 vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC); 1299 if (IS_ERR(vaddr)) { 1300 err = PTR_ERR(vaddr); 1301 goto err_obj; 1302 } 1303 1304 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 1305 if (err) 1306 goto err_map; 1307 1308 err = i915_vma_sync(vma); 1309 if (err) 1310 goto err_pin; 1311 1312 for_each_engine(engine, gt, id) { 1313 struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX }; 1314 struct i915_request *rq, *nop; 1315 1316 if (!intel_engine_has_preemption(engine)) 1317 continue; 1318 1319 st_engine_heartbeat_disable(engine); 1320 memset(vaddr, 0, PAGE_SIZE); 1321 1322 /* ELSP[0]: semaphore wait */ 1323 rq = semaphore_queue(engine, vma, 0); 1324 if (IS_ERR(rq)) { 1325 err = PTR_ERR(rq); 1326 goto err_heartbeat; 1327 } 1328 engine->schedule(rq, &attr); 1329 err = wait_for_submit(engine, rq, HZ / 2); 1330 if (err) { 1331 pr_err("%s: Timed out trying to submit semaphores\n", 1332 engine->name); 1333 goto err_rq; 1334 } 1335 1336 /* ELSP[1]: nop request */ 1337 nop = nop_request(engine); 1338 if (IS_ERR(nop)) { 1339 err = PTR_ERR(nop); 1340 goto err_rq; 1341 } 1342 err = wait_for_submit(engine, nop, HZ / 2); 1343 i915_request_put(nop); 1344 if (err) { 1345 pr_err("%s: Timed out trying to submit nop\n", 1346 engine->name); 1347 goto err_rq; 1348 } 1349 1350 GEM_BUG_ON(i915_request_completed(rq)); 1351 GEM_BUG_ON(execlists_active(&engine->execlists) != rq); 1352 1353 /* Queue: semaphore signal, matching priority as semaphore */ 1354 err = release_queue(engine, vma, 1, effective_prio(rq)); 1355 if (err) 1356 goto err_rq; 1357 1358 /* Wait until we ack the release_queue and start timeslicing */ 1359 do { 1360 cond_resched(); 1361 intel_engine_flush_submission(engine); 1362 } while (READ_ONCE(engine->execlists.pending[0])); 1363 1364 /* Timeslice every jiffy, so within 2 we should signal */ 1365 if (i915_request_wait(rq, 0, slice_timeout(engine)) < 0) { 1366 struct drm_printer p = 1367 drm_info_printer(gt->i915->drm.dev); 1368 1369 pr_err("%s: Failed to timeslice into queue\n", 1370 engine->name); 1371 intel_engine_dump(engine, &p, 1372 "%s\n", engine->name); 1373 1374 memset(vaddr, 0xff, PAGE_SIZE); 1375 err = -EIO; 1376 } 1377 err_rq: 1378 i915_request_put(rq); 1379 err_heartbeat: 1380 st_engine_heartbeat_enable(engine); 1381 if (err) 1382 break; 1383 } 1384 1385 err_pin: 1386 i915_vma_unpin(vma); 1387 err_map: 1388 i915_gem_object_unpin_map(obj); 1389 err_obj: 1390 i915_gem_object_put(obj); 1391 return err; 1392 } 1393 1394 static int live_timeslice_nopreempt(void *arg) 1395 { 1396 struct intel_gt *gt = arg; 1397 struct intel_engine_cs *engine; 1398 enum intel_engine_id id; 1399 struct igt_spinner spin; 1400 int err = 0; 1401 1402 /* 1403 * We should not timeslice into a request that is marked with 1404 * I915_REQUEST_NOPREEMPT. 1405 */ 1406 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 1407 return 0; 1408 1409 if (igt_spinner_init(&spin, gt)) 1410 return -ENOMEM; 1411 1412 for_each_engine(engine, gt, id) { 1413 struct intel_context *ce; 1414 struct i915_request *rq; 1415 unsigned long timeslice; 1416 1417 if (!intel_engine_has_preemption(engine)) 1418 continue; 1419 1420 ce = intel_context_create(engine); 1421 if (IS_ERR(ce)) { 1422 err = PTR_ERR(ce); 1423 break; 1424 } 1425 1426 st_engine_heartbeat_disable(engine); 1427 timeslice = xchg(&engine->props.timeslice_duration_ms, 1); 1428 1429 /* Create an unpreemptible spinner */ 1430 1431 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); 1432 intel_context_put(ce); 1433 if (IS_ERR(rq)) { 1434 err = PTR_ERR(rq); 1435 goto out_heartbeat; 1436 } 1437 1438 i915_request_get(rq); 1439 i915_request_add(rq); 1440 1441 if (!igt_wait_for_spinner(&spin, rq)) { 1442 i915_request_put(rq); 1443 err = -ETIME; 1444 goto out_spin; 1445 } 1446 1447 set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags); 1448 i915_request_put(rq); 1449 1450 /* Followed by a maximum priority barrier (heartbeat) */ 1451 1452 ce = intel_context_create(engine); 1453 if (IS_ERR(ce)) { 1454 err = PTR_ERR(ce); 1455 goto out_spin; 1456 } 1457 1458 rq = intel_context_create_request(ce); 1459 intel_context_put(ce); 1460 if (IS_ERR(rq)) { 1461 err = PTR_ERR(rq); 1462 goto out_spin; 1463 } 1464 1465 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 1466 i915_request_get(rq); 1467 i915_request_add(rq); 1468 1469 /* 1470 * Wait until the barrier is in ELSP, and we know timeslicing 1471 * will have been activated. 1472 */ 1473 if (wait_for_submit(engine, rq, HZ / 2)) { 1474 i915_request_put(rq); 1475 err = -ETIME; 1476 goto out_spin; 1477 } 1478 1479 /* 1480 * Since the ELSP[0] request is unpreemptible, it should not 1481 * allow the maximum priority barrier through. Wait long 1482 * enough to see if it is timesliced in by mistake. 1483 */ 1484 if (i915_request_wait(rq, 0, slice_timeout(engine)) >= 0) { 1485 pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n", 1486 engine->name); 1487 err = -EINVAL; 1488 } 1489 i915_request_put(rq); 1490 1491 out_spin: 1492 igt_spinner_end(&spin); 1493 out_heartbeat: 1494 xchg(&engine->props.timeslice_duration_ms, timeslice); 1495 st_engine_heartbeat_enable(engine); 1496 if (err) 1497 break; 1498 1499 if (igt_flush_test(gt->i915)) { 1500 err = -EIO; 1501 break; 1502 } 1503 } 1504 1505 igt_spinner_fini(&spin); 1506 return err; 1507 } 1508 1509 static int live_busywait_preempt(void *arg) 1510 { 1511 struct intel_gt *gt = arg; 1512 struct i915_gem_context *ctx_hi, *ctx_lo; 1513 struct intel_engine_cs *engine; 1514 struct drm_i915_gem_object *obj; 1515 struct i915_vma *vma; 1516 enum intel_engine_id id; 1517 int err = -ENOMEM; 1518 u32 *map; 1519 1520 /* 1521 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can 1522 * preempt the busywaits used to synchronise between rings. 1523 */ 1524 1525 ctx_hi = kernel_context(gt->i915); 1526 if (!ctx_hi) 1527 return -ENOMEM; 1528 ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY; 1529 1530 ctx_lo = kernel_context(gt->i915); 1531 if (!ctx_lo) 1532 goto err_ctx_hi; 1533 ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY; 1534 1535 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 1536 if (IS_ERR(obj)) { 1537 err = PTR_ERR(obj); 1538 goto err_ctx_lo; 1539 } 1540 1541 map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC); 1542 if (IS_ERR(map)) { 1543 err = PTR_ERR(map); 1544 goto err_obj; 1545 } 1546 1547 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 1548 if (IS_ERR(vma)) { 1549 err = PTR_ERR(vma); 1550 goto err_map; 1551 } 1552 1553 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 1554 if (err) 1555 goto err_map; 1556 1557 err = i915_vma_sync(vma); 1558 if (err) 1559 goto err_vma; 1560 1561 for_each_engine(engine, gt, id) { 1562 struct i915_request *lo, *hi; 1563 struct igt_live_test t; 1564 u32 *cs; 1565 1566 if (!intel_engine_has_preemption(engine)) 1567 continue; 1568 1569 if (!intel_engine_can_store_dword(engine)) 1570 continue; 1571 1572 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 1573 err = -EIO; 1574 goto err_vma; 1575 } 1576 1577 /* 1578 * We create two requests. The low priority request 1579 * busywaits on a semaphore (inside the ringbuffer where 1580 * is should be preemptible) and the high priority requests 1581 * uses a MI_STORE_DWORD_IMM to update the semaphore value 1582 * allowing the first request to complete. If preemption 1583 * fails, we hang instead. 1584 */ 1585 1586 lo = igt_request_alloc(ctx_lo, engine); 1587 if (IS_ERR(lo)) { 1588 err = PTR_ERR(lo); 1589 goto err_vma; 1590 } 1591 1592 cs = intel_ring_begin(lo, 8); 1593 if (IS_ERR(cs)) { 1594 err = PTR_ERR(cs); 1595 i915_request_add(lo); 1596 goto err_vma; 1597 } 1598 1599 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 1600 *cs++ = i915_ggtt_offset(vma); 1601 *cs++ = 0; 1602 *cs++ = 1; 1603 1604 /* XXX Do we need a flush + invalidate here? */ 1605 1606 *cs++ = MI_SEMAPHORE_WAIT | 1607 MI_SEMAPHORE_GLOBAL_GTT | 1608 MI_SEMAPHORE_POLL | 1609 MI_SEMAPHORE_SAD_EQ_SDD; 1610 *cs++ = 0; 1611 *cs++ = i915_ggtt_offset(vma); 1612 *cs++ = 0; 1613 1614 intel_ring_advance(lo, cs); 1615 1616 i915_request_get(lo); 1617 i915_request_add(lo); 1618 1619 if (wait_for(READ_ONCE(*map), 10)) { 1620 i915_request_put(lo); 1621 err = -ETIMEDOUT; 1622 goto err_vma; 1623 } 1624 1625 /* Low priority request should be busywaiting now */ 1626 if (i915_request_wait(lo, 0, 1) != -ETIME) { 1627 i915_request_put(lo); 1628 pr_err("%s: Busywaiting request did not!\n", 1629 engine->name); 1630 err = -EIO; 1631 goto err_vma; 1632 } 1633 1634 hi = igt_request_alloc(ctx_hi, engine); 1635 if (IS_ERR(hi)) { 1636 err = PTR_ERR(hi); 1637 i915_request_put(lo); 1638 goto err_vma; 1639 } 1640 1641 cs = intel_ring_begin(hi, 4); 1642 if (IS_ERR(cs)) { 1643 err = PTR_ERR(cs); 1644 i915_request_add(hi); 1645 i915_request_put(lo); 1646 goto err_vma; 1647 } 1648 1649 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 1650 *cs++ = i915_ggtt_offset(vma); 1651 *cs++ = 0; 1652 *cs++ = 0; 1653 1654 intel_ring_advance(hi, cs); 1655 i915_request_add(hi); 1656 1657 if (i915_request_wait(lo, 0, HZ / 5) < 0) { 1658 struct drm_printer p = drm_info_printer(gt->i915->drm.dev); 1659 1660 pr_err("%s: Failed to preempt semaphore busywait!\n", 1661 engine->name); 1662 1663 intel_engine_dump(engine, &p, "%s\n", engine->name); 1664 GEM_TRACE_DUMP(); 1665 1666 i915_request_put(lo); 1667 intel_gt_set_wedged(gt); 1668 err = -EIO; 1669 goto err_vma; 1670 } 1671 GEM_BUG_ON(READ_ONCE(*map)); 1672 i915_request_put(lo); 1673 1674 if (igt_live_test_end(&t)) { 1675 err = -EIO; 1676 goto err_vma; 1677 } 1678 } 1679 1680 err = 0; 1681 err_vma: 1682 i915_vma_unpin(vma); 1683 err_map: 1684 i915_gem_object_unpin_map(obj); 1685 err_obj: 1686 i915_gem_object_put(obj); 1687 err_ctx_lo: 1688 kernel_context_close(ctx_lo); 1689 err_ctx_hi: 1690 kernel_context_close(ctx_hi); 1691 return err; 1692 } 1693 1694 static struct i915_request * 1695 spinner_create_request(struct igt_spinner *spin, 1696 struct i915_gem_context *ctx, 1697 struct intel_engine_cs *engine, 1698 u32 arb) 1699 { 1700 struct intel_context *ce; 1701 struct i915_request *rq; 1702 1703 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); 1704 if (IS_ERR(ce)) 1705 return ERR_CAST(ce); 1706 1707 rq = igt_spinner_create_request(spin, ce, arb); 1708 intel_context_put(ce); 1709 return rq; 1710 } 1711 1712 static int live_preempt(void *arg) 1713 { 1714 struct intel_gt *gt = arg; 1715 struct i915_gem_context *ctx_hi, *ctx_lo; 1716 struct igt_spinner spin_hi, spin_lo; 1717 struct intel_engine_cs *engine; 1718 enum intel_engine_id id; 1719 int err = -ENOMEM; 1720 1721 if (igt_spinner_init(&spin_hi, gt)) 1722 return -ENOMEM; 1723 1724 if (igt_spinner_init(&spin_lo, gt)) 1725 goto err_spin_hi; 1726 1727 ctx_hi = kernel_context(gt->i915); 1728 if (!ctx_hi) 1729 goto err_spin_lo; 1730 ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY; 1731 1732 ctx_lo = kernel_context(gt->i915); 1733 if (!ctx_lo) 1734 goto err_ctx_hi; 1735 ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY; 1736 1737 for_each_engine(engine, gt, id) { 1738 struct igt_live_test t; 1739 struct i915_request *rq; 1740 1741 if (!intel_engine_has_preemption(engine)) 1742 continue; 1743 1744 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 1745 err = -EIO; 1746 goto err_ctx_lo; 1747 } 1748 1749 rq = spinner_create_request(&spin_lo, ctx_lo, engine, 1750 MI_ARB_CHECK); 1751 if (IS_ERR(rq)) { 1752 err = PTR_ERR(rq); 1753 goto err_ctx_lo; 1754 } 1755 1756 i915_request_add(rq); 1757 if (!igt_wait_for_spinner(&spin_lo, rq)) { 1758 GEM_TRACE("lo spinner failed to start\n"); 1759 GEM_TRACE_DUMP(); 1760 intel_gt_set_wedged(gt); 1761 err = -EIO; 1762 goto err_ctx_lo; 1763 } 1764 1765 rq = spinner_create_request(&spin_hi, ctx_hi, engine, 1766 MI_ARB_CHECK); 1767 if (IS_ERR(rq)) { 1768 igt_spinner_end(&spin_lo); 1769 err = PTR_ERR(rq); 1770 goto err_ctx_lo; 1771 } 1772 1773 i915_request_add(rq); 1774 if (!igt_wait_for_spinner(&spin_hi, rq)) { 1775 GEM_TRACE("hi spinner failed to start\n"); 1776 GEM_TRACE_DUMP(); 1777 intel_gt_set_wedged(gt); 1778 err = -EIO; 1779 goto err_ctx_lo; 1780 } 1781 1782 igt_spinner_end(&spin_hi); 1783 igt_spinner_end(&spin_lo); 1784 1785 if (igt_live_test_end(&t)) { 1786 err = -EIO; 1787 goto err_ctx_lo; 1788 } 1789 } 1790 1791 err = 0; 1792 err_ctx_lo: 1793 kernel_context_close(ctx_lo); 1794 err_ctx_hi: 1795 kernel_context_close(ctx_hi); 1796 err_spin_lo: 1797 igt_spinner_fini(&spin_lo); 1798 err_spin_hi: 1799 igt_spinner_fini(&spin_hi); 1800 return err; 1801 } 1802 1803 static int live_late_preempt(void *arg) 1804 { 1805 struct intel_gt *gt = arg; 1806 struct i915_gem_context *ctx_hi, *ctx_lo; 1807 struct igt_spinner spin_hi, spin_lo; 1808 struct intel_engine_cs *engine; 1809 struct i915_sched_attr attr = {}; 1810 enum intel_engine_id id; 1811 int err = -ENOMEM; 1812 1813 if (igt_spinner_init(&spin_hi, gt)) 1814 return -ENOMEM; 1815 1816 if (igt_spinner_init(&spin_lo, gt)) 1817 goto err_spin_hi; 1818 1819 ctx_hi = kernel_context(gt->i915); 1820 if (!ctx_hi) 1821 goto err_spin_lo; 1822 1823 ctx_lo = kernel_context(gt->i915); 1824 if (!ctx_lo) 1825 goto err_ctx_hi; 1826 1827 /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */ 1828 ctx_lo->sched.priority = 1; 1829 1830 for_each_engine(engine, gt, id) { 1831 struct igt_live_test t; 1832 struct i915_request *rq; 1833 1834 if (!intel_engine_has_preemption(engine)) 1835 continue; 1836 1837 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 1838 err = -EIO; 1839 goto err_ctx_lo; 1840 } 1841 1842 rq = spinner_create_request(&spin_lo, ctx_lo, engine, 1843 MI_ARB_CHECK); 1844 if (IS_ERR(rq)) { 1845 err = PTR_ERR(rq); 1846 goto err_ctx_lo; 1847 } 1848 1849 i915_request_add(rq); 1850 if (!igt_wait_for_spinner(&spin_lo, rq)) { 1851 pr_err("First context failed to start\n"); 1852 goto err_wedged; 1853 } 1854 1855 rq = spinner_create_request(&spin_hi, ctx_hi, engine, 1856 MI_NOOP); 1857 if (IS_ERR(rq)) { 1858 igt_spinner_end(&spin_lo); 1859 err = PTR_ERR(rq); 1860 goto err_ctx_lo; 1861 } 1862 1863 i915_request_add(rq); 1864 if (igt_wait_for_spinner(&spin_hi, rq)) { 1865 pr_err("Second context overtook first?\n"); 1866 goto err_wedged; 1867 } 1868 1869 attr.priority = I915_PRIORITY_MAX; 1870 engine->schedule(rq, &attr); 1871 1872 if (!igt_wait_for_spinner(&spin_hi, rq)) { 1873 pr_err("High priority context failed to preempt the low priority context\n"); 1874 GEM_TRACE_DUMP(); 1875 goto err_wedged; 1876 } 1877 1878 igt_spinner_end(&spin_hi); 1879 igt_spinner_end(&spin_lo); 1880 1881 if (igt_live_test_end(&t)) { 1882 err = -EIO; 1883 goto err_ctx_lo; 1884 } 1885 } 1886 1887 err = 0; 1888 err_ctx_lo: 1889 kernel_context_close(ctx_lo); 1890 err_ctx_hi: 1891 kernel_context_close(ctx_hi); 1892 err_spin_lo: 1893 igt_spinner_fini(&spin_lo); 1894 err_spin_hi: 1895 igt_spinner_fini(&spin_hi); 1896 return err; 1897 1898 err_wedged: 1899 igt_spinner_end(&spin_hi); 1900 igt_spinner_end(&spin_lo); 1901 intel_gt_set_wedged(gt); 1902 err = -EIO; 1903 goto err_ctx_lo; 1904 } 1905 1906 struct preempt_client { 1907 struct igt_spinner spin; 1908 struct i915_gem_context *ctx; 1909 }; 1910 1911 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c) 1912 { 1913 c->ctx = kernel_context(gt->i915); 1914 if (!c->ctx) 1915 return -ENOMEM; 1916 1917 if (igt_spinner_init(&c->spin, gt)) 1918 goto err_ctx; 1919 1920 return 0; 1921 1922 err_ctx: 1923 kernel_context_close(c->ctx); 1924 return -ENOMEM; 1925 } 1926 1927 static void preempt_client_fini(struct preempt_client *c) 1928 { 1929 igt_spinner_fini(&c->spin); 1930 kernel_context_close(c->ctx); 1931 } 1932 1933 static int live_nopreempt(void *arg) 1934 { 1935 struct intel_gt *gt = arg; 1936 struct intel_engine_cs *engine; 1937 struct preempt_client a, b; 1938 enum intel_engine_id id; 1939 int err = -ENOMEM; 1940 1941 /* 1942 * Verify that we can disable preemption for an individual request 1943 * that may be being observed and not want to be interrupted. 1944 */ 1945 1946 if (preempt_client_init(gt, &a)) 1947 return -ENOMEM; 1948 if (preempt_client_init(gt, &b)) 1949 goto err_client_a; 1950 b.ctx->sched.priority = I915_PRIORITY_MAX; 1951 1952 for_each_engine(engine, gt, id) { 1953 struct i915_request *rq_a, *rq_b; 1954 1955 if (!intel_engine_has_preemption(engine)) 1956 continue; 1957 1958 engine->execlists.preempt_hang.count = 0; 1959 1960 rq_a = spinner_create_request(&a.spin, 1961 a.ctx, engine, 1962 MI_ARB_CHECK); 1963 if (IS_ERR(rq_a)) { 1964 err = PTR_ERR(rq_a); 1965 goto err_client_b; 1966 } 1967 1968 /* Low priority client, but unpreemptable! */ 1969 __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags); 1970 1971 i915_request_add(rq_a); 1972 if (!igt_wait_for_spinner(&a.spin, rq_a)) { 1973 pr_err("First client failed to start\n"); 1974 goto err_wedged; 1975 } 1976 1977 rq_b = spinner_create_request(&b.spin, 1978 b.ctx, engine, 1979 MI_ARB_CHECK); 1980 if (IS_ERR(rq_b)) { 1981 err = PTR_ERR(rq_b); 1982 goto err_client_b; 1983 } 1984 1985 i915_request_add(rq_b); 1986 1987 /* B is much more important than A! (But A is unpreemptable.) */ 1988 GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a)); 1989 1990 /* Wait long enough for preemption and timeslicing */ 1991 if (igt_wait_for_spinner(&b.spin, rq_b)) { 1992 pr_err("Second client started too early!\n"); 1993 goto err_wedged; 1994 } 1995 1996 igt_spinner_end(&a.spin); 1997 1998 if (!igt_wait_for_spinner(&b.spin, rq_b)) { 1999 pr_err("Second client failed to start\n"); 2000 goto err_wedged; 2001 } 2002 2003 igt_spinner_end(&b.spin); 2004 2005 if (engine->execlists.preempt_hang.count) { 2006 pr_err("Preemption recorded x%d; should have been suppressed!\n", 2007 engine->execlists.preempt_hang.count); 2008 err = -EINVAL; 2009 goto err_wedged; 2010 } 2011 2012 if (igt_flush_test(gt->i915)) 2013 goto err_wedged; 2014 } 2015 2016 err = 0; 2017 err_client_b: 2018 preempt_client_fini(&b); 2019 err_client_a: 2020 preempt_client_fini(&a); 2021 return err; 2022 2023 err_wedged: 2024 igt_spinner_end(&b.spin); 2025 igt_spinner_end(&a.spin); 2026 intel_gt_set_wedged(gt); 2027 err = -EIO; 2028 goto err_client_b; 2029 } 2030 2031 struct live_preempt_cancel { 2032 struct intel_engine_cs *engine; 2033 struct preempt_client a, b; 2034 }; 2035 2036 static int __cancel_active0(struct live_preempt_cancel *arg) 2037 { 2038 struct i915_request *rq; 2039 struct igt_live_test t; 2040 int err; 2041 2042 /* Preempt cancel of ELSP0 */ 2043 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 2044 if (igt_live_test_begin(&t, arg->engine->i915, 2045 __func__, arg->engine->name)) 2046 return -EIO; 2047 2048 rq = spinner_create_request(&arg->a.spin, 2049 arg->a.ctx, arg->engine, 2050 MI_ARB_CHECK); 2051 if (IS_ERR(rq)) 2052 return PTR_ERR(rq); 2053 2054 clear_bit(CONTEXT_BANNED, &rq->context->flags); 2055 i915_request_get(rq); 2056 i915_request_add(rq); 2057 if (!igt_wait_for_spinner(&arg->a.spin, rq)) { 2058 err = -EIO; 2059 goto out; 2060 } 2061 2062 intel_context_set_banned(rq->context); 2063 err = intel_engine_pulse(arg->engine); 2064 if (err) 2065 goto out; 2066 2067 err = wait_for_reset(arg->engine, rq, HZ / 2); 2068 if (err) { 2069 pr_err("Cancelled inflight0 request did not reset\n"); 2070 goto out; 2071 } 2072 2073 out: 2074 i915_request_put(rq); 2075 if (igt_live_test_end(&t)) 2076 err = -EIO; 2077 return err; 2078 } 2079 2080 static int __cancel_active1(struct live_preempt_cancel *arg) 2081 { 2082 struct i915_request *rq[2] = {}; 2083 struct igt_live_test t; 2084 int err; 2085 2086 /* Preempt cancel of ELSP1 */ 2087 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 2088 if (igt_live_test_begin(&t, arg->engine->i915, 2089 __func__, arg->engine->name)) 2090 return -EIO; 2091 2092 rq[0] = spinner_create_request(&arg->a.spin, 2093 arg->a.ctx, arg->engine, 2094 MI_NOOP); /* no preemption */ 2095 if (IS_ERR(rq[0])) 2096 return PTR_ERR(rq[0]); 2097 2098 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); 2099 i915_request_get(rq[0]); 2100 i915_request_add(rq[0]); 2101 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { 2102 err = -EIO; 2103 goto out; 2104 } 2105 2106 rq[1] = spinner_create_request(&arg->b.spin, 2107 arg->b.ctx, arg->engine, 2108 MI_ARB_CHECK); 2109 if (IS_ERR(rq[1])) { 2110 err = PTR_ERR(rq[1]); 2111 goto out; 2112 } 2113 2114 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); 2115 i915_request_get(rq[1]); 2116 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); 2117 i915_request_add(rq[1]); 2118 if (err) 2119 goto out; 2120 2121 intel_context_set_banned(rq[1]->context); 2122 err = intel_engine_pulse(arg->engine); 2123 if (err) 2124 goto out; 2125 2126 igt_spinner_end(&arg->a.spin); 2127 err = wait_for_reset(arg->engine, rq[1], HZ / 2); 2128 if (err) 2129 goto out; 2130 2131 if (rq[0]->fence.error != 0) { 2132 pr_err("Normal inflight0 request did not complete\n"); 2133 err = -EINVAL; 2134 goto out; 2135 } 2136 2137 if (rq[1]->fence.error != -EIO) { 2138 pr_err("Cancelled inflight1 request did not report -EIO\n"); 2139 err = -EINVAL; 2140 goto out; 2141 } 2142 2143 out: 2144 i915_request_put(rq[1]); 2145 i915_request_put(rq[0]); 2146 if (igt_live_test_end(&t)) 2147 err = -EIO; 2148 return err; 2149 } 2150 2151 static int __cancel_queued(struct live_preempt_cancel *arg) 2152 { 2153 struct i915_request *rq[3] = {}; 2154 struct igt_live_test t; 2155 int err; 2156 2157 /* Full ELSP and one in the wings */ 2158 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 2159 if (igt_live_test_begin(&t, arg->engine->i915, 2160 __func__, arg->engine->name)) 2161 return -EIO; 2162 2163 rq[0] = spinner_create_request(&arg->a.spin, 2164 arg->a.ctx, arg->engine, 2165 MI_ARB_CHECK); 2166 if (IS_ERR(rq[0])) 2167 return PTR_ERR(rq[0]); 2168 2169 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); 2170 i915_request_get(rq[0]); 2171 i915_request_add(rq[0]); 2172 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { 2173 err = -EIO; 2174 goto out; 2175 } 2176 2177 rq[1] = igt_request_alloc(arg->b.ctx, arg->engine); 2178 if (IS_ERR(rq[1])) { 2179 err = PTR_ERR(rq[1]); 2180 goto out; 2181 } 2182 2183 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); 2184 i915_request_get(rq[1]); 2185 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); 2186 i915_request_add(rq[1]); 2187 if (err) 2188 goto out; 2189 2190 rq[2] = spinner_create_request(&arg->b.spin, 2191 arg->a.ctx, arg->engine, 2192 MI_ARB_CHECK); 2193 if (IS_ERR(rq[2])) { 2194 err = PTR_ERR(rq[2]); 2195 goto out; 2196 } 2197 2198 i915_request_get(rq[2]); 2199 err = i915_request_await_dma_fence(rq[2], &rq[1]->fence); 2200 i915_request_add(rq[2]); 2201 if (err) 2202 goto out; 2203 2204 intel_context_set_banned(rq[2]->context); 2205 err = intel_engine_pulse(arg->engine); 2206 if (err) 2207 goto out; 2208 2209 err = wait_for_reset(arg->engine, rq[2], HZ / 2); 2210 if (err) 2211 goto out; 2212 2213 if (rq[0]->fence.error != -EIO) { 2214 pr_err("Cancelled inflight0 request did not report -EIO\n"); 2215 err = -EINVAL; 2216 goto out; 2217 } 2218 2219 if (rq[1]->fence.error != 0) { 2220 pr_err("Normal inflight1 request did not complete\n"); 2221 err = -EINVAL; 2222 goto out; 2223 } 2224 2225 if (rq[2]->fence.error != -EIO) { 2226 pr_err("Cancelled queued request did not report -EIO\n"); 2227 err = -EINVAL; 2228 goto out; 2229 } 2230 2231 out: 2232 i915_request_put(rq[2]); 2233 i915_request_put(rq[1]); 2234 i915_request_put(rq[0]); 2235 if (igt_live_test_end(&t)) 2236 err = -EIO; 2237 return err; 2238 } 2239 2240 static int __cancel_hostile(struct live_preempt_cancel *arg) 2241 { 2242 struct i915_request *rq; 2243 int err; 2244 2245 /* Preempt cancel non-preemptible spinner in ELSP0 */ 2246 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) 2247 return 0; 2248 2249 if (!intel_has_reset_engine(arg->engine->gt)) 2250 return 0; 2251 2252 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 2253 rq = spinner_create_request(&arg->a.spin, 2254 arg->a.ctx, arg->engine, 2255 MI_NOOP); /* preemption disabled */ 2256 if (IS_ERR(rq)) 2257 return PTR_ERR(rq); 2258 2259 clear_bit(CONTEXT_BANNED, &rq->context->flags); 2260 i915_request_get(rq); 2261 i915_request_add(rq); 2262 if (!igt_wait_for_spinner(&arg->a.spin, rq)) { 2263 err = -EIO; 2264 goto out; 2265 } 2266 2267 intel_context_set_banned(rq->context); 2268 err = intel_engine_pulse(arg->engine); /* force reset */ 2269 if (err) 2270 goto out; 2271 2272 err = wait_for_reset(arg->engine, rq, HZ / 2); 2273 if (err) { 2274 pr_err("Cancelled inflight0 request did not reset\n"); 2275 goto out; 2276 } 2277 2278 out: 2279 i915_request_put(rq); 2280 if (igt_flush_test(arg->engine->i915)) 2281 err = -EIO; 2282 return err; 2283 } 2284 2285 static void force_reset_timeout(struct intel_engine_cs *engine) 2286 { 2287 engine->reset_timeout.probability = 999; 2288 atomic_set(&engine->reset_timeout.times, -1); 2289 } 2290 2291 static void cancel_reset_timeout(struct intel_engine_cs *engine) 2292 { 2293 memset(&engine->reset_timeout, 0, sizeof(engine->reset_timeout)); 2294 } 2295 2296 static int __cancel_fail(struct live_preempt_cancel *arg) 2297 { 2298 struct intel_engine_cs *engine = arg->engine; 2299 struct i915_request *rq; 2300 int err; 2301 2302 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) 2303 return 0; 2304 2305 if (!intel_has_reset_engine(engine->gt)) 2306 return 0; 2307 2308 GEM_TRACE("%s(%s)\n", __func__, engine->name); 2309 rq = spinner_create_request(&arg->a.spin, 2310 arg->a.ctx, engine, 2311 MI_NOOP); /* preemption disabled */ 2312 if (IS_ERR(rq)) 2313 return PTR_ERR(rq); 2314 2315 clear_bit(CONTEXT_BANNED, &rq->context->flags); 2316 i915_request_get(rq); 2317 i915_request_add(rq); 2318 if (!igt_wait_for_spinner(&arg->a.spin, rq)) { 2319 err = -EIO; 2320 goto out; 2321 } 2322 2323 intel_context_set_banned(rq->context); 2324 2325 err = intel_engine_pulse(engine); 2326 if (err) 2327 goto out; 2328 2329 force_reset_timeout(engine); 2330 2331 /* force preempt reset [failure] */ 2332 while (!engine->execlists.pending[0]) 2333 intel_engine_flush_submission(engine); 2334 del_timer_sync(&engine->execlists.preempt); 2335 intel_engine_flush_submission(engine); 2336 2337 cancel_reset_timeout(engine); 2338 2339 /* after failure, require heartbeats to reset device */ 2340 intel_engine_set_heartbeat(engine, 1); 2341 err = wait_for_reset(engine, rq, HZ / 2); 2342 intel_engine_set_heartbeat(engine, 2343 engine->defaults.heartbeat_interval_ms); 2344 if (err) { 2345 pr_err("Cancelled inflight0 request did not reset\n"); 2346 goto out; 2347 } 2348 2349 out: 2350 i915_request_put(rq); 2351 if (igt_flush_test(engine->i915)) 2352 err = -EIO; 2353 return err; 2354 } 2355 2356 static int live_preempt_cancel(void *arg) 2357 { 2358 struct intel_gt *gt = arg; 2359 struct live_preempt_cancel data; 2360 enum intel_engine_id id; 2361 int err = -ENOMEM; 2362 2363 /* 2364 * To cancel an inflight context, we need to first remove it from the 2365 * GPU. That sounds like preemption! Plus a little bit of bookkeeping. 2366 */ 2367 2368 if (preempt_client_init(gt, &data.a)) 2369 return -ENOMEM; 2370 if (preempt_client_init(gt, &data.b)) 2371 goto err_client_a; 2372 2373 for_each_engine(data.engine, gt, id) { 2374 if (!intel_engine_has_preemption(data.engine)) 2375 continue; 2376 2377 err = __cancel_active0(&data); 2378 if (err) 2379 goto err_wedged; 2380 2381 err = __cancel_active1(&data); 2382 if (err) 2383 goto err_wedged; 2384 2385 err = __cancel_queued(&data); 2386 if (err) 2387 goto err_wedged; 2388 2389 err = __cancel_hostile(&data); 2390 if (err) 2391 goto err_wedged; 2392 2393 err = __cancel_fail(&data); 2394 if (err) 2395 goto err_wedged; 2396 } 2397 2398 err = 0; 2399 err_client_b: 2400 preempt_client_fini(&data.b); 2401 err_client_a: 2402 preempt_client_fini(&data.a); 2403 return err; 2404 2405 err_wedged: 2406 GEM_TRACE_DUMP(); 2407 igt_spinner_end(&data.b.spin); 2408 igt_spinner_end(&data.a.spin); 2409 intel_gt_set_wedged(gt); 2410 goto err_client_b; 2411 } 2412 2413 static int live_suppress_self_preempt(void *arg) 2414 { 2415 struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX }; 2416 struct intel_gt *gt = arg; 2417 struct intel_engine_cs *engine; 2418 struct preempt_client a, b; 2419 enum intel_engine_id id; 2420 int err = -ENOMEM; 2421 2422 /* 2423 * Verify that if a preemption request does not cause a change in 2424 * the current execution order, the preempt-to-idle injection is 2425 * skipped and that we do not accidentally apply it after the CS 2426 * completion event. 2427 */ 2428 2429 if (intel_uc_uses_guc_submission(>->uc)) 2430 return 0; /* presume black blox */ 2431 2432 if (intel_vgpu_active(gt->i915)) 2433 return 0; /* GVT forces single port & request submission */ 2434 2435 if (preempt_client_init(gt, &a)) 2436 return -ENOMEM; 2437 if (preempt_client_init(gt, &b)) 2438 goto err_client_a; 2439 2440 for_each_engine(engine, gt, id) { 2441 struct i915_request *rq_a, *rq_b; 2442 int depth; 2443 2444 if (!intel_engine_has_preemption(engine)) 2445 continue; 2446 2447 if (igt_flush_test(gt->i915)) 2448 goto err_wedged; 2449 2450 st_engine_heartbeat_disable(engine); 2451 engine->execlists.preempt_hang.count = 0; 2452 2453 rq_a = spinner_create_request(&a.spin, 2454 a.ctx, engine, 2455 MI_NOOP); 2456 if (IS_ERR(rq_a)) { 2457 err = PTR_ERR(rq_a); 2458 st_engine_heartbeat_enable(engine); 2459 goto err_client_b; 2460 } 2461 2462 i915_request_add(rq_a); 2463 if (!igt_wait_for_spinner(&a.spin, rq_a)) { 2464 pr_err("First client failed to start\n"); 2465 st_engine_heartbeat_enable(engine); 2466 goto err_wedged; 2467 } 2468 2469 /* Keep postponing the timer to avoid premature slicing */ 2470 mod_timer(&engine->execlists.timer, jiffies + HZ); 2471 for (depth = 0; depth < 8; depth++) { 2472 rq_b = spinner_create_request(&b.spin, 2473 b.ctx, engine, 2474 MI_NOOP); 2475 if (IS_ERR(rq_b)) { 2476 err = PTR_ERR(rq_b); 2477 st_engine_heartbeat_enable(engine); 2478 goto err_client_b; 2479 } 2480 i915_request_add(rq_b); 2481 2482 GEM_BUG_ON(i915_request_completed(rq_a)); 2483 engine->schedule(rq_a, &attr); 2484 igt_spinner_end(&a.spin); 2485 2486 if (!igt_wait_for_spinner(&b.spin, rq_b)) { 2487 pr_err("Second client failed to start\n"); 2488 st_engine_heartbeat_enable(engine); 2489 goto err_wedged; 2490 } 2491 2492 swap(a, b); 2493 rq_a = rq_b; 2494 } 2495 igt_spinner_end(&a.spin); 2496 2497 if (engine->execlists.preempt_hang.count) { 2498 pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n", 2499 engine->name, 2500 engine->execlists.preempt_hang.count, 2501 depth); 2502 st_engine_heartbeat_enable(engine); 2503 err = -EINVAL; 2504 goto err_client_b; 2505 } 2506 2507 st_engine_heartbeat_enable(engine); 2508 if (igt_flush_test(gt->i915)) 2509 goto err_wedged; 2510 } 2511 2512 err = 0; 2513 err_client_b: 2514 preempt_client_fini(&b); 2515 err_client_a: 2516 preempt_client_fini(&a); 2517 return err; 2518 2519 err_wedged: 2520 igt_spinner_end(&b.spin); 2521 igt_spinner_end(&a.spin); 2522 intel_gt_set_wedged(gt); 2523 err = -EIO; 2524 goto err_client_b; 2525 } 2526 2527 static int live_chain_preempt(void *arg) 2528 { 2529 struct intel_gt *gt = arg; 2530 struct intel_engine_cs *engine; 2531 struct preempt_client hi, lo; 2532 enum intel_engine_id id; 2533 int err = -ENOMEM; 2534 2535 /* 2536 * Build a chain AB...BA between two contexts (A, B) and request 2537 * preemption of the last request. It should then complete before 2538 * the previously submitted spinner in B. 2539 */ 2540 2541 if (preempt_client_init(gt, &hi)) 2542 return -ENOMEM; 2543 2544 if (preempt_client_init(gt, &lo)) 2545 goto err_client_hi; 2546 2547 for_each_engine(engine, gt, id) { 2548 struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX }; 2549 struct igt_live_test t; 2550 struct i915_request *rq; 2551 int ring_size, count, i; 2552 2553 if (!intel_engine_has_preemption(engine)) 2554 continue; 2555 2556 rq = spinner_create_request(&lo.spin, 2557 lo.ctx, engine, 2558 MI_ARB_CHECK); 2559 if (IS_ERR(rq)) 2560 goto err_wedged; 2561 2562 i915_request_get(rq); 2563 i915_request_add(rq); 2564 2565 ring_size = rq->wa_tail - rq->head; 2566 if (ring_size < 0) 2567 ring_size += rq->ring->size; 2568 ring_size = rq->ring->size / ring_size; 2569 pr_debug("%s(%s): Using maximum of %d requests\n", 2570 __func__, engine->name, ring_size); 2571 2572 igt_spinner_end(&lo.spin); 2573 if (i915_request_wait(rq, 0, HZ / 2) < 0) { 2574 pr_err("Timed out waiting to flush %s\n", engine->name); 2575 i915_request_put(rq); 2576 goto err_wedged; 2577 } 2578 i915_request_put(rq); 2579 2580 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 2581 err = -EIO; 2582 goto err_wedged; 2583 } 2584 2585 for_each_prime_number_from(count, 1, ring_size) { 2586 rq = spinner_create_request(&hi.spin, 2587 hi.ctx, engine, 2588 MI_ARB_CHECK); 2589 if (IS_ERR(rq)) 2590 goto err_wedged; 2591 i915_request_add(rq); 2592 if (!igt_wait_for_spinner(&hi.spin, rq)) 2593 goto err_wedged; 2594 2595 rq = spinner_create_request(&lo.spin, 2596 lo.ctx, engine, 2597 MI_ARB_CHECK); 2598 if (IS_ERR(rq)) 2599 goto err_wedged; 2600 i915_request_add(rq); 2601 2602 for (i = 0; i < count; i++) { 2603 rq = igt_request_alloc(lo.ctx, engine); 2604 if (IS_ERR(rq)) 2605 goto err_wedged; 2606 i915_request_add(rq); 2607 } 2608 2609 rq = igt_request_alloc(hi.ctx, engine); 2610 if (IS_ERR(rq)) 2611 goto err_wedged; 2612 2613 i915_request_get(rq); 2614 i915_request_add(rq); 2615 engine->schedule(rq, &attr); 2616 2617 igt_spinner_end(&hi.spin); 2618 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 2619 struct drm_printer p = 2620 drm_info_printer(gt->i915->drm.dev); 2621 2622 pr_err("Failed to preempt over chain of %d\n", 2623 count); 2624 intel_engine_dump(engine, &p, 2625 "%s\n", engine->name); 2626 i915_request_put(rq); 2627 goto err_wedged; 2628 } 2629 igt_spinner_end(&lo.spin); 2630 i915_request_put(rq); 2631 2632 rq = igt_request_alloc(lo.ctx, engine); 2633 if (IS_ERR(rq)) 2634 goto err_wedged; 2635 2636 i915_request_get(rq); 2637 i915_request_add(rq); 2638 2639 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 2640 struct drm_printer p = 2641 drm_info_printer(gt->i915->drm.dev); 2642 2643 pr_err("Failed to flush low priority chain of %d requests\n", 2644 count); 2645 intel_engine_dump(engine, &p, 2646 "%s\n", engine->name); 2647 2648 i915_request_put(rq); 2649 goto err_wedged; 2650 } 2651 i915_request_put(rq); 2652 } 2653 2654 if (igt_live_test_end(&t)) { 2655 err = -EIO; 2656 goto err_wedged; 2657 } 2658 } 2659 2660 err = 0; 2661 err_client_lo: 2662 preempt_client_fini(&lo); 2663 err_client_hi: 2664 preempt_client_fini(&hi); 2665 return err; 2666 2667 err_wedged: 2668 igt_spinner_end(&hi.spin); 2669 igt_spinner_end(&lo.spin); 2670 intel_gt_set_wedged(gt); 2671 err = -EIO; 2672 goto err_client_lo; 2673 } 2674 2675 static int create_gang(struct intel_engine_cs *engine, 2676 struct i915_request **prev) 2677 { 2678 struct drm_i915_gem_object *obj; 2679 struct intel_context *ce; 2680 struct i915_request *rq; 2681 struct i915_vma *vma; 2682 u32 *cs; 2683 int err; 2684 2685 ce = intel_context_create(engine); 2686 if (IS_ERR(ce)) 2687 return PTR_ERR(ce); 2688 2689 obj = i915_gem_object_create_internal(engine->i915, 4096); 2690 if (IS_ERR(obj)) { 2691 err = PTR_ERR(obj); 2692 goto err_ce; 2693 } 2694 2695 vma = i915_vma_instance(obj, ce->vm, NULL); 2696 if (IS_ERR(vma)) { 2697 err = PTR_ERR(vma); 2698 goto err_obj; 2699 } 2700 2701 err = i915_vma_pin(vma, 0, 0, PIN_USER); 2702 if (err) 2703 goto err_obj; 2704 2705 cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC); 2706 if (IS_ERR(cs)) { 2707 err = PTR_ERR(cs); 2708 goto err_obj; 2709 } 2710 2711 /* Semaphore target: spin until zero */ 2712 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 2713 2714 *cs++ = MI_SEMAPHORE_WAIT | 2715 MI_SEMAPHORE_POLL | 2716 MI_SEMAPHORE_SAD_EQ_SDD; 2717 *cs++ = 0; 2718 *cs++ = lower_32_bits(vma->node.start); 2719 *cs++ = upper_32_bits(vma->node.start); 2720 2721 if (*prev) { 2722 u64 offset = (*prev)->batch->node.start; 2723 2724 /* Terminate the spinner in the next lower priority batch. */ 2725 *cs++ = MI_STORE_DWORD_IMM_GEN4; 2726 *cs++ = lower_32_bits(offset); 2727 *cs++ = upper_32_bits(offset); 2728 *cs++ = 0; 2729 } 2730 2731 *cs++ = MI_BATCH_BUFFER_END; 2732 i915_gem_object_flush_map(obj); 2733 i915_gem_object_unpin_map(obj); 2734 2735 rq = intel_context_create_request(ce); 2736 if (IS_ERR(rq)) { 2737 err = PTR_ERR(rq); 2738 goto err_obj; 2739 } 2740 2741 rq->batch = i915_vma_get(vma); 2742 i915_request_get(rq); 2743 2744 i915_vma_lock(vma); 2745 err = i915_request_await_object(rq, vma->obj, false); 2746 if (!err) 2747 err = i915_vma_move_to_active(vma, rq, 0); 2748 if (!err) 2749 err = rq->engine->emit_bb_start(rq, 2750 vma->node.start, 2751 PAGE_SIZE, 0); 2752 i915_vma_unlock(vma); 2753 i915_request_add(rq); 2754 if (err) 2755 goto err_rq; 2756 2757 i915_gem_object_put(obj); 2758 intel_context_put(ce); 2759 2760 rq->mock.link.next = &(*prev)->mock.link; 2761 *prev = rq; 2762 return 0; 2763 2764 err_rq: 2765 i915_vma_put(rq->batch); 2766 i915_request_put(rq); 2767 err_obj: 2768 i915_gem_object_put(obj); 2769 err_ce: 2770 intel_context_put(ce); 2771 return err; 2772 } 2773 2774 static int __live_preempt_ring(struct intel_engine_cs *engine, 2775 struct igt_spinner *spin, 2776 int queue_sz, int ring_sz) 2777 { 2778 struct intel_context *ce[2] = {}; 2779 struct i915_request *rq; 2780 struct igt_live_test t; 2781 int err = 0; 2782 int n; 2783 2784 if (igt_live_test_begin(&t, engine->i915, __func__, engine->name)) 2785 return -EIO; 2786 2787 for (n = 0; n < ARRAY_SIZE(ce); n++) { 2788 struct intel_context *tmp; 2789 2790 tmp = intel_context_create(engine); 2791 if (IS_ERR(tmp)) { 2792 err = PTR_ERR(tmp); 2793 goto err_ce; 2794 } 2795 2796 tmp->ring = __intel_context_ring_size(ring_sz); 2797 2798 err = intel_context_pin(tmp); 2799 if (err) { 2800 intel_context_put(tmp); 2801 goto err_ce; 2802 } 2803 2804 memset32(tmp->ring->vaddr, 2805 0xdeadbeef, /* trigger a hang if executed */ 2806 tmp->ring->vma->size / sizeof(u32)); 2807 2808 ce[n] = tmp; 2809 } 2810 2811 rq = igt_spinner_create_request(spin, ce[0], MI_ARB_CHECK); 2812 if (IS_ERR(rq)) { 2813 err = PTR_ERR(rq); 2814 goto err_ce; 2815 } 2816 2817 i915_request_get(rq); 2818 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 2819 i915_request_add(rq); 2820 2821 if (!igt_wait_for_spinner(spin, rq)) { 2822 intel_gt_set_wedged(engine->gt); 2823 i915_request_put(rq); 2824 err = -ETIME; 2825 goto err_ce; 2826 } 2827 2828 /* Fill the ring, until we will cause a wrap */ 2829 n = 0; 2830 while (ce[0]->ring->tail - rq->wa_tail <= queue_sz) { 2831 struct i915_request *tmp; 2832 2833 tmp = intel_context_create_request(ce[0]); 2834 if (IS_ERR(tmp)) { 2835 err = PTR_ERR(tmp); 2836 i915_request_put(rq); 2837 goto err_ce; 2838 } 2839 2840 i915_request_add(tmp); 2841 intel_engine_flush_submission(engine); 2842 n++; 2843 } 2844 intel_engine_flush_submission(engine); 2845 pr_debug("%s: Filled %d with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n", 2846 engine->name, queue_sz, n, 2847 ce[0]->ring->size, 2848 ce[0]->ring->tail, 2849 ce[0]->ring->emit, 2850 rq->tail); 2851 i915_request_put(rq); 2852 2853 /* Create a second request to preempt the first ring */ 2854 rq = intel_context_create_request(ce[1]); 2855 if (IS_ERR(rq)) { 2856 err = PTR_ERR(rq); 2857 goto err_ce; 2858 } 2859 2860 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 2861 i915_request_get(rq); 2862 i915_request_add(rq); 2863 2864 err = wait_for_submit(engine, rq, HZ / 2); 2865 i915_request_put(rq); 2866 if (err) { 2867 pr_err("%s: preemption request was not submitted\n", 2868 engine->name); 2869 err = -ETIME; 2870 } 2871 2872 pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n", 2873 engine->name, 2874 ce[0]->ring->tail, ce[0]->ring->emit, 2875 ce[1]->ring->tail, ce[1]->ring->emit); 2876 2877 err_ce: 2878 intel_engine_flush_submission(engine); 2879 igt_spinner_end(spin); 2880 for (n = 0; n < ARRAY_SIZE(ce); n++) { 2881 if (IS_ERR_OR_NULL(ce[n])) 2882 break; 2883 2884 intel_context_unpin(ce[n]); 2885 intel_context_put(ce[n]); 2886 } 2887 if (igt_live_test_end(&t)) 2888 err = -EIO; 2889 return err; 2890 } 2891 2892 static int live_preempt_ring(void *arg) 2893 { 2894 struct intel_gt *gt = arg; 2895 struct intel_engine_cs *engine; 2896 struct igt_spinner spin; 2897 enum intel_engine_id id; 2898 int err = 0; 2899 2900 /* 2901 * Check that we rollback large chunks of a ring in order to do a 2902 * preemption event. Similar to live_unlite_ring, but looking at 2903 * ring size rather than the impact of intel_ring_direction(). 2904 */ 2905 2906 if (igt_spinner_init(&spin, gt)) 2907 return -ENOMEM; 2908 2909 for_each_engine(engine, gt, id) { 2910 int n; 2911 2912 if (!intel_engine_has_preemption(engine)) 2913 continue; 2914 2915 if (!intel_engine_can_store_dword(engine)) 2916 continue; 2917 2918 st_engine_heartbeat_disable(engine); 2919 2920 for (n = 0; n <= 3; n++) { 2921 err = __live_preempt_ring(engine, &spin, 2922 n * SZ_4K / 4, SZ_4K); 2923 if (err) 2924 break; 2925 } 2926 2927 st_engine_heartbeat_enable(engine); 2928 if (err) 2929 break; 2930 } 2931 2932 igt_spinner_fini(&spin); 2933 return err; 2934 } 2935 2936 static int live_preempt_gang(void *arg) 2937 { 2938 struct intel_gt *gt = arg; 2939 struct intel_engine_cs *engine; 2940 enum intel_engine_id id; 2941 2942 /* 2943 * Build as long a chain of preempters as we can, with each 2944 * request higher priority than the last. Once we are ready, we release 2945 * the last batch which then precolates down the chain, each releasing 2946 * the next oldest in turn. The intent is to simply push as hard as we 2947 * can with the number of preemptions, trying to exceed narrow HW 2948 * limits. At a minimum, we insist that we can sort all the user 2949 * high priority levels into execution order. 2950 */ 2951 2952 for_each_engine(engine, gt, id) { 2953 struct i915_request *rq = NULL; 2954 struct igt_live_test t; 2955 IGT_TIMEOUT(end_time); 2956 int prio = 0; 2957 int err = 0; 2958 u32 *cs; 2959 2960 if (!intel_engine_has_preemption(engine)) 2961 continue; 2962 2963 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) 2964 return -EIO; 2965 2966 do { 2967 struct i915_sched_attr attr = { .priority = prio++ }; 2968 2969 err = create_gang(engine, &rq); 2970 if (err) 2971 break; 2972 2973 /* Submit each spinner at increasing priority */ 2974 engine->schedule(rq, &attr); 2975 } while (prio <= I915_PRIORITY_MAX && 2976 !__igt_timeout(end_time, NULL)); 2977 pr_debug("%s: Preempt chain of %d requests\n", 2978 engine->name, prio); 2979 2980 /* 2981 * Such that the last spinner is the highest priority and 2982 * should execute first. When that spinner completes, 2983 * it will terminate the next lowest spinner until there 2984 * are no more spinners and the gang is complete. 2985 */ 2986 cs = i915_gem_object_pin_map_unlocked(rq->batch->obj, I915_MAP_WC); 2987 if (!IS_ERR(cs)) { 2988 *cs = 0; 2989 i915_gem_object_unpin_map(rq->batch->obj); 2990 } else { 2991 err = PTR_ERR(cs); 2992 intel_gt_set_wedged(gt); 2993 } 2994 2995 while (rq) { /* wait for each rq from highest to lowest prio */ 2996 struct i915_request *n = list_next_entry(rq, mock.link); 2997 2998 if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) { 2999 struct drm_printer p = 3000 drm_info_printer(engine->i915->drm.dev); 3001 3002 pr_err("Failed to flush chain of %d requests, at %d\n", 3003 prio, rq_prio(rq)); 3004 intel_engine_dump(engine, &p, 3005 "%s\n", engine->name); 3006 3007 err = -ETIME; 3008 } 3009 3010 i915_vma_put(rq->batch); 3011 i915_request_put(rq); 3012 rq = n; 3013 } 3014 3015 if (igt_live_test_end(&t)) 3016 err = -EIO; 3017 if (err) 3018 return err; 3019 } 3020 3021 return 0; 3022 } 3023 3024 static struct i915_vma * 3025 create_gpr_user(struct intel_engine_cs *engine, 3026 struct i915_vma *result, 3027 unsigned int offset) 3028 { 3029 struct drm_i915_gem_object *obj; 3030 struct i915_vma *vma; 3031 u32 *cs; 3032 int err; 3033 int i; 3034 3035 obj = i915_gem_object_create_internal(engine->i915, 4096); 3036 if (IS_ERR(obj)) 3037 return ERR_CAST(obj); 3038 3039 vma = i915_vma_instance(obj, result->vm, NULL); 3040 if (IS_ERR(vma)) { 3041 i915_gem_object_put(obj); 3042 return vma; 3043 } 3044 3045 err = i915_vma_pin(vma, 0, 0, PIN_USER); 3046 if (err) { 3047 i915_vma_put(vma); 3048 return ERR_PTR(err); 3049 } 3050 3051 cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC); 3052 if (IS_ERR(cs)) { 3053 i915_vma_put(vma); 3054 return ERR_CAST(cs); 3055 } 3056 3057 /* All GPR are clear for new contexts. We use GPR(0) as a constant */ 3058 *cs++ = MI_LOAD_REGISTER_IMM(1); 3059 *cs++ = CS_GPR(engine, 0); 3060 *cs++ = 1; 3061 3062 for (i = 1; i < NUM_GPR; i++) { 3063 u64 addr; 3064 3065 /* 3066 * Perform: GPR[i]++ 3067 * 3068 * As we read and write into the context saved GPR[i], if 3069 * we restart this batch buffer from an earlier point, we 3070 * will repeat the increment and store a value > 1. 3071 */ 3072 *cs++ = MI_MATH(4); 3073 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(i)); 3074 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(0)); 3075 *cs++ = MI_MATH_ADD; 3076 *cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU); 3077 3078 addr = result->node.start + offset + i * sizeof(*cs); 3079 *cs++ = MI_STORE_REGISTER_MEM_GEN8; 3080 *cs++ = CS_GPR(engine, 2 * i); 3081 *cs++ = lower_32_bits(addr); 3082 *cs++ = upper_32_bits(addr); 3083 3084 *cs++ = MI_SEMAPHORE_WAIT | 3085 MI_SEMAPHORE_POLL | 3086 MI_SEMAPHORE_SAD_GTE_SDD; 3087 *cs++ = i; 3088 *cs++ = lower_32_bits(result->node.start); 3089 *cs++ = upper_32_bits(result->node.start); 3090 } 3091 3092 *cs++ = MI_BATCH_BUFFER_END; 3093 i915_gem_object_flush_map(obj); 3094 i915_gem_object_unpin_map(obj); 3095 3096 return vma; 3097 } 3098 3099 static struct i915_vma *create_global(struct intel_gt *gt, size_t sz) 3100 { 3101 struct drm_i915_gem_object *obj; 3102 struct i915_vma *vma; 3103 int err; 3104 3105 obj = i915_gem_object_create_internal(gt->i915, sz); 3106 if (IS_ERR(obj)) 3107 return ERR_CAST(obj); 3108 3109 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 3110 if (IS_ERR(vma)) { 3111 i915_gem_object_put(obj); 3112 return vma; 3113 } 3114 3115 err = i915_ggtt_pin(vma, NULL, 0, 0); 3116 if (err) { 3117 i915_vma_put(vma); 3118 return ERR_PTR(err); 3119 } 3120 3121 return vma; 3122 } 3123 3124 static struct i915_request * 3125 create_gpr_client(struct intel_engine_cs *engine, 3126 struct i915_vma *global, 3127 unsigned int offset) 3128 { 3129 struct i915_vma *batch, *vma; 3130 struct intel_context *ce; 3131 struct i915_request *rq; 3132 int err; 3133 3134 ce = intel_context_create(engine); 3135 if (IS_ERR(ce)) 3136 return ERR_CAST(ce); 3137 3138 vma = i915_vma_instance(global->obj, ce->vm, NULL); 3139 if (IS_ERR(vma)) { 3140 err = PTR_ERR(vma); 3141 goto out_ce; 3142 } 3143 3144 err = i915_vma_pin(vma, 0, 0, PIN_USER); 3145 if (err) 3146 goto out_ce; 3147 3148 batch = create_gpr_user(engine, vma, offset); 3149 if (IS_ERR(batch)) { 3150 err = PTR_ERR(batch); 3151 goto out_vma; 3152 } 3153 3154 rq = intel_context_create_request(ce); 3155 if (IS_ERR(rq)) { 3156 err = PTR_ERR(rq); 3157 goto out_batch; 3158 } 3159 3160 i915_vma_lock(vma); 3161 err = i915_request_await_object(rq, vma->obj, false); 3162 if (!err) 3163 err = i915_vma_move_to_active(vma, rq, 0); 3164 i915_vma_unlock(vma); 3165 3166 i915_vma_lock(batch); 3167 if (!err) 3168 err = i915_request_await_object(rq, batch->obj, false); 3169 if (!err) 3170 err = i915_vma_move_to_active(batch, rq, 0); 3171 if (!err) 3172 err = rq->engine->emit_bb_start(rq, 3173 batch->node.start, 3174 PAGE_SIZE, 0); 3175 i915_vma_unlock(batch); 3176 i915_vma_unpin(batch); 3177 3178 if (!err) 3179 i915_request_get(rq); 3180 i915_request_add(rq); 3181 3182 out_batch: 3183 i915_vma_put(batch); 3184 out_vma: 3185 i915_vma_unpin(vma); 3186 out_ce: 3187 intel_context_put(ce); 3188 return err ? ERR_PTR(err) : rq; 3189 } 3190 3191 static int preempt_user(struct intel_engine_cs *engine, 3192 struct i915_vma *global, 3193 int id) 3194 { 3195 struct i915_sched_attr attr = { 3196 .priority = I915_PRIORITY_MAX 3197 }; 3198 struct i915_request *rq; 3199 int err = 0; 3200 u32 *cs; 3201 3202 rq = intel_engine_create_kernel_request(engine); 3203 if (IS_ERR(rq)) 3204 return PTR_ERR(rq); 3205 3206 cs = intel_ring_begin(rq, 4); 3207 if (IS_ERR(cs)) { 3208 i915_request_add(rq); 3209 return PTR_ERR(cs); 3210 } 3211 3212 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 3213 *cs++ = i915_ggtt_offset(global); 3214 *cs++ = 0; 3215 *cs++ = id; 3216 3217 intel_ring_advance(rq, cs); 3218 3219 i915_request_get(rq); 3220 i915_request_add(rq); 3221 3222 engine->schedule(rq, &attr); 3223 3224 if (i915_request_wait(rq, 0, HZ / 2) < 0) 3225 err = -ETIME; 3226 i915_request_put(rq); 3227 3228 return err; 3229 } 3230 3231 static int live_preempt_user(void *arg) 3232 { 3233 struct intel_gt *gt = arg; 3234 struct intel_engine_cs *engine; 3235 struct i915_vma *global; 3236 enum intel_engine_id id; 3237 u32 *result; 3238 int err = 0; 3239 3240 /* 3241 * In our other tests, we look at preemption in carefully 3242 * controlled conditions in the ringbuffer. Since most of the 3243 * time is spent in user batches, most of our preemptions naturally 3244 * occur there. We want to verify that when we preempt inside a batch 3245 * we continue on from the current instruction and do not roll back 3246 * to the start, or another earlier arbitration point. 3247 * 3248 * To verify this, we create a batch which is a mixture of 3249 * MI_MATH (gpr++) MI_SRM (gpr) and preemption points. Then with 3250 * a few preempting contexts thrown into the mix, we look for any 3251 * repeated instructions (which show up as incorrect values). 3252 */ 3253 3254 global = create_global(gt, 4096); 3255 if (IS_ERR(global)) 3256 return PTR_ERR(global); 3257 3258 result = i915_gem_object_pin_map_unlocked(global->obj, I915_MAP_WC); 3259 if (IS_ERR(result)) { 3260 i915_vma_unpin_and_release(&global, 0); 3261 return PTR_ERR(result); 3262 } 3263 3264 for_each_engine(engine, gt, id) { 3265 struct i915_request *client[3] = {}; 3266 struct igt_live_test t; 3267 int i; 3268 3269 if (!intel_engine_has_preemption(engine)) 3270 continue; 3271 3272 if (IS_GEN(gt->i915, 8) && engine->class != RENDER_CLASS) 3273 continue; /* we need per-context GPR */ 3274 3275 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 3276 err = -EIO; 3277 break; 3278 } 3279 3280 memset(result, 0, 4096); 3281 3282 for (i = 0; i < ARRAY_SIZE(client); i++) { 3283 struct i915_request *rq; 3284 3285 rq = create_gpr_client(engine, global, 3286 NUM_GPR * i * sizeof(u32)); 3287 if (IS_ERR(rq)) { 3288 err = PTR_ERR(rq); 3289 goto end_test; 3290 } 3291 3292 client[i] = rq; 3293 } 3294 3295 /* Continuously preempt the set of 3 running contexts */ 3296 for (i = 1; i <= NUM_GPR; i++) { 3297 err = preempt_user(engine, global, i); 3298 if (err) 3299 goto end_test; 3300 } 3301 3302 if (READ_ONCE(result[0]) != NUM_GPR) { 3303 pr_err("%s: Failed to release semaphore\n", 3304 engine->name); 3305 err = -EIO; 3306 goto end_test; 3307 } 3308 3309 for (i = 0; i < ARRAY_SIZE(client); i++) { 3310 int gpr; 3311 3312 if (i915_request_wait(client[i], 0, HZ / 2) < 0) { 3313 err = -ETIME; 3314 goto end_test; 3315 } 3316 3317 for (gpr = 1; gpr < NUM_GPR; gpr++) { 3318 if (result[NUM_GPR * i + gpr] != 1) { 3319 pr_err("%s: Invalid result, client %d, gpr %d, result: %d\n", 3320 engine->name, 3321 i, gpr, result[NUM_GPR * i + gpr]); 3322 err = -EINVAL; 3323 goto end_test; 3324 } 3325 } 3326 } 3327 3328 end_test: 3329 for (i = 0; i < ARRAY_SIZE(client); i++) { 3330 if (!client[i]) 3331 break; 3332 3333 i915_request_put(client[i]); 3334 } 3335 3336 /* Flush the semaphores on error */ 3337 smp_store_mb(result[0], -1); 3338 if (igt_live_test_end(&t)) 3339 err = -EIO; 3340 if (err) 3341 break; 3342 } 3343 3344 i915_vma_unpin_and_release(&global, I915_VMA_RELEASE_MAP); 3345 return err; 3346 } 3347 3348 static int live_preempt_timeout(void *arg) 3349 { 3350 struct intel_gt *gt = arg; 3351 struct i915_gem_context *ctx_hi, *ctx_lo; 3352 struct igt_spinner spin_lo; 3353 struct intel_engine_cs *engine; 3354 enum intel_engine_id id; 3355 int err = -ENOMEM; 3356 3357 /* 3358 * Check that we force preemption to occur by cancelling the previous 3359 * context if it refuses to yield the GPU. 3360 */ 3361 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) 3362 return 0; 3363 3364 if (!intel_has_reset_engine(gt)) 3365 return 0; 3366 3367 if (igt_spinner_init(&spin_lo, gt)) 3368 return -ENOMEM; 3369 3370 ctx_hi = kernel_context(gt->i915); 3371 if (!ctx_hi) 3372 goto err_spin_lo; 3373 ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY; 3374 3375 ctx_lo = kernel_context(gt->i915); 3376 if (!ctx_lo) 3377 goto err_ctx_hi; 3378 ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY; 3379 3380 for_each_engine(engine, gt, id) { 3381 unsigned long saved_timeout; 3382 struct i915_request *rq; 3383 3384 if (!intel_engine_has_preemption(engine)) 3385 continue; 3386 3387 rq = spinner_create_request(&spin_lo, ctx_lo, engine, 3388 MI_NOOP); /* preemption disabled */ 3389 if (IS_ERR(rq)) { 3390 err = PTR_ERR(rq); 3391 goto err_ctx_lo; 3392 } 3393 3394 i915_request_add(rq); 3395 if (!igt_wait_for_spinner(&spin_lo, rq)) { 3396 intel_gt_set_wedged(gt); 3397 err = -EIO; 3398 goto err_ctx_lo; 3399 } 3400 3401 rq = igt_request_alloc(ctx_hi, engine); 3402 if (IS_ERR(rq)) { 3403 igt_spinner_end(&spin_lo); 3404 err = PTR_ERR(rq); 3405 goto err_ctx_lo; 3406 } 3407 3408 /* Flush the previous CS ack before changing timeouts */ 3409 while (READ_ONCE(engine->execlists.pending[0])) 3410 cpu_relax(); 3411 3412 saved_timeout = engine->props.preempt_timeout_ms; 3413 engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */ 3414 3415 i915_request_get(rq); 3416 i915_request_add(rq); 3417 3418 intel_engine_flush_submission(engine); 3419 engine->props.preempt_timeout_ms = saved_timeout; 3420 3421 if (i915_request_wait(rq, 0, HZ / 10) < 0) { 3422 intel_gt_set_wedged(gt); 3423 i915_request_put(rq); 3424 err = -ETIME; 3425 goto err_ctx_lo; 3426 } 3427 3428 igt_spinner_end(&spin_lo); 3429 i915_request_put(rq); 3430 } 3431 3432 err = 0; 3433 err_ctx_lo: 3434 kernel_context_close(ctx_lo); 3435 err_ctx_hi: 3436 kernel_context_close(ctx_hi); 3437 err_spin_lo: 3438 igt_spinner_fini(&spin_lo); 3439 return err; 3440 } 3441 3442 static int random_range(struct rnd_state *rnd, int min, int max) 3443 { 3444 return i915_prandom_u32_max_state(max - min, rnd) + min; 3445 } 3446 3447 static int random_priority(struct rnd_state *rnd) 3448 { 3449 return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX); 3450 } 3451 3452 struct preempt_smoke { 3453 struct intel_gt *gt; 3454 struct i915_gem_context **contexts; 3455 struct intel_engine_cs *engine; 3456 struct drm_i915_gem_object *batch; 3457 unsigned int ncontext; 3458 struct rnd_state prng; 3459 unsigned long count; 3460 }; 3461 3462 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke) 3463 { 3464 return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext, 3465 &smoke->prng)]; 3466 } 3467 3468 static int smoke_submit(struct preempt_smoke *smoke, 3469 struct i915_gem_context *ctx, int prio, 3470 struct drm_i915_gem_object *batch) 3471 { 3472 struct i915_request *rq; 3473 struct i915_vma *vma = NULL; 3474 int err = 0; 3475 3476 if (batch) { 3477 struct i915_address_space *vm; 3478 3479 vm = i915_gem_context_get_vm_rcu(ctx); 3480 vma = i915_vma_instance(batch, vm, NULL); 3481 i915_vm_put(vm); 3482 if (IS_ERR(vma)) 3483 return PTR_ERR(vma); 3484 3485 err = i915_vma_pin(vma, 0, 0, PIN_USER); 3486 if (err) 3487 return err; 3488 } 3489 3490 ctx->sched.priority = prio; 3491 3492 rq = igt_request_alloc(ctx, smoke->engine); 3493 if (IS_ERR(rq)) { 3494 err = PTR_ERR(rq); 3495 goto unpin; 3496 } 3497 3498 if (vma) { 3499 i915_vma_lock(vma); 3500 err = i915_request_await_object(rq, vma->obj, false); 3501 if (!err) 3502 err = i915_vma_move_to_active(vma, rq, 0); 3503 if (!err) 3504 err = rq->engine->emit_bb_start(rq, 3505 vma->node.start, 3506 PAGE_SIZE, 0); 3507 i915_vma_unlock(vma); 3508 } 3509 3510 i915_request_add(rq); 3511 3512 unpin: 3513 if (vma) 3514 i915_vma_unpin(vma); 3515 3516 return err; 3517 } 3518 3519 static int smoke_crescendo_thread(void *arg) 3520 { 3521 struct preempt_smoke *smoke = arg; 3522 IGT_TIMEOUT(end_time); 3523 unsigned long count; 3524 3525 count = 0; 3526 do { 3527 struct i915_gem_context *ctx = smoke_context(smoke); 3528 int err; 3529 3530 err = smoke_submit(smoke, 3531 ctx, count % I915_PRIORITY_MAX, 3532 smoke->batch); 3533 if (err) 3534 return err; 3535 3536 count++; 3537 } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL)); 3538 3539 smoke->count = count; 3540 return 0; 3541 } 3542 3543 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) 3544 #define BATCH BIT(0) 3545 { 3546 struct task_struct *tsk[I915_NUM_ENGINES] = {}; 3547 struct preempt_smoke arg[I915_NUM_ENGINES]; 3548 struct intel_engine_cs *engine; 3549 enum intel_engine_id id; 3550 unsigned long count; 3551 int err = 0; 3552 3553 for_each_engine(engine, smoke->gt, id) { 3554 arg[id] = *smoke; 3555 arg[id].engine = engine; 3556 if (!(flags & BATCH)) 3557 arg[id].batch = NULL; 3558 arg[id].count = 0; 3559 3560 tsk[id] = kthread_run(smoke_crescendo_thread, &arg, 3561 "igt/smoke:%d", id); 3562 if (IS_ERR(tsk[id])) { 3563 err = PTR_ERR(tsk[id]); 3564 break; 3565 } 3566 get_task_struct(tsk[id]); 3567 } 3568 3569 yield(); /* start all threads before we kthread_stop() */ 3570 3571 count = 0; 3572 for_each_engine(engine, smoke->gt, id) { 3573 int status; 3574 3575 if (IS_ERR_OR_NULL(tsk[id])) 3576 continue; 3577 3578 status = kthread_stop(tsk[id]); 3579 if (status && !err) 3580 err = status; 3581 3582 count += arg[id].count; 3583 3584 put_task_struct(tsk[id]); 3585 } 3586 3587 pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n", 3588 count, flags, smoke->gt->info.num_engines, smoke->ncontext); 3589 return 0; 3590 } 3591 3592 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags) 3593 { 3594 enum intel_engine_id id; 3595 IGT_TIMEOUT(end_time); 3596 unsigned long count; 3597 3598 count = 0; 3599 do { 3600 for_each_engine(smoke->engine, smoke->gt, id) { 3601 struct i915_gem_context *ctx = smoke_context(smoke); 3602 int err; 3603 3604 err = smoke_submit(smoke, 3605 ctx, random_priority(&smoke->prng), 3606 flags & BATCH ? smoke->batch : NULL); 3607 if (err) 3608 return err; 3609 3610 count++; 3611 } 3612 } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL)); 3613 3614 pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n", 3615 count, flags, smoke->gt->info.num_engines, smoke->ncontext); 3616 return 0; 3617 } 3618 3619 static int live_preempt_smoke(void *arg) 3620 { 3621 struct preempt_smoke smoke = { 3622 .gt = arg, 3623 .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed), 3624 .ncontext = 256, 3625 }; 3626 const unsigned int phase[] = { 0, BATCH }; 3627 struct igt_live_test t; 3628 int err = -ENOMEM; 3629 u32 *cs; 3630 int n; 3631 3632 smoke.contexts = kmalloc_array(smoke.ncontext, 3633 sizeof(*smoke.contexts), 3634 GFP_KERNEL); 3635 if (!smoke.contexts) 3636 return -ENOMEM; 3637 3638 smoke.batch = 3639 i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE); 3640 if (IS_ERR(smoke.batch)) { 3641 err = PTR_ERR(smoke.batch); 3642 goto err_free; 3643 } 3644 3645 cs = i915_gem_object_pin_map_unlocked(smoke.batch, I915_MAP_WB); 3646 if (IS_ERR(cs)) { 3647 err = PTR_ERR(cs); 3648 goto err_batch; 3649 } 3650 for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++) 3651 cs[n] = MI_ARB_CHECK; 3652 cs[n] = MI_BATCH_BUFFER_END; 3653 i915_gem_object_flush_map(smoke.batch); 3654 i915_gem_object_unpin_map(smoke.batch); 3655 3656 if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) { 3657 err = -EIO; 3658 goto err_batch; 3659 } 3660 3661 for (n = 0; n < smoke.ncontext; n++) { 3662 smoke.contexts[n] = kernel_context(smoke.gt->i915); 3663 if (!smoke.contexts[n]) 3664 goto err_ctx; 3665 } 3666 3667 for (n = 0; n < ARRAY_SIZE(phase); n++) { 3668 err = smoke_crescendo(&smoke, phase[n]); 3669 if (err) 3670 goto err_ctx; 3671 3672 err = smoke_random(&smoke, phase[n]); 3673 if (err) 3674 goto err_ctx; 3675 } 3676 3677 err_ctx: 3678 if (igt_live_test_end(&t)) 3679 err = -EIO; 3680 3681 for (n = 0; n < smoke.ncontext; n++) { 3682 if (!smoke.contexts[n]) 3683 break; 3684 kernel_context_close(smoke.contexts[n]); 3685 } 3686 3687 err_batch: 3688 i915_gem_object_put(smoke.batch); 3689 err_free: 3690 kfree(smoke.contexts); 3691 3692 return err; 3693 } 3694 3695 static int nop_virtual_engine(struct intel_gt *gt, 3696 struct intel_engine_cs **siblings, 3697 unsigned int nsibling, 3698 unsigned int nctx, 3699 unsigned int flags) 3700 #define CHAIN BIT(0) 3701 { 3702 IGT_TIMEOUT(end_time); 3703 struct i915_request *request[16] = {}; 3704 struct intel_context *ve[16]; 3705 unsigned long n, prime, nc; 3706 struct igt_live_test t; 3707 ktime_t times[2] = {}; 3708 int err; 3709 3710 GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve)); 3711 3712 for (n = 0; n < nctx; n++) { 3713 ve[n] = intel_execlists_create_virtual(siblings, nsibling); 3714 if (IS_ERR(ve[n])) { 3715 err = PTR_ERR(ve[n]); 3716 nctx = n; 3717 goto out; 3718 } 3719 3720 err = intel_context_pin(ve[n]); 3721 if (err) { 3722 intel_context_put(ve[n]); 3723 nctx = n; 3724 goto out; 3725 } 3726 } 3727 3728 err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name); 3729 if (err) 3730 goto out; 3731 3732 for_each_prime_number_from(prime, 1, 8192) { 3733 times[1] = ktime_get_raw(); 3734 3735 if (flags & CHAIN) { 3736 for (nc = 0; nc < nctx; nc++) { 3737 for (n = 0; n < prime; n++) { 3738 struct i915_request *rq; 3739 3740 rq = i915_request_create(ve[nc]); 3741 if (IS_ERR(rq)) { 3742 err = PTR_ERR(rq); 3743 goto out; 3744 } 3745 3746 if (request[nc]) 3747 i915_request_put(request[nc]); 3748 request[nc] = i915_request_get(rq); 3749 i915_request_add(rq); 3750 } 3751 } 3752 } else { 3753 for (n = 0; n < prime; n++) { 3754 for (nc = 0; nc < nctx; nc++) { 3755 struct i915_request *rq; 3756 3757 rq = i915_request_create(ve[nc]); 3758 if (IS_ERR(rq)) { 3759 err = PTR_ERR(rq); 3760 goto out; 3761 } 3762 3763 if (request[nc]) 3764 i915_request_put(request[nc]); 3765 request[nc] = i915_request_get(rq); 3766 i915_request_add(rq); 3767 } 3768 } 3769 } 3770 3771 for (nc = 0; nc < nctx; nc++) { 3772 if (i915_request_wait(request[nc], 0, HZ / 10) < 0) { 3773 pr_err("%s(%s): wait for %llx:%lld timed out\n", 3774 __func__, ve[0]->engine->name, 3775 request[nc]->fence.context, 3776 request[nc]->fence.seqno); 3777 3778 GEM_TRACE("%s(%s) failed at request %llx:%lld\n", 3779 __func__, ve[0]->engine->name, 3780 request[nc]->fence.context, 3781 request[nc]->fence.seqno); 3782 GEM_TRACE_DUMP(); 3783 intel_gt_set_wedged(gt); 3784 break; 3785 } 3786 } 3787 3788 times[1] = ktime_sub(ktime_get_raw(), times[1]); 3789 if (prime == 1) 3790 times[0] = times[1]; 3791 3792 for (nc = 0; nc < nctx; nc++) { 3793 i915_request_put(request[nc]); 3794 request[nc] = NULL; 3795 } 3796 3797 if (__igt_timeout(end_time, NULL)) 3798 break; 3799 } 3800 3801 err = igt_live_test_end(&t); 3802 if (err) 3803 goto out; 3804 3805 pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n", 3806 nctx, ve[0]->engine->name, ktime_to_ns(times[0]), 3807 prime, div64_u64(ktime_to_ns(times[1]), prime)); 3808 3809 out: 3810 if (igt_flush_test(gt->i915)) 3811 err = -EIO; 3812 3813 for (nc = 0; nc < nctx; nc++) { 3814 i915_request_put(request[nc]); 3815 intel_context_unpin(ve[nc]); 3816 intel_context_put(ve[nc]); 3817 } 3818 return err; 3819 } 3820 3821 static unsigned int 3822 __select_siblings(struct intel_gt *gt, 3823 unsigned int class, 3824 struct intel_engine_cs **siblings, 3825 bool (*filter)(const struct intel_engine_cs *)) 3826 { 3827 unsigned int n = 0; 3828 unsigned int inst; 3829 3830 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { 3831 if (!gt->engine_class[class][inst]) 3832 continue; 3833 3834 if (filter && !filter(gt->engine_class[class][inst])) 3835 continue; 3836 3837 siblings[n++] = gt->engine_class[class][inst]; 3838 } 3839 3840 return n; 3841 } 3842 3843 static unsigned int 3844 select_siblings(struct intel_gt *gt, 3845 unsigned int class, 3846 struct intel_engine_cs **siblings) 3847 { 3848 return __select_siblings(gt, class, siblings, NULL); 3849 } 3850 3851 static int live_virtual_engine(void *arg) 3852 { 3853 struct intel_gt *gt = arg; 3854 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 3855 struct intel_engine_cs *engine; 3856 enum intel_engine_id id; 3857 unsigned int class; 3858 int err; 3859 3860 if (intel_uc_uses_guc_submission(>->uc)) 3861 return 0; 3862 3863 for_each_engine(engine, gt, id) { 3864 err = nop_virtual_engine(gt, &engine, 1, 1, 0); 3865 if (err) { 3866 pr_err("Failed to wrap engine %s: err=%d\n", 3867 engine->name, err); 3868 return err; 3869 } 3870 } 3871 3872 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 3873 int nsibling, n; 3874 3875 nsibling = select_siblings(gt, class, siblings); 3876 if (nsibling < 2) 3877 continue; 3878 3879 for (n = 1; n <= nsibling + 1; n++) { 3880 err = nop_virtual_engine(gt, siblings, nsibling, 3881 n, 0); 3882 if (err) 3883 return err; 3884 } 3885 3886 err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN); 3887 if (err) 3888 return err; 3889 } 3890 3891 return 0; 3892 } 3893 3894 static int mask_virtual_engine(struct intel_gt *gt, 3895 struct intel_engine_cs **siblings, 3896 unsigned int nsibling) 3897 { 3898 struct i915_request *request[MAX_ENGINE_INSTANCE + 1]; 3899 struct intel_context *ve; 3900 struct igt_live_test t; 3901 unsigned int n; 3902 int err; 3903 3904 /* 3905 * Check that by setting the execution mask on a request, we can 3906 * restrict it to our desired engine within the virtual engine. 3907 */ 3908 3909 ve = intel_execlists_create_virtual(siblings, nsibling); 3910 if (IS_ERR(ve)) { 3911 err = PTR_ERR(ve); 3912 goto out_close; 3913 } 3914 3915 err = intel_context_pin(ve); 3916 if (err) 3917 goto out_put; 3918 3919 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name); 3920 if (err) 3921 goto out_unpin; 3922 3923 for (n = 0; n < nsibling; n++) { 3924 request[n] = i915_request_create(ve); 3925 if (IS_ERR(request[n])) { 3926 err = PTR_ERR(request[n]); 3927 nsibling = n; 3928 goto out; 3929 } 3930 3931 /* Reverse order as it's more likely to be unnatural */ 3932 request[n]->execution_mask = siblings[nsibling - n - 1]->mask; 3933 3934 i915_request_get(request[n]); 3935 i915_request_add(request[n]); 3936 } 3937 3938 for (n = 0; n < nsibling; n++) { 3939 if (i915_request_wait(request[n], 0, HZ / 10) < 0) { 3940 pr_err("%s(%s): wait for %llx:%lld timed out\n", 3941 __func__, ve->engine->name, 3942 request[n]->fence.context, 3943 request[n]->fence.seqno); 3944 3945 GEM_TRACE("%s(%s) failed at request %llx:%lld\n", 3946 __func__, ve->engine->name, 3947 request[n]->fence.context, 3948 request[n]->fence.seqno); 3949 GEM_TRACE_DUMP(); 3950 intel_gt_set_wedged(gt); 3951 err = -EIO; 3952 goto out; 3953 } 3954 3955 if (request[n]->engine != siblings[nsibling - n - 1]) { 3956 pr_err("Executed on wrong sibling '%s', expected '%s'\n", 3957 request[n]->engine->name, 3958 siblings[nsibling - n - 1]->name); 3959 err = -EINVAL; 3960 goto out; 3961 } 3962 } 3963 3964 err = igt_live_test_end(&t); 3965 out: 3966 if (igt_flush_test(gt->i915)) 3967 err = -EIO; 3968 3969 for (n = 0; n < nsibling; n++) 3970 i915_request_put(request[n]); 3971 3972 out_unpin: 3973 intel_context_unpin(ve); 3974 out_put: 3975 intel_context_put(ve); 3976 out_close: 3977 return err; 3978 } 3979 3980 static int live_virtual_mask(void *arg) 3981 { 3982 struct intel_gt *gt = arg; 3983 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 3984 unsigned int class; 3985 int err; 3986 3987 if (intel_uc_uses_guc_submission(>->uc)) 3988 return 0; 3989 3990 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 3991 unsigned int nsibling; 3992 3993 nsibling = select_siblings(gt, class, siblings); 3994 if (nsibling < 2) 3995 continue; 3996 3997 err = mask_virtual_engine(gt, siblings, nsibling); 3998 if (err) 3999 return err; 4000 } 4001 4002 return 0; 4003 } 4004 4005 static int slicein_virtual_engine(struct intel_gt *gt, 4006 struct intel_engine_cs **siblings, 4007 unsigned int nsibling) 4008 { 4009 const long timeout = slice_timeout(siblings[0]); 4010 struct intel_context *ce; 4011 struct i915_request *rq; 4012 struct igt_spinner spin; 4013 unsigned int n; 4014 int err = 0; 4015 4016 /* 4017 * Virtual requests must take part in timeslicing on the target engines. 4018 */ 4019 4020 if (igt_spinner_init(&spin, gt)) 4021 return -ENOMEM; 4022 4023 for (n = 0; n < nsibling; n++) { 4024 ce = intel_context_create(siblings[n]); 4025 if (IS_ERR(ce)) { 4026 err = PTR_ERR(ce); 4027 goto out; 4028 } 4029 4030 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); 4031 intel_context_put(ce); 4032 if (IS_ERR(rq)) { 4033 err = PTR_ERR(rq); 4034 goto out; 4035 } 4036 4037 i915_request_add(rq); 4038 } 4039 4040 ce = intel_execlists_create_virtual(siblings, nsibling); 4041 if (IS_ERR(ce)) { 4042 err = PTR_ERR(ce); 4043 goto out; 4044 } 4045 4046 rq = intel_context_create_request(ce); 4047 intel_context_put(ce); 4048 if (IS_ERR(rq)) { 4049 err = PTR_ERR(rq); 4050 goto out; 4051 } 4052 4053 i915_request_get(rq); 4054 i915_request_add(rq); 4055 if (i915_request_wait(rq, 0, timeout) < 0) { 4056 GEM_TRACE_ERR("%s(%s) failed to slice in virtual request\n", 4057 __func__, rq->engine->name); 4058 GEM_TRACE_DUMP(); 4059 intel_gt_set_wedged(gt); 4060 err = -EIO; 4061 } 4062 i915_request_put(rq); 4063 4064 out: 4065 igt_spinner_end(&spin); 4066 if (igt_flush_test(gt->i915)) 4067 err = -EIO; 4068 igt_spinner_fini(&spin); 4069 return err; 4070 } 4071 4072 static int sliceout_virtual_engine(struct intel_gt *gt, 4073 struct intel_engine_cs **siblings, 4074 unsigned int nsibling) 4075 { 4076 const long timeout = slice_timeout(siblings[0]); 4077 struct intel_context *ce; 4078 struct i915_request *rq; 4079 struct igt_spinner spin; 4080 unsigned int n; 4081 int err = 0; 4082 4083 /* 4084 * Virtual requests must allow others a fair timeslice. 4085 */ 4086 4087 if (igt_spinner_init(&spin, gt)) 4088 return -ENOMEM; 4089 4090 /* XXX We do not handle oversubscription and fairness with normal rq */ 4091 for (n = 0; n < nsibling; n++) { 4092 ce = intel_execlists_create_virtual(siblings, nsibling); 4093 if (IS_ERR(ce)) { 4094 err = PTR_ERR(ce); 4095 goto out; 4096 } 4097 4098 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); 4099 intel_context_put(ce); 4100 if (IS_ERR(rq)) { 4101 err = PTR_ERR(rq); 4102 goto out; 4103 } 4104 4105 i915_request_add(rq); 4106 } 4107 4108 for (n = 0; !err && n < nsibling; n++) { 4109 ce = intel_context_create(siblings[n]); 4110 if (IS_ERR(ce)) { 4111 err = PTR_ERR(ce); 4112 goto out; 4113 } 4114 4115 rq = intel_context_create_request(ce); 4116 intel_context_put(ce); 4117 if (IS_ERR(rq)) { 4118 err = PTR_ERR(rq); 4119 goto out; 4120 } 4121 4122 i915_request_get(rq); 4123 i915_request_add(rq); 4124 if (i915_request_wait(rq, 0, timeout) < 0) { 4125 GEM_TRACE_ERR("%s(%s) failed to slice out virtual request\n", 4126 __func__, siblings[n]->name); 4127 GEM_TRACE_DUMP(); 4128 intel_gt_set_wedged(gt); 4129 err = -EIO; 4130 } 4131 i915_request_put(rq); 4132 } 4133 4134 out: 4135 igt_spinner_end(&spin); 4136 if (igt_flush_test(gt->i915)) 4137 err = -EIO; 4138 igt_spinner_fini(&spin); 4139 return err; 4140 } 4141 4142 static int live_virtual_slice(void *arg) 4143 { 4144 struct intel_gt *gt = arg; 4145 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 4146 unsigned int class; 4147 int err; 4148 4149 if (intel_uc_uses_guc_submission(>->uc)) 4150 return 0; 4151 4152 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 4153 unsigned int nsibling; 4154 4155 nsibling = __select_siblings(gt, class, siblings, 4156 intel_engine_has_timeslices); 4157 if (nsibling < 2) 4158 continue; 4159 4160 err = slicein_virtual_engine(gt, siblings, nsibling); 4161 if (err) 4162 return err; 4163 4164 err = sliceout_virtual_engine(gt, siblings, nsibling); 4165 if (err) 4166 return err; 4167 } 4168 4169 return 0; 4170 } 4171 4172 static int preserved_virtual_engine(struct intel_gt *gt, 4173 struct intel_engine_cs **siblings, 4174 unsigned int nsibling) 4175 { 4176 struct i915_request *last = NULL; 4177 struct intel_context *ve; 4178 struct i915_vma *scratch; 4179 struct igt_live_test t; 4180 unsigned int n; 4181 int err = 0; 4182 u32 *cs; 4183 4184 scratch = 4185 __vm_create_scratch_for_read_pinned(&siblings[0]->gt->ggtt->vm, 4186 PAGE_SIZE); 4187 if (IS_ERR(scratch)) 4188 return PTR_ERR(scratch); 4189 4190 err = i915_vma_sync(scratch); 4191 if (err) 4192 goto out_scratch; 4193 4194 ve = intel_execlists_create_virtual(siblings, nsibling); 4195 if (IS_ERR(ve)) { 4196 err = PTR_ERR(ve); 4197 goto out_scratch; 4198 } 4199 4200 err = intel_context_pin(ve); 4201 if (err) 4202 goto out_put; 4203 4204 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name); 4205 if (err) 4206 goto out_unpin; 4207 4208 for (n = 0; n < NUM_GPR_DW; n++) { 4209 struct intel_engine_cs *engine = siblings[n % nsibling]; 4210 struct i915_request *rq; 4211 4212 rq = i915_request_create(ve); 4213 if (IS_ERR(rq)) { 4214 err = PTR_ERR(rq); 4215 goto out_end; 4216 } 4217 4218 i915_request_put(last); 4219 last = i915_request_get(rq); 4220 4221 cs = intel_ring_begin(rq, 8); 4222 if (IS_ERR(cs)) { 4223 i915_request_add(rq); 4224 err = PTR_ERR(cs); 4225 goto out_end; 4226 } 4227 4228 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 4229 *cs++ = CS_GPR(engine, n); 4230 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32); 4231 *cs++ = 0; 4232 4233 *cs++ = MI_LOAD_REGISTER_IMM(1); 4234 *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW); 4235 *cs++ = n + 1; 4236 4237 *cs++ = MI_NOOP; 4238 intel_ring_advance(rq, cs); 4239 4240 /* Restrict this request to run on a particular engine */ 4241 rq->execution_mask = engine->mask; 4242 i915_request_add(rq); 4243 } 4244 4245 if (i915_request_wait(last, 0, HZ / 5) < 0) { 4246 err = -ETIME; 4247 goto out_end; 4248 } 4249 4250 cs = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB); 4251 if (IS_ERR(cs)) { 4252 err = PTR_ERR(cs); 4253 goto out_end; 4254 } 4255 4256 for (n = 0; n < NUM_GPR_DW; n++) { 4257 if (cs[n] != n) { 4258 pr_err("Incorrect value[%d] found for GPR[%d]\n", 4259 cs[n], n); 4260 err = -EINVAL; 4261 break; 4262 } 4263 } 4264 4265 i915_gem_object_unpin_map(scratch->obj); 4266 4267 out_end: 4268 if (igt_live_test_end(&t)) 4269 err = -EIO; 4270 i915_request_put(last); 4271 out_unpin: 4272 intel_context_unpin(ve); 4273 out_put: 4274 intel_context_put(ve); 4275 out_scratch: 4276 i915_vma_unpin_and_release(&scratch, 0); 4277 return err; 4278 } 4279 4280 static int live_virtual_preserved(void *arg) 4281 { 4282 struct intel_gt *gt = arg; 4283 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 4284 unsigned int class; 4285 4286 /* 4287 * Check that the context image retains non-privileged (user) registers 4288 * from one engine to the next. For this we check that the CS_GPR 4289 * are preserved. 4290 */ 4291 4292 if (intel_uc_uses_guc_submission(>->uc)) 4293 return 0; 4294 4295 /* As we use CS_GPR we cannot run before they existed on all engines. */ 4296 if (INTEL_GEN(gt->i915) < 9) 4297 return 0; 4298 4299 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 4300 int nsibling, err; 4301 4302 nsibling = select_siblings(gt, class, siblings); 4303 if (nsibling < 2) 4304 continue; 4305 4306 err = preserved_virtual_engine(gt, siblings, nsibling); 4307 if (err) 4308 return err; 4309 } 4310 4311 return 0; 4312 } 4313 4314 static int bond_virtual_engine(struct intel_gt *gt, 4315 unsigned int class, 4316 struct intel_engine_cs **siblings, 4317 unsigned int nsibling, 4318 unsigned int flags) 4319 #define BOND_SCHEDULE BIT(0) 4320 { 4321 struct intel_engine_cs *master; 4322 struct i915_request *rq[16]; 4323 enum intel_engine_id id; 4324 struct igt_spinner spin; 4325 unsigned long n; 4326 int err; 4327 4328 /* 4329 * A set of bonded requests is intended to be run concurrently 4330 * across a number of engines. We use one request per-engine 4331 * and a magic fence to schedule each of the bonded requests 4332 * at the same time. A consequence of our current scheduler is that 4333 * we only move requests to the HW ready queue when the request 4334 * becomes ready, that is when all of its prerequisite fences have 4335 * been signaled. As one of those fences is the master submit fence, 4336 * there is a delay on all secondary fences as the HW may be 4337 * currently busy. Equally, as all the requests are independent, 4338 * they may have other fences that delay individual request 4339 * submission to HW. Ergo, we do not guarantee that all requests are 4340 * immediately submitted to HW at the same time, just that if the 4341 * rules are abided by, they are ready at the same time as the 4342 * first is submitted. Userspace can embed semaphores in its batch 4343 * to ensure parallel execution of its phases as it requires. 4344 * Though naturally it gets requested that perhaps the scheduler should 4345 * take care of parallel execution, even across preemption events on 4346 * different HW. (The proper answer is of course "lalalala".) 4347 * 4348 * With the submit-fence, we have identified three possible phases 4349 * of synchronisation depending on the master fence: queued (not 4350 * ready), executing, and signaled. The first two are quite simple 4351 * and checked below. However, the signaled master fence handling is 4352 * contentious. Currently we do not distinguish between a signaled 4353 * fence and an expired fence, as once signaled it does not convey 4354 * any information about the previous execution. It may even be freed 4355 * and hence checking later it may not exist at all. Ergo we currently 4356 * do not apply the bonding constraint for an already signaled fence, 4357 * as our expectation is that it should not constrain the secondaries 4358 * and is outside of the scope of the bonded request API (i.e. all 4359 * userspace requests are meant to be running in parallel). As 4360 * it imposes no constraint, and is effectively a no-op, we do not 4361 * check below as normal execution flows are checked extensively above. 4362 * 4363 * XXX Is the degenerate handling of signaled submit fences the 4364 * expected behaviour for userpace? 4365 */ 4366 4367 GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1); 4368 4369 if (igt_spinner_init(&spin, gt)) 4370 return -ENOMEM; 4371 4372 err = 0; 4373 rq[0] = ERR_PTR(-ENOMEM); 4374 for_each_engine(master, gt, id) { 4375 struct i915_sw_fence fence = {}; 4376 struct intel_context *ce; 4377 4378 if (master->class == class) 4379 continue; 4380 4381 ce = intel_context_create(master); 4382 if (IS_ERR(ce)) { 4383 err = PTR_ERR(ce); 4384 goto out; 4385 } 4386 4387 memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq)); 4388 4389 rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP); 4390 intel_context_put(ce); 4391 if (IS_ERR(rq[0])) { 4392 err = PTR_ERR(rq[0]); 4393 goto out; 4394 } 4395 i915_request_get(rq[0]); 4396 4397 if (flags & BOND_SCHEDULE) { 4398 onstack_fence_init(&fence); 4399 err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit, 4400 &fence, 4401 GFP_KERNEL); 4402 } 4403 4404 i915_request_add(rq[0]); 4405 if (err < 0) 4406 goto out; 4407 4408 if (!(flags & BOND_SCHEDULE) && 4409 !igt_wait_for_spinner(&spin, rq[0])) { 4410 err = -EIO; 4411 goto out; 4412 } 4413 4414 for (n = 0; n < nsibling; n++) { 4415 struct intel_context *ve; 4416 4417 ve = intel_execlists_create_virtual(siblings, nsibling); 4418 if (IS_ERR(ve)) { 4419 err = PTR_ERR(ve); 4420 onstack_fence_fini(&fence); 4421 goto out; 4422 } 4423 4424 err = intel_virtual_engine_attach_bond(ve->engine, 4425 master, 4426 siblings[n]); 4427 if (err) { 4428 intel_context_put(ve); 4429 onstack_fence_fini(&fence); 4430 goto out; 4431 } 4432 4433 err = intel_context_pin(ve); 4434 intel_context_put(ve); 4435 if (err) { 4436 onstack_fence_fini(&fence); 4437 goto out; 4438 } 4439 4440 rq[n + 1] = i915_request_create(ve); 4441 intel_context_unpin(ve); 4442 if (IS_ERR(rq[n + 1])) { 4443 err = PTR_ERR(rq[n + 1]); 4444 onstack_fence_fini(&fence); 4445 goto out; 4446 } 4447 i915_request_get(rq[n + 1]); 4448 4449 err = i915_request_await_execution(rq[n + 1], 4450 &rq[0]->fence, 4451 ve->engine->bond_execute); 4452 i915_request_add(rq[n + 1]); 4453 if (err < 0) { 4454 onstack_fence_fini(&fence); 4455 goto out; 4456 } 4457 } 4458 onstack_fence_fini(&fence); 4459 intel_engine_flush_submission(master); 4460 igt_spinner_end(&spin); 4461 4462 if (i915_request_wait(rq[0], 0, HZ / 10) < 0) { 4463 pr_err("Master request did not execute (on %s)!\n", 4464 rq[0]->engine->name); 4465 err = -EIO; 4466 goto out; 4467 } 4468 4469 for (n = 0; n < nsibling; n++) { 4470 if (i915_request_wait(rq[n + 1], 0, 4471 MAX_SCHEDULE_TIMEOUT) < 0) { 4472 err = -EIO; 4473 goto out; 4474 } 4475 4476 if (rq[n + 1]->engine != siblings[n]) { 4477 pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n", 4478 siblings[n]->name, 4479 rq[n + 1]->engine->name, 4480 rq[0]->engine->name); 4481 err = -EINVAL; 4482 goto out; 4483 } 4484 } 4485 4486 for (n = 0; !IS_ERR(rq[n]); n++) 4487 i915_request_put(rq[n]); 4488 rq[0] = ERR_PTR(-ENOMEM); 4489 } 4490 4491 out: 4492 for (n = 0; !IS_ERR(rq[n]); n++) 4493 i915_request_put(rq[n]); 4494 if (igt_flush_test(gt->i915)) 4495 err = -EIO; 4496 4497 igt_spinner_fini(&spin); 4498 return err; 4499 } 4500 4501 static int live_virtual_bond(void *arg) 4502 { 4503 static const struct phase { 4504 const char *name; 4505 unsigned int flags; 4506 } phases[] = { 4507 { "", 0 }, 4508 { "schedule", BOND_SCHEDULE }, 4509 { }, 4510 }; 4511 struct intel_gt *gt = arg; 4512 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 4513 unsigned int class; 4514 int err; 4515 4516 if (intel_uc_uses_guc_submission(>->uc)) 4517 return 0; 4518 4519 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 4520 const struct phase *p; 4521 int nsibling; 4522 4523 nsibling = select_siblings(gt, class, siblings); 4524 if (nsibling < 2) 4525 continue; 4526 4527 for (p = phases; p->name; p++) { 4528 err = bond_virtual_engine(gt, 4529 class, siblings, nsibling, 4530 p->flags); 4531 if (err) { 4532 pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n", 4533 __func__, p->name, class, nsibling, err); 4534 return err; 4535 } 4536 } 4537 } 4538 4539 return 0; 4540 } 4541 4542 static int reset_virtual_engine(struct intel_gt *gt, 4543 struct intel_engine_cs **siblings, 4544 unsigned int nsibling) 4545 { 4546 struct intel_engine_cs *engine; 4547 struct intel_context *ve; 4548 struct igt_spinner spin; 4549 struct i915_request *rq; 4550 unsigned int n; 4551 int err = 0; 4552 4553 /* 4554 * In order to support offline error capture for fast preempt reset, 4555 * we need to decouple the guilty request and ensure that it and its 4556 * descendents are not executed while the capture is in progress. 4557 */ 4558 4559 if (igt_spinner_init(&spin, gt)) 4560 return -ENOMEM; 4561 4562 ve = intel_execlists_create_virtual(siblings, nsibling); 4563 if (IS_ERR(ve)) { 4564 err = PTR_ERR(ve); 4565 goto out_spin; 4566 } 4567 4568 for (n = 0; n < nsibling; n++) 4569 st_engine_heartbeat_disable(siblings[n]); 4570 4571 rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK); 4572 if (IS_ERR(rq)) { 4573 err = PTR_ERR(rq); 4574 goto out_heartbeat; 4575 } 4576 i915_request_add(rq); 4577 4578 if (!igt_wait_for_spinner(&spin, rq)) { 4579 intel_gt_set_wedged(gt); 4580 err = -ETIME; 4581 goto out_heartbeat; 4582 } 4583 4584 engine = rq->engine; 4585 GEM_BUG_ON(engine == ve->engine); 4586 4587 /* Take ownership of the reset and tasklet */ 4588 local_bh_disable(); 4589 if (test_and_set_bit(I915_RESET_ENGINE + engine->id, 4590 >->reset.flags)) { 4591 local_bh_enable(); 4592 intel_gt_set_wedged(gt); 4593 err = -EBUSY; 4594 goto out_heartbeat; 4595 } 4596 tasklet_disable(&engine->execlists.tasklet); 4597 4598 engine->execlists.tasklet.callback(&engine->execlists.tasklet); 4599 GEM_BUG_ON(execlists_active(&engine->execlists) != rq); 4600 4601 /* Fake a preemption event; failed of course */ 4602 spin_lock_irq(&engine->active.lock); 4603 __unwind_incomplete_requests(engine); 4604 spin_unlock_irq(&engine->active.lock); 4605 GEM_BUG_ON(rq->engine != engine); 4606 4607 /* Reset the engine while keeping our active request on hold */ 4608 execlists_hold(engine, rq); 4609 GEM_BUG_ON(!i915_request_on_hold(rq)); 4610 4611 __intel_engine_reset_bh(engine, NULL); 4612 GEM_BUG_ON(rq->fence.error != -EIO); 4613 4614 /* Release our grasp on the engine, letting CS flow again */ 4615 tasklet_enable(&engine->execlists.tasklet); 4616 clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, >->reset.flags); 4617 local_bh_enable(); 4618 4619 /* Check that we do not resubmit the held request */ 4620 i915_request_get(rq); 4621 if (!i915_request_wait(rq, 0, HZ / 5)) { 4622 pr_err("%s: on hold request completed!\n", 4623 engine->name); 4624 intel_gt_set_wedged(gt); 4625 err = -EIO; 4626 goto out_rq; 4627 } 4628 GEM_BUG_ON(!i915_request_on_hold(rq)); 4629 4630 /* But is resubmitted on release */ 4631 execlists_unhold(engine, rq); 4632 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 4633 pr_err("%s: held request did not complete!\n", 4634 engine->name); 4635 intel_gt_set_wedged(gt); 4636 err = -ETIME; 4637 } 4638 4639 out_rq: 4640 i915_request_put(rq); 4641 out_heartbeat: 4642 for (n = 0; n < nsibling; n++) 4643 st_engine_heartbeat_enable(siblings[n]); 4644 4645 intel_context_put(ve); 4646 out_spin: 4647 igt_spinner_fini(&spin); 4648 return err; 4649 } 4650 4651 static int live_virtual_reset(void *arg) 4652 { 4653 struct intel_gt *gt = arg; 4654 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 4655 unsigned int class; 4656 4657 /* 4658 * Check that we handle a reset event within a virtual engine. 4659 * Only the physical engine is reset, but we have to check the flow 4660 * of the virtual requests around the reset, and make sure it is not 4661 * forgotten. 4662 */ 4663 4664 if (intel_uc_uses_guc_submission(>->uc)) 4665 return 0; 4666 4667 if (!intel_has_reset_engine(gt)) 4668 return 0; 4669 4670 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 4671 int nsibling, err; 4672 4673 nsibling = select_siblings(gt, class, siblings); 4674 if (nsibling < 2) 4675 continue; 4676 4677 err = reset_virtual_engine(gt, siblings, nsibling); 4678 if (err) 4679 return err; 4680 } 4681 4682 return 0; 4683 } 4684 4685 int intel_execlists_live_selftests(struct drm_i915_private *i915) 4686 { 4687 static const struct i915_subtest tests[] = { 4688 SUBTEST(live_sanitycheck), 4689 SUBTEST(live_unlite_switch), 4690 SUBTEST(live_unlite_preempt), 4691 SUBTEST(live_unlite_ring), 4692 SUBTEST(live_pin_rewind), 4693 SUBTEST(live_hold_reset), 4694 SUBTEST(live_error_interrupt), 4695 SUBTEST(live_timeslice_preempt), 4696 SUBTEST(live_timeslice_rewind), 4697 SUBTEST(live_timeslice_queue), 4698 SUBTEST(live_timeslice_nopreempt), 4699 SUBTEST(live_busywait_preempt), 4700 SUBTEST(live_preempt), 4701 SUBTEST(live_late_preempt), 4702 SUBTEST(live_nopreempt), 4703 SUBTEST(live_preempt_cancel), 4704 SUBTEST(live_suppress_self_preempt), 4705 SUBTEST(live_chain_preempt), 4706 SUBTEST(live_preempt_ring), 4707 SUBTEST(live_preempt_gang), 4708 SUBTEST(live_preempt_timeout), 4709 SUBTEST(live_preempt_user), 4710 SUBTEST(live_preempt_smoke), 4711 SUBTEST(live_virtual_engine), 4712 SUBTEST(live_virtual_mask), 4713 SUBTEST(live_virtual_preserved), 4714 SUBTEST(live_virtual_slice), 4715 SUBTEST(live_virtual_bond), 4716 SUBTEST(live_virtual_reset), 4717 }; 4718 4719 if (!HAS_EXECLISTS(i915)) 4720 return 0; 4721 4722 if (intel_gt_is_wedged(&i915->gt)) 4723 return 0; 4724 4725 return intel_gt_live_subtests(tests, &i915->gt); 4726 } 4727