1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2018 Intel Corporation 5 */ 6 7 #include <linux/prime_numbers.h> 8 9 #include "gem/i915_gem_pm.h" 10 #include "gt/intel_engine_heartbeat.h" 11 #include "gt/intel_reset.h" 12 #include "gt/selftest_engine_heartbeat.h" 13 14 #include "i915_selftest.h" 15 #include "selftests/i915_random.h" 16 #include "selftests/igt_flush_test.h" 17 #include "selftests/igt_live_test.h" 18 #include "selftests/igt_spinner.h" 19 #include "selftests/lib_sw_fence.h" 20 21 #include "gem/selftests/igt_gem_utils.h" 22 #include "gem/selftests/mock_context.h" 23 24 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4) 25 #define NUM_GPR 16 26 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */ 27 28 static bool is_active(struct i915_request *rq) 29 { 30 if (i915_request_is_active(rq)) 31 return true; 32 33 if (i915_request_on_hold(rq)) 34 return true; 35 36 if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq)) 37 return true; 38 39 return false; 40 } 41 42 static int wait_for_submit(struct intel_engine_cs *engine, 43 struct i915_request *rq, 44 unsigned long timeout) 45 { 46 /* Ignore our own attempts to suppress excess tasklets */ 47 tasklet_hi_schedule(&engine->execlists.tasklet); 48 49 timeout += jiffies; 50 do { 51 bool done = time_after(jiffies, timeout); 52 53 if (i915_request_completed(rq)) /* that was quick! */ 54 return 0; 55 56 /* Wait until the HW has acknowleged the submission (or err) */ 57 intel_engine_flush_submission(engine); 58 if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq)) 59 return 0; 60 61 if (done) 62 return -ETIME; 63 64 cond_resched(); 65 } while (1); 66 } 67 68 static int wait_for_reset(struct intel_engine_cs *engine, 69 struct i915_request *rq, 70 unsigned long timeout) 71 { 72 timeout += jiffies; 73 74 do { 75 cond_resched(); 76 intel_engine_flush_submission(engine); 77 78 if (READ_ONCE(engine->execlists.pending[0])) 79 continue; 80 81 if (i915_request_completed(rq)) 82 break; 83 84 if (READ_ONCE(rq->fence.error)) 85 break; 86 } while (time_before(jiffies, timeout)); 87 88 flush_scheduled_work(); 89 90 if (rq->fence.error != -EIO) { 91 pr_err("%s: hanging request %llx:%lld not reset\n", 92 engine->name, 93 rq->fence.context, 94 rq->fence.seqno); 95 return -EINVAL; 96 } 97 98 /* Give the request a jiffie to complete after flushing the worker */ 99 if (i915_request_wait(rq, 0, 100 max(0l, (long)(timeout - jiffies)) + 1) < 0) { 101 pr_err("%s: hanging request %llx:%lld did not complete\n", 102 engine->name, 103 rq->fence.context, 104 rq->fence.seqno); 105 return -ETIME; 106 } 107 108 return 0; 109 } 110 111 static int live_sanitycheck(void *arg) 112 { 113 struct intel_gt *gt = arg; 114 struct intel_engine_cs *engine; 115 enum intel_engine_id id; 116 struct igt_spinner spin; 117 int err = 0; 118 119 if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915)) 120 return 0; 121 122 if (igt_spinner_init(&spin, gt)) 123 return -ENOMEM; 124 125 for_each_engine(engine, gt, id) { 126 struct intel_context *ce; 127 struct i915_request *rq; 128 129 ce = intel_context_create(engine); 130 if (IS_ERR(ce)) { 131 err = PTR_ERR(ce); 132 break; 133 } 134 135 rq = igt_spinner_create_request(&spin, ce, MI_NOOP); 136 if (IS_ERR(rq)) { 137 err = PTR_ERR(rq); 138 goto out_ctx; 139 } 140 141 i915_request_add(rq); 142 if (!igt_wait_for_spinner(&spin, rq)) { 143 GEM_TRACE("spinner failed to start\n"); 144 GEM_TRACE_DUMP(); 145 intel_gt_set_wedged(gt); 146 err = -EIO; 147 goto out_ctx; 148 } 149 150 igt_spinner_end(&spin); 151 if (igt_flush_test(gt->i915)) { 152 err = -EIO; 153 goto out_ctx; 154 } 155 156 out_ctx: 157 intel_context_put(ce); 158 if (err) 159 break; 160 } 161 162 igt_spinner_fini(&spin); 163 return err; 164 } 165 166 static int live_unlite_restore(struct intel_gt *gt, int prio) 167 { 168 struct intel_engine_cs *engine; 169 enum intel_engine_id id; 170 struct igt_spinner spin; 171 int err = -ENOMEM; 172 173 /* 174 * Check that we can correctly context switch between 2 instances 175 * on the same engine from the same parent context. 176 */ 177 178 if (igt_spinner_init(&spin, gt)) 179 return err; 180 181 err = 0; 182 for_each_engine(engine, gt, id) { 183 struct intel_context *ce[2] = {}; 184 struct i915_request *rq[2]; 185 struct igt_live_test t; 186 int n; 187 188 if (prio && !intel_engine_has_preemption(engine)) 189 continue; 190 191 if (!intel_engine_can_store_dword(engine)) 192 continue; 193 194 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 195 err = -EIO; 196 break; 197 } 198 st_engine_heartbeat_disable(engine); 199 200 for (n = 0; n < ARRAY_SIZE(ce); n++) { 201 struct intel_context *tmp; 202 203 tmp = intel_context_create(engine); 204 if (IS_ERR(tmp)) { 205 err = PTR_ERR(tmp); 206 goto err_ce; 207 } 208 209 err = intel_context_pin(tmp); 210 if (err) { 211 intel_context_put(tmp); 212 goto err_ce; 213 } 214 215 /* 216 * Setup the pair of contexts such that if we 217 * lite-restore using the RING_TAIL from ce[1] it 218 * will execute garbage from ce[0]->ring. 219 */ 220 memset(tmp->ring->vaddr, 221 POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */ 222 tmp->ring->vma->size); 223 224 ce[n] = tmp; 225 } 226 GEM_BUG_ON(!ce[1]->ring->size); 227 intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2); 228 lrc_update_regs(ce[1], engine, ce[1]->ring->head); 229 230 rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK); 231 if (IS_ERR(rq[0])) { 232 err = PTR_ERR(rq[0]); 233 goto err_ce; 234 } 235 236 i915_request_get(rq[0]); 237 i915_request_add(rq[0]); 238 GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit); 239 240 if (!igt_wait_for_spinner(&spin, rq[0])) { 241 i915_request_put(rq[0]); 242 goto err_ce; 243 } 244 245 rq[1] = i915_request_create(ce[1]); 246 if (IS_ERR(rq[1])) { 247 err = PTR_ERR(rq[1]); 248 i915_request_put(rq[0]); 249 goto err_ce; 250 } 251 252 if (!prio) { 253 /* 254 * Ensure we do the switch to ce[1] on completion. 255 * 256 * rq[0] is already submitted, so this should reduce 257 * to a no-op (a wait on a request on the same engine 258 * uses the submit fence, not the completion fence), 259 * but it will install a dependency on rq[1] for rq[0] 260 * that will prevent the pair being reordered by 261 * timeslicing. 262 */ 263 i915_request_await_dma_fence(rq[1], &rq[0]->fence); 264 } 265 266 i915_request_get(rq[1]); 267 i915_request_add(rq[1]); 268 GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix); 269 i915_request_put(rq[0]); 270 271 if (prio) { 272 struct i915_sched_attr attr = { 273 .priority = prio, 274 }; 275 276 /* Alternatively preempt the spinner with ce[1] */ 277 engine->schedule(rq[1], &attr); 278 } 279 280 /* And switch back to ce[0] for good measure */ 281 rq[0] = i915_request_create(ce[0]); 282 if (IS_ERR(rq[0])) { 283 err = PTR_ERR(rq[0]); 284 i915_request_put(rq[1]); 285 goto err_ce; 286 } 287 288 i915_request_await_dma_fence(rq[0], &rq[1]->fence); 289 i915_request_get(rq[0]); 290 i915_request_add(rq[0]); 291 GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix); 292 i915_request_put(rq[1]); 293 i915_request_put(rq[0]); 294 295 err_ce: 296 intel_engine_flush_submission(engine); 297 igt_spinner_end(&spin); 298 for (n = 0; n < ARRAY_SIZE(ce); n++) { 299 if (IS_ERR_OR_NULL(ce[n])) 300 break; 301 302 intel_context_unpin(ce[n]); 303 intel_context_put(ce[n]); 304 } 305 306 st_engine_heartbeat_enable(engine); 307 if (igt_live_test_end(&t)) 308 err = -EIO; 309 if (err) 310 break; 311 } 312 313 igt_spinner_fini(&spin); 314 return err; 315 } 316 317 static int live_unlite_switch(void *arg) 318 { 319 return live_unlite_restore(arg, 0); 320 } 321 322 static int live_unlite_preempt(void *arg) 323 { 324 return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX)); 325 } 326 327 static int live_unlite_ring(void *arg) 328 { 329 struct intel_gt *gt = arg; 330 struct intel_engine_cs *engine; 331 struct igt_spinner spin; 332 enum intel_engine_id id; 333 int err = 0; 334 335 /* 336 * Setup a preemption event that will cause almost the entire ring 337 * to be unwound, potentially fooling our intel_ring_direction() 338 * into emitting a forward lite-restore instead of the rollback. 339 */ 340 341 if (igt_spinner_init(&spin, gt)) 342 return -ENOMEM; 343 344 for_each_engine(engine, gt, id) { 345 struct intel_context *ce[2] = {}; 346 struct i915_request *rq; 347 struct igt_live_test t; 348 int n; 349 350 if (!intel_engine_has_preemption(engine)) 351 continue; 352 353 if (!intel_engine_can_store_dword(engine)) 354 continue; 355 356 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 357 err = -EIO; 358 break; 359 } 360 st_engine_heartbeat_disable(engine); 361 362 for (n = 0; n < ARRAY_SIZE(ce); n++) { 363 struct intel_context *tmp; 364 365 tmp = intel_context_create(engine); 366 if (IS_ERR(tmp)) { 367 err = PTR_ERR(tmp); 368 goto err_ce; 369 } 370 371 err = intel_context_pin(tmp); 372 if (err) { 373 intel_context_put(tmp); 374 goto err_ce; 375 } 376 377 memset32(tmp->ring->vaddr, 378 0xdeadbeef, /* trigger a hang if executed */ 379 tmp->ring->vma->size / sizeof(u32)); 380 381 ce[n] = tmp; 382 } 383 384 /* Create max prio spinner, followed by N low prio nops */ 385 rq = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK); 386 if (IS_ERR(rq)) { 387 err = PTR_ERR(rq); 388 goto err_ce; 389 } 390 391 i915_request_get(rq); 392 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 393 i915_request_add(rq); 394 395 if (!igt_wait_for_spinner(&spin, rq)) { 396 intel_gt_set_wedged(gt); 397 i915_request_put(rq); 398 err = -ETIME; 399 goto err_ce; 400 } 401 402 /* Fill the ring, until we will cause a wrap */ 403 n = 0; 404 while (intel_ring_direction(ce[0]->ring, 405 rq->wa_tail, 406 ce[0]->ring->tail) <= 0) { 407 struct i915_request *tmp; 408 409 tmp = intel_context_create_request(ce[0]); 410 if (IS_ERR(tmp)) { 411 err = PTR_ERR(tmp); 412 i915_request_put(rq); 413 goto err_ce; 414 } 415 416 i915_request_add(tmp); 417 intel_engine_flush_submission(engine); 418 n++; 419 } 420 intel_engine_flush_submission(engine); 421 pr_debug("%s: Filled ring with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n", 422 engine->name, n, 423 ce[0]->ring->size, 424 ce[0]->ring->tail, 425 ce[0]->ring->emit, 426 rq->tail); 427 GEM_BUG_ON(intel_ring_direction(ce[0]->ring, 428 rq->tail, 429 ce[0]->ring->tail) <= 0); 430 i915_request_put(rq); 431 432 /* Create a second ring to preempt the first ring after rq[0] */ 433 rq = intel_context_create_request(ce[1]); 434 if (IS_ERR(rq)) { 435 err = PTR_ERR(rq); 436 goto err_ce; 437 } 438 439 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 440 i915_request_get(rq); 441 i915_request_add(rq); 442 443 err = wait_for_submit(engine, rq, HZ / 2); 444 i915_request_put(rq); 445 if (err) { 446 pr_err("%s: preemption request was not submitted\n", 447 engine->name); 448 err = -ETIME; 449 } 450 451 pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n", 452 engine->name, 453 ce[0]->ring->tail, ce[0]->ring->emit, 454 ce[1]->ring->tail, ce[1]->ring->emit); 455 456 err_ce: 457 intel_engine_flush_submission(engine); 458 igt_spinner_end(&spin); 459 for (n = 0; n < ARRAY_SIZE(ce); n++) { 460 if (IS_ERR_OR_NULL(ce[n])) 461 break; 462 463 intel_context_unpin(ce[n]); 464 intel_context_put(ce[n]); 465 } 466 st_engine_heartbeat_enable(engine); 467 if (igt_live_test_end(&t)) 468 err = -EIO; 469 if (err) 470 break; 471 } 472 473 igt_spinner_fini(&spin); 474 return err; 475 } 476 477 static int live_pin_rewind(void *arg) 478 { 479 struct intel_gt *gt = arg; 480 struct intel_engine_cs *engine; 481 enum intel_engine_id id; 482 int err = 0; 483 484 /* 485 * We have to be careful not to trust intel_ring too much, for example 486 * ring->head is updated upon retire which is out of sync with pinning 487 * the context. Thus we cannot use ring->head to set CTX_RING_HEAD, 488 * or else we risk writing an older, stale value. 489 * 490 * To simulate this, let's apply a bit of deliberate sabotague. 491 */ 492 493 for_each_engine(engine, gt, id) { 494 struct intel_context *ce; 495 struct i915_request *rq; 496 struct intel_ring *ring; 497 struct igt_live_test t; 498 499 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 500 err = -EIO; 501 break; 502 } 503 504 ce = intel_context_create(engine); 505 if (IS_ERR(ce)) { 506 err = PTR_ERR(ce); 507 break; 508 } 509 510 err = intel_context_pin(ce); 511 if (err) { 512 intel_context_put(ce); 513 break; 514 } 515 516 /* Keep the context awake while we play games */ 517 err = i915_active_acquire(&ce->active); 518 if (err) { 519 intel_context_unpin(ce); 520 intel_context_put(ce); 521 break; 522 } 523 ring = ce->ring; 524 525 /* Poison the ring, and offset the next request from HEAD */ 526 memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32)); 527 ring->emit = ring->size / 2; 528 ring->tail = ring->emit; 529 GEM_BUG_ON(ring->head); 530 531 intel_context_unpin(ce); 532 533 /* Submit a simple nop request */ 534 GEM_BUG_ON(intel_context_is_pinned(ce)); 535 rq = intel_context_create_request(ce); 536 i915_active_release(&ce->active); /* e.g. async retire */ 537 intel_context_put(ce); 538 if (IS_ERR(rq)) { 539 err = PTR_ERR(rq); 540 break; 541 } 542 GEM_BUG_ON(!rq->head); 543 i915_request_add(rq); 544 545 /* Expect not to hang! */ 546 if (igt_live_test_end(&t)) { 547 err = -EIO; 548 break; 549 } 550 } 551 552 return err; 553 } 554 555 static int live_hold_reset(void *arg) 556 { 557 struct intel_gt *gt = arg; 558 struct intel_engine_cs *engine; 559 enum intel_engine_id id; 560 struct igt_spinner spin; 561 int err = 0; 562 563 /* 564 * In order to support offline error capture for fast preempt reset, 565 * we need to decouple the guilty request and ensure that it and its 566 * descendents are not executed while the capture is in progress. 567 */ 568 569 if (!intel_has_reset_engine(gt)) 570 return 0; 571 572 if (igt_spinner_init(&spin, gt)) 573 return -ENOMEM; 574 575 for_each_engine(engine, gt, id) { 576 struct intel_context *ce; 577 struct i915_request *rq; 578 579 ce = intel_context_create(engine); 580 if (IS_ERR(ce)) { 581 err = PTR_ERR(ce); 582 break; 583 } 584 585 st_engine_heartbeat_disable(engine); 586 587 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); 588 if (IS_ERR(rq)) { 589 err = PTR_ERR(rq); 590 goto out; 591 } 592 i915_request_add(rq); 593 594 if (!igt_wait_for_spinner(&spin, rq)) { 595 intel_gt_set_wedged(gt); 596 err = -ETIME; 597 goto out; 598 } 599 600 /* We have our request executing, now remove it and reset */ 601 602 local_bh_disable(); 603 if (test_and_set_bit(I915_RESET_ENGINE + id, 604 >->reset.flags)) { 605 local_bh_enable(); 606 intel_gt_set_wedged(gt); 607 err = -EBUSY; 608 goto out; 609 } 610 tasklet_disable(&engine->execlists.tasklet); 611 612 engine->execlists.tasklet.func(engine->execlists.tasklet.data); 613 GEM_BUG_ON(execlists_active(&engine->execlists) != rq); 614 615 i915_request_get(rq); 616 execlists_hold(engine, rq); 617 GEM_BUG_ON(!i915_request_on_hold(rq)); 618 619 __intel_engine_reset_bh(engine, NULL); 620 GEM_BUG_ON(rq->fence.error != -EIO); 621 622 tasklet_enable(&engine->execlists.tasklet); 623 clear_and_wake_up_bit(I915_RESET_ENGINE + id, 624 >->reset.flags); 625 local_bh_enable(); 626 627 /* Check that we do not resubmit the held request */ 628 if (!i915_request_wait(rq, 0, HZ / 5)) { 629 pr_err("%s: on hold request completed!\n", 630 engine->name); 631 i915_request_put(rq); 632 err = -EIO; 633 goto out; 634 } 635 GEM_BUG_ON(!i915_request_on_hold(rq)); 636 637 /* But is resubmitted on release */ 638 execlists_unhold(engine, rq); 639 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 640 pr_err("%s: held request did not complete!\n", 641 engine->name); 642 intel_gt_set_wedged(gt); 643 err = -ETIME; 644 } 645 i915_request_put(rq); 646 647 out: 648 st_engine_heartbeat_enable(engine); 649 intel_context_put(ce); 650 if (err) 651 break; 652 } 653 654 igt_spinner_fini(&spin); 655 return err; 656 } 657 658 static const char *error_repr(int err) 659 { 660 return err ? "bad" : "good"; 661 } 662 663 static int live_error_interrupt(void *arg) 664 { 665 static const struct error_phase { 666 enum { GOOD = 0, BAD = -EIO } error[2]; 667 } phases[] = { 668 { { BAD, GOOD } }, 669 { { BAD, BAD } }, 670 { { BAD, GOOD } }, 671 { { GOOD, GOOD } }, /* sentinel */ 672 }; 673 struct intel_gt *gt = arg; 674 struct intel_engine_cs *engine; 675 enum intel_engine_id id; 676 677 /* 678 * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning 679 * of invalid commands in user batches that will cause a GPU hang. 680 * This is a faster mechanism than using hangcheck/heartbeats, but 681 * only detects problems the HW knows about -- it will not warn when 682 * we kill the HW! 683 * 684 * To verify our detection and reset, we throw some invalid commands 685 * at the HW and wait for the interrupt. 686 */ 687 688 if (!intel_has_reset_engine(gt)) 689 return 0; 690 691 for_each_engine(engine, gt, id) { 692 const struct error_phase *p; 693 int err = 0; 694 695 st_engine_heartbeat_disable(engine); 696 697 for (p = phases; p->error[0] != GOOD; p++) { 698 struct i915_request *client[ARRAY_SIZE(phases->error)]; 699 u32 *cs; 700 int i; 701 702 memset(client, 0, sizeof(*client)); 703 for (i = 0; i < ARRAY_SIZE(client); i++) { 704 struct intel_context *ce; 705 struct i915_request *rq; 706 707 ce = intel_context_create(engine); 708 if (IS_ERR(ce)) { 709 err = PTR_ERR(ce); 710 goto out; 711 } 712 713 rq = intel_context_create_request(ce); 714 intel_context_put(ce); 715 if (IS_ERR(rq)) { 716 err = PTR_ERR(rq); 717 goto out; 718 } 719 720 if (rq->engine->emit_init_breadcrumb) { 721 err = rq->engine->emit_init_breadcrumb(rq); 722 if (err) { 723 i915_request_add(rq); 724 goto out; 725 } 726 } 727 728 cs = intel_ring_begin(rq, 2); 729 if (IS_ERR(cs)) { 730 i915_request_add(rq); 731 err = PTR_ERR(cs); 732 goto out; 733 } 734 735 if (p->error[i]) { 736 *cs++ = 0xdeadbeef; 737 *cs++ = 0xdeadbeef; 738 } else { 739 *cs++ = MI_NOOP; 740 *cs++ = MI_NOOP; 741 } 742 743 client[i] = i915_request_get(rq); 744 i915_request_add(rq); 745 } 746 747 err = wait_for_submit(engine, client[0], HZ / 2); 748 if (err) { 749 pr_err("%s: first request did not start within time!\n", 750 engine->name); 751 err = -ETIME; 752 goto out; 753 } 754 755 for (i = 0; i < ARRAY_SIZE(client); i++) { 756 if (i915_request_wait(client[i], 0, HZ / 5) < 0) 757 pr_debug("%s: %s request incomplete!\n", 758 engine->name, 759 error_repr(p->error[i])); 760 761 if (!i915_request_started(client[i])) { 762 pr_err("%s: %s request not started!\n", 763 engine->name, 764 error_repr(p->error[i])); 765 err = -ETIME; 766 goto out; 767 } 768 769 /* Kick the tasklet to process the error */ 770 intel_engine_flush_submission(engine); 771 if (client[i]->fence.error != p->error[i]) { 772 pr_err("%s: %s request (%s) with wrong error code: %d\n", 773 engine->name, 774 error_repr(p->error[i]), 775 i915_request_completed(client[i]) ? "completed" : "running", 776 client[i]->fence.error); 777 err = -EINVAL; 778 goto out; 779 } 780 } 781 782 out: 783 for (i = 0; i < ARRAY_SIZE(client); i++) 784 if (client[i]) 785 i915_request_put(client[i]); 786 if (err) { 787 pr_err("%s: failed at phase[%zd] { %d, %d }\n", 788 engine->name, p - phases, 789 p->error[0], p->error[1]); 790 break; 791 } 792 } 793 794 st_engine_heartbeat_enable(engine); 795 if (err) { 796 intel_gt_set_wedged(gt); 797 return err; 798 } 799 } 800 801 return 0; 802 } 803 804 static int 805 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx) 806 { 807 u32 *cs; 808 809 cs = intel_ring_begin(rq, 10); 810 if (IS_ERR(cs)) 811 return PTR_ERR(cs); 812 813 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 814 815 *cs++ = MI_SEMAPHORE_WAIT | 816 MI_SEMAPHORE_GLOBAL_GTT | 817 MI_SEMAPHORE_POLL | 818 MI_SEMAPHORE_SAD_NEQ_SDD; 819 *cs++ = 0; 820 *cs++ = i915_ggtt_offset(vma) + 4 * idx; 821 *cs++ = 0; 822 823 if (idx > 0) { 824 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 825 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1); 826 *cs++ = 0; 827 *cs++ = 1; 828 } else { 829 *cs++ = MI_NOOP; 830 *cs++ = MI_NOOP; 831 *cs++ = MI_NOOP; 832 *cs++ = MI_NOOP; 833 } 834 835 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 836 837 intel_ring_advance(rq, cs); 838 return 0; 839 } 840 841 static struct i915_request * 842 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx) 843 { 844 struct intel_context *ce; 845 struct i915_request *rq; 846 int err; 847 848 ce = intel_context_create(engine); 849 if (IS_ERR(ce)) 850 return ERR_CAST(ce); 851 852 rq = intel_context_create_request(ce); 853 if (IS_ERR(rq)) 854 goto out_ce; 855 856 err = 0; 857 if (rq->engine->emit_init_breadcrumb) 858 err = rq->engine->emit_init_breadcrumb(rq); 859 if (err == 0) 860 err = emit_semaphore_chain(rq, vma, idx); 861 if (err == 0) 862 i915_request_get(rq); 863 i915_request_add(rq); 864 if (err) 865 rq = ERR_PTR(err); 866 867 out_ce: 868 intel_context_put(ce); 869 return rq; 870 } 871 872 static int 873 release_queue(struct intel_engine_cs *engine, 874 struct i915_vma *vma, 875 int idx, int prio) 876 { 877 struct i915_sched_attr attr = { 878 .priority = prio, 879 }; 880 struct i915_request *rq; 881 u32 *cs; 882 883 rq = intel_engine_create_kernel_request(engine); 884 if (IS_ERR(rq)) 885 return PTR_ERR(rq); 886 887 cs = intel_ring_begin(rq, 4); 888 if (IS_ERR(cs)) { 889 i915_request_add(rq); 890 return PTR_ERR(cs); 891 } 892 893 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 894 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1); 895 *cs++ = 0; 896 *cs++ = 1; 897 898 intel_ring_advance(rq, cs); 899 900 i915_request_get(rq); 901 i915_request_add(rq); 902 903 local_bh_disable(); 904 engine->schedule(rq, &attr); 905 local_bh_enable(); /* kick tasklet */ 906 907 i915_request_put(rq); 908 909 return 0; 910 } 911 912 static int 913 slice_semaphore_queue(struct intel_engine_cs *outer, 914 struct i915_vma *vma, 915 int count) 916 { 917 struct intel_engine_cs *engine; 918 struct i915_request *head; 919 enum intel_engine_id id; 920 int err, i, n = 0; 921 922 head = semaphore_queue(outer, vma, n++); 923 if (IS_ERR(head)) 924 return PTR_ERR(head); 925 926 for_each_engine(engine, outer->gt, id) { 927 if (!intel_engine_has_preemption(engine)) 928 continue; 929 930 for (i = 0; i < count; i++) { 931 struct i915_request *rq; 932 933 rq = semaphore_queue(engine, vma, n++); 934 if (IS_ERR(rq)) { 935 err = PTR_ERR(rq); 936 goto out; 937 } 938 939 i915_request_put(rq); 940 } 941 } 942 943 err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER); 944 if (err) 945 goto out; 946 947 if (i915_request_wait(head, 0, 948 2 * outer->gt->info.num_engines * (count + 2) * (count + 3)) < 0) { 949 pr_err("%s: Failed to slice along semaphore chain of length (%d, %d)!\n", 950 outer->name, count, n); 951 GEM_TRACE_DUMP(); 952 intel_gt_set_wedged(outer->gt); 953 err = -EIO; 954 } 955 956 out: 957 i915_request_put(head); 958 return err; 959 } 960 961 static int live_timeslice_preempt(void *arg) 962 { 963 struct intel_gt *gt = arg; 964 struct drm_i915_gem_object *obj; 965 struct intel_engine_cs *engine; 966 enum intel_engine_id id; 967 struct i915_vma *vma; 968 void *vaddr; 969 int err = 0; 970 971 /* 972 * If a request takes too long, we would like to give other users 973 * a fair go on the GPU. In particular, users may create batches 974 * that wait upon external input, where that input may even be 975 * supplied by another GPU job. To avoid blocking forever, we 976 * need to preempt the current task and replace it with another 977 * ready task. 978 */ 979 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 980 return 0; 981 982 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 983 if (IS_ERR(obj)) 984 return PTR_ERR(obj); 985 986 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 987 if (IS_ERR(vma)) { 988 err = PTR_ERR(vma); 989 goto err_obj; 990 } 991 992 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); 993 if (IS_ERR(vaddr)) { 994 err = PTR_ERR(vaddr); 995 goto err_obj; 996 } 997 998 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 999 if (err) 1000 goto err_map; 1001 1002 err = i915_vma_sync(vma); 1003 if (err) 1004 goto err_pin; 1005 1006 for_each_engine(engine, gt, id) { 1007 if (!intel_engine_has_preemption(engine)) 1008 continue; 1009 1010 memset(vaddr, 0, PAGE_SIZE); 1011 1012 st_engine_heartbeat_disable(engine); 1013 err = slice_semaphore_queue(engine, vma, 5); 1014 st_engine_heartbeat_enable(engine); 1015 if (err) 1016 goto err_pin; 1017 1018 if (igt_flush_test(gt->i915)) { 1019 err = -EIO; 1020 goto err_pin; 1021 } 1022 } 1023 1024 err_pin: 1025 i915_vma_unpin(vma); 1026 err_map: 1027 i915_gem_object_unpin_map(obj); 1028 err_obj: 1029 i915_gem_object_put(obj); 1030 return err; 1031 } 1032 1033 static struct i915_request * 1034 create_rewinder(struct intel_context *ce, 1035 struct i915_request *wait, 1036 void *slot, int idx) 1037 { 1038 const u32 offset = 1039 i915_ggtt_offset(ce->engine->status_page.vma) + 1040 offset_in_page(slot); 1041 struct i915_request *rq; 1042 u32 *cs; 1043 int err; 1044 1045 rq = intel_context_create_request(ce); 1046 if (IS_ERR(rq)) 1047 return rq; 1048 1049 if (wait) { 1050 err = i915_request_await_dma_fence(rq, &wait->fence); 1051 if (err) 1052 goto err; 1053 } 1054 1055 cs = intel_ring_begin(rq, 14); 1056 if (IS_ERR(cs)) { 1057 err = PTR_ERR(cs); 1058 goto err; 1059 } 1060 1061 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 1062 *cs++ = MI_NOOP; 1063 1064 *cs++ = MI_SEMAPHORE_WAIT | 1065 MI_SEMAPHORE_GLOBAL_GTT | 1066 MI_SEMAPHORE_POLL | 1067 MI_SEMAPHORE_SAD_GTE_SDD; 1068 *cs++ = idx; 1069 *cs++ = offset; 1070 *cs++ = 0; 1071 1072 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 1073 *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base)); 1074 *cs++ = offset + idx * sizeof(u32); 1075 *cs++ = 0; 1076 1077 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 1078 *cs++ = offset; 1079 *cs++ = 0; 1080 *cs++ = idx + 1; 1081 1082 intel_ring_advance(rq, cs); 1083 1084 rq->sched.attr.priority = I915_PRIORITY_MASK; 1085 err = 0; 1086 err: 1087 i915_request_get(rq); 1088 i915_request_add(rq); 1089 if (err) { 1090 i915_request_put(rq); 1091 return ERR_PTR(err); 1092 } 1093 1094 return rq; 1095 } 1096 1097 static int live_timeslice_rewind(void *arg) 1098 { 1099 struct intel_gt *gt = arg; 1100 struct intel_engine_cs *engine; 1101 enum intel_engine_id id; 1102 1103 /* 1104 * The usual presumption on timeslice expiration is that we replace 1105 * the active context with another. However, given a chain of 1106 * dependencies we may end up with replacing the context with itself, 1107 * but only a few of those requests, forcing us to rewind the 1108 * RING_TAIL of the original request. 1109 */ 1110 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 1111 return 0; 1112 1113 for_each_engine(engine, gt, id) { 1114 enum { A1, A2, B1 }; 1115 enum { X = 1, Z, Y }; 1116 struct i915_request *rq[3] = {}; 1117 struct intel_context *ce; 1118 unsigned long timeslice; 1119 int i, err = 0; 1120 u32 *slot; 1121 1122 if (!intel_engine_has_timeslices(engine)) 1123 continue; 1124 1125 /* 1126 * A:rq1 -- semaphore wait, timestamp X 1127 * A:rq2 -- write timestamp Y 1128 * 1129 * B:rq1 [await A:rq1] -- write timestamp Z 1130 * 1131 * Force timeslice, release semaphore. 1132 * 1133 * Expect execution/evaluation order XZY 1134 */ 1135 1136 st_engine_heartbeat_disable(engine); 1137 timeslice = xchg(&engine->props.timeslice_duration_ms, 1); 1138 1139 slot = memset32(engine->status_page.addr + 1000, 0, 4); 1140 1141 ce = intel_context_create(engine); 1142 if (IS_ERR(ce)) { 1143 err = PTR_ERR(ce); 1144 goto err; 1145 } 1146 1147 rq[A1] = create_rewinder(ce, NULL, slot, X); 1148 if (IS_ERR(rq[A1])) { 1149 intel_context_put(ce); 1150 goto err; 1151 } 1152 1153 rq[A2] = create_rewinder(ce, NULL, slot, Y); 1154 intel_context_put(ce); 1155 if (IS_ERR(rq[A2])) 1156 goto err; 1157 1158 err = wait_for_submit(engine, rq[A2], HZ / 2); 1159 if (err) { 1160 pr_err("%s: failed to submit first context\n", 1161 engine->name); 1162 goto err; 1163 } 1164 1165 ce = intel_context_create(engine); 1166 if (IS_ERR(ce)) { 1167 err = PTR_ERR(ce); 1168 goto err; 1169 } 1170 1171 rq[B1] = create_rewinder(ce, rq[A1], slot, Z); 1172 intel_context_put(ce); 1173 if (IS_ERR(rq[2])) 1174 goto err; 1175 1176 err = wait_for_submit(engine, rq[B1], HZ / 2); 1177 if (err) { 1178 pr_err("%s: failed to submit second context\n", 1179 engine->name); 1180 goto err; 1181 } 1182 1183 /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */ 1184 ENGINE_TRACE(engine, "forcing tasklet for rewind\n"); 1185 while (i915_request_is_active(rq[A2])) { /* semaphore yield! */ 1186 /* Wait for the timeslice to kick in */ 1187 del_timer(&engine->execlists.timer); 1188 tasklet_hi_schedule(&engine->execlists.tasklet); 1189 intel_engine_flush_submission(engine); 1190 } 1191 /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */ 1192 GEM_BUG_ON(!i915_request_is_active(rq[A1])); 1193 GEM_BUG_ON(!i915_request_is_active(rq[B1])); 1194 GEM_BUG_ON(i915_request_is_active(rq[A2])); 1195 1196 /* Release the hounds! */ 1197 slot[0] = 1; 1198 wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */ 1199 1200 for (i = 1; i <= 3; i++) { 1201 unsigned long timeout = jiffies + HZ / 2; 1202 1203 while (!READ_ONCE(slot[i]) && 1204 time_before(jiffies, timeout)) 1205 ; 1206 1207 if (!time_before(jiffies, timeout)) { 1208 pr_err("%s: rq[%d] timed out\n", 1209 engine->name, i - 1); 1210 err = -ETIME; 1211 goto err; 1212 } 1213 1214 pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]); 1215 } 1216 1217 /* XZY: XZ < XY */ 1218 if (slot[Z] - slot[X] >= slot[Y] - slot[X]) { 1219 pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n", 1220 engine->name, 1221 slot[Z] - slot[X], 1222 slot[Y] - slot[X]); 1223 err = -EINVAL; 1224 } 1225 1226 err: 1227 memset32(&slot[0], -1, 4); 1228 wmb(); 1229 1230 engine->props.timeslice_duration_ms = timeslice; 1231 st_engine_heartbeat_enable(engine); 1232 for (i = 0; i < 3; i++) 1233 i915_request_put(rq[i]); 1234 if (igt_flush_test(gt->i915)) 1235 err = -EIO; 1236 if (err) 1237 return err; 1238 } 1239 1240 return 0; 1241 } 1242 1243 static struct i915_request *nop_request(struct intel_engine_cs *engine) 1244 { 1245 struct i915_request *rq; 1246 1247 rq = intel_engine_create_kernel_request(engine); 1248 if (IS_ERR(rq)) 1249 return rq; 1250 1251 i915_request_get(rq); 1252 i915_request_add(rq); 1253 1254 return rq; 1255 } 1256 1257 static long slice_timeout(struct intel_engine_cs *engine) 1258 { 1259 long timeout; 1260 1261 /* Enough time for a timeslice to kick in, and kick out */ 1262 timeout = 2 * msecs_to_jiffies_timeout(timeslice(engine)); 1263 1264 /* Enough time for the nop request to complete */ 1265 timeout += HZ / 5; 1266 1267 return timeout + 1; 1268 } 1269 1270 static int live_timeslice_queue(void *arg) 1271 { 1272 struct intel_gt *gt = arg; 1273 struct drm_i915_gem_object *obj; 1274 struct intel_engine_cs *engine; 1275 enum intel_engine_id id; 1276 struct i915_vma *vma; 1277 void *vaddr; 1278 int err = 0; 1279 1280 /* 1281 * Make sure that even if ELSP[0] and ELSP[1] are filled with 1282 * timeslicing between them disabled, we *do* enable timeslicing 1283 * if the queue demands it. (Normally, we do not submit if 1284 * ELSP[1] is already occupied, so must rely on timeslicing to 1285 * eject ELSP[0] in favour of the queue.) 1286 */ 1287 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 1288 return 0; 1289 1290 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 1291 if (IS_ERR(obj)) 1292 return PTR_ERR(obj); 1293 1294 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 1295 if (IS_ERR(vma)) { 1296 err = PTR_ERR(vma); 1297 goto err_obj; 1298 } 1299 1300 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); 1301 if (IS_ERR(vaddr)) { 1302 err = PTR_ERR(vaddr); 1303 goto err_obj; 1304 } 1305 1306 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 1307 if (err) 1308 goto err_map; 1309 1310 err = i915_vma_sync(vma); 1311 if (err) 1312 goto err_pin; 1313 1314 for_each_engine(engine, gt, id) { 1315 struct i915_sched_attr attr = { 1316 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), 1317 }; 1318 struct i915_request *rq, *nop; 1319 1320 if (!intel_engine_has_preemption(engine)) 1321 continue; 1322 1323 st_engine_heartbeat_disable(engine); 1324 memset(vaddr, 0, PAGE_SIZE); 1325 1326 /* ELSP[0]: semaphore wait */ 1327 rq = semaphore_queue(engine, vma, 0); 1328 if (IS_ERR(rq)) { 1329 err = PTR_ERR(rq); 1330 goto err_heartbeat; 1331 } 1332 engine->schedule(rq, &attr); 1333 err = wait_for_submit(engine, rq, HZ / 2); 1334 if (err) { 1335 pr_err("%s: Timed out trying to submit semaphores\n", 1336 engine->name); 1337 goto err_rq; 1338 } 1339 1340 /* ELSP[1]: nop request */ 1341 nop = nop_request(engine); 1342 if (IS_ERR(nop)) { 1343 err = PTR_ERR(nop); 1344 goto err_rq; 1345 } 1346 err = wait_for_submit(engine, nop, HZ / 2); 1347 i915_request_put(nop); 1348 if (err) { 1349 pr_err("%s: Timed out trying to submit nop\n", 1350 engine->name); 1351 goto err_rq; 1352 } 1353 1354 GEM_BUG_ON(i915_request_completed(rq)); 1355 GEM_BUG_ON(execlists_active(&engine->execlists) != rq); 1356 1357 /* Queue: semaphore signal, matching priority as semaphore */ 1358 err = release_queue(engine, vma, 1, effective_prio(rq)); 1359 if (err) 1360 goto err_rq; 1361 1362 /* Wait until we ack the release_queue and start timeslicing */ 1363 do { 1364 cond_resched(); 1365 intel_engine_flush_submission(engine); 1366 } while (READ_ONCE(engine->execlists.pending[0])); 1367 1368 /* Timeslice every jiffy, so within 2 we should signal */ 1369 if (i915_request_wait(rq, 0, slice_timeout(engine)) < 0) { 1370 struct drm_printer p = 1371 drm_info_printer(gt->i915->drm.dev); 1372 1373 pr_err("%s: Failed to timeslice into queue\n", 1374 engine->name); 1375 intel_engine_dump(engine, &p, 1376 "%s\n", engine->name); 1377 1378 memset(vaddr, 0xff, PAGE_SIZE); 1379 err = -EIO; 1380 } 1381 err_rq: 1382 i915_request_put(rq); 1383 err_heartbeat: 1384 st_engine_heartbeat_enable(engine); 1385 if (err) 1386 break; 1387 } 1388 1389 err_pin: 1390 i915_vma_unpin(vma); 1391 err_map: 1392 i915_gem_object_unpin_map(obj); 1393 err_obj: 1394 i915_gem_object_put(obj); 1395 return err; 1396 } 1397 1398 static int live_timeslice_nopreempt(void *arg) 1399 { 1400 struct intel_gt *gt = arg; 1401 struct intel_engine_cs *engine; 1402 enum intel_engine_id id; 1403 struct igt_spinner spin; 1404 int err = 0; 1405 1406 /* 1407 * We should not timeslice into a request that is marked with 1408 * I915_REQUEST_NOPREEMPT. 1409 */ 1410 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 1411 return 0; 1412 1413 if (igt_spinner_init(&spin, gt)) 1414 return -ENOMEM; 1415 1416 for_each_engine(engine, gt, id) { 1417 struct intel_context *ce; 1418 struct i915_request *rq; 1419 unsigned long timeslice; 1420 1421 if (!intel_engine_has_preemption(engine)) 1422 continue; 1423 1424 ce = intel_context_create(engine); 1425 if (IS_ERR(ce)) { 1426 err = PTR_ERR(ce); 1427 break; 1428 } 1429 1430 st_engine_heartbeat_disable(engine); 1431 timeslice = xchg(&engine->props.timeslice_duration_ms, 1); 1432 1433 /* Create an unpreemptible spinner */ 1434 1435 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); 1436 intel_context_put(ce); 1437 if (IS_ERR(rq)) { 1438 err = PTR_ERR(rq); 1439 goto out_heartbeat; 1440 } 1441 1442 i915_request_get(rq); 1443 i915_request_add(rq); 1444 1445 if (!igt_wait_for_spinner(&spin, rq)) { 1446 i915_request_put(rq); 1447 err = -ETIME; 1448 goto out_spin; 1449 } 1450 1451 set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags); 1452 i915_request_put(rq); 1453 1454 /* Followed by a maximum priority barrier (heartbeat) */ 1455 1456 ce = intel_context_create(engine); 1457 if (IS_ERR(ce)) { 1458 err = PTR_ERR(ce); 1459 goto out_spin; 1460 } 1461 1462 rq = intel_context_create_request(ce); 1463 intel_context_put(ce); 1464 if (IS_ERR(rq)) { 1465 err = PTR_ERR(rq); 1466 goto out_spin; 1467 } 1468 1469 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 1470 i915_request_get(rq); 1471 i915_request_add(rq); 1472 1473 /* 1474 * Wait until the barrier is in ELSP, and we know timeslicing 1475 * will have been activated. 1476 */ 1477 if (wait_for_submit(engine, rq, HZ / 2)) { 1478 i915_request_put(rq); 1479 err = -ETIME; 1480 goto out_spin; 1481 } 1482 1483 /* 1484 * Since the ELSP[0] request is unpreemptible, it should not 1485 * allow the maximum priority barrier through. Wait long 1486 * enough to see if it is timesliced in by mistake. 1487 */ 1488 if (i915_request_wait(rq, 0, slice_timeout(engine)) >= 0) { 1489 pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n", 1490 engine->name); 1491 err = -EINVAL; 1492 } 1493 i915_request_put(rq); 1494 1495 out_spin: 1496 igt_spinner_end(&spin); 1497 out_heartbeat: 1498 xchg(&engine->props.timeslice_duration_ms, timeslice); 1499 st_engine_heartbeat_enable(engine); 1500 if (err) 1501 break; 1502 1503 if (igt_flush_test(gt->i915)) { 1504 err = -EIO; 1505 break; 1506 } 1507 } 1508 1509 igt_spinner_fini(&spin); 1510 return err; 1511 } 1512 1513 static int live_busywait_preempt(void *arg) 1514 { 1515 struct intel_gt *gt = arg; 1516 struct i915_gem_context *ctx_hi, *ctx_lo; 1517 struct intel_engine_cs *engine; 1518 struct drm_i915_gem_object *obj; 1519 struct i915_vma *vma; 1520 enum intel_engine_id id; 1521 int err = -ENOMEM; 1522 u32 *map; 1523 1524 /* 1525 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can 1526 * preempt the busywaits used to synchronise between rings. 1527 */ 1528 1529 ctx_hi = kernel_context(gt->i915); 1530 if (!ctx_hi) 1531 return -ENOMEM; 1532 ctx_hi->sched.priority = 1533 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); 1534 1535 ctx_lo = kernel_context(gt->i915); 1536 if (!ctx_lo) 1537 goto err_ctx_hi; 1538 ctx_lo->sched.priority = 1539 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); 1540 1541 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 1542 if (IS_ERR(obj)) { 1543 err = PTR_ERR(obj); 1544 goto err_ctx_lo; 1545 } 1546 1547 map = i915_gem_object_pin_map(obj, I915_MAP_WC); 1548 if (IS_ERR(map)) { 1549 err = PTR_ERR(map); 1550 goto err_obj; 1551 } 1552 1553 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 1554 if (IS_ERR(vma)) { 1555 err = PTR_ERR(vma); 1556 goto err_map; 1557 } 1558 1559 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 1560 if (err) 1561 goto err_map; 1562 1563 err = i915_vma_sync(vma); 1564 if (err) 1565 goto err_vma; 1566 1567 for_each_engine(engine, gt, id) { 1568 struct i915_request *lo, *hi; 1569 struct igt_live_test t; 1570 u32 *cs; 1571 1572 if (!intel_engine_has_preemption(engine)) 1573 continue; 1574 1575 if (!intel_engine_can_store_dword(engine)) 1576 continue; 1577 1578 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 1579 err = -EIO; 1580 goto err_vma; 1581 } 1582 1583 /* 1584 * We create two requests. The low priority request 1585 * busywaits on a semaphore (inside the ringbuffer where 1586 * is should be preemptible) and the high priority requests 1587 * uses a MI_STORE_DWORD_IMM to update the semaphore value 1588 * allowing the first request to complete. If preemption 1589 * fails, we hang instead. 1590 */ 1591 1592 lo = igt_request_alloc(ctx_lo, engine); 1593 if (IS_ERR(lo)) { 1594 err = PTR_ERR(lo); 1595 goto err_vma; 1596 } 1597 1598 cs = intel_ring_begin(lo, 8); 1599 if (IS_ERR(cs)) { 1600 err = PTR_ERR(cs); 1601 i915_request_add(lo); 1602 goto err_vma; 1603 } 1604 1605 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 1606 *cs++ = i915_ggtt_offset(vma); 1607 *cs++ = 0; 1608 *cs++ = 1; 1609 1610 /* XXX Do we need a flush + invalidate here? */ 1611 1612 *cs++ = MI_SEMAPHORE_WAIT | 1613 MI_SEMAPHORE_GLOBAL_GTT | 1614 MI_SEMAPHORE_POLL | 1615 MI_SEMAPHORE_SAD_EQ_SDD; 1616 *cs++ = 0; 1617 *cs++ = i915_ggtt_offset(vma); 1618 *cs++ = 0; 1619 1620 intel_ring_advance(lo, cs); 1621 1622 i915_request_get(lo); 1623 i915_request_add(lo); 1624 1625 if (wait_for(READ_ONCE(*map), 10)) { 1626 i915_request_put(lo); 1627 err = -ETIMEDOUT; 1628 goto err_vma; 1629 } 1630 1631 /* Low priority request should be busywaiting now */ 1632 if (i915_request_wait(lo, 0, 1) != -ETIME) { 1633 i915_request_put(lo); 1634 pr_err("%s: Busywaiting request did not!\n", 1635 engine->name); 1636 err = -EIO; 1637 goto err_vma; 1638 } 1639 1640 hi = igt_request_alloc(ctx_hi, engine); 1641 if (IS_ERR(hi)) { 1642 err = PTR_ERR(hi); 1643 i915_request_put(lo); 1644 goto err_vma; 1645 } 1646 1647 cs = intel_ring_begin(hi, 4); 1648 if (IS_ERR(cs)) { 1649 err = PTR_ERR(cs); 1650 i915_request_add(hi); 1651 i915_request_put(lo); 1652 goto err_vma; 1653 } 1654 1655 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 1656 *cs++ = i915_ggtt_offset(vma); 1657 *cs++ = 0; 1658 *cs++ = 0; 1659 1660 intel_ring_advance(hi, cs); 1661 i915_request_add(hi); 1662 1663 if (i915_request_wait(lo, 0, HZ / 5) < 0) { 1664 struct drm_printer p = drm_info_printer(gt->i915->drm.dev); 1665 1666 pr_err("%s: Failed to preempt semaphore busywait!\n", 1667 engine->name); 1668 1669 intel_engine_dump(engine, &p, "%s\n", engine->name); 1670 GEM_TRACE_DUMP(); 1671 1672 i915_request_put(lo); 1673 intel_gt_set_wedged(gt); 1674 err = -EIO; 1675 goto err_vma; 1676 } 1677 GEM_BUG_ON(READ_ONCE(*map)); 1678 i915_request_put(lo); 1679 1680 if (igt_live_test_end(&t)) { 1681 err = -EIO; 1682 goto err_vma; 1683 } 1684 } 1685 1686 err = 0; 1687 err_vma: 1688 i915_vma_unpin(vma); 1689 err_map: 1690 i915_gem_object_unpin_map(obj); 1691 err_obj: 1692 i915_gem_object_put(obj); 1693 err_ctx_lo: 1694 kernel_context_close(ctx_lo); 1695 err_ctx_hi: 1696 kernel_context_close(ctx_hi); 1697 return err; 1698 } 1699 1700 static struct i915_request * 1701 spinner_create_request(struct igt_spinner *spin, 1702 struct i915_gem_context *ctx, 1703 struct intel_engine_cs *engine, 1704 u32 arb) 1705 { 1706 struct intel_context *ce; 1707 struct i915_request *rq; 1708 1709 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); 1710 if (IS_ERR(ce)) 1711 return ERR_CAST(ce); 1712 1713 rq = igt_spinner_create_request(spin, ce, arb); 1714 intel_context_put(ce); 1715 return rq; 1716 } 1717 1718 static int live_preempt(void *arg) 1719 { 1720 struct intel_gt *gt = arg; 1721 struct i915_gem_context *ctx_hi, *ctx_lo; 1722 struct igt_spinner spin_hi, spin_lo; 1723 struct intel_engine_cs *engine; 1724 enum intel_engine_id id; 1725 int err = -ENOMEM; 1726 1727 if (igt_spinner_init(&spin_hi, gt)) 1728 return -ENOMEM; 1729 1730 if (igt_spinner_init(&spin_lo, gt)) 1731 goto err_spin_hi; 1732 1733 ctx_hi = kernel_context(gt->i915); 1734 if (!ctx_hi) 1735 goto err_spin_lo; 1736 ctx_hi->sched.priority = 1737 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); 1738 1739 ctx_lo = kernel_context(gt->i915); 1740 if (!ctx_lo) 1741 goto err_ctx_hi; 1742 ctx_lo->sched.priority = 1743 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); 1744 1745 for_each_engine(engine, gt, id) { 1746 struct igt_live_test t; 1747 struct i915_request *rq; 1748 1749 if (!intel_engine_has_preemption(engine)) 1750 continue; 1751 1752 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 1753 err = -EIO; 1754 goto err_ctx_lo; 1755 } 1756 1757 rq = spinner_create_request(&spin_lo, ctx_lo, engine, 1758 MI_ARB_CHECK); 1759 if (IS_ERR(rq)) { 1760 err = PTR_ERR(rq); 1761 goto err_ctx_lo; 1762 } 1763 1764 i915_request_add(rq); 1765 if (!igt_wait_for_spinner(&spin_lo, rq)) { 1766 GEM_TRACE("lo spinner failed to start\n"); 1767 GEM_TRACE_DUMP(); 1768 intel_gt_set_wedged(gt); 1769 err = -EIO; 1770 goto err_ctx_lo; 1771 } 1772 1773 rq = spinner_create_request(&spin_hi, ctx_hi, engine, 1774 MI_ARB_CHECK); 1775 if (IS_ERR(rq)) { 1776 igt_spinner_end(&spin_lo); 1777 err = PTR_ERR(rq); 1778 goto err_ctx_lo; 1779 } 1780 1781 i915_request_add(rq); 1782 if (!igt_wait_for_spinner(&spin_hi, rq)) { 1783 GEM_TRACE("hi spinner failed to start\n"); 1784 GEM_TRACE_DUMP(); 1785 intel_gt_set_wedged(gt); 1786 err = -EIO; 1787 goto err_ctx_lo; 1788 } 1789 1790 igt_spinner_end(&spin_hi); 1791 igt_spinner_end(&spin_lo); 1792 1793 if (igt_live_test_end(&t)) { 1794 err = -EIO; 1795 goto err_ctx_lo; 1796 } 1797 } 1798 1799 err = 0; 1800 err_ctx_lo: 1801 kernel_context_close(ctx_lo); 1802 err_ctx_hi: 1803 kernel_context_close(ctx_hi); 1804 err_spin_lo: 1805 igt_spinner_fini(&spin_lo); 1806 err_spin_hi: 1807 igt_spinner_fini(&spin_hi); 1808 return err; 1809 } 1810 1811 static int live_late_preempt(void *arg) 1812 { 1813 struct intel_gt *gt = arg; 1814 struct i915_gem_context *ctx_hi, *ctx_lo; 1815 struct igt_spinner spin_hi, spin_lo; 1816 struct intel_engine_cs *engine; 1817 struct i915_sched_attr attr = {}; 1818 enum intel_engine_id id; 1819 int err = -ENOMEM; 1820 1821 if (igt_spinner_init(&spin_hi, gt)) 1822 return -ENOMEM; 1823 1824 if (igt_spinner_init(&spin_lo, gt)) 1825 goto err_spin_hi; 1826 1827 ctx_hi = kernel_context(gt->i915); 1828 if (!ctx_hi) 1829 goto err_spin_lo; 1830 1831 ctx_lo = kernel_context(gt->i915); 1832 if (!ctx_lo) 1833 goto err_ctx_hi; 1834 1835 /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */ 1836 ctx_lo->sched.priority = I915_USER_PRIORITY(1); 1837 1838 for_each_engine(engine, gt, id) { 1839 struct igt_live_test t; 1840 struct i915_request *rq; 1841 1842 if (!intel_engine_has_preemption(engine)) 1843 continue; 1844 1845 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 1846 err = -EIO; 1847 goto err_ctx_lo; 1848 } 1849 1850 rq = spinner_create_request(&spin_lo, ctx_lo, engine, 1851 MI_ARB_CHECK); 1852 if (IS_ERR(rq)) { 1853 err = PTR_ERR(rq); 1854 goto err_ctx_lo; 1855 } 1856 1857 i915_request_add(rq); 1858 if (!igt_wait_for_spinner(&spin_lo, rq)) { 1859 pr_err("First context failed to start\n"); 1860 goto err_wedged; 1861 } 1862 1863 rq = spinner_create_request(&spin_hi, ctx_hi, engine, 1864 MI_NOOP); 1865 if (IS_ERR(rq)) { 1866 igt_spinner_end(&spin_lo); 1867 err = PTR_ERR(rq); 1868 goto err_ctx_lo; 1869 } 1870 1871 i915_request_add(rq); 1872 if (igt_wait_for_spinner(&spin_hi, rq)) { 1873 pr_err("Second context overtook first?\n"); 1874 goto err_wedged; 1875 } 1876 1877 attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX); 1878 engine->schedule(rq, &attr); 1879 1880 if (!igt_wait_for_spinner(&spin_hi, rq)) { 1881 pr_err("High priority context failed to preempt the low priority context\n"); 1882 GEM_TRACE_DUMP(); 1883 goto err_wedged; 1884 } 1885 1886 igt_spinner_end(&spin_hi); 1887 igt_spinner_end(&spin_lo); 1888 1889 if (igt_live_test_end(&t)) { 1890 err = -EIO; 1891 goto err_ctx_lo; 1892 } 1893 } 1894 1895 err = 0; 1896 err_ctx_lo: 1897 kernel_context_close(ctx_lo); 1898 err_ctx_hi: 1899 kernel_context_close(ctx_hi); 1900 err_spin_lo: 1901 igt_spinner_fini(&spin_lo); 1902 err_spin_hi: 1903 igt_spinner_fini(&spin_hi); 1904 return err; 1905 1906 err_wedged: 1907 igt_spinner_end(&spin_hi); 1908 igt_spinner_end(&spin_lo); 1909 intel_gt_set_wedged(gt); 1910 err = -EIO; 1911 goto err_ctx_lo; 1912 } 1913 1914 struct preempt_client { 1915 struct igt_spinner spin; 1916 struct i915_gem_context *ctx; 1917 }; 1918 1919 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c) 1920 { 1921 c->ctx = kernel_context(gt->i915); 1922 if (!c->ctx) 1923 return -ENOMEM; 1924 1925 if (igt_spinner_init(&c->spin, gt)) 1926 goto err_ctx; 1927 1928 return 0; 1929 1930 err_ctx: 1931 kernel_context_close(c->ctx); 1932 return -ENOMEM; 1933 } 1934 1935 static void preempt_client_fini(struct preempt_client *c) 1936 { 1937 igt_spinner_fini(&c->spin); 1938 kernel_context_close(c->ctx); 1939 } 1940 1941 static int live_nopreempt(void *arg) 1942 { 1943 struct intel_gt *gt = arg; 1944 struct intel_engine_cs *engine; 1945 struct preempt_client a, b; 1946 enum intel_engine_id id; 1947 int err = -ENOMEM; 1948 1949 /* 1950 * Verify that we can disable preemption for an individual request 1951 * that may be being observed and not want to be interrupted. 1952 */ 1953 1954 if (preempt_client_init(gt, &a)) 1955 return -ENOMEM; 1956 if (preempt_client_init(gt, &b)) 1957 goto err_client_a; 1958 b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX); 1959 1960 for_each_engine(engine, gt, id) { 1961 struct i915_request *rq_a, *rq_b; 1962 1963 if (!intel_engine_has_preemption(engine)) 1964 continue; 1965 1966 engine->execlists.preempt_hang.count = 0; 1967 1968 rq_a = spinner_create_request(&a.spin, 1969 a.ctx, engine, 1970 MI_ARB_CHECK); 1971 if (IS_ERR(rq_a)) { 1972 err = PTR_ERR(rq_a); 1973 goto err_client_b; 1974 } 1975 1976 /* Low priority client, but unpreemptable! */ 1977 __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags); 1978 1979 i915_request_add(rq_a); 1980 if (!igt_wait_for_spinner(&a.spin, rq_a)) { 1981 pr_err("First client failed to start\n"); 1982 goto err_wedged; 1983 } 1984 1985 rq_b = spinner_create_request(&b.spin, 1986 b.ctx, engine, 1987 MI_ARB_CHECK); 1988 if (IS_ERR(rq_b)) { 1989 err = PTR_ERR(rq_b); 1990 goto err_client_b; 1991 } 1992 1993 i915_request_add(rq_b); 1994 1995 /* B is much more important than A! (But A is unpreemptable.) */ 1996 GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a)); 1997 1998 /* Wait long enough for preemption and timeslicing */ 1999 if (igt_wait_for_spinner(&b.spin, rq_b)) { 2000 pr_err("Second client started too early!\n"); 2001 goto err_wedged; 2002 } 2003 2004 igt_spinner_end(&a.spin); 2005 2006 if (!igt_wait_for_spinner(&b.spin, rq_b)) { 2007 pr_err("Second client failed to start\n"); 2008 goto err_wedged; 2009 } 2010 2011 igt_spinner_end(&b.spin); 2012 2013 if (engine->execlists.preempt_hang.count) { 2014 pr_err("Preemption recorded x%d; should have been suppressed!\n", 2015 engine->execlists.preempt_hang.count); 2016 err = -EINVAL; 2017 goto err_wedged; 2018 } 2019 2020 if (igt_flush_test(gt->i915)) 2021 goto err_wedged; 2022 } 2023 2024 err = 0; 2025 err_client_b: 2026 preempt_client_fini(&b); 2027 err_client_a: 2028 preempt_client_fini(&a); 2029 return err; 2030 2031 err_wedged: 2032 igt_spinner_end(&b.spin); 2033 igt_spinner_end(&a.spin); 2034 intel_gt_set_wedged(gt); 2035 err = -EIO; 2036 goto err_client_b; 2037 } 2038 2039 struct live_preempt_cancel { 2040 struct intel_engine_cs *engine; 2041 struct preempt_client a, b; 2042 }; 2043 2044 static int __cancel_active0(struct live_preempt_cancel *arg) 2045 { 2046 struct i915_request *rq; 2047 struct igt_live_test t; 2048 int err; 2049 2050 /* Preempt cancel of ELSP0 */ 2051 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 2052 if (igt_live_test_begin(&t, arg->engine->i915, 2053 __func__, arg->engine->name)) 2054 return -EIO; 2055 2056 rq = spinner_create_request(&arg->a.spin, 2057 arg->a.ctx, arg->engine, 2058 MI_ARB_CHECK); 2059 if (IS_ERR(rq)) 2060 return PTR_ERR(rq); 2061 2062 clear_bit(CONTEXT_BANNED, &rq->context->flags); 2063 i915_request_get(rq); 2064 i915_request_add(rq); 2065 if (!igt_wait_for_spinner(&arg->a.spin, rq)) { 2066 err = -EIO; 2067 goto out; 2068 } 2069 2070 intel_context_set_banned(rq->context); 2071 err = intel_engine_pulse(arg->engine); 2072 if (err) 2073 goto out; 2074 2075 err = wait_for_reset(arg->engine, rq, HZ / 2); 2076 if (err) { 2077 pr_err("Cancelled inflight0 request did not reset\n"); 2078 goto out; 2079 } 2080 2081 out: 2082 i915_request_put(rq); 2083 if (igt_live_test_end(&t)) 2084 err = -EIO; 2085 return err; 2086 } 2087 2088 static int __cancel_active1(struct live_preempt_cancel *arg) 2089 { 2090 struct i915_request *rq[2] = {}; 2091 struct igt_live_test t; 2092 int err; 2093 2094 /* Preempt cancel of ELSP1 */ 2095 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 2096 if (igt_live_test_begin(&t, arg->engine->i915, 2097 __func__, arg->engine->name)) 2098 return -EIO; 2099 2100 rq[0] = spinner_create_request(&arg->a.spin, 2101 arg->a.ctx, arg->engine, 2102 MI_NOOP); /* no preemption */ 2103 if (IS_ERR(rq[0])) 2104 return PTR_ERR(rq[0]); 2105 2106 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); 2107 i915_request_get(rq[0]); 2108 i915_request_add(rq[0]); 2109 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { 2110 err = -EIO; 2111 goto out; 2112 } 2113 2114 rq[1] = spinner_create_request(&arg->b.spin, 2115 arg->b.ctx, arg->engine, 2116 MI_ARB_CHECK); 2117 if (IS_ERR(rq[1])) { 2118 err = PTR_ERR(rq[1]); 2119 goto out; 2120 } 2121 2122 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); 2123 i915_request_get(rq[1]); 2124 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); 2125 i915_request_add(rq[1]); 2126 if (err) 2127 goto out; 2128 2129 intel_context_set_banned(rq[1]->context); 2130 err = intel_engine_pulse(arg->engine); 2131 if (err) 2132 goto out; 2133 2134 igt_spinner_end(&arg->a.spin); 2135 err = wait_for_reset(arg->engine, rq[1], HZ / 2); 2136 if (err) 2137 goto out; 2138 2139 if (rq[0]->fence.error != 0) { 2140 pr_err("Normal inflight0 request did not complete\n"); 2141 err = -EINVAL; 2142 goto out; 2143 } 2144 2145 if (rq[1]->fence.error != -EIO) { 2146 pr_err("Cancelled inflight1 request did not report -EIO\n"); 2147 err = -EINVAL; 2148 goto out; 2149 } 2150 2151 out: 2152 i915_request_put(rq[1]); 2153 i915_request_put(rq[0]); 2154 if (igt_live_test_end(&t)) 2155 err = -EIO; 2156 return err; 2157 } 2158 2159 static int __cancel_queued(struct live_preempt_cancel *arg) 2160 { 2161 struct i915_request *rq[3] = {}; 2162 struct igt_live_test t; 2163 int err; 2164 2165 /* Full ELSP and one in the wings */ 2166 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 2167 if (igt_live_test_begin(&t, arg->engine->i915, 2168 __func__, arg->engine->name)) 2169 return -EIO; 2170 2171 rq[0] = spinner_create_request(&arg->a.spin, 2172 arg->a.ctx, arg->engine, 2173 MI_ARB_CHECK); 2174 if (IS_ERR(rq[0])) 2175 return PTR_ERR(rq[0]); 2176 2177 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); 2178 i915_request_get(rq[0]); 2179 i915_request_add(rq[0]); 2180 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { 2181 err = -EIO; 2182 goto out; 2183 } 2184 2185 rq[1] = igt_request_alloc(arg->b.ctx, arg->engine); 2186 if (IS_ERR(rq[1])) { 2187 err = PTR_ERR(rq[1]); 2188 goto out; 2189 } 2190 2191 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); 2192 i915_request_get(rq[1]); 2193 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); 2194 i915_request_add(rq[1]); 2195 if (err) 2196 goto out; 2197 2198 rq[2] = spinner_create_request(&arg->b.spin, 2199 arg->a.ctx, arg->engine, 2200 MI_ARB_CHECK); 2201 if (IS_ERR(rq[2])) { 2202 err = PTR_ERR(rq[2]); 2203 goto out; 2204 } 2205 2206 i915_request_get(rq[2]); 2207 err = i915_request_await_dma_fence(rq[2], &rq[1]->fence); 2208 i915_request_add(rq[2]); 2209 if (err) 2210 goto out; 2211 2212 intel_context_set_banned(rq[2]->context); 2213 err = intel_engine_pulse(arg->engine); 2214 if (err) 2215 goto out; 2216 2217 err = wait_for_reset(arg->engine, rq[2], HZ / 2); 2218 if (err) 2219 goto out; 2220 2221 if (rq[0]->fence.error != -EIO) { 2222 pr_err("Cancelled inflight0 request did not report -EIO\n"); 2223 err = -EINVAL; 2224 goto out; 2225 } 2226 2227 if (rq[1]->fence.error != 0) { 2228 pr_err("Normal inflight1 request did not complete\n"); 2229 err = -EINVAL; 2230 goto out; 2231 } 2232 2233 if (rq[2]->fence.error != -EIO) { 2234 pr_err("Cancelled queued request did not report -EIO\n"); 2235 err = -EINVAL; 2236 goto out; 2237 } 2238 2239 out: 2240 i915_request_put(rq[2]); 2241 i915_request_put(rq[1]); 2242 i915_request_put(rq[0]); 2243 if (igt_live_test_end(&t)) 2244 err = -EIO; 2245 return err; 2246 } 2247 2248 static int __cancel_hostile(struct live_preempt_cancel *arg) 2249 { 2250 struct i915_request *rq; 2251 int err; 2252 2253 /* Preempt cancel non-preemptible spinner in ELSP0 */ 2254 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) 2255 return 0; 2256 2257 if (!intel_has_reset_engine(arg->engine->gt)) 2258 return 0; 2259 2260 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 2261 rq = spinner_create_request(&arg->a.spin, 2262 arg->a.ctx, arg->engine, 2263 MI_NOOP); /* preemption disabled */ 2264 if (IS_ERR(rq)) 2265 return PTR_ERR(rq); 2266 2267 clear_bit(CONTEXT_BANNED, &rq->context->flags); 2268 i915_request_get(rq); 2269 i915_request_add(rq); 2270 if (!igt_wait_for_spinner(&arg->a.spin, rq)) { 2271 err = -EIO; 2272 goto out; 2273 } 2274 2275 intel_context_set_banned(rq->context); 2276 err = intel_engine_pulse(arg->engine); /* force reset */ 2277 if (err) 2278 goto out; 2279 2280 err = wait_for_reset(arg->engine, rq, HZ / 2); 2281 if (err) { 2282 pr_err("Cancelled inflight0 request did not reset\n"); 2283 goto out; 2284 } 2285 2286 out: 2287 i915_request_put(rq); 2288 if (igt_flush_test(arg->engine->i915)) 2289 err = -EIO; 2290 return err; 2291 } 2292 2293 static void force_reset_timeout(struct intel_engine_cs *engine) 2294 { 2295 engine->reset_timeout.probability = 999; 2296 atomic_set(&engine->reset_timeout.times, -1); 2297 } 2298 2299 static void cancel_reset_timeout(struct intel_engine_cs *engine) 2300 { 2301 memset(&engine->reset_timeout, 0, sizeof(engine->reset_timeout)); 2302 } 2303 2304 static int __cancel_fail(struct live_preempt_cancel *arg) 2305 { 2306 struct intel_engine_cs *engine = arg->engine; 2307 struct i915_request *rq; 2308 int err; 2309 2310 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) 2311 return 0; 2312 2313 if (!intel_has_reset_engine(engine->gt)) 2314 return 0; 2315 2316 GEM_TRACE("%s(%s)\n", __func__, engine->name); 2317 rq = spinner_create_request(&arg->a.spin, 2318 arg->a.ctx, engine, 2319 MI_NOOP); /* preemption disabled */ 2320 if (IS_ERR(rq)) 2321 return PTR_ERR(rq); 2322 2323 clear_bit(CONTEXT_BANNED, &rq->context->flags); 2324 i915_request_get(rq); 2325 i915_request_add(rq); 2326 if (!igt_wait_for_spinner(&arg->a.spin, rq)) { 2327 err = -EIO; 2328 goto out; 2329 } 2330 2331 intel_context_set_banned(rq->context); 2332 2333 err = intel_engine_pulse(engine); 2334 if (err) 2335 goto out; 2336 2337 force_reset_timeout(engine); 2338 2339 /* force preempt reset [failure] */ 2340 while (!engine->execlists.pending[0]) 2341 intel_engine_flush_submission(engine); 2342 del_timer_sync(&engine->execlists.preempt); 2343 intel_engine_flush_submission(engine); 2344 2345 cancel_reset_timeout(engine); 2346 2347 /* after failure, require heartbeats to reset device */ 2348 intel_engine_set_heartbeat(engine, 1); 2349 err = wait_for_reset(engine, rq, HZ / 2); 2350 intel_engine_set_heartbeat(engine, 2351 engine->defaults.heartbeat_interval_ms); 2352 if (err) { 2353 pr_err("Cancelled inflight0 request did not reset\n"); 2354 goto out; 2355 } 2356 2357 out: 2358 i915_request_put(rq); 2359 if (igt_flush_test(engine->i915)) 2360 err = -EIO; 2361 return err; 2362 } 2363 2364 static int live_preempt_cancel(void *arg) 2365 { 2366 struct intel_gt *gt = arg; 2367 struct live_preempt_cancel data; 2368 enum intel_engine_id id; 2369 int err = -ENOMEM; 2370 2371 /* 2372 * To cancel an inflight context, we need to first remove it from the 2373 * GPU. That sounds like preemption! Plus a little bit of bookkeeping. 2374 */ 2375 2376 if (preempt_client_init(gt, &data.a)) 2377 return -ENOMEM; 2378 if (preempt_client_init(gt, &data.b)) 2379 goto err_client_a; 2380 2381 for_each_engine(data.engine, gt, id) { 2382 if (!intel_engine_has_preemption(data.engine)) 2383 continue; 2384 2385 err = __cancel_active0(&data); 2386 if (err) 2387 goto err_wedged; 2388 2389 err = __cancel_active1(&data); 2390 if (err) 2391 goto err_wedged; 2392 2393 err = __cancel_queued(&data); 2394 if (err) 2395 goto err_wedged; 2396 2397 err = __cancel_hostile(&data); 2398 if (err) 2399 goto err_wedged; 2400 2401 err = __cancel_fail(&data); 2402 if (err) 2403 goto err_wedged; 2404 } 2405 2406 err = 0; 2407 err_client_b: 2408 preempt_client_fini(&data.b); 2409 err_client_a: 2410 preempt_client_fini(&data.a); 2411 return err; 2412 2413 err_wedged: 2414 GEM_TRACE_DUMP(); 2415 igt_spinner_end(&data.b.spin); 2416 igt_spinner_end(&data.a.spin); 2417 intel_gt_set_wedged(gt); 2418 goto err_client_b; 2419 } 2420 2421 static int live_suppress_self_preempt(void *arg) 2422 { 2423 struct intel_gt *gt = arg; 2424 struct intel_engine_cs *engine; 2425 struct i915_sched_attr attr = { 2426 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX) 2427 }; 2428 struct preempt_client a, b; 2429 enum intel_engine_id id; 2430 int err = -ENOMEM; 2431 2432 /* 2433 * Verify that if a preemption request does not cause a change in 2434 * the current execution order, the preempt-to-idle injection is 2435 * skipped and that we do not accidentally apply it after the CS 2436 * completion event. 2437 */ 2438 2439 if (intel_uc_uses_guc_submission(>->uc)) 2440 return 0; /* presume black blox */ 2441 2442 if (intel_vgpu_active(gt->i915)) 2443 return 0; /* GVT forces single port & request submission */ 2444 2445 if (preempt_client_init(gt, &a)) 2446 return -ENOMEM; 2447 if (preempt_client_init(gt, &b)) 2448 goto err_client_a; 2449 2450 for_each_engine(engine, gt, id) { 2451 struct i915_request *rq_a, *rq_b; 2452 int depth; 2453 2454 if (!intel_engine_has_preemption(engine)) 2455 continue; 2456 2457 if (igt_flush_test(gt->i915)) 2458 goto err_wedged; 2459 2460 st_engine_heartbeat_disable(engine); 2461 engine->execlists.preempt_hang.count = 0; 2462 2463 rq_a = spinner_create_request(&a.spin, 2464 a.ctx, engine, 2465 MI_NOOP); 2466 if (IS_ERR(rq_a)) { 2467 err = PTR_ERR(rq_a); 2468 st_engine_heartbeat_enable(engine); 2469 goto err_client_b; 2470 } 2471 2472 i915_request_add(rq_a); 2473 if (!igt_wait_for_spinner(&a.spin, rq_a)) { 2474 pr_err("First client failed to start\n"); 2475 st_engine_heartbeat_enable(engine); 2476 goto err_wedged; 2477 } 2478 2479 /* Keep postponing the timer to avoid premature slicing */ 2480 mod_timer(&engine->execlists.timer, jiffies + HZ); 2481 for (depth = 0; depth < 8; depth++) { 2482 rq_b = spinner_create_request(&b.spin, 2483 b.ctx, engine, 2484 MI_NOOP); 2485 if (IS_ERR(rq_b)) { 2486 err = PTR_ERR(rq_b); 2487 st_engine_heartbeat_enable(engine); 2488 goto err_client_b; 2489 } 2490 i915_request_add(rq_b); 2491 2492 GEM_BUG_ON(i915_request_completed(rq_a)); 2493 engine->schedule(rq_a, &attr); 2494 igt_spinner_end(&a.spin); 2495 2496 if (!igt_wait_for_spinner(&b.spin, rq_b)) { 2497 pr_err("Second client failed to start\n"); 2498 st_engine_heartbeat_enable(engine); 2499 goto err_wedged; 2500 } 2501 2502 swap(a, b); 2503 rq_a = rq_b; 2504 } 2505 igt_spinner_end(&a.spin); 2506 2507 if (engine->execlists.preempt_hang.count) { 2508 pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n", 2509 engine->name, 2510 engine->execlists.preempt_hang.count, 2511 depth); 2512 st_engine_heartbeat_enable(engine); 2513 err = -EINVAL; 2514 goto err_client_b; 2515 } 2516 2517 st_engine_heartbeat_enable(engine); 2518 if (igt_flush_test(gt->i915)) 2519 goto err_wedged; 2520 } 2521 2522 err = 0; 2523 err_client_b: 2524 preempt_client_fini(&b); 2525 err_client_a: 2526 preempt_client_fini(&a); 2527 return err; 2528 2529 err_wedged: 2530 igt_spinner_end(&b.spin); 2531 igt_spinner_end(&a.spin); 2532 intel_gt_set_wedged(gt); 2533 err = -EIO; 2534 goto err_client_b; 2535 } 2536 2537 static int live_chain_preempt(void *arg) 2538 { 2539 struct intel_gt *gt = arg; 2540 struct intel_engine_cs *engine; 2541 struct preempt_client hi, lo; 2542 enum intel_engine_id id; 2543 int err = -ENOMEM; 2544 2545 /* 2546 * Build a chain AB...BA between two contexts (A, B) and request 2547 * preemption of the last request. It should then complete before 2548 * the previously submitted spinner in B. 2549 */ 2550 2551 if (preempt_client_init(gt, &hi)) 2552 return -ENOMEM; 2553 2554 if (preempt_client_init(gt, &lo)) 2555 goto err_client_hi; 2556 2557 for_each_engine(engine, gt, id) { 2558 struct i915_sched_attr attr = { 2559 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), 2560 }; 2561 struct igt_live_test t; 2562 struct i915_request *rq; 2563 int ring_size, count, i; 2564 2565 if (!intel_engine_has_preemption(engine)) 2566 continue; 2567 2568 rq = spinner_create_request(&lo.spin, 2569 lo.ctx, engine, 2570 MI_ARB_CHECK); 2571 if (IS_ERR(rq)) 2572 goto err_wedged; 2573 2574 i915_request_get(rq); 2575 i915_request_add(rq); 2576 2577 ring_size = rq->wa_tail - rq->head; 2578 if (ring_size < 0) 2579 ring_size += rq->ring->size; 2580 ring_size = rq->ring->size / ring_size; 2581 pr_debug("%s(%s): Using maximum of %d requests\n", 2582 __func__, engine->name, ring_size); 2583 2584 igt_spinner_end(&lo.spin); 2585 if (i915_request_wait(rq, 0, HZ / 2) < 0) { 2586 pr_err("Timed out waiting to flush %s\n", engine->name); 2587 i915_request_put(rq); 2588 goto err_wedged; 2589 } 2590 i915_request_put(rq); 2591 2592 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 2593 err = -EIO; 2594 goto err_wedged; 2595 } 2596 2597 for_each_prime_number_from(count, 1, ring_size) { 2598 rq = spinner_create_request(&hi.spin, 2599 hi.ctx, engine, 2600 MI_ARB_CHECK); 2601 if (IS_ERR(rq)) 2602 goto err_wedged; 2603 i915_request_add(rq); 2604 if (!igt_wait_for_spinner(&hi.spin, rq)) 2605 goto err_wedged; 2606 2607 rq = spinner_create_request(&lo.spin, 2608 lo.ctx, engine, 2609 MI_ARB_CHECK); 2610 if (IS_ERR(rq)) 2611 goto err_wedged; 2612 i915_request_add(rq); 2613 2614 for (i = 0; i < count; i++) { 2615 rq = igt_request_alloc(lo.ctx, engine); 2616 if (IS_ERR(rq)) 2617 goto err_wedged; 2618 i915_request_add(rq); 2619 } 2620 2621 rq = igt_request_alloc(hi.ctx, engine); 2622 if (IS_ERR(rq)) 2623 goto err_wedged; 2624 2625 i915_request_get(rq); 2626 i915_request_add(rq); 2627 engine->schedule(rq, &attr); 2628 2629 igt_spinner_end(&hi.spin); 2630 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 2631 struct drm_printer p = 2632 drm_info_printer(gt->i915->drm.dev); 2633 2634 pr_err("Failed to preempt over chain of %d\n", 2635 count); 2636 intel_engine_dump(engine, &p, 2637 "%s\n", engine->name); 2638 i915_request_put(rq); 2639 goto err_wedged; 2640 } 2641 igt_spinner_end(&lo.spin); 2642 i915_request_put(rq); 2643 2644 rq = igt_request_alloc(lo.ctx, engine); 2645 if (IS_ERR(rq)) 2646 goto err_wedged; 2647 2648 i915_request_get(rq); 2649 i915_request_add(rq); 2650 2651 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 2652 struct drm_printer p = 2653 drm_info_printer(gt->i915->drm.dev); 2654 2655 pr_err("Failed to flush low priority chain of %d requests\n", 2656 count); 2657 intel_engine_dump(engine, &p, 2658 "%s\n", engine->name); 2659 2660 i915_request_put(rq); 2661 goto err_wedged; 2662 } 2663 i915_request_put(rq); 2664 } 2665 2666 if (igt_live_test_end(&t)) { 2667 err = -EIO; 2668 goto err_wedged; 2669 } 2670 } 2671 2672 err = 0; 2673 err_client_lo: 2674 preempt_client_fini(&lo); 2675 err_client_hi: 2676 preempt_client_fini(&hi); 2677 return err; 2678 2679 err_wedged: 2680 igt_spinner_end(&hi.spin); 2681 igt_spinner_end(&lo.spin); 2682 intel_gt_set_wedged(gt); 2683 err = -EIO; 2684 goto err_client_lo; 2685 } 2686 2687 static int create_gang(struct intel_engine_cs *engine, 2688 struct i915_request **prev) 2689 { 2690 struct drm_i915_gem_object *obj; 2691 struct intel_context *ce; 2692 struct i915_request *rq; 2693 struct i915_vma *vma; 2694 u32 *cs; 2695 int err; 2696 2697 ce = intel_context_create(engine); 2698 if (IS_ERR(ce)) 2699 return PTR_ERR(ce); 2700 2701 obj = i915_gem_object_create_internal(engine->i915, 4096); 2702 if (IS_ERR(obj)) { 2703 err = PTR_ERR(obj); 2704 goto err_ce; 2705 } 2706 2707 vma = i915_vma_instance(obj, ce->vm, NULL); 2708 if (IS_ERR(vma)) { 2709 err = PTR_ERR(vma); 2710 goto err_obj; 2711 } 2712 2713 err = i915_vma_pin(vma, 0, 0, PIN_USER); 2714 if (err) 2715 goto err_obj; 2716 2717 cs = i915_gem_object_pin_map(obj, I915_MAP_WC); 2718 if (IS_ERR(cs)) { 2719 err = PTR_ERR(cs); 2720 goto err_obj; 2721 } 2722 2723 /* Semaphore target: spin until zero */ 2724 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 2725 2726 *cs++ = MI_SEMAPHORE_WAIT | 2727 MI_SEMAPHORE_POLL | 2728 MI_SEMAPHORE_SAD_EQ_SDD; 2729 *cs++ = 0; 2730 *cs++ = lower_32_bits(vma->node.start); 2731 *cs++ = upper_32_bits(vma->node.start); 2732 2733 if (*prev) { 2734 u64 offset = (*prev)->batch->node.start; 2735 2736 /* Terminate the spinner in the next lower priority batch. */ 2737 *cs++ = MI_STORE_DWORD_IMM_GEN4; 2738 *cs++ = lower_32_bits(offset); 2739 *cs++ = upper_32_bits(offset); 2740 *cs++ = 0; 2741 } 2742 2743 *cs++ = MI_BATCH_BUFFER_END; 2744 i915_gem_object_flush_map(obj); 2745 i915_gem_object_unpin_map(obj); 2746 2747 rq = intel_context_create_request(ce); 2748 if (IS_ERR(rq)) { 2749 err = PTR_ERR(rq); 2750 goto err_obj; 2751 } 2752 2753 rq->batch = i915_vma_get(vma); 2754 i915_request_get(rq); 2755 2756 i915_vma_lock(vma); 2757 err = i915_request_await_object(rq, vma->obj, false); 2758 if (!err) 2759 err = i915_vma_move_to_active(vma, rq, 0); 2760 if (!err) 2761 err = rq->engine->emit_bb_start(rq, 2762 vma->node.start, 2763 PAGE_SIZE, 0); 2764 i915_vma_unlock(vma); 2765 i915_request_add(rq); 2766 if (err) 2767 goto err_rq; 2768 2769 i915_gem_object_put(obj); 2770 intel_context_put(ce); 2771 2772 rq->mock.link.next = &(*prev)->mock.link; 2773 *prev = rq; 2774 return 0; 2775 2776 err_rq: 2777 i915_vma_put(rq->batch); 2778 i915_request_put(rq); 2779 err_obj: 2780 i915_gem_object_put(obj); 2781 err_ce: 2782 intel_context_put(ce); 2783 return err; 2784 } 2785 2786 static int __live_preempt_ring(struct intel_engine_cs *engine, 2787 struct igt_spinner *spin, 2788 int queue_sz, int ring_sz) 2789 { 2790 struct intel_context *ce[2] = {}; 2791 struct i915_request *rq; 2792 struct igt_live_test t; 2793 int err = 0; 2794 int n; 2795 2796 if (igt_live_test_begin(&t, engine->i915, __func__, engine->name)) 2797 return -EIO; 2798 2799 for (n = 0; n < ARRAY_SIZE(ce); n++) { 2800 struct intel_context *tmp; 2801 2802 tmp = intel_context_create(engine); 2803 if (IS_ERR(tmp)) { 2804 err = PTR_ERR(tmp); 2805 goto err_ce; 2806 } 2807 2808 tmp->ring = __intel_context_ring_size(ring_sz); 2809 2810 err = intel_context_pin(tmp); 2811 if (err) { 2812 intel_context_put(tmp); 2813 goto err_ce; 2814 } 2815 2816 memset32(tmp->ring->vaddr, 2817 0xdeadbeef, /* trigger a hang if executed */ 2818 tmp->ring->vma->size / sizeof(u32)); 2819 2820 ce[n] = tmp; 2821 } 2822 2823 rq = igt_spinner_create_request(spin, ce[0], MI_ARB_CHECK); 2824 if (IS_ERR(rq)) { 2825 err = PTR_ERR(rq); 2826 goto err_ce; 2827 } 2828 2829 i915_request_get(rq); 2830 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 2831 i915_request_add(rq); 2832 2833 if (!igt_wait_for_spinner(spin, rq)) { 2834 intel_gt_set_wedged(engine->gt); 2835 i915_request_put(rq); 2836 err = -ETIME; 2837 goto err_ce; 2838 } 2839 2840 /* Fill the ring, until we will cause a wrap */ 2841 n = 0; 2842 while (ce[0]->ring->tail - rq->wa_tail <= queue_sz) { 2843 struct i915_request *tmp; 2844 2845 tmp = intel_context_create_request(ce[0]); 2846 if (IS_ERR(tmp)) { 2847 err = PTR_ERR(tmp); 2848 i915_request_put(rq); 2849 goto err_ce; 2850 } 2851 2852 i915_request_add(tmp); 2853 intel_engine_flush_submission(engine); 2854 n++; 2855 } 2856 intel_engine_flush_submission(engine); 2857 pr_debug("%s: Filled %d with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n", 2858 engine->name, queue_sz, n, 2859 ce[0]->ring->size, 2860 ce[0]->ring->tail, 2861 ce[0]->ring->emit, 2862 rq->tail); 2863 i915_request_put(rq); 2864 2865 /* Create a second request to preempt the first ring */ 2866 rq = intel_context_create_request(ce[1]); 2867 if (IS_ERR(rq)) { 2868 err = PTR_ERR(rq); 2869 goto err_ce; 2870 } 2871 2872 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 2873 i915_request_get(rq); 2874 i915_request_add(rq); 2875 2876 err = wait_for_submit(engine, rq, HZ / 2); 2877 i915_request_put(rq); 2878 if (err) { 2879 pr_err("%s: preemption request was not submited\n", 2880 engine->name); 2881 err = -ETIME; 2882 } 2883 2884 pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n", 2885 engine->name, 2886 ce[0]->ring->tail, ce[0]->ring->emit, 2887 ce[1]->ring->tail, ce[1]->ring->emit); 2888 2889 err_ce: 2890 intel_engine_flush_submission(engine); 2891 igt_spinner_end(spin); 2892 for (n = 0; n < ARRAY_SIZE(ce); n++) { 2893 if (IS_ERR_OR_NULL(ce[n])) 2894 break; 2895 2896 intel_context_unpin(ce[n]); 2897 intel_context_put(ce[n]); 2898 } 2899 if (igt_live_test_end(&t)) 2900 err = -EIO; 2901 return err; 2902 } 2903 2904 static int live_preempt_ring(void *arg) 2905 { 2906 struct intel_gt *gt = arg; 2907 struct intel_engine_cs *engine; 2908 struct igt_spinner spin; 2909 enum intel_engine_id id; 2910 int err = 0; 2911 2912 /* 2913 * Check that we rollback large chunks of a ring in order to do a 2914 * preemption event. Similar to live_unlite_ring, but looking at 2915 * ring size rather than the impact of intel_ring_direction(). 2916 */ 2917 2918 if (igt_spinner_init(&spin, gt)) 2919 return -ENOMEM; 2920 2921 for_each_engine(engine, gt, id) { 2922 int n; 2923 2924 if (!intel_engine_has_preemption(engine)) 2925 continue; 2926 2927 if (!intel_engine_can_store_dword(engine)) 2928 continue; 2929 2930 st_engine_heartbeat_disable(engine); 2931 2932 for (n = 0; n <= 3; n++) { 2933 err = __live_preempt_ring(engine, &spin, 2934 n * SZ_4K / 4, SZ_4K); 2935 if (err) 2936 break; 2937 } 2938 2939 st_engine_heartbeat_enable(engine); 2940 if (err) 2941 break; 2942 } 2943 2944 igt_spinner_fini(&spin); 2945 return err; 2946 } 2947 2948 static int live_preempt_gang(void *arg) 2949 { 2950 struct intel_gt *gt = arg; 2951 struct intel_engine_cs *engine; 2952 enum intel_engine_id id; 2953 2954 /* 2955 * Build as long a chain of preempters as we can, with each 2956 * request higher priority than the last. Once we are ready, we release 2957 * the last batch which then precolates down the chain, each releasing 2958 * the next oldest in turn. The intent is to simply push as hard as we 2959 * can with the number of preemptions, trying to exceed narrow HW 2960 * limits. At a minimum, we insist that we can sort all the user 2961 * high priority levels into execution order. 2962 */ 2963 2964 for_each_engine(engine, gt, id) { 2965 struct i915_request *rq = NULL; 2966 struct igt_live_test t; 2967 IGT_TIMEOUT(end_time); 2968 int prio = 0; 2969 int err = 0; 2970 u32 *cs; 2971 2972 if (!intel_engine_has_preemption(engine)) 2973 continue; 2974 2975 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) 2976 return -EIO; 2977 2978 do { 2979 struct i915_sched_attr attr = { 2980 .priority = I915_USER_PRIORITY(prio++), 2981 }; 2982 2983 err = create_gang(engine, &rq); 2984 if (err) 2985 break; 2986 2987 /* Submit each spinner at increasing priority */ 2988 engine->schedule(rq, &attr); 2989 } while (prio <= I915_PRIORITY_MAX && 2990 !__igt_timeout(end_time, NULL)); 2991 pr_debug("%s: Preempt chain of %d requests\n", 2992 engine->name, prio); 2993 2994 /* 2995 * Such that the last spinner is the highest priority and 2996 * should execute first. When that spinner completes, 2997 * it will terminate the next lowest spinner until there 2998 * are no more spinners and the gang is complete. 2999 */ 3000 cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC); 3001 if (!IS_ERR(cs)) { 3002 *cs = 0; 3003 i915_gem_object_unpin_map(rq->batch->obj); 3004 } else { 3005 err = PTR_ERR(cs); 3006 intel_gt_set_wedged(gt); 3007 } 3008 3009 while (rq) { /* wait for each rq from highest to lowest prio */ 3010 struct i915_request *n = list_next_entry(rq, mock.link); 3011 3012 if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) { 3013 struct drm_printer p = 3014 drm_info_printer(engine->i915->drm.dev); 3015 3016 pr_err("Failed to flush chain of %d requests, at %d\n", 3017 prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT); 3018 intel_engine_dump(engine, &p, 3019 "%s\n", engine->name); 3020 3021 err = -ETIME; 3022 } 3023 3024 i915_vma_put(rq->batch); 3025 i915_request_put(rq); 3026 rq = n; 3027 } 3028 3029 if (igt_live_test_end(&t)) 3030 err = -EIO; 3031 if (err) 3032 return err; 3033 } 3034 3035 return 0; 3036 } 3037 3038 static struct i915_vma * 3039 create_gpr_user(struct intel_engine_cs *engine, 3040 struct i915_vma *result, 3041 unsigned int offset) 3042 { 3043 struct drm_i915_gem_object *obj; 3044 struct i915_vma *vma; 3045 u32 *cs; 3046 int err; 3047 int i; 3048 3049 obj = i915_gem_object_create_internal(engine->i915, 4096); 3050 if (IS_ERR(obj)) 3051 return ERR_CAST(obj); 3052 3053 vma = i915_vma_instance(obj, result->vm, NULL); 3054 if (IS_ERR(vma)) { 3055 i915_gem_object_put(obj); 3056 return vma; 3057 } 3058 3059 err = i915_vma_pin(vma, 0, 0, PIN_USER); 3060 if (err) { 3061 i915_vma_put(vma); 3062 return ERR_PTR(err); 3063 } 3064 3065 cs = i915_gem_object_pin_map(obj, I915_MAP_WC); 3066 if (IS_ERR(cs)) { 3067 i915_vma_put(vma); 3068 return ERR_CAST(cs); 3069 } 3070 3071 /* All GPR are clear for new contexts. We use GPR(0) as a constant */ 3072 *cs++ = MI_LOAD_REGISTER_IMM(1); 3073 *cs++ = CS_GPR(engine, 0); 3074 *cs++ = 1; 3075 3076 for (i = 1; i < NUM_GPR; i++) { 3077 u64 addr; 3078 3079 /* 3080 * Perform: GPR[i]++ 3081 * 3082 * As we read and write into the context saved GPR[i], if 3083 * we restart this batch buffer from an earlier point, we 3084 * will repeat the increment and store a value > 1. 3085 */ 3086 *cs++ = MI_MATH(4); 3087 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(i)); 3088 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(0)); 3089 *cs++ = MI_MATH_ADD; 3090 *cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU); 3091 3092 addr = result->node.start + offset + i * sizeof(*cs); 3093 *cs++ = MI_STORE_REGISTER_MEM_GEN8; 3094 *cs++ = CS_GPR(engine, 2 * i); 3095 *cs++ = lower_32_bits(addr); 3096 *cs++ = upper_32_bits(addr); 3097 3098 *cs++ = MI_SEMAPHORE_WAIT | 3099 MI_SEMAPHORE_POLL | 3100 MI_SEMAPHORE_SAD_GTE_SDD; 3101 *cs++ = i; 3102 *cs++ = lower_32_bits(result->node.start); 3103 *cs++ = upper_32_bits(result->node.start); 3104 } 3105 3106 *cs++ = MI_BATCH_BUFFER_END; 3107 i915_gem_object_flush_map(obj); 3108 i915_gem_object_unpin_map(obj); 3109 3110 return vma; 3111 } 3112 3113 static struct i915_vma *create_global(struct intel_gt *gt, size_t sz) 3114 { 3115 struct drm_i915_gem_object *obj; 3116 struct i915_vma *vma; 3117 int err; 3118 3119 obj = i915_gem_object_create_internal(gt->i915, sz); 3120 if (IS_ERR(obj)) 3121 return ERR_CAST(obj); 3122 3123 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 3124 if (IS_ERR(vma)) { 3125 i915_gem_object_put(obj); 3126 return vma; 3127 } 3128 3129 err = i915_ggtt_pin(vma, NULL, 0, 0); 3130 if (err) { 3131 i915_vma_put(vma); 3132 return ERR_PTR(err); 3133 } 3134 3135 return vma; 3136 } 3137 3138 static struct i915_request * 3139 create_gpr_client(struct intel_engine_cs *engine, 3140 struct i915_vma *global, 3141 unsigned int offset) 3142 { 3143 struct i915_vma *batch, *vma; 3144 struct intel_context *ce; 3145 struct i915_request *rq; 3146 int err; 3147 3148 ce = intel_context_create(engine); 3149 if (IS_ERR(ce)) 3150 return ERR_CAST(ce); 3151 3152 vma = i915_vma_instance(global->obj, ce->vm, NULL); 3153 if (IS_ERR(vma)) { 3154 err = PTR_ERR(vma); 3155 goto out_ce; 3156 } 3157 3158 err = i915_vma_pin(vma, 0, 0, PIN_USER); 3159 if (err) 3160 goto out_ce; 3161 3162 batch = create_gpr_user(engine, vma, offset); 3163 if (IS_ERR(batch)) { 3164 err = PTR_ERR(batch); 3165 goto out_vma; 3166 } 3167 3168 rq = intel_context_create_request(ce); 3169 if (IS_ERR(rq)) { 3170 err = PTR_ERR(rq); 3171 goto out_batch; 3172 } 3173 3174 i915_vma_lock(vma); 3175 err = i915_request_await_object(rq, vma->obj, false); 3176 if (!err) 3177 err = i915_vma_move_to_active(vma, rq, 0); 3178 i915_vma_unlock(vma); 3179 3180 i915_vma_lock(batch); 3181 if (!err) 3182 err = i915_request_await_object(rq, batch->obj, false); 3183 if (!err) 3184 err = i915_vma_move_to_active(batch, rq, 0); 3185 if (!err) 3186 err = rq->engine->emit_bb_start(rq, 3187 batch->node.start, 3188 PAGE_SIZE, 0); 3189 i915_vma_unlock(batch); 3190 i915_vma_unpin(batch); 3191 3192 if (!err) 3193 i915_request_get(rq); 3194 i915_request_add(rq); 3195 3196 out_batch: 3197 i915_vma_put(batch); 3198 out_vma: 3199 i915_vma_unpin(vma); 3200 out_ce: 3201 intel_context_put(ce); 3202 return err ? ERR_PTR(err) : rq; 3203 } 3204 3205 static int preempt_user(struct intel_engine_cs *engine, 3206 struct i915_vma *global, 3207 int id) 3208 { 3209 struct i915_sched_attr attr = { 3210 .priority = I915_PRIORITY_MAX 3211 }; 3212 struct i915_request *rq; 3213 int err = 0; 3214 u32 *cs; 3215 3216 rq = intel_engine_create_kernel_request(engine); 3217 if (IS_ERR(rq)) 3218 return PTR_ERR(rq); 3219 3220 cs = intel_ring_begin(rq, 4); 3221 if (IS_ERR(cs)) { 3222 i915_request_add(rq); 3223 return PTR_ERR(cs); 3224 } 3225 3226 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 3227 *cs++ = i915_ggtt_offset(global); 3228 *cs++ = 0; 3229 *cs++ = id; 3230 3231 intel_ring_advance(rq, cs); 3232 3233 i915_request_get(rq); 3234 i915_request_add(rq); 3235 3236 engine->schedule(rq, &attr); 3237 3238 if (i915_request_wait(rq, 0, HZ / 2) < 0) 3239 err = -ETIME; 3240 i915_request_put(rq); 3241 3242 return err; 3243 } 3244 3245 static int live_preempt_user(void *arg) 3246 { 3247 struct intel_gt *gt = arg; 3248 struct intel_engine_cs *engine; 3249 struct i915_vma *global; 3250 enum intel_engine_id id; 3251 u32 *result; 3252 int err = 0; 3253 3254 /* 3255 * In our other tests, we look at preemption in carefully 3256 * controlled conditions in the ringbuffer. Since most of the 3257 * time is spent in user batches, most of our preemptions naturally 3258 * occur there. We want to verify that when we preempt inside a batch 3259 * we continue on from the current instruction and do not roll back 3260 * to the start, or another earlier arbitration point. 3261 * 3262 * To verify this, we create a batch which is a mixture of 3263 * MI_MATH (gpr++) MI_SRM (gpr) and preemption points. Then with 3264 * a few preempting contexts thrown into the mix, we look for any 3265 * repeated instructions (which show up as incorrect values). 3266 */ 3267 3268 global = create_global(gt, 4096); 3269 if (IS_ERR(global)) 3270 return PTR_ERR(global); 3271 3272 result = i915_gem_object_pin_map(global->obj, I915_MAP_WC); 3273 if (IS_ERR(result)) { 3274 i915_vma_unpin_and_release(&global, 0); 3275 return PTR_ERR(result); 3276 } 3277 3278 for_each_engine(engine, gt, id) { 3279 struct i915_request *client[3] = {}; 3280 struct igt_live_test t; 3281 int i; 3282 3283 if (!intel_engine_has_preemption(engine)) 3284 continue; 3285 3286 if (IS_GEN(gt->i915, 8) && engine->class != RENDER_CLASS) 3287 continue; /* we need per-context GPR */ 3288 3289 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 3290 err = -EIO; 3291 break; 3292 } 3293 3294 memset(result, 0, 4096); 3295 3296 for (i = 0; i < ARRAY_SIZE(client); i++) { 3297 struct i915_request *rq; 3298 3299 rq = create_gpr_client(engine, global, 3300 NUM_GPR * i * sizeof(u32)); 3301 if (IS_ERR(rq)) { 3302 err = PTR_ERR(rq); 3303 goto end_test; 3304 } 3305 3306 client[i] = rq; 3307 } 3308 3309 /* Continuously preempt the set of 3 running contexts */ 3310 for (i = 1; i <= NUM_GPR; i++) { 3311 err = preempt_user(engine, global, i); 3312 if (err) 3313 goto end_test; 3314 } 3315 3316 if (READ_ONCE(result[0]) != NUM_GPR) { 3317 pr_err("%s: Failed to release semaphore\n", 3318 engine->name); 3319 err = -EIO; 3320 goto end_test; 3321 } 3322 3323 for (i = 0; i < ARRAY_SIZE(client); i++) { 3324 int gpr; 3325 3326 if (i915_request_wait(client[i], 0, HZ / 2) < 0) { 3327 err = -ETIME; 3328 goto end_test; 3329 } 3330 3331 for (gpr = 1; gpr < NUM_GPR; gpr++) { 3332 if (result[NUM_GPR * i + gpr] != 1) { 3333 pr_err("%s: Invalid result, client %d, gpr %d, result: %d\n", 3334 engine->name, 3335 i, gpr, result[NUM_GPR * i + gpr]); 3336 err = -EINVAL; 3337 goto end_test; 3338 } 3339 } 3340 } 3341 3342 end_test: 3343 for (i = 0; i < ARRAY_SIZE(client); i++) { 3344 if (!client[i]) 3345 break; 3346 3347 i915_request_put(client[i]); 3348 } 3349 3350 /* Flush the semaphores on error */ 3351 smp_store_mb(result[0], -1); 3352 if (igt_live_test_end(&t)) 3353 err = -EIO; 3354 if (err) 3355 break; 3356 } 3357 3358 i915_vma_unpin_and_release(&global, I915_VMA_RELEASE_MAP); 3359 return err; 3360 } 3361 3362 static int live_preempt_timeout(void *arg) 3363 { 3364 struct intel_gt *gt = arg; 3365 struct i915_gem_context *ctx_hi, *ctx_lo; 3366 struct igt_spinner spin_lo; 3367 struct intel_engine_cs *engine; 3368 enum intel_engine_id id; 3369 int err = -ENOMEM; 3370 3371 /* 3372 * Check that we force preemption to occur by cancelling the previous 3373 * context if it refuses to yield the GPU. 3374 */ 3375 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) 3376 return 0; 3377 3378 if (!intel_has_reset_engine(gt)) 3379 return 0; 3380 3381 if (igt_spinner_init(&spin_lo, gt)) 3382 return -ENOMEM; 3383 3384 ctx_hi = kernel_context(gt->i915); 3385 if (!ctx_hi) 3386 goto err_spin_lo; 3387 ctx_hi->sched.priority = 3388 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); 3389 3390 ctx_lo = kernel_context(gt->i915); 3391 if (!ctx_lo) 3392 goto err_ctx_hi; 3393 ctx_lo->sched.priority = 3394 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); 3395 3396 for_each_engine(engine, gt, id) { 3397 unsigned long saved_timeout; 3398 struct i915_request *rq; 3399 3400 if (!intel_engine_has_preemption(engine)) 3401 continue; 3402 3403 rq = spinner_create_request(&spin_lo, ctx_lo, engine, 3404 MI_NOOP); /* preemption disabled */ 3405 if (IS_ERR(rq)) { 3406 err = PTR_ERR(rq); 3407 goto err_ctx_lo; 3408 } 3409 3410 i915_request_add(rq); 3411 if (!igt_wait_for_spinner(&spin_lo, rq)) { 3412 intel_gt_set_wedged(gt); 3413 err = -EIO; 3414 goto err_ctx_lo; 3415 } 3416 3417 rq = igt_request_alloc(ctx_hi, engine); 3418 if (IS_ERR(rq)) { 3419 igt_spinner_end(&spin_lo); 3420 err = PTR_ERR(rq); 3421 goto err_ctx_lo; 3422 } 3423 3424 /* Flush the previous CS ack before changing timeouts */ 3425 while (READ_ONCE(engine->execlists.pending[0])) 3426 cpu_relax(); 3427 3428 saved_timeout = engine->props.preempt_timeout_ms; 3429 engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */ 3430 3431 i915_request_get(rq); 3432 i915_request_add(rq); 3433 3434 intel_engine_flush_submission(engine); 3435 engine->props.preempt_timeout_ms = saved_timeout; 3436 3437 if (i915_request_wait(rq, 0, HZ / 10) < 0) { 3438 intel_gt_set_wedged(gt); 3439 i915_request_put(rq); 3440 err = -ETIME; 3441 goto err_ctx_lo; 3442 } 3443 3444 igt_spinner_end(&spin_lo); 3445 i915_request_put(rq); 3446 } 3447 3448 err = 0; 3449 err_ctx_lo: 3450 kernel_context_close(ctx_lo); 3451 err_ctx_hi: 3452 kernel_context_close(ctx_hi); 3453 err_spin_lo: 3454 igt_spinner_fini(&spin_lo); 3455 return err; 3456 } 3457 3458 static int random_range(struct rnd_state *rnd, int min, int max) 3459 { 3460 return i915_prandom_u32_max_state(max - min, rnd) + min; 3461 } 3462 3463 static int random_priority(struct rnd_state *rnd) 3464 { 3465 return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX); 3466 } 3467 3468 struct preempt_smoke { 3469 struct intel_gt *gt; 3470 struct i915_gem_context **contexts; 3471 struct intel_engine_cs *engine; 3472 struct drm_i915_gem_object *batch; 3473 unsigned int ncontext; 3474 struct rnd_state prng; 3475 unsigned long count; 3476 }; 3477 3478 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke) 3479 { 3480 return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext, 3481 &smoke->prng)]; 3482 } 3483 3484 static int smoke_submit(struct preempt_smoke *smoke, 3485 struct i915_gem_context *ctx, int prio, 3486 struct drm_i915_gem_object *batch) 3487 { 3488 struct i915_request *rq; 3489 struct i915_vma *vma = NULL; 3490 int err = 0; 3491 3492 if (batch) { 3493 struct i915_address_space *vm; 3494 3495 vm = i915_gem_context_get_vm_rcu(ctx); 3496 vma = i915_vma_instance(batch, vm, NULL); 3497 i915_vm_put(vm); 3498 if (IS_ERR(vma)) 3499 return PTR_ERR(vma); 3500 3501 err = i915_vma_pin(vma, 0, 0, PIN_USER); 3502 if (err) 3503 return err; 3504 } 3505 3506 ctx->sched.priority = prio; 3507 3508 rq = igt_request_alloc(ctx, smoke->engine); 3509 if (IS_ERR(rq)) { 3510 err = PTR_ERR(rq); 3511 goto unpin; 3512 } 3513 3514 if (vma) { 3515 i915_vma_lock(vma); 3516 err = i915_request_await_object(rq, vma->obj, false); 3517 if (!err) 3518 err = i915_vma_move_to_active(vma, rq, 0); 3519 if (!err) 3520 err = rq->engine->emit_bb_start(rq, 3521 vma->node.start, 3522 PAGE_SIZE, 0); 3523 i915_vma_unlock(vma); 3524 } 3525 3526 i915_request_add(rq); 3527 3528 unpin: 3529 if (vma) 3530 i915_vma_unpin(vma); 3531 3532 return err; 3533 } 3534 3535 static int smoke_crescendo_thread(void *arg) 3536 { 3537 struct preempt_smoke *smoke = arg; 3538 IGT_TIMEOUT(end_time); 3539 unsigned long count; 3540 3541 count = 0; 3542 do { 3543 struct i915_gem_context *ctx = smoke_context(smoke); 3544 int err; 3545 3546 err = smoke_submit(smoke, 3547 ctx, count % I915_PRIORITY_MAX, 3548 smoke->batch); 3549 if (err) 3550 return err; 3551 3552 count++; 3553 } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL)); 3554 3555 smoke->count = count; 3556 return 0; 3557 } 3558 3559 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) 3560 #define BATCH BIT(0) 3561 { 3562 struct task_struct *tsk[I915_NUM_ENGINES] = {}; 3563 struct preempt_smoke arg[I915_NUM_ENGINES]; 3564 struct intel_engine_cs *engine; 3565 enum intel_engine_id id; 3566 unsigned long count; 3567 int err = 0; 3568 3569 for_each_engine(engine, smoke->gt, id) { 3570 arg[id] = *smoke; 3571 arg[id].engine = engine; 3572 if (!(flags & BATCH)) 3573 arg[id].batch = NULL; 3574 arg[id].count = 0; 3575 3576 tsk[id] = kthread_run(smoke_crescendo_thread, &arg, 3577 "igt/smoke:%d", id); 3578 if (IS_ERR(tsk[id])) { 3579 err = PTR_ERR(tsk[id]); 3580 break; 3581 } 3582 get_task_struct(tsk[id]); 3583 } 3584 3585 yield(); /* start all threads before we kthread_stop() */ 3586 3587 count = 0; 3588 for_each_engine(engine, smoke->gt, id) { 3589 int status; 3590 3591 if (IS_ERR_OR_NULL(tsk[id])) 3592 continue; 3593 3594 status = kthread_stop(tsk[id]); 3595 if (status && !err) 3596 err = status; 3597 3598 count += arg[id].count; 3599 3600 put_task_struct(tsk[id]); 3601 } 3602 3603 pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n", 3604 count, flags, smoke->gt->info.num_engines, smoke->ncontext); 3605 return 0; 3606 } 3607 3608 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags) 3609 { 3610 enum intel_engine_id id; 3611 IGT_TIMEOUT(end_time); 3612 unsigned long count; 3613 3614 count = 0; 3615 do { 3616 for_each_engine(smoke->engine, smoke->gt, id) { 3617 struct i915_gem_context *ctx = smoke_context(smoke); 3618 int err; 3619 3620 err = smoke_submit(smoke, 3621 ctx, random_priority(&smoke->prng), 3622 flags & BATCH ? smoke->batch : NULL); 3623 if (err) 3624 return err; 3625 3626 count++; 3627 } 3628 } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL)); 3629 3630 pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n", 3631 count, flags, smoke->gt->info.num_engines, smoke->ncontext); 3632 return 0; 3633 } 3634 3635 static int live_preempt_smoke(void *arg) 3636 { 3637 struct preempt_smoke smoke = { 3638 .gt = arg, 3639 .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed), 3640 .ncontext = 256, 3641 }; 3642 const unsigned int phase[] = { 0, BATCH }; 3643 struct igt_live_test t; 3644 int err = -ENOMEM; 3645 u32 *cs; 3646 int n; 3647 3648 smoke.contexts = kmalloc_array(smoke.ncontext, 3649 sizeof(*smoke.contexts), 3650 GFP_KERNEL); 3651 if (!smoke.contexts) 3652 return -ENOMEM; 3653 3654 smoke.batch = 3655 i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE); 3656 if (IS_ERR(smoke.batch)) { 3657 err = PTR_ERR(smoke.batch); 3658 goto err_free; 3659 } 3660 3661 cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB); 3662 if (IS_ERR(cs)) { 3663 err = PTR_ERR(cs); 3664 goto err_batch; 3665 } 3666 for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++) 3667 cs[n] = MI_ARB_CHECK; 3668 cs[n] = MI_BATCH_BUFFER_END; 3669 i915_gem_object_flush_map(smoke.batch); 3670 i915_gem_object_unpin_map(smoke.batch); 3671 3672 if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) { 3673 err = -EIO; 3674 goto err_batch; 3675 } 3676 3677 for (n = 0; n < smoke.ncontext; n++) { 3678 smoke.contexts[n] = kernel_context(smoke.gt->i915); 3679 if (!smoke.contexts[n]) 3680 goto err_ctx; 3681 } 3682 3683 for (n = 0; n < ARRAY_SIZE(phase); n++) { 3684 err = smoke_crescendo(&smoke, phase[n]); 3685 if (err) 3686 goto err_ctx; 3687 3688 err = smoke_random(&smoke, phase[n]); 3689 if (err) 3690 goto err_ctx; 3691 } 3692 3693 err_ctx: 3694 if (igt_live_test_end(&t)) 3695 err = -EIO; 3696 3697 for (n = 0; n < smoke.ncontext; n++) { 3698 if (!smoke.contexts[n]) 3699 break; 3700 kernel_context_close(smoke.contexts[n]); 3701 } 3702 3703 err_batch: 3704 i915_gem_object_put(smoke.batch); 3705 err_free: 3706 kfree(smoke.contexts); 3707 3708 return err; 3709 } 3710 3711 static int nop_virtual_engine(struct intel_gt *gt, 3712 struct intel_engine_cs **siblings, 3713 unsigned int nsibling, 3714 unsigned int nctx, 3715 unsigned int flags) 3716 #define CHAIN BIT(0) 3717 { 3718 IGT_TIMEOUT(end_time); 3719 struct i915_request *request[16] = {}; 3720 struct intel_context *ve[16]; 3721 unsigned long n, prime, nc; 3722 struct igt_live_test t; 3723 ktime_t times[2] = {}; 3724 int err; 3725 3726 GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve)); 3727 3728 for (n = 0; n < nctx; n++) { 3729 ve[n] = intel_execlists_create_virtual(siblings, nsibling); 3730 if (IS_ERR(ve[n])) { 3731 err = PTR_ERR(ve[n]); 3732 nctx = n; 3733 goto out; 3734 } 3735 3736 err = intel_context_pin(ve[n]); 3737 if (err) { 3738 intel_context_put(ve[n]); 3739 nctx = n; 3740 goto out; 3741 } 3742 } 3743 3744 err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name); 3745 if (err) 3746 goto out; 3747 3748 for_each_prime_number_from(prime, 1, 8192) { 3749 times[1] = ktime_get_raw(); 3750 3751 if (flags & CHAIN) { 3752 for (nc = 0; nc < nctx; nc++) { 3753 for (n = 0; n < prime; n++) { 3754 struct i915_request *rq; 3755 3756 rq = i915_request_create(ve[nc]); 3757 if (IS_ERR(rq)) { 3758 err = PTR_ERR(rq); 3759 goto out; 3760 } 3761 3762 if (request[nc]) 3763 i915_request_put(request[nc]); 3764 request[nc] = i915_request_get(rq); 3765 i915_request_add(rq); 3766 } 3767 } 3768 } else { 3769 for (n = 0; n < prime; n++) { 3770 for (nc = 0; nc < nctx; nc++) { 3771 struct i915_request *rq; 3772 3773 rq = i915_request_create(ve[nc]); 3774 if (IS_ERR(rq)) { 3775 err = PTR_ERR(rq); 3776 goto out; 3777 } 3778 3779 if (request[nc]) 3780 i915_request_put(request[nc]); 3781 request[nc] = i915_request_get(rq); 3782 i915_request_add(rq); 3783 } 3784 } 3785 } 3786 3787 for (nc = 0; nc < nctx; nc++) { 3788 if (i915_request_wait(request[nc], 0, HZ / 10) < 0) { 3789 pr_err("%s(%s): wait for %llx:%lld timed out\n", 3790 __func__, ve[0]->engine->name, 3791 request[nc]->fence.context, 3792 request[nc]->fence.seqno); 3793 3794 GEM_TRACE("%s(%s) failed at request %llx:%lld\n", 3795 __func__, ve[0]->engine->name, 3796 request[nc]->fence.context, 3797 request[nc]->fence.seqno); 3798 GEM_TRACE_DUMP(); 3799 intel_gt_set_wedged(gt); 3800 break; 3801 } 3802 } 3803 3804 times[1] = ktime_sub(ktime_get_raw(), times[1]); 3805 if (prime == 1) 3806 times[0] = times[1]; 3807 3808 for (nc = 0; nc < nctx; nc++) { 3809 i915_request_put(request[nc]); 3810 request[nc] = NULL; 3811 } 3812 3813 if (__igt_timeout(end_time, NULL)) 3814 break; 3815 } 3816 3817 err = igt_live_test_end(&t); 3818 if (err) 3819 goto out; 3820 3821 pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n", 3822 nctx, ve[0]->engine->name, ktime_to_ns(times[0]), 3823 prime, div64_u64(ktime_to_ns(times[1]), prime)); 3824 3825 out: 3826 if (igt_flush_test(gt->i915)) 3827 err = -EIO; 3828 3829 for (nc = 0; nc < nctx; nc++) { 3830 i915_request_put(request[nc]); 3831 intel_context_unpin(ve[nc]); 3832 intel_context_put(ve[nc]); 3833 } 3834 return err; 3835 } 3836 3837 static unsigned int 3838 __select_siblings(struct intel_gt *gt, 3839 unsigned int class, 3840 struct intel_engine_cs **siblings, 3841 bool (*filter)(const struct intel_engine_cs *)) 3842 { 3843 unsigned int n = 0; 3844 unsigned int inst; 3845 3846 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { 3847 if (!gt->engine_class[class][inst]) 3848 continue; 3849 3850 if (filter && !filter(gt->engine_class[class][inst])) 3851 continue; 3852 3853 siblings[n++] = gt->engine_class[class][inst]; 3854 } 3855 3856 return n; 3857 } 3858 3859 static unsigned int 3860 select_siblings(struct intel_gt *gt, 3861 unsigned int class, 3862 struct intel_engine_cs **siblings) 3863 { 3864 return __select_siblings(gt, class, siblings, NULL); 3865 } 3866 3867 static int live_virtual_engine(void *arg) 3868 { 3869 struct intel_gt *gt = arg; 3870 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 3871 struct intel_engine_cs *engine; 3872 enum intel_engine_id id; 3873 unsigned int class; 3874 int err; 3875 3876 if (intel_uc_uses_guc_submission(>->uc)) 3877 return 0; 3878 3879 for_each_engine(engine, gt, id) { 3880 err = nop_virtual_engine(gt, &engine, 1, 1, 0); 3881 if (err) { 3882 pr_err("Failed to wrap engine %s: err=%d\n", 3883 engine->name, err); 3884 return err; 3885 } 3886 } 3887 3888 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 3889 int nsibling, n; 3890 3891 nsibling = select_siblings(gt, class, siblings); 3892 if (nsibling < 2) 3893 continue; 3894 3895 for (n = 1; n <= nsibling + 1; n++) { 3896 err = nop_virtual_engine(gt, siblings, nsibling, 3897 n, 0); 3898 if (err) 3899 return err; 3900 } 3901 3902 err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN); 3903 if (err) 3904 return err; 3905 } 3906 3907 return 0; 3908 } 3909 3910 static int mask_virtual_engine(struct intel_gt *gt, 3911 struct intel_engine_cs **siblings, 3912 unsigned int nsibling) 3913 { 3914 struct i915_request *request[MAX_ENGINE_INSTANCE + 1]; 3915 struct intel_context *ve; 3916 struct igt_live_test t; 3917 unsigned int n; 3918 int err; 3919 3920 /* 3921 * Check that by setting the execution mask on a request, we can 3922 * restrict it to our desired engine within the virtual engine. 3923 */ 3924 3925 ve = intel_execlists_create_virtual(siblings, nsibling); 3926 if (IS_ERR(ve)) { 3927 err = PTR_ERR(ve); 3928 goto out_close; 3929 } 3930 3931 err = intel_context_pin(ve); 3932 if (err) 3933 goto out_put; 3934 3935 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name); 3936 if (err) 3937 goto out_unpin; 3938 3939 for (n = 0; n < nsibling; n++) { 3940 request[n] = i915_request_create(ve); 3941 if (IS_ERR(request[n])) { 3942 err = PTR_ERR(request[n]); 3943 nsibling = n; 3944 goto out; 3945 } 3946 3947 /* Reverse order as it's more likely to be unnatural */ 3948 request[n]->execution_mask = siblings[nsibling - n - 1]->mask; 3949 3950 i915_request_get(request[n]); 3951 i915_request_add(request[n]); 3952 } 3953 3954 for (n = 0; n < nsibling; n++) { 3955 if (i915_request_wait(request[n], 0, HZ / 10) < 0) { 3956 pr_err("%s(%s): wait for %llx:%lld timed out\n", 3957 __func__, ve->engine->name, 3958 request[n]->fence.context, 3959 request[n]->fence.seqno); 3960 3961 GEM_TRACE("%s(%s) failed at request %llx:%lld\n", 3962 __func__, ve->engine->name, 3963 request[n]->fence.context, 3964 request[n]->fence.seqno); 3965 GEM_TRACE_DUMP(); 3966 intel_gt_set_wedged(gt); 3967 err = -EIO; 3968 goto out; 3969 } 3970 3971 if (request[n]->engine != siblings[nsibling - n - 1]) { 3972 pr_err("Executed on wrong sibling '%s', expected '%s'\n", 3973 request[n]->engine->name, 3974 siblings[nsibling - n - 1]->name); 3975 err = -EINVAL; 3976 goto out; 3977 } 3978 } 3979 3980 err = igt_live_test_end(&t); 3981 out: 3982 if (igt_flush_test(gt->i915)) 3983 err = -EIO; 3984 3985 for (n = 0; n < nsibling; n++) 3986 i915_request_put(request[n]); 3987 3988 out_unpin: 3989 intel_context_unpin(ve); 3990 out_put: 3991 intel_context_put(ve); 3992 out_close: 3993 return err; 3994 } 3995 3996 static int live_virtual_mask(void *arg) 3997 { 3998 struct intel_gt *gt = arg; 3999 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 4000 unsigned int class; 4001 int err; 4002 4003 if (intel_uc_uses_guc_submission(>->uc)) 4004 return 0; 4005 4006 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 4007 unsigned int nsibling; 4008 4009 nsibling = select_siblings(gt, class, siblings); 4010 if (nsibling < 2) 4011 continue; 4012 4013 err = mask_virtual_engine(gt, siblings, nsibling); 4014 if (err) 4015 return err; 4016 } 4017 4018 return 0; 4019 } 4020 4021 static int slicein_virtual_engine(struct intel_gt *gt, 4022 struct intel_engine_cs **siblings, 4023 unsigned int nsibling) 4024 { 4025 const long timeout = slice_timeout(siblings[0]); 4026 struct intel_context *ce; 4027 struct i915_request *rq; 4028 struct igt_spinner spin; 4029 unsigned int n; 4030 int err = 0; 4031 4032 /* 4033 * Virtual requests must take part in timeslicing on the target engines. 4034 */ 4035 4036 if (igt_spinner_init(&spin, gt)) 4037 return -ENOMEM; 4038 4039 for (n = 0; n < nsibling; n++) { 4040 ce = intel_context_create(siblings[n]); 4041 if (IS_ERR(ce)) { 4042 err = PTR_ERR(ce); 4043 goto out; 4044 } 4045 4046 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); 4047 intel_context_put(ce); 4048 if (IS_ERR(rq)) { 4049 err = PTR_ERR(rq); 4050 goto out; 4051 } 4052 4053 i915_request_add(rq); 4054 } 4055 4056 ce = intel_execlists_create_virtual(siblings, nsibling); 4057 if (IS_ERR(ce)) { 4058 err = PTR_ERR(ce); 4059 goto out; 4060 } 4061 4062 rq = intel_context_create_request(ce); 4063 intel_context_put(ce); 4064 if (IS_ERR(rq)) { 4065 err = PTR_ERR(rq); 4066 goto out; 4067 } 4068 4069 i915_request_get(rq); 4070 i915_request_add(rq); 4071 if (i915_request_wait(rq, 0, timeout) < 0) { 4072 GEM_TRACE_ERR("%s(%s) failed to slice in virtual request\n", 4073 __func__, rq->engine->name); 4074 GEM_TRACE_DUMP(); 4075 intel_gt_set_wedged(gt); 4076 err = -EIO; 4077 } 4078 i915_request_put(rq); 4079 4080 out: 4081 igt_spinner_end(&spin); 4082 if (igt_flush_test(gt->i915)) 4083 err = -EIO; 4084 igt_spinner_fini(&spin); 4085 return err; 4086 } 4087 4088 static int sliceout_virtual_engine(struct intel_gt *gt, 4089 struct intel_engine_cs **siblings, 4090 unsigned int nsibling) 4091 { 4092 const long timeout = slice_timeout(siblings[0]); 4093 struct intel_context *ce; 4094 struct i915_request *rq; 4095 struct igt_spinner spin; 4096 unsigned int n; 4097 int err = 0; 4098 4099 /* 4100 * Virtual requests must allow others a fair timeslice. 4101 */ 4102 4103 if (igt_spinner_init(&spin, gt)) 4104 return -ENOMEM; 4105 4106 /* XXX We do not handle oversubscription and fairness with normal rq */ 4107 for (n = 0; n < nsibling; n++) { 4108 ce = intel_execlists_create_virtual(siblings, nsibling); 4109 if (IS_ERR(ce)) { 4110 err = PTR_ERR(ce); 4111 goto out; 4112 } 4113 4114 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); 4115 intel_context_put(ce); 4116 if (IS_ERR(rq)) { 4117 err = PTR_ERR(rq); 4118 goto out; 4119 } 4120 4121 i915_request_add(rq); 4122 } 4123 4124 for (n = 0; !err && n < nsibling; n++) { 4125 ce = intel_context_create(siblings[n]); 4126 if (IS_ERR(ce)) { 4127 err = PTR_ERR(ce); 4128 goto out; 4129 } 4130 4131 rq = intel_context_create_request(ce); 4132 intel_context_put(ce); 4133 if (IS_ERR(rq)) { 4134 err = PTR_ERR(rq); 4135 goto out; 4136 } 4137 4138 i915_request_get(rq); 4139 i915_request_add(rq); 4140 if (i915_request_wait(rq, 0, timeout) < 0) { 4141 GEM_TRACE_ERR("%s(%s) failed to slice out virtual request\n", 4142 __func__, siblings[n]->name); 4143 GEM_TRACE_DUMP(); 4144 intel_gt_set_wedged(gt); 4145 err = -EIO; 4146 } 4147 i915_request_put(rq); 4148 } 4149 4150 out: 4151 igt_spinner_end(&spin); 4152 if (igt_flush_test(gt->i915)) 4153 err = -EIO; 4154 igt_spinner_fini(&spin); 4155 return err; 4156 } 4157 4158 static int live_virtual_slice(void *arg) 4159 { 4160 struct intel_gt *gt = arg; 4161 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 4162 unsigned int class; 4163 int err; 4164 4165 if (intel_uc_uses_guc_submission(>->uc)) 4166 return 0; 4167 4168 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 4169 unsigned int nsibling; 4170 4171 nsibling = __select_siblings(gt, class, siblings, 4172 intel_engine_has_timeslices); 4173 if (nsibling < 2) 4174 continue; 4175 4176 err = slicein_virtual_engine(gt, siblings, nsibling); 4177 if (err) 4178 return err; 4179 4180 err = sliceout_virtual_engine(gt, siblings, nsibling); 4181 if (err) 4182 return err; 4183 } 4184 4185 return 0; 4186 } 4187 4188 static int preserved_virtual_engine(struct intel_gt *gt, 4189 struct intel_engine_cs **siblings, 4190 unsigned int nsibling) 4191 { 4192 struct i915_request *last = NULL; 4193 struct intel_context *ve; 4194 struct i915_vma *scratch; 4195 struct igt_live_test t; 4196 unsigned int n; 4197 int err = 0; 4198 u32 *cs; 4199 4200 scratch = __vm_create_scratch_for_read(&siblings[0]->gt->ggtt->vm, 4201 PAGE_SIZE); 4202 if (IS_ERR(scratch)) 4203 return PTR_ERR(scratch); 4204 4205 err = i915_vma_sync(scratch); 4206 if (err) 4207 goto out_scratch; 4208 4209 ve = intel_execlists_create_virtual(siblings, nsibling); 4210 if (IS_ERR(ve)) { 4211 err = PTR_ERR(ve); 4212 goto out_scratch; 4213 } 4214 4215 err = intel_context_pin(ve); 4216 if (err) 4217 goto out_put; 4218 4219 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name); 4220 if (err) 4221 goto out_unpin; 4222 4223 for (n = 0; n < NUM_GPR_DW; n++) { 4224 struct intel_engine_cs *engine = siblings[n % nsibling]; 4225 struct i915_request *rq; 4226 4227 rq = i915_request_create(ve); 4228 if (IS_ERR(rq)) { 4229 err = PTR_ERR(rq); 4230 goto out_end; 4231 } 4232 4233 i915_request_put(last); 4234 last = i915_request_get(rq); 4235 4236 cs = intel_ring_begin(rq, 8); 4237 if (IS_ERR(cs)) { 4238 i915_request_add(rq); 4239 err = PTR_ERR(cs); 4240 goto out_end; 4241 } 4242 4243 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 4244 *cs++ = CS_GPR(engine, n); 4245 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32); 4246 *cs++ = 0; 4247 4248 *cs++ = MI_LOAD_REGISTER_IMM(1); 4249 *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW); 4250 *cs++ = n + 1; 4251 4252 *cs++ = MI_NOOP; 4253 intel_ring_advance(rq, cs); 4254 4255 /* Restrict this request to run on a particular engine */ 4256 rq->execution_mask = engine->mask; 4257 i915_request_add(rq); 4258 } 4259 4260 if (i915_request_wait(last, 0, HZ / 5) < 0) { 4261 err = -ETIME; 4262 goto out_end; 4263 } 4264 4265 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); 4266 if (IS_ERR(cs)) { 4267 err = PTR_ERR(cs); 4268 goto out_end; 4269 } 4270 4271 for (n = 0; n < NUM_GPR_DW; n++) { 4272 if (cs[n] != n) { 4273 pr_err("Incorrect value[%d] found for GPR[%d]\n", 4274 cs[n], n); 4275 err = -EINVAL; 4276 break; 4277 } 4278 } 4279 4280 i915_gem_object_unpin_map(scratch->obj); 4281 4282 out_end: 4283 if (igt_live_test_end(&t)) 4284 err = -EIO; 4285 i915_request_put(last); 4286 out_unpin: 4287 intel_context_unpin(ve); 4288 out_put: 4289 intel_context_put(ve); 4290 out_scratch: 4291 i915_vma_unpin_and_release(&scratch, 0); 4292 return err; 4293 } 4294 4295 static int live_virtual_preserved(void *arg) 4296 { 4297 struct intel_gt *gt = arg; 4298 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 4299 unsigned int class; 4300 4301 /* 4302 * Check that the context image retains non-privileged (user) registers 4303 * from one engine to the next. For this we check that the CS_GPR 4304 * are preserved. 4305 */ 4306 4307 if (intel_uc_uses_guc_submission(>->uc)) 4308 return 0; 4309 4310 /* As we use CS_GPR we cannot run before they existed on all engines. */ 4311 if (INTEL_GEN(gt->i915) < 9) 4312 return 0; 4313 4314 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 4315 int nsibling, err; 4316 4317 nsibling = select_siblings(gt, class, siblings); 4318 if (nsibling < 2) 4319 continue; 4320 4321 err = preserved_virtual_engine(gt, siblings, nsibling); 4322 if (err) 4323 return err; 4324 } 4325 4326 return 0; 4327 } 4328 4329 static int bond_virtual_engine(struct intel_gt *gt, 4330 unsigned int class, 4331 struct intel_engine_cs **siblings, 4332 unsigned int nsibling, 4333 unsigned int flags) 4334 #define BOND_SCHEDULE BIT(0) 4335 { 4336 struct intel_engine_cs *master; 4337 struct i915_request *rq[16]; 4338 enum intel_engine_id id; 4339 struct igt_spinner spin; 4340 unsigned long n; 4341 int err; 4342 4343 /* 4344 * A set of bonded requests is intended to be run concurrently 4345 * across a number of engines. We use one request per-engine 4346 * and a magic fence to schedule each of the bonded requests 4347 * at the same time. A consequence of our current scheduler is that 4348 * we only move requests to the HW ready queue when the request 4349 * becomes ready, that is when all of its prerequisite fences have 4350 * been signaled. As one of those fences is the master submit fence, 4351 * there is a delay on all secondary fences as the HW may be 4352 * currently busy. Equally, as all the requests are independent, 4353 * they may have other fences that delay individual request 4354 * submission to HW. Ergo, we do not guarantee that all requests are 4355 * immediately submitted to HW at the same time, just that if the 4356 * rules are abided by, they are ready at the same time as the 4357 * first is submitted. Userspace can embed semaphores in its batch 4358 * to ensure parallel execution of its phases as it requires. 4359 * Though naturally it gets requested that perhaps the scheduler should 4360 * take care of parallel execution, even across preemption events on 4361 * different HW. (The proper answer is of course "lalalala".) 4362 * 4363 * With the submit-fence, we have identified three possible phases 4364 * of synchronisation depending on the master fence: queued (not 4365 * ready), executing, and signaled. The first two are quite simple 4366 * and checked below. However, the signaled master fence handling is 4367 * contentious. Currently we do not distinguish between a signaled 4368 * fence and an expired fence, as once signaled it does not convey 4369 * any information about the previous execution. It may even be freed 4370 * and hence checking later it may not exist at all. Ergo we currently 4371 * do not apply the bonding constraint for an already signaled fence, 4372 * as our expectation is that it should not constrain the secondaries 4373 * and is outside of the scope of the bonded request API (i.e. all 4374 * userspace requests are meant to be running in parallel). As 4375 * it imposes no constraint, and is effectively a no-op, we do not 4376 * check below as normal execution flows are checked extensively above. 4377 * 4378 * XXX Is the degenerate handling of signaled submit fences the 4379 * expected behaviour for userpace? 4380 */ 4381 4382 GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1); 4383 4384 if (igt_spinner_init(&spin, gt)) 4385 return -ENOMEM; 4386 4387 err = 0; 4388 rq[0] = ERR_PTR(-ENOMEM); 4389 for_each_engine(master, gt, id) { 4390 struct i915_sw_fence fence = {}; 4391 struct intel_context *ce; 4392 4393 if (master->class == class) 4394 continue; 4395 4396 ce = intel_context_create(master); 4397 if (IS_ERR(ce)) { 4398 err = PTR_ERR(ce); 4399 goto out; 4400 } 4401 4402 memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq)); 4403 4404 rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP); 4405 intel_context_put(ce); 4406 if (IS_ERR(rq[0])) { 4407 err = PTR_ERR(rq[0]); 4408 goto out; 4409 } 4410 i915_request_get(rq[0]); 4411 4412 if (flags & BOND_SCHEDULE) { 4413 onstack_fence_init(&fence); 4414 err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit, 4415 &fence, 4416 GFP_KERNEL); 4417 } 4418 4419 i915_request_add(rq[0]); 4420 if (err < 0) 4421 goto out; 4422 4423 if (!(flags & BOND_SCHEDULE) && 4424 !igt_wait_for_spinner(&spin, rq[0])) { 4425 err = -EIO; 4426 goto out; 4427 } 4428 4429 for (n = 0; n < nsibling; n++) { 4430 struct intel_context *ve; 4431 4432 ve = intel_execlists_create_virtual(siblings, nsibling); 4433 if (IS_ERR(ve)) { 4434 err = PTR_ERR(ve); 4435 onstack_fence_fini(&fence); 4436 goto out; 4437 } 4438 4439 err = intel_virtual_engine_attach_bond(ve->engine, 4440 master, 4441 siblings[n]); 4442 if (err) { 4443 intel_context_put(ve); 4444 onstack_fence_fini(&fence); 4445 goto out; 4446 } 4447 4448 err = intel_context_pin(ve); 4449 intel_context_put(ve); 4450 if (err) { 4451 onstack_fence_fini(&fence); 4452 goto out; 4453 } 4454 4455 rq[n + 1] = i915_request_create(ve); 4456 intel_context_unpin(ve); 4457 if (IS_ERR(rq[n + 1])) { 4458 err = PTR_ERR(rq[n + 1]); 4459 onstack_fence_fini(&fence); 4460 goto out; 4461 } 4462 i915_request_get(rq[n + 1]); 4463 4464 err = i915_request_await_execution(rq[n + 1], 4465 &rq[0]->fence, 4466 ve->engine->bond_execute); 4467 i915_request_add(rq[n + 1]); 4468 if (err < 0) { 4469 onstack_fence_fini(&fence); 4470 goto out; 4471 } 4472 } 4473 onstack_fence_fini(&fence); 4474 intel_engine_flush_submission(master); 4475 igt_spinner_end(&spin); 4476 4477 if (i915_request_wait(rq[0], 0, HZ / 10) < 0) { 4478 pr_err("Master request did not execute (on %s)!\n", 4479 rq[0]->engine->name); 4480 err = -EIO; 4481 goto out; 4482 } 4483 4484 for (n = 0; n < nsibling; n++) { 4485 if (i915_request_wait(rq[n + 1], 0, 4486 MAX_SCHEDULE_TIMEOUT) < 0) { 4487 err = -EIO; 4488 goto out; 4489 } 4490 4491 if (rq[n + 1]->engine != siblings[n]) { 4492 pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n", 4493 siblings[n]->name, 4494 rq[n + 1]->engine->name, 4495 rq[0]->engine->name); 4496 err = -EINVAL; 4497 goto out; 4498 } 4499 } 4500 4501 for (n = 0; !IS_ERR(rq[n]); n++) 4502 i915_request_put(rq[n]); 4503 rq[0] = ERR_PTR(-ENOMEM); 4504 } 4505 4506 out: 4507 for (n = 0; !IS_ERR(rq[n]); n++) 4508 i915_request_put(rq[n]); 4509 if (igt_flush_test(gt->i915)) 4510 err = -EIO; 4511 4512 igt_spinner_fini(&spin); 4513 return err; 4514 } 4515 4516 static int live_virtual_bond(void *arg) 4517 { 4518 static const struct phase { 4519 const char *name; 4520 unsigned int flags; 4521 } phases[] = { 4522 { "", 0 }, 4523 { "schedule", BOND_SCHEDULE }, 4524 { }, 4525 }; 4526 struct intel_gt *gt = arg; 4527 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 4528 unsigned int class; 4529 int err; 4530 4531 if (intel_uc_uses_guc_submission(>->uc)) 4532 return 0; 4533 4534 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 4535 const struct phase *p; 4536 int nsibling; 4537 4538 nsibling = select_siblings(gt, class, siblings); 4539 if (nsibling < 2) 4540 continue; 4541 4542 for (p = phases; p->name; p++) { 4543 err = bond_virtual_engine(gt, 4544 class, siblings, nsibling, 4545 p->flags); 4546 if (err) { 4547 pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n", 4548 __func__, p->name, class, nsibling, err); 4549 return err; 4550 } 4551 } 4552 } 4553 4554 return 0; 4555 } 4556 4557 static int reset_virtual_engine(struct intel_gt *gt, 4558 struct intel_engine_cs **siblings, 4559 unsigned int nsibling) 4560 { 4561 struct intel_engine_cs *engine; 4562 struct intel_context *ve; 4563 struct igt_spinner spin; 4564 struct i915_request *rq; 4565 unsigned int n; 4566 int err = 0; 4567 4568 /* 4569 * In order to support offline error capture for fast preempt reset, 4570 * we need to decouple the guilty request and ensure that it and its 4571 * descendents are not executed while the capture is in progress. 4572 */ 4573 4574 if (igt_spinner_init(&spin, gt)) 4575 return -ENOMEM; 4576 4577 ve = intel_execlists_create_virtual(siblings, nsibling); 4578 if (IS_ERR(ve)) { 4579 err = PTR_ERR(ve); 4580 goto out_spin; 4581 } 4582 4583 for (n = 0; n < nsibling; n++) 4584 st_engine_heartbeat_disable(siblings[n]); 4585 4586 rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK); 4587 if (IS_ERR(rq)) { 4588 err = PTR_ERR(rq); 4589 goto out_heartbeat; 4590 } 4591 i915_request_add(rq); 4592 4593 if (!igt_wait_for_spinner(&spin, rq)) { 4594 intel_gt_set_wedged(gt); 4595 err = -ETIME; 4596 goto out_heartbeat; 4597 } 4598 4599 engine = rq->engine; 4600 GEM_BUG_ON(engine == ve->engine); 4601 4602 /* Take ownership of the reset and tasklet */ 4603 local_bh_disable(); 4604 if (test_and_set_bit(I915_RESET_ENGINE + engine->id, 4605 >->reset.flags)) { 4606 local_bh_enable(); 4607 intel_gt_set_wedged(gt); 4608 err = -EBUSY; 4609 goto out_heartbeat; 4610 } 4611 tasklet_disable(&engine->execlists.tasklet); 4612 4613 engine->execlists.tasklet.func(engine->execlists.tasklet.data); 4614 GEM_BUG_ON(execlists_active(&engine->execlists) != rq); 4615 4616 /* Fake a preemption event; failed of course */ 4617 spin_lock_irq(&engine->active.lock); 4618 __unwind_incomplete_requests(engine); 4619 spin_unlock_irq(&engine->active.lock); 4620 GEM_BUG_ON(rq->engine != engine); 4621 4622 /* Reset the engine while keeping our active request on hold */ 4623 execlists_hold(engine, rq); 4624 GEM_BUG_ON(!i915_request_on_hold(rq)); 4625 4626 __intel_engine_reset_bh(engine, NULL); 4627 GEM_BUG_ON(rq->fence.error != -EIO); 4628 4629 /* Release our grasp on the engine, letting CS flow again */ 4630 tasklet_enable(&engine->execlists.tasklet); 4631 clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, >->reset.flags); 4632 local_bh_enable(); 4633 4634 /* Check that we do not resubmit the held request */ 4635 i915_request_get(rq); 4636 if (!i915_request_wait(rq, 0, HZ / 5)) { 4637 pr_err("%s: on hold request completed!\n", 4638 engine->name); 4639 intel_gt_set_wedged(gt); 4640 err = -EIO; 4641 goto out_rq; 4642 } 4643 GEM_BUG_ON(!i915_request_on_hold(rq)); 4644 4645 /* But is resubmitted on release */ 4646 execlists_unhold(engine, rq); 4647 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 4648 pr_err("%s: held request did not complete!\n", 4649 engine->name); 4650 intel_gt_set_wedged(gt); 4651 err = -ETIME; 4652 } 4653 4654 out_rq: 4655 i915_request_put(rq); 4656 out_heartbeat: 4657 for (n = 0; n < nsibling; n++) 4658 st_engine_heartbeat_enable(siblings[n]); 4659 4660 intel_context_put(ve); 4661 out_spin: 4662 igt_spinner_fini(&spin); 4663 return err; 4664 } 4665 4666 static int live_virtual_reset(void *arg) 4667 { 4668 struct intel_gt *gt = arg; 4669 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 4670 unsigned int class; 4671 4672 /* 4673 * Check that we handle a reset event within a virtual engine. 4674 * Only the physical engine is reset, but we have to check the flow 4675 * of the virtual requests around the reset, and make sure it is not 4676 * forgotten. 4677 */ 4678 4679 if (intel_uc_uses_guc_submission(>->uc)) 4680 return 0; 4681 4682 if (!intel_has_reset_engine(gt)) 4683 return 0; 4684 4685 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 4686 int nsibling, err; 4687 4688 nsibling = select_siblings(gt, class, siblings); 4689 if (nsibling < 2) 4690 continue; 4691 4692 err = reset_virtual_engine(gt, siblings, nsibling); 4693 if (err) 4694 return err; 4695 } 4696 4697 return 0; 4698 } 4699 4700 int intel_execlists_live_selftests(struct drm_i915_private *i915) 4701 { 4702 static const struct i915_subtest tests[] = { 4703 SUBTEST(live_sanitycheck), 4704 SUBTEST(live_unlite_switch), 4705 SUBTEST(live_unlite_preempt), 4706 SUBTEST(live_unlite_ring), 4707 SUBTEST(live_pin_rewind), 4708 SUBTEST(live_hold_reset), 4709 SUBTEST(live_error_interrupt), 4710 SUBTEST(live_timeslice_preempt), 4711 SUBTEST(live_timeslice_rewind), 4712 SUBTEST(live_timeslice_queue), 4713 SUBTEST(live_timeslice_nopreempt), 4714 SUBTEST(live_busywait_preempt), 4715 SUBTEST(live_preempt), 4716 SUBTEST(live_late_preempt), 4717 SUBTEST(live_nopreempt), 4718 SUBTEST(live_preempt_cancel), 4719 SUBTEST(live_suppress_self_preempt), 4720 SUBTEST(live_chain_preempt), 4721 SUBTEST(live_preempt_ring), 4722 SUBTEST(live_preempt_gang), 4723 SUBTEST(live_preempt_timeout), 4724 SUBTEST(live_preempt_user), 4725 SUBTEST(live_preempt_smoke), 4726 SUBTEST(live_virtual_engine), 4727 SUBTEST(live_virtual_mask), 4728 SUBTEST(live_virtual_preserved), 4729 SUBTEST(live_virtual_slice), 4730 SUBTEST(live_virtual_bond), 4731 SUBTEST(live_virtual_reset), 4732 }; 4733 4734 if (!HAS_EXECLISTS(i915)) 4735 return 0; 4736 4737 if (intel_gt_is_wedged(&i915->gt)) 4738 return 0; 4739 4740 return intel_gt_live_subtests(tests, &i915->gt); 4741 } 4742