1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2018 Intel Corporation 5 */ 6 7 #include <linux/prime_numbers.h> 8 9 #include "gem/i915_gem_pm.h" 10 #include "gt/intel_engine_heartbeat.h" 11 #include "gt/intel_reset.h" 12 #include "gt/selftest_engine_heartbeat.h" 13 14 #include "i915_selftest.h" 15 #include "selftests/i915_random.h" 16 #include "selftests/igt_flush_test.h" 17 #include "selftests/igt_live_test.h" 18 #include "selftests/igt_spinner.h" 19 #include "selftests/lib_sw_fence.h" 20 21 #include "gem/selftests/igt_gem_utils.h" 22 #include "gem/selftests/mock_context.h" 23 24 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4) 25 #define NUM_GPR 16 26 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */ 27 28 static struct i915_vma *create_scratch(struct intel_gt *gt) 29 { 30 struct drm_i915_gem_object *obj; 31 struct i915_vma *vma; 32 int err; 33 34 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 35 if (IS_ERR(obj)) 36 return ERR_CAST(obj); 37 38 i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED); 39 40 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 41 if (IS_ERR(vma)) { 42 i915_gem_object_put(obj); 43 return vma; 44 } 45 46 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 47 if (err) { 48 i915_gem_object_put(obj); 49 return ERR_PTR(err); 50 } 51 52 return vma; 53 } 54 55 static bool is_active(struct i915_request *rq) 56 { 57 if (i915_request_is_active(rq)) 58 return true; 59 60 if (i915_request_on_hold(rq)) 61 return true; 62 63 if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq)) 64 return true; 65 66 return false; 67 } 68 69 static int wait_for_submit(struct intel_engine_cs *engine, 70 struct i915_request *rq, 71 unsigned long timeout) 72 { 73 timeout += jiffies; 74 do { 75 bool done = time_after(jiffies, timeout); 76 77 if (i915_request_completed(rq)) /* that was quick! */ 78 return 0; 79 80 /* Wait until the HW has acknowleged the submission (or err) */ 81 intel_engine_flush_submission(engine); 82 if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq)) 83 return 0; 84 85 if (done) 86 return -ETIME; 87 88 cond_resched(); 89 } while (1); 90 } 91 92 static int wait_for_reset(struct intel_engine_cs *engine, 93 struct i915_request *rq, 94 unsigned long timeout) 95 { 96 timeout += jiffies; 97 98 do { 99 cond_resched(); 100 intel_engine_flush_submission(engine); 101 102 if (READ_ONCE(engine->execlists.pending[0])) 103 continue; 104 105 if (i915_request_completed(rq)) 106 break; 107 108 if (READ_ONCE(rq->fence.error)) 109 break; 110 } while (time_before(jiffies, timeout)); 111 112 flush_scheduled_work(); 113 114 if (rq->fence.error != -EIO) { 115 pr_err("%s: hanging request %llx:%lld not reset\n", 116 engine->name, 117 rq->fence.context, 118 rq->fence.seqno); 119 return -EINVAL; 120 } 121 122 /* Give the request a jiffie to complete after flushing the worker */ 123 if (i915_request_wait(rq, 0, 124 max(0l, (long)(timeout - jiffies)) + 1) < 0) { 125 pr_err("%s: hanging request %llx:%lld did not complete\n", 126 engine->name, 127 rq->fence.context, 128 rq->fence.seqno); 129 return -ETIME; 130 } 131 132 return 0; 133 } 134 135 static int live_sanitycheck(void *arg) 136 { 137 struct intel_gt *gt = arg; 138 struct intel_engine_cs *engine; 139 enum intel_engine_id id; 140 struct igt_spinner spin; 141 int err = 0; 142 143 if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915)) 144 return 0; 145 146 if (igt_spinner_init(&spin, gt)) 147 return -ENOMEM; 148 149 for_each_engine(engine, gt, id) { 150 struct intel_context *ce; 151 struct i915_request *rq; 152 153 ce = intel_context_create(engine); 154 if (IS_ERR(ce)) { 155 err = PTR_ERR(ce); 156 break; 157 } 158 159 rq = igt_spinner_create_request(&spin, ce, MI_NOOP); 160 if (IS_ERR(rq)) { 161 err = PTR_ERR(rq); 162 goto out_ctx; 163 } 164 165 i915_request_add(rq); 166 if (!igt_wait_for_spinner(&spin, rq)) { 167 GEM_TRACE("spinner failed to start\n"); 168 GEM_TRACE_DUMP(); 169 intel_gt_set_wedged(gt); 170 err = -EIO; 171 goto out_ctx; 172 } 173 174 igt_spinner_end(&spin); 175 if (igt_flush_test(gt->i915)) { 176 err = -EIO; 177 goto out_ctx; 178 } 179 180 out_ctx: 181 intel_context_put(ce); 182 if (err) 183 break; 184 } 185 186 igt_spinner_fini(&spin); 187 return err; 188 } 189 190 static int live_unlite_restore(struct intel_gt *gt, int prio) 191 { 192 struct intel_engine_cs *engine; 193 enum intel_engine_id id; 194 struct igt_spinner spin; 195 int err = -ENOMEM; 196 197 /* 198 * Check that we can correctly context switch between 2 instances 199 * on the same engine from the same parent context. 200 */ 201 202 if (igt_spinner_init(&spin, gt)) 203 return err; 204 205 err = 0; 206 for_each_engine(engine, gt, id) { 207 struct intel_context *ce[2] = {}; 208 struct i915_request *rq[2]; 209 struct igt_live_test t; 210 int n; 211 212 if (prio && !intel_engine_has_preemption(engine)) 213 continue; 214 215 if (!intel_engine_can_store_dword(engine)) 216 continue; 217 218 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 219 err = -EIO; 220 break; 221 } 222 st_engine_heartbeat_disable(engine); 223 224 for (n = 0; n < ARRAY_SIZE(ce); n++) { 225 struct intel_context *tmp; 226 227 tmp = intel_context_create(engine); 228 if (IS_ERR(tmp)) { 229 err = PTR_ERR(tmp); 230 goto err_ce; 231 } 232 233 err = intel_context_pin(tmp); 234 if (err) { 235 intel_context_put(tmp); 236 goto err_ce; 237 } 238 239 /* 240 * Setup the pair of contexts such that if we 241 * lite-restore using the RING_TAIL from ce[1] it 242 * will execute garbage from ce[0]->ring. 243 */ 244 memset(tmp->ring->vaddr, 245 POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */ 246 tmp->ring->vma->size); 247 248 ce[n] = tmp; 249 } 250 GEM_BUG_ON(!ce[1]->ring->size); 251 intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2); 252 __execlists_update_reg_state(ce[1], engine, ce[1]->ring->head); 253 254 rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK); 255 if (IS_ERR(rq[0])) { 256 err = PTR_ERR(rq[0]); 257 goto err_ce; 258 } 259 260 i915_request_get(rq[0]); 261 i915_request_add(rq[0]); 262 GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit); 263 264 if (!igt_wait_for_spinner(&spin, rq[0])) { 265 i915_request_put(rq[0]); 266 goto err_ce; 267 } 268 269 rq[1] = i915_request_create(ce[1]); 270 if (IS_ERR(rq[1])) { 271 err = PTR_ERR(rq[1]); 272 i915_request_put(rq[0]); 273 goto err_ce; 274 } 275 276 if (!prio) { 277 /* 278 * Ensure we do the switch to ce[1] on completion. 279 * 280 * rq[0] is already submitted, so this should reduce 281 * to a no-op (a wait on a request on the same engine 282 * uses the submit fence, not the completion fence), 283 * but it will install a dependency on rq[1] for rq[0] 284 * that will prevent the pair being reordered by 285 * timeslicing. 286 */ 287 i915_request_await_dma_fence(rq[1], &rq[0]->fence); 288 } 289 290 i915_request_get(rq[1]); 291 i915_request_add(rq[1]); 292 GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix); 293 i915_request_put(rq[0]); 294 295 if (prio) { 296 struct i915_sched_attr attr = { 297 .priority = prio, 298 }; 299 300 /* Alternatively preempt the spinner with ce[1] */ 301 engine->schedule(rq[1], &attr); 302 } 303 304 /* And switch back to ce[0] for good measure */ 305 rq[0] = i915_request_create(ce[0]); 306 if (IS_ERR(rq[0])) { 307 err = PTR_ERR(rq[0]); 308 i915_request_put(rq[1]); 309 goto err_ce; 310 } 311 312 i915_request_await_dma_fence(rq[0], &rq[1]->fence); 313 i915_request_get(rq[0]); 314 i915_request_add(rq[0]); 315 GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix); 316 i915_request_put(rq[1]); 317 i915_request_put(rq[0]); 318 319 err_ce: 320 intel_engine_flush_submission(engine); 321 igt_spinner_end(&spin); 322 for (n = 0; n < ARRAY_SIZE(ce); n++) { 323 if (IS_ERR_OR_NULL(ce[n])) 324 break; 325 326 intel_context_unpin(ce[n]); 327 intel_context_put(ce[n]); 328 } 329 330 st_engine_heartbeat_enable(engine); 331 if (igt_live_test_end(&t)) 332 err = -EIO; 333 if (err) 334 break; 335 } 336 337 igt_spinner_fini(&spin); 338 return err; 339 } 340 341 static int live_unlite_switch(void *arg) 342 { 343 return live_unlite_restore(arg, 0); 344 } 345 346 static int live_unlite_preempt(void *arg) 347 { 348 return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX)); 349 } 350 351 static int live_unlite_ring(void *arg) 352 { 353 struct intel_gt *gt = arg; 354 struct intel_engine_cs *engine; 355 struct igt_spinner spin; 356 enum intel_engine_id id; 357 int err = 0; 358 359 /* 360 * Setup a preemption event that will cause almost the entire ring 361 * to be unwound, potentially fooling our intel_ring_direction() 362 * into emitting a forward lite-restore instead of the rollback. 363 */ 364 365 if (igt_spinner_init(&spin, gt)) 366 return -ENOMEM; 367 368 for_each_engine(engine, gt, id) { 369 struct intel_context *ce[2] = {}; 370 struct i915_request *rq; 371 struct igt_live_test t; 372 int n; 373 374 if (!intel_engine_has_preemption(engine)) 375 continue; 376 377 if (!intel_engine_can_store_dword(engine)) 378 continue; 379 380 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 381 err = -EIO; 382 break; 383 } 384 st_engine_heartbeat_disable(engine); 385 386 for (n = 0; n < ARRAY_SIZE(ce); n++) { 387 struct intel_context *tmp; 388 389 tmp = intel_context_create(engine); 390 if (IS_ERR(tmp)) { 391 err = PTR_ERR(tmp); 392 goto err_ce; 393 } 394 395 err = intel_context_pin(tmp); 396 if (err) { 397 intel_context_put(tmp); 398 goto err_ce; 399 } 400 401 memset32(tmp->ring->vaddr, 402 0xdeadbeef, /* trigger a hang if executed */ 403 tmp->ring->vma->size / sizeof(u32)); 404 405 ce[n] = tmp; 406 } 407 408 /* Create max prio spinner, followed by N low prio nops */ 409 rq = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK); 410 if (IS_ERR(rq)) { 411 err = PTR_ERR(rq); 412 goto err_ce; 413 } 414 415 i915_request_get(rq); 416 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 417 i915_request_add(rq); 418 419 if (!igt_wait_for_spinner(&spin, rq)) { 420 intel_gt_set_wedged(gt); 421 i915_request_put(rq); 422 err = -ETIME; 423 goto err_ce; 424 } 425 426 /* Fill the ring, until we will cause a wrap */ 427 n = 0; 428 while (intel_ring_direction(ce[0]->ring, 429 rq->wa_tail, 430 ce[0]->ring->tail) <= 0) { 431 struct i915_request *tmp; 432 433 tmp = intel_context_create_request(ce[0]); 434 if (IS_ERR(tmp)) { 435 err = PTR_ERR(tmp); 436 i915_request_put(rq); 437 goto err_ce; 438 } 439 440 i915_request_add(tmp); 441 intel_engine_flush_submission(engine); 442 n++; 443 } 444 intel_engine_flush_submission(engine); 445 pr_debug("%s: Filled ring with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n", 446 engine->name, n, 447 ce[0]->ring->size, 448 ce[0]->ring->tail, 449 ce[0]->ring->emit, 450 rq->tail); 451 GEM_BUG_ON(intel_ring_direction(ce[0]->ring, 452 rq->tail, 453 ce[0]->ring->tail) <= 0); 454 i915_request_put(rq); 455 456 /* Create a second ring to preempt the first ring after rq[0] */ 457 rq = intel_context_create_request(ce[1]); 458 if (IS_ERR(rq)) { 459 err = PTR_ERR(rq); 460 goto err_ce; 461 } 462 463 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 464 i915_request_get(rq); 465 i915_request_add(rq); 466 467 err = wait_for_submit(engine, rq, HZ / 2); 468 i915_request_put(rq); 469 if (err) { 470 pr_err("%s: preemption request was not submitted\n", 471 engine->name); 472 err = -ETIME; 473 } 474 475 pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n", 476 engine->name, 477 ce[0]->ring->tail, ce[0]->ring->emit, 478 ce[1]->ring->tail, ce[1]->ring->emit); 479 480 err_ce: 481 intel_engine_flush_submission(engine); 482 igt_spinner_end(&spin); 483 for (n = 0; n < ARRAY_SIZE(ce); n++) { 484 if (IS_ERR_OR_NULL(ce[n])) 485 break; 486 487 intel_context_unpin(ce[n]); 488 intel_context_put(ce[n]); 489 } 490 st_engine_heartbeat_enable(engine); 491 if (igt_live_test_end(&t)) 492 err = -EIO; 493 if (err) 494 break; 495 } 496 497 igt_spinner_fini(&spin); 498 return err; 499 } 500 501 static int live_pin_rewind(void *arg) 502 { 503 struct intel_gt *gt = arg; 504 struct intel_engine_cs *engine; 505 enum intel_engine_id id; 506 int err = 0; 507 508 /* 509 * We have to be careful not to trust intel_ring too much, for example 510 * ring->head is updated upon retire which is out of sync with pinning 511 * the context. Thus we cannot use ring->head to set CTX_RING_HEAD, 512 * or else we risk writing an older, stale value. 513 * 514 * To simulate this, let's apply a bit of deliberate sabotague. 515 */ 516 517 for_each_engine(engine, gt, id) { 518 struct intel_context *ce; 519 struct i915_request *rq; 520 struct intel_ring *ring; 521 struct igt_live_test t; 522 523 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 524 err = -EIO; 525 break; 526 } 527 528 ce = intel_context_create(engine); 529 if (IS_ERR(ce)) { 530 err = PTR_ERR(ce); 531 break; 532 } 533 534 err = intel_context_pin(ce); 535 if (err) { 536 intel_context_put(ce); 537 break; 538 } 539 540 /* Keep the context awake while we play games */ 541 err = i915_active_acquire(&ce->active); 542 if (err) { 543 intel_context_unpin(ce); 544 intel_context_put(ce); 545 break; 546 } 547 ring = ce->ring; 548 549 /* Poison the ring, and offset the next request from HEAD */ 550 memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32)); 551 ring->emit = ring->size / 2; 552 ring->tail = ring->emit; 553 GEM_BUG_ON(ring->head); 554 555 intel_context_unpin(ce); 556 557 /* Submit a simple nop request */ 558 GEM_BUG_ON(intel_context_is_pinned(ce)); 559 rq = intel_context_create_request(ce); 560 i915_active_release(&ce->active); /* e.g. async retire */ 561 intel_context_put(ce); 562 if (IS_ERR(rq)) { 563 err = PTR_ERR(rq); 564 break; 565 } 566 GEM_BUG_ON(!rq->head); 567 i915_request_add(rq); 568 569 /* Expect not to hang! */ 570 if (igt_live_test_end(&t)) { 571 err = -EIO; 572 break; 573 } 574 } 575 576 return err; 577 } 578 579 static int live_hold_reset(void *arg) 580 { 581 struct intel_gt *gt = arg; 582 struct intel_engine_cs *engine; 583 enum intel_engine_id id; 584 struct igt_spinner spin; 585 int err = 0; 586 587 /* 588 * In order to support offline error capture for fast preempt reset, 589 * we need to decouple the guilty request and ensure that it and its 590 * descendents are not executed while the capture is in progress. 591 */ 592 593 if (!intel_has_reset_engine(gt)) 594 return 0; 595 596 if (igt_spinner_init(&spin, gt)) 597 return -ENOMEM; 598 599 for_each_engine(engine, gt, id) { 600 struct intel_context *ce; 601 struct i915_request *rq; 602 603 ce = intel_context_create(engine); 604 if (IS_ERR(ce)) { 605 err = PTR_ERR(ce); 606 break; 607 } 608 609 st_engine_heartbeat_disable(engine); 610 611 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); 612 if (IS_ERR(rq)) { 613 err = PTR_ERR(rq); 614 goto out; 615 } 616 i915_request_add(rq); 617 618 if (!igt_wait_for_spinner(&spin, rq)) { 619 intel_gt_set_wedged(gt); 620 err = -ETIME; 621 goto out; 622 } 623 624 /* We have our request executing, now remove it and reset */ 625 626 if (test_and_set_bit(I915_RESET_ENGINE + id, 627 >->reset.flags)) { 628 intel_gt_set_wedged(gt); 629 err = -EBUSY; 630 goto out; 631 } 632 tasklet_disable(&engine->execlists.tasklet); 633 634 engine->execlists.tasklet.func(engine->execlists.tasklet.data); 635 GEM_BUG_ON(execlists_active(&engine->execlists) != rq); 636 637 i915_request_get(rq); 638 execlists_hold(engine, rq); 639 GEM_BUG_ON(!i915_request_on_hold(rq)); 640 641 intel_engine_reset(engine, NULL); 642 GEM_BUG_ON(rq->fence.error != -EIO); 643 644 tasklet_enable(&engine->execlists.tasklet); 645 clear_and_wake_up_bit(I915_RESET_ENGINE + id, 646 >->reset.flags); 647 648 /* Check that we do not resubmit the held request */ 649 if (!i915_request_wait(rq, 0, HZ / 5)) { 650 pr_err("%s: on hold request completed!\n", 651 engine->name); 652 i915_request_put(rq); 653 err = -EIO; 654 goto out; 655 } 656 GEM_BUG_ON(!i915_request_on_hold(rq)); 657 658 /* But is resubmitted on release */ 659 execlists_unhold(engine, rq); 660 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 661 pr_err("%s: held request did not complete!\n", 662 engine->name); 663 intel_gt_set_wedged(gt); 664 err = -ETIME; 665 } 666 i915_request_put(rq); 667 668 out: 669 st_engine_heartbeat_enable(engine); 670 intel_context_put(ce); 671 if (err) 672 break; 673 } 674 675 igt_spinner_fini(&spin); 676 return err; 677 } 678 679 static const char *error_repr(int err) 680 { 681 return err ? "bad" : "good"; 682 } 683 684 static int live_error_interrupt(void *arg) 685 { 686 static const struct error_phase { 687 enum { GOOD = 0, BAD = -EIO } error[2]; 688 } phases[] = { 689 { { BAD, GOOD } }, 690 { { BAD, BAD } }, 691 { { BAD, GOOD } }, 692 { { GOOD, GOOD } }, /* sentinel */ 693 }; 694 struct intel_gt *gt = arg; 695 struct intel_engine_cs *engine; 696 enum intel_engine_id id; 697 698 /* 699 * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning 700 * of invalid commands in user batches that will cause a GPU hang. 701 * This is a faster mechanism than using hangcheck/heartbeats, but 702 * only detects problems the HW knows about -- it will not warn when 703 * we kill the HW! 704 * 705 * To verify our detection and reset, we throw some invalid commands 706 * at the HW and wait for the interrupt. 707 */ 708 709 if (!intel_has_reset_engine(gt)) 710 return 0; 711 712 for_each_engine(engine, gt, id) { 713 const struct error_phase *p; 714 int err = 0; 715 716 st_engine_heartbeat_disable(engine); 717 718 for (p = phases; p->error[0] != GOOD; p++) { 719 struct i915_request *client[ARRAY_SIZE(phases->error)]; 720 u32 *cs; 721 int i; 722 723 memset(client, 0, sizeof(*client)); 724 for (i = 0; i < ARRAY_SIZE(client); i++) { 725 struct intel_context *ce; 726 struct i915_request *rq; 727 728 ce = intel_context_create(engine); 729 if (IS_ERR(ce)) { 730 err = PTR_ERR(ce); 731 goto out; 732 } 733 734 rq = intel_context_create_request(ce); 735 intel_context_put(ce); 736 if (IS_ERR(rq)) { 737 err = PTR_ERR(rq); 738 goto out; 739 } 740 741 if (rq->engine->emit_init_breadcrumb) { 742 err = rq->engine->emit_init_breadcrumb(rq); 743 if (err) { 744 i915_request_add(rq); 745 goto out; 746 } 747 } 748 749 cs = intel_ring_begin(rq, 2); 750 if (IS_ERR(cs)) { 751 i915_request_add(rq); 752 err = PTR_ERR(cs); 753 goto out; 754 } 755 756 if (p->error[i]) { 757 *cs++ = 0xdeadbeef; 758 *cs++ = 0xdeadbeef; 759 } else { 760 *cs++ = MI_NOOP; 761 *cs++ = MI_NOOP; 762 } 763 764 client[i] = i915_request_get(rq); 765 i915_request_add(rq); 766 } 767 768 err = wait_for_submit(engine, client[0], HZ / 2); 769 if (err) { 770 pr_err("%s: first request did not start within time!\n", 771 engine->name); 772 err = -ETIME; 773 goto out; 774 } 775 776 for (i = 0; i < ARRAY_SIZE(client); i++) { 777 if (i915_request_wait(client[i], 0, HZ / 5) < 0) 778 pr_debug("%s: %s request incomplete!\n", 779 engine->name, 780 error_repr(p->error[i])); 781 782 if (!i915_request_started(client[i])) { 783 pr_err("%s: %s request not started!\n", 784 engine->name, 785 error_repr(p->error[i])); 786 err = -ETIME; 787 goto out; 788 } 789 790 /* Kick the tasklet to process the error */ 791 intel_engine_flush_submission(engine); 792 if (client[i]->fence.error != p->error[i]) { 793 pr_err("%s: %s request (%s) with wrong error code: %d\n", 794 engine->name, 795 error_repr(p->error[i]), 796 i915_request_completed(client[i]) ? "completed" : "running", 797 client[i]->fence.error); 798 err = -EINVAL; 799 goto out; 800 } 801 } 802 803 out: 804 for (i = 0; i < ARRAY_SIZE(client); i++) 805 if (client[i]) 806 i915_request_put(client[i]); 807 if (err) { 808 pr_err("%s: failed at phase[%zd] { %d, %d }\n", 809 engine->name, p - phases, 810 p->error[0], p->error[1]); 811 break; 812 } 813 } 814 815 st_engine_heartbeat_enable(engine); 816 if (err) { 817 intel_gt_set_wedged(gt); 818 return err; 819 } 820 } 821 822 return 0; 823 } 824 825 static int 826 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx) 827 { 828 u32 *cs; 829 830 cs = intel_ring_begin(rq, 10); 831 if (IS_ERR(cs)) 832 return PTR_ERR(cs); 833 834 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 835 836 *cs++ = MI_SEMAPHORE_WAIT | 837 MI_SEMAPHORE_GLOBAL_GTT | 838 MI_SEMAPHORE_POLL | 839 MI_SEMAPHORE_SAD_NEQ_SDD; 840 *cs++ = 0; 841 *cs++ = i915_ggtt_offset(vma) + 4 * idx; 842 *cs++ = 0; 843 844 if (idx > 0) { 845 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 846 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1); 847 *cs++ = 0; 848 *cs++ = 1; 849 } else { 850 *cs++ = MI_NOOP; 851 *cs++ = MI_NOOP; 852 *cs++ = MI_NOOP; 853 *cs++ = MI_NOOP; 854 } 855 856 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 857 858 intel_ring_advance(rq, cs); 859 return 0; 860 } 861 862 static struct i915_request * 863 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx) 864 { 865 struct intel_context *ce; 866 struct i915_request *rq; 867 int err; 868 869 ce = intel_context_create(engine); 870 if (IS_ERR(ce)) 871 return ERR_CAST(ce); 872 873 rq = intel_context_create_request(ce); 874 if (IS_ERR(rq)) 875 goto out_ce; 876 877 err = 0; 878 if (rq->engine->emit_init_breadcrumb) 879 err = rq->engine->emit_init_breadcrumb(rq); 880 if (err == 0) 881 err = emit_semaphore_chain(rq, vma, idx); 882 if (err == 0) 883 i915_request_get(rq); 884 i915_request_add(rq); 885 if (err) 886 rq = ERR_PTR(err); 887 888 out_ce: 889 intel_context_put(ce); 890 return rq; 891 } 892 893 static int 894 release_queue(struct intel_engine_cs *engine, 895 struct i915_vma *vma, 896 int idx, int prio) 897 { 898 struct i915_sched_attr attr = { 899 .priority = prio, 900 }; 901 struct i915_request *rq; 902 u32 *cs; 903 904 rq = intel_engine_create_kernel_request(engine); 905 if (IS_ERR(rq)) 906 return PTR_ERR(rq); 907 908 cs = intel_ring_begin(rq, 4); 909 if (IS_ERR(cs)) { 910 i915_request_add(rq); 911 return PTR_ERR(cs); 912 } 913 914 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 915 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1); 916 *cs++ = 0; 917 *cs++ = 1; 918 919 intel_ring_advance(rq, cs); 920 921 i915_request_get(rq); 922 i915_request_add(rq); 923 924 local_bh_disable(); 925 engine->schedule(rq, &attr); 926 local_bh_enable(); /* kick tasklet */ 927 928 i915_request_put(rq); 929 930 return 0; 931 } 932 933 static int 934 slice_semaphore_queue(struct intel_engine_cs *outer, 935 struct i915_vma *vma, 936 int count) 937 { 938 struct intel_engine_cs *engine; 939 struct i915_request *head; 940 enum intel_engine_id id; 941 int err, i, n = 0; 942 943 head = semaphore_queue(outer, vma, n++); 944 if (IS_ERR(head)) 945 return PTR_ERR(head); 946 947 for_each_engine(engine, outer->gt, id) { 948 for (i = 0; i < count; i++) { 949 struct i915_request *rq; 950 951 rq = semaphore_queue(engine, vma, n++); 952 if (IS_ERR(rq)) { 953 err = PTR_ERR(rq); 954 goto out; 955 } 956 957 i915_request_put(rq); 958 } 959 } 960 961 err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER); 962 if (err) 963 goto out; 964 965 if (i915_request_wait(head, 0, 966 2 * outer->gt->info.num_engines * (count + 2) * (count + 3)) < 0) { 967 pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n", 968 count, n); 969 GEM_TRACE_DUMP(); 970 intel_gt_set_wedged(outer->gt); 971 err = -EIO; 972 } 973 974 out: 975 i915_request_put(head); 976 return err; 977 } 978 979 static int live_timeslice_preempt(void *arg) 980 { 981 struct intel_gt *gt = arg; 982 struct drm_i915_gem_object *obj; 983 struct intel_engine_cs *engine; 984 enum intel_engine_id id; 985 struct i915_vma *vma; 986 void *vaddr; 987 int err = 0; 988 989 /* 990 * If a request takes too long, we would like to give other users 991 * a fair go on the GPU. In particular, users may create batches 992 * that wait upon external input, where that input may even be 993 * supplied by another GPU job. To avoid blocking forever, we 994 * need to preempt the current task and replace it with another 995 * ready task. 996 */ 997 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 998 return 0; 999 1000 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 1001 if (IS_ERR(obj)) 1002 return PTR_ERR(obj); 1003 1004 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 1005 if (IS_ERR(vma)) { 1006 err = PTR_ERR(vma); 1007 goto err_obj; 1008 } 1009 1010 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); 1011 if (IS_ERR(vaddr)) { 1012 err = PTR_ERR(vaddr); 1013 goto err_obj; 1014 } 1015 1016 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 1017 if (err) 1018 goto err_map; 1019 1020 err = i915_vma_sync(vma); 1021 if (err) 1022 goto err_pin; 1023 1024 for_each_engine(engine, gt, id) { 1025 if (!intel_engine_has_preemption(engine)) 1026 continue; 1027 1028 memset(vaddr, 0, PAGE_SIZE); 1029 1030 st_engine_heartbeat_disable(engine); 1031 err = slice_semaphore_queue(engine, vma, 5); 1032 st_engine_heartbeat_enable(engine); 1033 if (err) 1034 goto err_pin; 1035 1036 if (igt_flush_test(gt->i915)) { 1037 err = -EIO; 1038 goto err_pin; 1039 } 1040 } 1041 1042 err_pin: 1043 i915_vma_unpin(vma); 1044 err_map: 1045 i915_gem_object_unpin_map(obj); 1046 err_obj: 1047 i915_gem_object_put(obj); 1048 return err; 1049 } 1050 1051 static struct i915_request * 1052 create_rewinder(struct intel_context *ce, 1053 struct i915_request *wait, 1054 void *slot, int idx) 1055 { 1056 const u32 offset = 1057 i915_ggtt_offset(ce->engine->status_page.vma) + 1058 offset_in_page(slot); 1059 struct i915_request *rq; 1060 u32 *cs; 1061 int err; 1062 1063 rq = intel_context_create_request(ce); 1064 if (IS_ERR(rq)) 1065 return rq; 1066 1067 if (wait) { 1068 err = i915_request_await_dma_fence(rq, &wait->fence); 1069 if (err) 1070 goto err; 1071 } 1072 1073 cs = intel_ring_begin(rq, 14); 1074 if (IS_ERR(cs)) { 1075 err = PTR_ERR(cs); 1076 goto err; 1077 } 1078 1079 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 1080 *cs++ = MI_NOOP; 1081 1082 *cs++ = MI_SEMAPHORE_WAIT | 1083 MI_SEMAPHORE_GLOBAL_GTT | 1084 MI_SEMAPHORE_POLL | 1085 MI_SEMAPHORE_SAD_GTE_SDD; 1086 *cs++ = idx; 1087 *cs++ = offset; 1088 *cs++ = 0; 1089 1090 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 1091 *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base)); 1092 *cs++ = offset + idx * sizeof(u32); 1093 *cs++ = 0; 1094 1095 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 1096 *cs++ = offset; 1097 *cs++ = 0; 1098 *cs++ = idx + 1; 1099 1100 intel_ring_advance(rq, cs); 1101 1102 rq->sched.attr.priority = I915_PRIORITY_MASK; 1103 err = 0; 1104 err: 1105 i915_request_get(rq); 1106 i915_request_add(rq); 1107 if (err) { 1108 i915_request_put(rq); 1109 return ERR_PTR(err); 1110 } 1111 1112 return rq; 1113 } 1114 1115 static int live_timeslice_rewind(void *arg) 1116 { 1117 struct intel_gt *gt = arg; 1118 struct intel_engine_cs *engine; 1119 enum intel_engine_id id; 1120 1121 /* 1122 * The usual presumption on timeslice expiration is that we replace 1123 * the active context with another. However, given a chain of 1124 * dependencies we may end up with replacing the context with itself, 1125 * but only a few of those requests, forcing us to rewind the 1126 * RING_TAIL of the original request. 1127 */ 1128 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 1129 return 0; 1130 1131 for_each_engine(engine, gt, id) { 1132 enum { A1, A2, B1 }; 1133 enum { X = 1, Z, Y }; 1134 struct i915_request *rq[3] = {}; 1135 struct intel_context *ce; 1136 unsigned long timeslice; 1137 int i, err = 0; 1138 u32 *slot; 1139 1140 if (!intel_engine_has_timeslices(engine)) 1141 continue; 1142 1143 /* 1144 * A:rq1 -- semaphore wait, timestamp X 1145 * A:rq2 -- write timestamp Y 1146 * 1147 * B:rq1 [await A:rq1] -- write timestamp Z 1148 * 1149 * Force timeslice, release semaphore. 1150 * 1151 * Expect execution/evaluation order XZY 1152 */ 1153 1154 st_engine_heartbeat_disable(engine); 1155 timeslice = xchg(&engine->props.timeslice_duration_ms, 1); 1156 1157 slot = memset32(engine->status_page.addr + 1000, 0, 4); 1158 1159 ce = intel_context_create(engine); 1160 if (IS_ERR(ce)) { 1161 err = PTR_ERR(ce); 1162 goto err; 1163 } 1164 1165 rq[A1] = create_rewinder(ce, NULL, slot, X); 1166 if (IS_ERR(rq[A1])) { 1167 intel_context_put(ce); 1168 goto err; 1169 } 1170 1171 rq[A2] = create_rewinder(ce, NULL, slot, Y); 1172 intel_context_put(ce); 1173 if (IS_ERR(rq[A2])) 1174 goto err; 1175 1176 err = wait_for_submit(engine, rq[A2], HZ / 2); 1177 if (err) { 1178 pr_err("%s: failed to submit first context\n", 1179 engine->name); 1180 goto err; 1181 } 1182 1183 ce = intel_context_create(engine); 1184 if (IS_ERR(ce)) { 1185 err = PTR_ERR(ce); 1186 goto err; 1187 } 1188 1189 rq[B1] = create_rewinder(ce, rq[A1], slot, Z); 1190 intel_context_put(ce); 1191 if (IS_ERR(rq[2])) 1192 goto err; 1193 1194 err = wait_for_submit(engine, rq[B1], HZ / 2); 1195 if (err) { 1196 pr_err("%s: failed to submit second context\n", 1197 engine->name); 1198 goto err; 1199 } 1200 1201 /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */ 1202 ENGINE_TRACE(engine, "forcing tasklet for rewind\n"); 1203 if (i915_request_is_active(rq[A2])) { /* semaphore yielded! */ 1204 /* Wait for the timeslice to kick in */ 1205 del_timer(&engine->execlists.timer); 1206 tasklet_hi_schedule(&engine->execlists.tasklet); 1207 intel_engine_flush_submission(engine); 1208 } 1209 /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */ 1210 GEM_BUG_ON(!i915_request_is_active(rq[A1])); 1211 GEM_BUG_ON(!i915_request_is_active(rq[B1])); 1212 GEM_BUG_ON(i915_request_is_active(rq[A2])); 1213 1214 /* Release the hounds! */ 1215 slot[0] = 1; 1216 wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */ 1217 1218 for (i = 1; i <= 3; i++) { 1219 unsigned long timeout = jiffies + HZ / 2; 1220 1221 while (!READ_ONCE(slot[i]) && 1222 time_before(jiffies, timeout)) 1223 ; 1224 1225 if (!time_before(jiffies, timeout)) { 1226 pr_err("%s: rq[%d] timed out\n", 1227 engine->name, i - 1); 1228 err = -ETIME; 1229 goto err; 1230 } 1231 1232 pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]); 1233 } 1234 1235 /* XZY: XZ < XY */ 1236 if (slot[Z] - slot[X] >= slot[Y] - slot[X]) { 1237 pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n", 1238 engine->name, 1239 slot[Z] - slot[X], 1240 slot[Y] - slot[X]); 1241 err = -EINVAL; 1242 } 1243 1244 err: 1245 memset32(&slot[0], -1, 4); 1246 wmb(); 1247 1248 engine->props.timeslice_duration_ms = timeslice; 1249 st_engine_heartbeat_enable(engine); 1250 for (i = 0; i < 3; i++) 1251 i915_request_put(rq[i]); 1252 if (igt_flush_test(gt->i915)) 1253 err = -EIO; 1254 if (err) 1255 return err; 1256 } 1257 1258 return 0; 1259 } 1260 1261 static struct i915_request *nop_request(struct intel_engine_cs *engine) 1262 { 1263 struct i915_request *rq; 1264 1265 rq = intel_engine_create_kernel_request(engine); 1266 if (IS_ERR(rq)) 1267 return rq; 1268 1269 i915_request_get(rq); 1270 i915_request_add(rq); 1271 1272 return rq; 1273 } 1274 1275 static long slice_timeout(struct intel_engine_cs *engine) 1276 { 1277 long timeout; 1278 1279 /* Enough time for a timeslice to kick in, and kick out */ 1280 timeout = 2 * msecs_to_jiffies_timeout(timeslice(engine)); 1281 1282 /* Enough time for the nop request to complete */ 1283 timeout += HZ / 5; 1284 1285 return timeout + 1; 1286 } 1287 1288 static int live_timeslice_queue(void *arg) 1289 { 1290 struct intel_gt *gt = arg; 1291 struct drm_i915_gem_object *obj; 1292 struct intel_engine_cs *engine; 1293 enum intel_engine_id id; 1294 struct i915_vma *vma; 1295 void *vaddr; 1296 int err = 0; 1297 1298 /* 1299 * Make sure that even if ELSP[0] and ELSP[1] are filled with 1300 * timeslicing between them disabled, we *do* enable timeslicing 1301 * if the queue demands it. (Normally, we do not submit if 1302 * ELSP[1] is already occupied, so must rely on timeslicing to 1303 * eject ELSP[0] in favour of the queue.) 1304 */ 1305 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 1306 return 0; 1307 1308 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 1309 if (IS_ERR(obj)) 1310 return PTR_ERR(obj); 1311 1312 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 1313 if (IS_ERR(vma)) { 1314 err = PTR_ERR(vma); 1315 goto err_obj; 1316 } 1317 1318 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); 1319 if (IS_ERR(vaddr)) { 1320 err = PTR_ERR(vaddr); 1321 goto err_obj; 1322 } 1323 1324 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 1325 if (err) 1326 goto err_map; 1327 1328 err = i915_vma_sync(vma); 1329 if (err) 1330 goto err_pin; 1331 1332 for_each_engine(engine, gt, id) { 1333 struct i915_sched_attr attr = { 1334 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), 1335 }; 1336 struct i915_request *rq, *nop; 1337 1338 if (!intel_engine_has_preemption(engine)) 1339 continue; 1340 1341 st_engine_heartbeat_disable(engine); 1342 memset(vaddr, 0, PAGE_SIZE); 1343 1344 /* ELSP[0]: semaphore wait */ 1345 rq = semaphore_queue(engine, vma, 0); 1346 if (IS_ERR(rq)) { 1347 err = PTR_ERR(rq); 1348 goto err_heartbeat; 1349 } 1350 engine->schedule(rq, &attr); 1351 err = wait_for_submit(engine, rq, HZ / 2); 1352 if (err) { 1353 pr_err("%s: Timed out trying to submit semaphores\n", 1354 engine->name); 1355 goto err_rq; 1356 } 1357 1358 /* ELSP[1]: nop request */ 1359 nop = nop_request(engine); 1360 if (IS_ERR(nop)) { 1361 err = PTR_ERR(nop); 1362 goto err_rq; 1363 } 1364 err = wait_for_submit(engine, nop, HZ / 2); 1365 i915_request_put(nop); 1366 if (err) { 1367 pr_err("%s: Timed out trying to submit nop\n", 1368 engine->name); 1369 goto err_rq; 1370 } 1371 1372 GEM_BUG_ON(i915_request_completed(rq)); 1373 GEM_BUG_ON(execlists_active(&engine->execlists) != rq); 1374 1375 /* Queue: semaphore signal, matching priority as semaphore */ 1376 err = release_queue(engine, vma, 1, effective_prio(rq)); 1377 if (err) 1378 goto err_rq; 1379 1380 /* Wait until we ack the release_queue and start timeslicing */ 1381 do { 1382 cond_resched(); 1383 intel_engine_flush_submission(engine); 1384 } while (READ_ONCE(engine->execlists.pending[0])); 1385 1386 /* Timeslice every jiffy, so within 2 we should signal */ 1387 if (i915_request_wait(rq, 0, slice_timeout(engine)) < 0) { 1388 struct drm_printer p = 1389 drm_info_printer(gt->i915->drm.dev); 1390 1391 pr_err("%s: Failed to timeslice into queue\n", 1392 engine->name); 1393 intel_engine_dump(engine, &p, 1394 "%s\n", engine->name); 1395 1396 memset(vaddr, 0xff, PAGE_SIZE); 1397 err = -EIO; 1398 } 1399 err_rq: 1400 i915_request_put(rq); 1401 err_heartbeat: 1402 st_engine_heartbeat_enable(engine); 1403 if (err) 1404 break; 1405 } 1406 1407 err_pin: 1408 i915_vma_unpin(vma); 1409 err_map: 1410 i915_gem_object_unpin_map(obj); 1411 err_obj: 1412 i915_gem_object_put(obj); 1413 return err; 1414 } 1415 1416 static int live_timeslice_nopreempt(void *arg) 1417 { 1418 struct intel_gt *gt = arg; 1419 struct intel_engine_cs *engine; 1420 enum intel_engine_id id; 1421 struct igt_spinner spin; 1422 int err = 0; 1423 1424 /* 1425 * We should not timeslice into a request that is marked with 1426 * I915_REQUEST_NOPREEMPT. 1427 */ 1428 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 1429 return 0; 1430 1431 if (igt_spinner_init(&spin, gt)) 1432 return -ENOMEM; 1433 1434 for_each_engine(engine, gt, id) { 1435 struct intel_context *ce; 1436 struct i915_request *rq; 1437 unsigned long timeslice; 1438 1439 if (!intel_engine_has_preemption(engine)) 1440 continue; 1441 1442 ce = intel_context_create(engine); 1443 if (IS_ERR(ce)) { 1444 err = PTR_ERR(ce); 1445 break; 1446 } 1447 1448 st_engine_heartbeat_disable(engine); 1449 timeslice = xchg(&engine->props.timeslice_duration_ms, 1); 1450 1451 /* Create an unpreemptible spinner */ 1452 1453 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); 1454 intel_context_put(ce); 1455 if (IS_ERR(rq)) { 1456 err = PTR_ERR(rq); 1457 goto out_heartbeat; 1458 } 1459 1460 i915_request_get(rq); 1461 i915_request_add(rq); 1462 1463 if (!igt_wait_for_spinner(&spin, rq)) { 1464 i915_request_put(rq); 1465 err = -ETIME; 1466 goto out_spin; 1467 } 1468 1469 set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags); 1470 i915_request_put(rq); 1471 1472 /* Followed by a maximum priority barrier (heartbeat) */ 1473 1474 ce = intel_context_create(engine); 1475 if (IS_ERR(ce)) { 1476 err = PTR_ERR(ce); 1477 goto out_spin; 1478 } 1479 1480 rq = intel_context_create_request(ce); 1481 intel_context_put(ce); 1482 if (IS_ERR(rq)) { 1483 err = PTR_ERR(rq); 1484 goto out_spin; 1485 } 1486 1487 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 1488 i915_request_get(rq); 1489 i915_request_add(rq); 1490 1491 /* 1492 * Wait until the barrier is in ELSP, and we know timeslicing 1493 * will have been activated. 1494 */ 1495 if (wait_for_submit(engine, rq, HZ / 2)) { 1496 i915_request_put(rq); 1497 err = -ETIME; 1498 goto out_spin; 1499 } 1500 1501 /* 1502 * Since the ELSP[0] request is unpreemptible, it should not 1503 * allow the maximum priority barrier through. Wait long 1504 * enough to see if it is timesliced in by mistake. 1505 */ 1506 if (i915_request_wait(rq, 0, slice_timeout(engine)) >= 0) { 1507 pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n", 1508 engine->name); 1509 err = -EINVAL; 1510 } 1511 i915_request_put(rq); 1512 1513 out_spin: 1514 igt_spinner_end(&spin); 1515 out_heartbeat: 1516 xchg(&engine->props.timeslice_duration_ms, timeslice); 1517 st_engine_heartbeat_enable(engine); 1518 if (err) 1519 break; 1520 1521 if (igt_flush_test(gt->i915)) { 1522 err = -EIO; 1523 break; 1524 } 1525 } 1526 1527 igt_spinner_fini(&spin); 1528 return err; 1529 } 1530 1531 static int live_busywait_preempt(void *arg) 1532 { 1533 struct intel_gt *gt = arg; 1534 struct i915_gem_context *ctx_hi, *ctx_lo; 1535 struct intel_engine_cs *engine; 1536 struct drm_i915_gem_object *obj; 1537 struct i915_vma *vma; 1538 enum intel_engine_id id; 1539 int err = -ENOMEM; 1540 u32 *map; 1541 1542 /* 1543 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can 1544 * preempt the busywaits used to synchronise between rings. 1545 */ 1546 1547 ctx_hi = kernel_context(gt->i915); 1548 if (!ctx_hi) 1549 return -ENOMEM; 1550 ctx_hi->sched.priority = 1551 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); 1552 1553 ctx_lo = kernel_context(gt->i915); 1554 if (!ctx_lo) 1555 goto err_ctx_hi; 1556 ctx_lo->sched.priority = 1557 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); 1558 1559 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 1560 if (IS_ERR(obj)) { 1561 err = PTR_ERR(obj); 1562 goto err_ctx_lo; 1563 } 1564 1565 map = i915_gem_object_pin_map(obj, I915_MAP_WC); 1566 if (IS_ERR(map)) { 1567 err = PTR_ERR(map); 1568 goto err_obj; 1569 } 1570 1571 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 1572 if (IS_ERR(vma)) { 1573 err = PTR_ERR(vma); 1574 goto err_map; 1575 } 1576 1577 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 1578 if (err) 1579 goto err_map; 1580 1581 err = i915_vma_sync(vma); 1582 if (err) 1583 goto err_vma; 1584 1585 for_each_engine(engine, gt, id) { 1586 struct i915_request *lo, *hi; 1587 struct igt_live_test t; 1588 u32 *cs; 1589 1590 if (!intel_engine_has_preemption(engine)) 1591 continue; 1592 1593 if (!intel_engine_can_store_dword(engine)) 1594 continue; 1595 1596 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 1597 err = -EIO; 1598 goto err_vma; 1599 } 1600 1601 /* 1602 * We create two requests. The low priority request 1603 * busywaits on a semaphore (inside the ringbuffer where 1604 * is should be preemptible) and the high priority requests 1605 * uses a MI_STORE_DWORD_IMM to update the semaphore value 1606 * allowing the first request to complete. If preemption 1607 * fails, we hang instead. 1608 */ 1609 1610 lo = igt_request_alloc(ctx_lo, engine); 1611 if (IS_ERR(lo)) { 1612 err = PTR_ERR(lo); 1613 goto err_vma; 1614 } 1615 1616 cs = intel_ring_begin(lo, 8); 1617 if (IS_ERR(cs)) { 1618 err = PTR_ERR(cs); 1619 i915_request_add(lo); 1620 goto err_vma; 1621 } 1622 1623 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 1624 *cs++ = i915_ggtt_offset(vma); 1625 *cs++ = 0; 1626 *cs++ = 1; 1627 1628 /* XXX Do we need a flush + invalidate here? */ 1629 1630 *cs++ = MI_SEMAPHORE_WAIT | 1631 MI_SEMAPHORE_GLOBAL_GTT | 1632 MI_SEMAPHORE_POLL | 1633 MI_SEMAPHORE_SAD_EQ_SDD; 1634 *cs++ = 0; 1635 *cs++ = i915_ggtt_offset(vma); 1636 *cs++ = 0; 1637 1638 intel_ring_advance(lo, cs); 1639 1640 i915_request_get(lo); 1641 i915_request_add(lo); 1642 1643 if (wait_for(READ_ONCE(*map), 10)) { 1644 i915_request_put(lo); 1645 err = -ETIMEDOUT; 1646 goto err_vma; 1647 } 1648 1649 /* Low priority request should be busywaiting now */ 1650 if (i915_request_wait(lo, 0, 1) != -ETIME) { 1651 i915_request_put(lo); 1652 pr_err("%s: Busywaiting request did not!\n", 1653 engine->name); 1654 err = -EIO; 1655 goto err_vma; 1656 } 1657 1658 hi = igt_request_alloc(ctx_hi, engine); 1659 if (IS_ERR(hi)) { 1660 err = PTR_ERR(hi); 1661 i915_request_put(lo); 1662 goto err_vma; 1663 } 1664 1665 cs = intel_ring_begin(hi, 4); 1666 if (IS_ERR(cs)) { 1667 err = PTR_ERR(cs); 1668 i915_request_add(hi); 1669 i915_request_put(lo); 1670 goto err_vma; 1671 } 1672 1673 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 1674 *cs++ = i915_ggtt_offset(vma); 1675 *cs++ = 0; 1676 *cs++ = 0; 1677 1678 intel_ring_advance(hi, cs); 1679 i915_request_add(hi); 1680 1681 if (i915_request_wait(lo, 0, HZ / 5) < 0) { 1682 struct drm_printer p = drm_info_printer(gt->i915->drm.dev); 1683 1684 pr_err("%s: Failed to preempt semaphore busywait!\n", 1685 engine->name); 1686 1687 intel_engine_dump(engine, &p, "%s\n", engine->name); 1688 GEM_TRACE_DUMP(); 1689 1690 i915_request_put(lo); 1691 intel_gt_set_wedged(gt); 1692 err = -EIO; 1693 goto err_vma; 1694 } 1695 GEM_BUG_ON(READ_ONCE(*map)); 1696 i915_request_put(lo); 1697 1698 if (igt_live_test_end(&t)) { 1699 err = -EIO; 1700 goto err_vma; 1701 } 1702 } 1703 1704 err = 0; 1705 err_vma: 1706 i915_vma_unpin(vma); 1707 err_map: 1708 i915_gem_object_unpin_map(obj); 1709 err_obj: 1710 i915_gem_object_put(obj); 1711 err_ctx_lo: 1712 kernel_context_close(ctx_lo); 1713 err_ctx_hi: 1714 kernel_context_close(ctx_hi); 1715 return err; 1716 } 1717 1718 static struct i915_request * 1719 spinner_create_request(struct igt_spinner *spin, 1720 struct i915_gem_context *ctx, 1721 struct intel_engine_cs *engine, 1722 u32 arb) 1723 { 1724 struct intel_context *ce; 1725 struct i915_request *rq; 1726 1727 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); 1728 if (IS_ERR(ce)) 1729 return ERR_CAST(ce); 1730 1731 rq = igt_spinner_create_request(spin, ce, arb); 1732 intel_context_put(ce); 1733 return rq; 1734 } 1735 1736 static int live_preempt(void *arg) 1737 { 1738 struct intel_gt *gt = arg; 1739 struct i915_gem_context *ctx_hi, *ctx_lo; 1740 struct igt_spinner spin_hi, spin_lo; 1741 struct intel_engine_cs *engine; 1742 enum intel_engine_id id; 1743 int err = -ENOMEM; 1744 1745 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 1746 return 0; 1747 1748 if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION)) 1749 pr_err("Logical preemption supported, but not exposed\n"); 1750 1751 if (igt_spinner_init(&spin_hi, gt)) 1752 return -ENOMEM; 1753 1754 if (igt_spinner_init(&spin_lo, gt)) 1755 goto err_spin_hi; 1756 1757 ctx_hi = kernel_context(gt->i915); 1758 if (!ctx_hi) 1759 goto err_spin_lo; 1760 ctx_hi->sched.priority = 1761 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); 1762 1763 ctx_lo = kernel_context(gt->i915); 1764 if (!ctx_lo) 1765 goto err_ctx_hi; 1766 ctx_lo->sched.priority = 1767 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); 1768 1769 for_each_engine(engine, gt, id) { 1770 struct igt_live_test t; 1771 struct i915_request *rq; 1772 1773 if (!intel_engine_has_preemption(engine)) 1774 continue; 1775 1776 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 1777 err = -EIO; 1778 goto err_ctx_lo; 1779 } 1780 1781 rq = spinner_create_request(&spin_lo, ctx_lo, engine, 1782 MI_ARB_CHECK); 1783 if (IS_ERR(rq)) { 1784 err = PTR_ERR(rq); 1785 goto err_ctx_lo; 1786 } 1787 1788 i915_request_add(rq); 1789 if (!igt_wait_for_spinner(&spin_lo, rq)) { 1790 GEM_TRACE("lo spinner failed to start\n"); 1791 GEM_TRACE_DUMP(); 1792 intel_gt_set_wedged(gt); 1793 err = -EIO; 1794 goto err_ctx_lo; 1795 } 1796 1797 rq = spinner_create_request(&spin_hi, ctx_hi, engine, 1798 MI_ARB_CHECK); 1799 if (IS_ERR(rq)) { 1800 igt_spinner_end(&spin_lo); 1801 err = PTR_ERR(rq); 1802 goto err_ctx_lo; 1803 } 1804 1805 i915_request_add(rq); 1806 if (!igt_wait_for_spinner(&spin_hi, rq)) { 1807 GEM_TRACE("hi spinner failed to start\n"); 1808 GEM_TRACE_DUMP(); 1809 intel_gt_set_wedged(gt); 1810 err = -EIO; 1811 goto err_ctx_lo; 1812 } 1813 1814 igt_spinner_end(&spin_hi); 1815 igt_spinner_end(&spin_lo); 1816 1817 if (igt_live_test_end(&t)) { 1818 err = -EIO; 1819 goto err_ctx_lo; 1820 } 1821 } 1822 1823 err = 0; 1824 err_ctx_lo: 1825 kernel_context_close(ctx_lo); 1826 err_ctx_hi: 1827 kernel_context_close(ctx_hi); 1828 err_spin_lo: 1829 igt_spinner_fini(&spin_lo); 1830 err_spin_hi: 1831 igt_spinner_fini(&spin_hi); 1832 return err; 1833 } 1834 1835 static int live_late_preempt(void *arg) 1836 { 1837 struct intel_gt *gt = arg; 1838 struct i915_gem_context *ctx_hi, *ctx_lo; 1839 struct igt_spinner spin_hi, spin_lo; 1840 struct intel_engine_cs *engine; 1841 struct i915_sched_attr attr = {}; 1842 enum intel_engine_id id; 1843 int err = -ENOMEM; 1844 1845 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 1846 return 0; 1847 1848 if (igt_spinner_init(&spin_hi, gt)) 1849 return -ENOMEM; 1850 1851 if (igt_spinner_init(&spin_lo, gt)) 1852 goto err_spin_hi; 1853 1854 ctx_hi = kernel_context(gt->i915); 1855 if (!ctx_hi) 1856 goto err_spin_lo; 1857 1858 ctx_lo = kernel_context(gt->i915); 1859 if (!ctx_lo) 1860 goto err_ctx_hi; 1861 1862 /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */ 1863 ctx_lo->sched.priority = I915_USER_PRIORITY(1); 1864 1865 for_each_engine(engine, gt, id) { 1866 struct igt_live_test t; 1867 struct i915_request *rq; 1868 1869 if (!intel_engine_has_preemption(engine)) 1870 continue; 1871 1872 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 1873 err = -EIO; 1874 goto err_ctx_lo; 1875 } 1876 1877 rq = spinner_create_request(&spin_lo, ctx_lo, engine, 1878 MI_ARB_CHECK); 1879 if (IS_ERR(rq)) { 1880 err = PTR_ERR(rq); 1881 goto err_ctx_lo; 1882 } 1883 1884 i915_request_add(rq); 1885 if (!igt_wait_for_spinner(&spin_lo, rq)) { 1886 pr_err("First context failed to start\n"); 1887 goto err_wedged; 1888 } 1889 1890 rq = spinner_create_request(&spin_hi, ctx_hi, engine, 1891 MI_NOOP); 1892 if (IS_ERR(rq)) { 1893 igt_spinner_end(&spin_lo); 1894 err = PTR_ERR(rq); 1895 goto err_ctx_lo; 1896 } 1897 1898 i915_request_add(rq); 1899 if (igt_wait_for_spinner(&spin_hi, rq)) { 1900 pr_err("Second context overtook first?\n"); 1901 goto err_wedged; 1902 } 1903 1904 attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX); 1905 engine->schedule(rq, &attr); 1906 1907 if (!igt_wait_for_spinner(&spin_hi, rq)) { 1908 pr_err("High priority context failed to preempt the low priority context\n"); 1909 GEM_TRACE_DUMP(); 1910 goto err_wedged; 1911 } 1912 1913 igt_spinner_end(&spin_hi); 1914 igt_spinner_end(&spin_lo); 1915 1916 if (igt_live_test_end(&t)) { 1917 err = -EIO; 1918 goto err_ctx_lo; 1919 } 1920 } 1921 1922 err = 0; 1923 err_ctx_lo: 1924 kernel_context_close(ctx_lo); 1925 err_ctx_hi: 1926 kernel_context_close(ctx_hi); 1927 err_spin_lo: 1928 igt_spinner_fini(&spin_lo); 1929 err_spin_hi: 1930 igt_spinner_fini(&spin_hi); 1931 return err; 1932 1933 err_wedged: 1934 igt_spinner_end(&spin_hi); 1935 igt_spinner_end(&spin_lo); 1936 intel_gt_set_wedged(gt); 1937 err = -EIO; 1938 goto err_ctx_lo; 1939 } 1940 1941 struct preempt_client { 1942 struct igt_spinner spin; 1943 struct i915_gem_context *ctx; 1944 }; 1945 1946 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c) 1947 { 1948 c->ctx = kernel_context(gt->i915); 1949 if (!c->ctx) 1950 return -ENOMEM; 1951 1952 if (igt_spinner_init(&c->spin, gt)) 1953 goto err_ctx; 1954 1955 return 0; 1956 1957 err_ctx: 1958 kernel_context_close(c->ctx); 1959 return -ENOMEM; 1960 } 1961 1962 static void preempt_client_fini(struct preempt_client *c) 1963 { 1964 igt_spinner_fini(&c->spin); 1965 kernel_context_close(c->ctx); 1966 } 1967 1968 static int live_nopreempt(void *arg) 1969 { 1970 struct intel_gt *gt = arg; 1971 struct intel_engine_cs *engine; 1972 struct preempt_client a, b; 1973 enum intel_engine_id id; 1974 int err = -ENOMEM; 1975 1976 /* 1977 * Verify that we can disable preemption for an individual request 1978 * that may be being observed and not want to be interrupted. 1979 */ 1980 1981 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 1982 return 0; 1983 1984 if (preempt_client_init(gt, &a)) 1985 return -ENOMEM; 1986 if (preempt_client_init(gt, &b)) 1987 goto err_client_a; 1988 b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX); 1989 1990 for_each_engine(engine, gt, id) { 1991 struct i915_request *rq_a, *rq_b; 1992 1993 if (!intel_engine_has_preemption(engine)) 1994 continue; 1995 1996 engine->execlists.preempt_hang.count = 0; 1997 1998 rq_a = spinner_create_request(&a.spin, 1999 a.ctx, engine, 2000 MI_ARB_CHECK); 2001 if (IS_ERR(rq_a)) { 2002 err = PTR_ERR(rq_a); 2003 goto err_client_b; 2004 } 2005 2006 /* Low priority client, but unpreemptable! */ 2007 __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags); 2008 2009 i915_request_add(rq_a); 2010 if (!igt_wait_for_spinner(&a.spin, rq_a)) { 2011 pr_err("First client failed to start\n"); 2012 goto err_wedged; 2013 } 2014 2015 rq_b = spinner_create_request(&b.spin, 2016 b.ctx, engine, 2017 MI_ARB_CHECK); 2018 if (IS_ERR(rq_b)) { 2019 err = PTR_ERR(rq_b); 2020 goto err_client_b; 2021 } 2022 2023 i915_request_add(rq_b); 2024 2025 /* B is much more important than A! (But A is unpreemptable.) */ 2026 GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a)); 2027 2028 /* Wait long enough for preemption and timeslicing */ 2029 if (igt_wait_for_spinner(&b.spin, rq_b)) { 2030 pr_err("Second client started too early!\n"); 2031 goto err_wedged; 2032 } 2033 2034 igt_spinner_end(&a.spin); 2035 2036 if (!igt_wait_for_spinner(&b.spin, rq_b)) { 2037 pr_err("Second client failed to start\n"); 2038 goto err_wedged; 2039 } 2040 2041 igt_spinner_end(&b.spin); 2042 2043 if (engine->execlists.preempt_hang.count) { 2044 pr_err("Preemption recorded x%d; should have been suppressed!\n", 2045 engine->execlists.preempt_hang.count); 2046 err = -EINVAL; 2047 goto err_wedged; 2048 } 2049 2050 if (igt_flush_test(gt->i915)) 2051 goto err_wedged; 2052 } 2053 2054 err = 0; 2055 err_client_b: 2056 preempt_client_fini(&b); 2057 err_client_a: 2058 preempt_client_fini(&a); 2059 return err; 2060 2061 err_wedged: 2062 igt_spinner_end(&b.spin); 2063 igt_spinner_end(&a.spin); 2064 intel_gt_set_wedged(gt); 2065 err = -EIO; 2066 goto err_client_b; 2067 } 2068 2069 struct live_preempt_cancel { 2070 struct intel_engine_cs *engine; 2071 struct preempt_client a, b; 2072 }; 2073 2074 static int __cancel_active0(struct live_preempt_cancel *arg) 2075 { 2076 struct i915_request *rq; 2077 struct igt_live_test t; 2078 int err; 2079 2080 /* Preempt cancel of ELSP0 */ 2081 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 2082 if (igt_live_test_begin(&t, arg->engine->i915, 2083 __func__, arg->engine->name)) 2084 return -EIO; 2085 2086 rq = spinner_create_request(&arg->a.spin, 2087 arg->a.ctx, arg->engine, 2088 MI_ARB_CHECK); 2089 if (IS_ERR(rq)) 2090 return PTR_ERR(rq); 2091 2092 clear_bit(CONTEXT_BANNED, &rq->context->flags); 2093 i915_request_get(rq); 2094 i915_request_add(rq); 2095 if (!igt_wait_for_spinner(&arg->a.spin, rq)) { 2096 err = -EIO; 2097 goto out; 2098 } 2099 2100 intel_context_set_banned(rq->context); 2101 err = intel_engine_pulse(arg->engine); 2102 if (err) 2103 goto out; 2104 2105 err = wait_for_reset(arg->engine, rq, HZ / 2); 2106 if (err) { 2107 pr_err("Cancelled inflight0 request did not reset\n"); 2108 goto out; 2109 } 2110 2111 out: 2112 i915_request_put(rq); 2113 if (igt_live_test_end(&t)) 2114 err = -EIO; 2115 return err; 2116 } 2117 2118 static int __cancel_active1(struct live_preempt_cancel *arg) 2119 { 2120 struct i915_request *rq[2] = {}; 2121 struct igt_live_test t; 2122 int err; 2123 2124 /* Preempt cancel of ELSP1 */ 2125 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 2126 if (igt_live_test_begin(&t, arg->engine->i915, 2127 __func__, arg->engine->name)) 2128 return -EIO; 2129 2130 rq[0] = spinner_create_request(&arg->a.spin, 2131 arg->a.ctx, arg->engine, 2132 MI_NOOP); /* no preemption */ 2133 if (IS_ERR(rq[0])) 2134 return PTR_ERR(rq[0]); 2135 2136 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); 2137 i915_request_get(rq[0]); 2138 i915_request_add(rq[0]); 2139 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { 2140 err = -EIO; 2141 goto out; 2142 } 2143 2144 rq[1] = spinner_create_request(&arg->b.spin, 2145 arg->b.ctx, arg->engine, 2146 MI_ARB_CHECK); 2147 if (IS_ERR(rq[1])) { 2148 err = PTR_ERR(rq[1]); 2149 goto out; 2150 } 2151 2152 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); 2153 i915_request_get(rq[1]); 2154 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); 2155 i915_request_add(rq[1]); 2156 if (err) 2157 goto out; 2158 2159 intel_context_set_banned(rq[1]->context); 2160 err = intel_engine_pulse(arg->engine); 2161 if (err) 2162 goto out; 2163 2164 igt_spinner_end(&arg->a.spin); 2165 err = wait_for_reset(arg->engine, rq[1], HZ / 2); 2166 if (err) 2167 goto out; 2168 2169 if (rq[0]->fence.error != 0) { 2170 pr_err("Normal inflight0 request did not complete\n"); 2171 err = -EINVAL; 2172 goto out; 2173 } 2174 2175 if (rq[1]->fence.error != -EIO) { 2176 pr_err("Cancelled inflight1 request did not report -EIO\n"); 2177 err = -EINVAL; 2178 goto out; 2179 } 2180 2181 out: 2182 i915_request_put(rq[1]); 2183 i915_request_put(rq[0]); 2184 if (igt_live_test_end(&t)) 2185 err = -EIO; 2186 return err; 2187 } 2188 2189 static int __cancel_queued(struct live_preempt_cancel *arg) 2190 { 2191 struct i915_request *rq[3] = {}; 2192 struct igt_live_test t; 2193 int err; 2194 2195 /* Full ELSP and one in the wings */ 2196 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 2197 if (igt_live_test_begin(&t, arg->engine->i915, 2198 __func__, arg->engine->name)) 2199 return -EIO; 2200 2201 rq[0] = spinner_create_request(&arg->a.spin, 2202 arg->a.ctx, arg->engine, 2203 MI_ARB_CHECK); 2204 if (IS_ERR(rq[0])) 2205 return PTR_ERR(rq[0]); 2206 2207 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); 2208 i915_request_get(rq[0]); 2209 i915_request_add(rq[0]); 2210 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { 2211 err = -EIO; 2212 goto out; 2213 } 2214 2215 rq[1] = igt_request_alloc(arg->b.ctx, arg->engine); 2216 if (IS_ERR(rq[1])) { 2217 err = PTR_ERR(rq[1]); 2218 goto out; 2219 } 2220 2221 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); 2222 i915_request_get(rq[1]); 2223 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); 2224 i915_request_add(rq[1]); 2225 if (err) 2226 goto out; 2227 2228 rq[2] = spinner_create_request(&arg->b.spin, 2229 arg->a.ctx, arg->engine, 2230 MI_ARB_CHECK); 2231 if (IS_ERR(rq[2])) { 2232 err = PTR_ERR(rq[2]); 2233 goto out; 2234 } 2235 2236 i915_request_get(rq[2]); 2237 err = i915_request_await_dma_fence(rq[2], &rq[1]->fence); 2238 i915_request_add(rq[2]); 2239 if (err) 2240 goto out; 2241 2242 intel_context_set_banned(rq[2]->context); 2243 err = intel_engine_pulse(arg->engine); 2244 if (err) 2245 goto out; 2246 2247 err = wait_for_reset(arg->engine, rq[2], HZ / 2); 2248 if (err) 2249 goto out; 2250 2251 if (rq[0]->fence.error != -EIO) { 2252 pr_err("Cancelled inflight0 request did not report -EIO\n"); 2253 err = -EINVAL; 2254 goto out; 2255 } 2256 2257 if (rq[1]->fence.error != 0) { 2258 pr_err("Normal inflight1 request did not complete\n"); 2259 err = -EINVAL; 2260 goto out; 2261 } 2262 2263 if (rq[2]->fence.error != -EIO) { 2264 pr_err("Cancelled queued request did not report -EIO\n"); 2265 err = -EINVAL; 2266 goto out; 2267 } 2268 2269 out: 2270 i915_request_put(rq[2]); 2271 i915_request_put(rq[1]); 2272 i915_request_put(rq[0]); 2273 if (igt_live_test_end(&t)) 2274 err = -EIO; 2275 return err; 2276 } 2277 2278 static int __cancel_hostile(struct live_preempt_cancel *arg) 2279 { 2280 struct i915_request *rq; 2281 int err; 2282 2283 /* Preempt cancel non-preemptible spinner in ELSP0 */ 2284 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) 2285 return 0; 2286 2287 if (!intel_has_reset_engine(arg->engine->gt)) 2288 return 0; 2289 2290 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 2291 rq = spinner_create_request(&arg->a.spin, 2292 arg->a.ctx, arg->engine, 2293 MI_NOOP); /* preemption disabled */ 2294 if (IS_ERR(rq)) 2295 return PTR_ERR(rq); 2296 2297 clear_bit(CONTEXT_BANNED, &rq->context->flags); 2298 i915_request_get(rq); 2299 i915_request_add(rq); 2300 if (!igt_wait_for_spinner(&arg->a.spin, rq)) { 2301 err = -EIO; 2302 goto out; 2303 } 2304 2305 intel_context_set_banned(rq->context); 2306 err = intel_engine_pulse(arg->engine); /* force reset */ 2307 if (err) 2308 goto out; 2309 2310 err = wait_for_reset(arg->engine, rq, HZ / 2); 2311 if (err) { 2312 pr_err("Cancelled inflight0 request did not reset\n"); 2313 goto out; 2314 } 2315 2316 out: 2317 i915_request_put(rq); 2318 if (igt_flush_test(arg->engine->i915)) 2319 err = -EIO; 2320 return err; 2321 } 2322 2323 static int live_preempt_cancel(void *arg) 2324 { 2325 struct intel_gt *gt = arg; 2326 struct live_preempt_cancel data; 2327 enum intel_engine_id id; 2328 int err = -ENOMEM; 2329 2330 /* 2331 * To cancel an inflight context, we need to first remove it from the 2332 * GPU. That sounds like preemption! Plus a little bit of bookkeeping. 2333 */ 2334 2335 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2336 return 0; 2337 2338 if (preempt_client_init(gt, &data.a)) 2339 return -ENOMEM; 2340 if (preempt_client_init(gt, &data.b)) 2341 goto err_client_a; 2342 2343 for_each_engine(data.engine, gt, id) { 2344 if (!intel_engine_has_preemption(data.engine)) 2345 continue; 2346 2347 err = __cancel_active0(&data); 2348 if (err) 2349 goto err_wedged; 2350 2351 err = __cancel_active1(&data); 2352 if (err) 2353 goto err_wedged; 2354 2355 err = __cancel_queued(&data); 2356 if (err) 2357 goto err_wedged; 2358 2359 err = __cancel_hostile(&data); 2360 if (err) 2361 goto err_wedged; 2362 } 2363 2364 err = 0; 2365 err_client_b: 2366 preempt_client_fini(&data.b); 2367 err_client_a: 2368 preempt_client_fini(&data.a); 2369 return err; 2370 2371 err_wedged: 2372 GEM_TRACE_DUMP(); 2373 igt_spinner_end(&data.b.spin); 2374 igt_spinner_end(&data.a.spin); 2375 intel_gt_set_wedged(gt); 2376 goto err_client_b; 2377 } 2378 2379 static int live_suppress_self_preempt(void *arg) 2380 { 2381 struct intel_gt *gt = arg; 2382 struct intel_engine_cs *engine; 2383 struct i915_sched_attr attr = { 2384 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX) 2385 }; 2386 struct preempt_client a, b; 2387 enum intel_engine_id id; 2388 int err = -ENOMEM; 2389 2390 /* 2391 * Verify that if a preemption request does not cause a change in 2392 * the current execution order, the preempt-to-idle injection is 2393 * skipped and that we do not accidentally apply it after the CS 2394 * completion event. 2395 */ 2396 2397 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2398 return 0; 2399 2400 if (intel_uc_uses_guc_submission(>->uc)) 2401 return 0; /* presume black blox */ 2402 2403 if (intel_vgpu_active(gt->i915)) 2404 return 0; /* GVT forces single port & request submission */ 2405 2406 if (preempt_client_init(gt, &a)) 2407 return -ENOMEM; 2408 if (preempt_client_init(gt, &b)) 2409 goto err_client_a; 2410 2411 for_each_engine(engine, gt, id) { 2412 struct i915_request *rq_a, *rq_b; 2413 int depth; 2414 2415 if (!intel_engine_has_preemption(engine)) 2416 continue; 2417 2418 if (igt_flush_test(gt->i915)) 2419 goto err_wedged; 2420 2421 st_engine_heartbeat_disable(engine); 2422 engine->execlists.preempt_hang.count = 0; 2423 2424 rq_a = spinner_create_request(&a.spin, 2425 a.ctx, engine, 2426 MI_NOOP); 2427 if (IS_ERR(rq_a)) { 2428 err = PTR_ERR(rq_a); 2429 st_engine_heartbeat_enable(engine); 2430 goto err_client_b; 2431 } 2432 2433 i915_request_add(rq_a); 2434 if (!igt_wait_for_spinner(&a.spin, rq_a)) { 2435 pr_err("First client failed to start\n"); 2436 st_engine_heartbeat_enable(engine); 2437 goto err_wedged; 2438 } 2439 2440 /* Keep postponing the timer to avoid premature slicing */ 2441 mod_timer(&engine->execlists.timer, jiffies + HZ); 2442 for (depth = 0; depth < 8; depth++) { 2443 rq_b = spinner_create_request(&b.spin, 2444 b.ctx, engine, 2445 MI_NOOP); 2446 if (IS_ERR(rq_b)) { 2447 err = PTR_ERR(rq_b); 2448 st_engine_heartbeat_enable(engine); 2449 goto err_client_b; 2450 } 2451 i915_request_add(rq_b); 2452 2453 GEM_BUG_ON(i915_request_completed(rq_a)); 2454 engine->schedule(rq_a, &attr); 2455 igt_spinner_end(&a.spin); 2456 2457 if (!igt_wait_for_spinner(&b.spin, rq_b)) { 2458 pr_err("Second client failed to start\n"); 2459 st_engine_heartbeat_enable(engine); 2460 goto err_wedged; 2461 } 2462 2463 swap(a, b); 2464 rq_a = rq_b; 2465 } 2466 igt_spinner_end(&a.spin); 2467 2468 if (engine->execlists.preempt_hang.count) { 2469 pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n", 2470 engine->name, 2471 engine->execlists.preempt_hang.count, 2472 depth); 2473 st_engine_heartbeat_enable(engine); 2474 err = -EINVAL; 2475 goto err_client_b; 2476 } 2477 2478 st_engine_heartbeat_enable(engine); 2479 if (igt_flush_test(gt->i915)) 2480 goto err_wedged; 2481 } 2482 2483 err = 0; 2484 err_client_b: 2485 preempt_client_fini(&b); 2486 err_client_a: 2487 preempt_client_fini(&a); 2488 return err; 2489 2490 err_wedged: 2491 igt_spinner_end(&b.spin); 2492 igt_spinner_end(&a.spin); 2493 intel_gt_set_wedged(gt); 2494 err = -EIO; 2495 goto err_client_b; 2496 } 2497 2498 static int live_chain_preempt(void *arg) 2499 { 2500 struct intel_gt *gt = arg; 2501 struct intel_engine_cs *engine; 2502 struct preempt_client hi, lo; 2503 enum intel_engine_id id; 2504 int err = -ENOMEM; 2505 2506 /* 2507 * Build a chain AB...BA between two contexts (A, B) and request 2508 * preemption of the last request. It should then complete before 2509 * the previously submitted spinner in B. 2510 */ 2511 2512 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2513 return 0; 2514 2515 if (preempt_client_init(gt, &hi)) 2516 return -ENOMEM; 2517 2518 if (preempt_client_init(gt, &lo)) 2519 goto err_client_hi; 2520 2521 for_each_engine(engine, gt, id) { 2522 struct i915_sched_attr attr = { 2523 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), 2524 }; 2525 struct igt_live_test t; 2526 struct i915_request *rq; 2527 int ring_size, count, i; 2528 2529 if (!intel_engine_has_preemption(engine)) 2530 continue; 2531 2532 rq = spinner_create_request(&lo.spin, 2533 lo.ctx, engine, 2534 MI_ARB_CHECK); 2535 if (IS_ERR(rq)) 2536 goto err_wedged; 2537 2538 i915_request_get(rq); 2539 i915_request_add(rq); 2540 2541 ring_size = rq->wa_tail - rq->head; 2542 if (ring_size < 0) 2543 ring_size += rq->ring->size; 2544 ring_size = rq->ring->size / ring_size; 2545 pr_debug("%s(%s): Using maximum of %d requests\n", 2546 __func__, engine->name, ring_size); 2547 2548 igt_spinner_end(&lo.spin); 2549 if (i915_request_wait(rq, 0, HZ / 2) < 0) { 2550 pr_err("Timed out waiting to flush %s\n", engine->name); 2551 i915_request_put(rq); 2552 goto err_wedged; 2553 } 2554 i915_request_put(rq); 2555 2556 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 2557 err = -EIO; 2558 goto err_wedged; 2559 } 2560 2561 for_each_prime_number_from(count, 1, ring_size) { 2562 rq = spinner_create_request(&hi.spin, 2563 hi.ctx, engine, 2564 MI_ARB_CHECK); 2565 if (IS_ERR(rq)) 2566 goto err_wedged; 2567 i915_request_add(rq); 2568 if (!igt_wait_for_spinner(&hi.spin, rq)) 2569 goto err_wedged; 2570 2571 rq = spinner_create_request(&lo.spin, 2572 lo.ctx, engine, 2573 MI_ARB_CHECK); 2574 if (IS_ERR(rq)) 2575 goto err_wedged; 2576 i915_request_add(rq); 2577 2578 for (i = 0; i < count; i++) { 2579 rq = igt_request_alloc(lo.ctx, engine); 2580 if (IS_ERR(rq)) 2581 goto err_wedged; 2582 i915_request_add(rq); 2583 } 2584 2585 rq = igt_request_alloc(hi.ctx, engine); 2586 if (IS_ERR(rq)) 2587 goto err_wedged; 2588 2589 i915_request_get(rq); 2590 i915_request_add(rq); 2591 engine->schedule(rq, &attr); 2592 2593 igt_spinner_end(&hi.spin); 2594 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 2595 struct drm_printer p = 2596 drm_info_printer(gt->i915->drm.dev); 2597 2598 pr_err("Failed to preempt over chain of %d\n", 2599 count); 2600 intel_engine_dump(engine, &p, 2601 "%s\n", engine->name); 2602 i915_request_put(rq); 2603 goto err_wedged; 2604 } 2605 igt_spinner_end(&lo.spin); 2606 i915_request_put(rq); 2607 2608 rq = igt_request_alloc(lo.ctx, engine); 2609 if (IS_ERR(rq)) 2610 goto err_wedged; 2611 2612 i915_request_get(rq); 2613 i915_request_add(rq); 2614 2615 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 2616 struct drm_printer p = 2617 drm_info_printer(gt->i915->drm.dev); 2618 2619 pr_err("Failed to flush low priority chain of %d requests\n", 2620 count); 2621 intel_engine_dump(engine, &p, 2622 "%s\n", engine->name); 2623 2624 i915_request_put(rq); 2625 goto err_wedged; 2626 } 2627 i915_request_put(rq); 2628 } 2629 2630 if (igt_live_test_end(&t)) { 2631 err = -EIO; 2632 goto err_wedged; 2633 } 2634 } 2635 2636 err = 0; 2637 err_client_lo: 2638 preempt_client_fini(&lo); 2639 err_client_hi: 2640 preempt_client_fini(&hi); 2641 return err; 2642 2643 err_wedged: 2644 igt_spinner_end(&hi.spin); 2645 igt_spinner_end(&lo.spin); 2646 intel_gt_set_wedged(gt); 2647 err = -EIO; 2648 goto err_client_lo; 2649 } 2650 2651 static int create_gang(struct intel_engine_cs *engine, 2652 struct i915_request **prev) 2653 { 2654 struct drm_i915_gem_object *obj; 2655 struct intel_context *ce; 2656 struct i915_request *rq; 2657 struct i915_vma *vma; 2658 u32 *cs; 2659 int err; 2660 2661 ce = intel_context_create(engine); 2662 if (IS_ERR(ce)) 2663 return PTR_ERR(ce); 2664 2665 obj = i915_gem_object_create_internal(engine->i915, 4096); 2666 if (IS_ERR(obj)) { 2667 err = PTR_ERR(obj); 2668 goto err_ce; 2669 } 2670 2671 vma = i915_vma_instance(obj, ce->vm, NULL); 2672 if (IS_ERR(vma)) { 2673 err = PTR_ERR(vma); 2674 goto err_obj; 2675 } 2676 2677 err = i915_vma_pin(vma, 0, 0, PIN_USER); 2678 if (err) 2679 goto err_obj; 2680 2681 cs = i915_gem_object_pin_map(obj, I915_MAP_WC); 2682 if (IS_ERR(cs)) 2683 goto err_obj; 2684 2685 /* Semaphore target: spin until zero */ 2686 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 2687 2688 *cs++ = MI_SEMAPHORE_WAIT | 2689 MI_SEMAPHORE_POLL | 2690 MI_SEMAPHORE_SAD_EQ_SDD; 2691 *cs++ = 0; 2692 *cs++ = lower_32_bits(vma->node.start); 2693 *cs++ = upper_32_bits(vma->node.start); 2694 2695 if (*prev) { 2696 u64 offset = (*prev)->batch->node.start; 2697 2698 /* Terminate the spinner in the next lower priority batch. */ 2699 *cs++ = MI_STORE_DWORD_IMM_GEN4; 2700 *cs++ = lower_32_bits(offset); 2701 *cs++ = upper_32_bits(offset); 2702 *cs++ = 0; 2703 } 2704 2705 *cs++ = MI_BATCH_BUFFER_END; 2706 i915_gem_object_flush_map(obj); 2707 i915_gem_object_unpin_map(obj); 2708 2709 rq = intel_context_create_request(ce); 2710 if (IS_ERR(rq)) 2711 goto err_obj; 2712 2713 rq->batch = i915_vma_get(vma); 2714 i915_request_get(rq); 2715 2716 i915_vma_lock(vma); 2717 err = i915_request_await_object(rq, vma->obj, false); 2718 if (!err) 2719 err = i915_vma_move_to_active(vma, rq, 0); 2720 if (!err) 2721 err = rq->engine->emit_bb_start(rq, 2722 vma->node.start, 2723 PAGE_SIZE, 0); 2724 i915_vma_unlock(vma); 2725 i915_request_add(rq); 2726 if (err) 2727 goto err_rq; 2728 2729 i915_gem_object_put(obj); 2730 intel_context_put(ce); 2731 2732 rq->mock.link.next = &(*prev)->mock.link; 2733 *prev = rq; 2734 return 0; 2735 2736 err_rq: 2737 i915_vma_put(rq->batch); 2738 i915_request_put(rq); 2739 err_obj: 2740 i915_gem_object_put(obj); 2741 err_ce: 2742 intel_context_put(ce); 2743 return err; 2744 } 2745 2746 static int __live_preempt_ring(struct intel_engine_cs *engine, 2747 struct igt_spinner *spin, 2748 int queue_sz, int ring_sz) 2749 { 2750 struct intel_context *ce[2] = {}; 2751 struct i915_request *rq; 2752 struct igt_live_test t; 2753 int err = 0; 2754 int n; 2755 2756 if (igt_live_test_begin(&t, engine->i915, __func__, engine->name)) 2757 return -EIO; 2758 2759 for (n = 0; n < ARRAY_SIZE(ce); n++) { 2760 struct intel_context *tmp; 2761 2762 tmp = intel_context_create(engine); 2763 if (IS_ERR(tmp)) { 2764 err = PTR_ERR(tmp); 2765 goto err_ce; 2766 } 2767 2768 tmp->ring = __intel_context_ring_size(ring_sz); 2769 2770 err = intel_context_pin(tmp); 2771 if (err) { 2772 intel_context_put(tmp); 2773 goto err_ce; 2774 } 2775 2776 memset32(tmp->ring->vaddr, 2777 0xdeadbeef, /* trigger a hang if executed */ 2778 tmp->ring->vma->size / sizeof(u32)); 2779 2780 ce[n] = tmp; 2781 } 2782 2783 rq = igt_spinner_create_request(spin, ce[0], MI_ARB_CHECK); 2784 if (IS_ERR(rq)) { 2785 err = PTR_ERR(rq); 2786 goto err_ce; 2787 } 2788 2789 i915_request_get(rq); 2790 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 2791 i915_request_add(rq); 2792 2793 if (!igt_wait_for_spinner(spin, rq)) { 2794 intel_gt_set_wedged(engine->gt); 2795 i915_request_put(rq); 2796 err = -ETIME; 2797 goto err_ce; 2798 } 2799 2800 /* Fill the ring, until we will cause a wrap */ 2801 n = 0; 2802 while (ce[0]->ring->tail - rq->wa_tail <= queue_sz) { 2803 struct i915_request *tmp; 2804 2805 tmp = intel_context_create_request(ce[0]); 2806 if (IS_ERR(tmp)) { 2807 err = PTR_ERR(tmp); 2808 i915_request_put(rq); 2809 goto err_ce; 2810 } 2811 2812 i915_request_add(tmp); 2813 intel_engine_flush_submission(engine); 2814 n++; 2815 } 2816 intel_engine_flush_submission(engine); 2817 pr_debug("%s: Filled %d with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n", 2818 engine->name, queue_sz, n, 2819 ce[0]->ring->size, 2820 ce[0]->ring->tail, 2821 ce[0]->ring->emit, 2822 rq->tail); 2823 i915_request_put(rq); 2824 2825 /* Create a second request to preempt the first ring */ 2826 rq = intel_context_create_request(ce[1]); 2827 if (IS_ERR(rq)) { 2828 err = PTR_ERR(rq); 2829 goto err_ce; 2830 } 2831 2832 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 2833 i915_request_get(rq); 2834 i915_request_add(rq); 2835 2836 err = wait_for_submit(engine, rq, HZ / 2); 2837 i915_request_put(rq); 2838 if (err) { 2839 pr_err("%s: preemption request was not submited\n", 2840 engine->name); 2841 err = -ETIME; 2842 } 2843 2844 pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n", 2845 engine->name, 2846 ce[0]->ring->tail, ce[0]->ring->emit, 2847 ce[1]->ring->tail, ce[1]->ring->emit); 2848 2849 err_ce: 2850 intel_engine_flush_submission(engine); 2851 igt_spinner_end(spin); 2852 for (n = 0; n < ARRAY_SIZE(ce); n++) { 2853 if (IS_ERR_OR_NULL(ce[n])) 2854 break; 2855 2856 intel_context_unpin(ce[n]); 2857 intel_context_put(ce[n]); 2858 } 2859 if (igt_live_test_end(&t)) 2860 err = -EIO; 2861 return err; 2862 } 2863 2864 static int live_preempt_ring(void *arg) 2865 { 2866 struct intel_gt *gt = arg; 2867 struct intel_engine_cs *engine; 2868 struct igt_spinner spin; 2869 enum intel_engine_id id; 2870 int err = 0; 2871 2872 /* 2873 * Check that we rollback large chunks of a ring in order to do a 2874 * preemption event. Similar to live_unlite_ring, but looking at 2875 * ring size rather than the impact of intel_ring_direction(). 2876 */ 2877 2878 if (igt_spinner_init(&spin, gt)) 2879 return -ENOMEM; 2880 2881 for_each_engine(engine, gt, id) { 2882 int n; 2883 2884 if (!intel_engine_has_preemption(engine)) 2885 continue; 2886 2887 if (!intel_engine_can_store_dword(engine)) 2888 continue; 2889 2890 st_engine_heartbeat_disable(engine); 2891 2892 for (n = 0; n <= 3; n++) { 2893 err = __live_preempt_ring(engine, &spin, 2894 n * SZ_4K / 4, SZ_4K); 2895 if (err) 2896 break; 2897 } 2898 2899 st_engine_heartbeat_enable(engine); 2900 if (err) 2901 break; 2902 } 2903 2904 igt_spinner_fini(&spin); 2905 return err; 2906 } 2907 2908 static int live_preempt_gang(void *arg) 2909 { 2910 struct intel_gt *gt = arg; 2911 struct intel_engine_cs *engine; 2912 enum intel_engine_id id; 2913 2914 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2915 return 0; 2916 2917 /* 2918 * Build as long a chain of preempters as we can, with each 2919 * request higher priority than the last. Once we are ready, we release 2920 * the last batch which then precolates down the chain, each releasing 2921 * the next oldest in turn. The intent is to simply push as hard as we 2922 * can with the number of preemptions, trying to exceed narrow HW 2923 * limits. At a minimum, we insist that we can sort all the user 2924 * high priority levels into execution order. 2925 */ 2926 2927 for_each_engine(engine, gt, id) { 2928 struct i915_request *rq = NULL; 2929 struct igt_live_test t; 2930 IGT_TIMEOUT(end_time); 2931 int prio = 0; 2932 int err = 0; 2933 u32 *cs; 2934 2935 if (!intel_engine_has_preemption(engine)) 2936 continue; 2937 2938 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) 2939 return -EIO; 2940 2941 do { 2942 struct i915_sched_attr attr = { 2943 .priority = I915_USER_PRIORITY(prio++), 2944 }; 2945 2946 err = create_gang(engine, &rq); 2947 if (err) 2948 break; 2949 2950 /* Submit each spinner at increasing priority */ 2951 engine->schedule(rq, &attr); 2952 } while (prio <= I915_PRIORITY_MAX && 2953 !__igt_timeout(end_time, NULL)); 2954 pr_debug("%s: Preempt chain of %d requests\n", 2955 engine->name, prio); 2956 2957 /* 2958 * Such that the last spinner is the highest priority and 2959 * should execute first. When that spinner completes, 2960 * it will terminate the next lowest spinner until there 2961 * are no more spinners and the gang is complete. 2962 */ 2963 cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC); 2964 if (!IS_ERR(cs)) { 2965 *cs = 0; 2966 i915_gem_object_unpin_map(rq->batch->obj); 2967 } else { 2968 err = PTR_ERR(cs); 2969 intel_gt_set_wedged(gt); 2970 } 2971 2972 while (rq) { /* wait for each rq from highest to lowest prio */ 2973 struct i915_request *n = list_next_entry(rq, mock.link); 2974 2975 if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) { 2976 struct drm_printer p = 2977 drm_info_printer(engine->i915->drm.dev); 2978 2979 pr_err("Failed to flush chain of %d requests, at %d\n", 2980 prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT); 2981 intel_engine_dump(engine, &p, 2982 "%s\n", engine->name); 2983 2984 err = -ETIME; 2985 } 2986 2987 i915_vma_put(rq->batch); 2988 i915_request_put(rq); 2989 rq = n; 2990 } 2991 2992 if (igt_live_test_end(&t)) 2993 err = -EIO; 2994 if (err) 2995 return err; 2996 } 2997 2998 return 0; 2999 } 3000 3001 static struct i915_vma * 3002 create_gpr_user(struct intel_engine_cs *engine, 3003 struct i915_vma *result, 3004 unsigned int offset) 3005 { 3006 struct drm_i915_gem_object *obj; 3007 struct i915_vma *vma; 3008 u32 *cs; 3009 int err; 3010 int i; 3011 3012 obj = i915_gem_object_create_internal(engine->i915, 4096); 3013 if (IS_ERR(obj)) 3014 return ERR_CAST(obj); 3015 3016 vma = i915_vma_instance(obj, result->vm, NULL); 3017 if (IS_ERR(vma)) { 3018 i915_gem_object_put(obj); 3019 return vma; 3020 } 3021 3022 err = i915_vma_pin(vma, 0, 0, PIN_USER); 3023 if (err) { 3024 i915_vma_put(vma); 3025 return ERR_PTR(err); 3026 } 3027 3028 cs = i915_gem_object_pin_map(obj, I915_MAP_WC); 3029 if (IS_ERR(cs)) { 3030 i915_vma_put(vma); 3031 return ERR_CAST(cs); 3032 } 3033 3034 /* All GPR are clear for new contexts. We use GPR(0) as a constant */ 3035 *cs++ = MI_LOAD_REGISTER_IMM(1); 3036 *cs++ = CS_GPR(engine, 0); 3037 *cs++ = 1; 3038 3039 for (i = 1; i < NUM_GPR; i++) { 3040 u64 addr; 3041 3042 /* 3043 * Perform: GPR[i]++ 3044 * 3045 * As we read and write into the context saved GPR[i], if 3046 * we restart this batch buffer from an earlier point, we 3047 * will repeat the increment and store a value > 1. 3048 */ 3049 *cs++ = MI_MATH(4); 3050 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(i)); 3051 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(0)); 3052 *cs++ = MI_MATH_ADD; 3053 *cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU); 3054 3055 addr = result->node.start + offset + i * sizeof(*cs); 3056 *cs++ = MI_STORE_REGISTER_MEM_GEN8; 3057 *cs++ = CS_GPR(engine, 2 * i); 3058 *cs++ = lower_32_bits(addr); 3059 *cs++ = upper_32_bits(addr); 3060 3061 *cs++ = MI_SEMAPHORE_WAIT | 3062 MI_SEMAPHORE_POLL | 3063 MI_SEMAPHORE_SAD_GTE_SDD; 3064 *cs++ = i; 3065 *cs++ = lower_32_bits(result->node.start); 3066 *cs++ = upper_32_bits(result->node.start); 3067 } 3068 3069 *cs++ = MI_BATCH_BUFFER_END; 3070 i915_gem_object_flush_map(obj); 3071 i915_gem_object_unpin_map(obj); 3072 3073 return vma; 3074 } 3075 3076 static struct i915_vma *create_global(struct intel_gt *gt, size_t sz) 3077 { 3078 struct drm_i915_gem_object *obj; 3079 struct i915_vma *vma; 3080 int err; 3081 3082 obj = i915_gem_object_create_internal(gt->i915, sz); 3083 if (IS_ERR(obj)) 3084 return ERR_CAST(obj); 3085 3086 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 3087 if (IS_ERR(vma)) { 3088 i915_gem_object_put(obj); 3089 return vma; 3090 } 3091 3092 err = i915_ggtt_pin(vma, NULL, 0, 0); 3093 if (err) { 3094 i915_vma_put(vma); 3095 return ERR_PTR(err); 3096 } 3097 3098 return vma; 3099 } 3100 3101 static struct i915_request * 3102 create_gpr_client(struct intel_engine_cs *engine, 3103 struct i915_vma *global, 3104 unsigned int offset) 3105 { 3106 struct i915_vma *batch, *vma; 3107 struct intel_context *ce; 3108 struct i915_request *rq; 3109 int err; 3110 3111 ce = intel_context_create(engine); 3112 if (IS_ERR(ce)) 3113 return ERR_CAST(ce); 3114 3115 vma = i915_vma_instance(global->obj, ce->vm, NULL); 3116 if (IS_ERR(vma)) { 3117 err = PTR_ERR(vma); 3118 goto out_ce; 3119 } 3120 3121 err = i915_vma_pin(vma, 0, 0, PIN_USER); 3122 if (err) 3123 goto out_ce; 3124 3125 batch = create_gpr_user(engine, vma, offset); 3126 if (IS_ERR(batch)) { 3127 err = PTR_ERR(batch); 3128 goto out_vma; 3129 } 3130 3131 rq = intel_context_create_request(ce); 3132 if (IS_ERR(rq)) { 3133 err = PTR_ERR(rq); 3134 goto out_batch; 3135 } 3136 3137 i915_vma_lock(vma); 3138 err = i915_request_await_object(rq, vma->obj, false); 3139 if (!err) 3140 err = i915_vma_move_to_active(vma, rq, 0); 3141 i915_vma_unlock(vma); 3142 3143 i915_vma_lock(batch); 3144 if (!err) 3145 err = i915_request_await_object(rq, batch->obj, false); 3146 if (!err) 3147 err = i915_vma_move_to_active(batch, rq, 0); 3148 if (!err) 3149 err = rq->engine->emit_bb_start(rq, 3150 batch->node.start, 3151 PAGE_SIZE, 0); 3152 i915_vma_unlock(batch); 3153 i915_vma_unpin(batch); 3154 3155 if (!err) 3156 i915_request_get(rq); 3157 i915_request_add(rq); 3158 3159 out_batch: 3160 i915_vma_put(batch); 3161 out_vma: 3162 i915_vma_unpin(vma); 3163 out_ce: 3164 intel_context_put(ce); 3165 return err ? ERR_PTR(err) : rq; 3166 } 3167 3168 static int preempt_user(struct intel_engine_cs *engine, 3169 struct i915_vma *global, 3170 int id) 3171 { 3172 struct i915_sched_attr attr = { 3173 .priority = I915_PRIORITY_MAX 3174 }; 3175 struct i915_request *rq; 3176 int err = 0; 3177 u32 *cs; 3178 3179 rq = intel_engine_create_kernel_request(engine); 3180 if (IS_ERR(rq)) 3181 return PTR_ERR(rq); 3182 3183 cs = intel_ring_begin(rq, 4); 3184 if (IS_ERR(cs)) { 3185 i915_request_add(rq); 3186 return PTR_ERR(cs); 3187 } 3188 3189 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 3190 *cs++ = i915_ggtt_offset(global); 3191 *cs++ = 0; 3192 *cs++ = id; 3193 3194 intel_ring_advance(rq, cs); 3195 3196 i915_request_get(rq); 3197 i915_request_add(rq); 3198 3199 engine->schedule(rq, &attr); 3200 3201 if (i915_request_wait(rq, 0, HZ / 2) < 0) 3202 err = -ETIME; 3203 i915_request_put(rq); 3204 3205 return err; 3206 } 3207 3208 static int live_preempt_user(void *arg) 3209 { 3210 struct intel_gt *gt = arg; 3211 struct intel_engine_cs *engine; 3212 struct i915_vma *global; 3213 enum intel_engine_id id; 3214 u32 *result; 3215 int err = 0; 3216 3217 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 3218 return 0; 3219 3220 /* 3221 * In our other tests, we look at preemption in carefully 3222 * controlled conditions in the ringbuffer. Since most of the 3223 * time is spent in user batches, most of our preemptions naturally 3224 * occur there. We want to verify that when we preempt inside a batch 3225 * we continue on from the current instruction and do not roll back 3226 * to the start, or another earlier arbitration point. 3227 * 3228 * To verify this, we create a batch which is a mixture of 3229 * MI_MATH (gpr++) MI_SRM (gpr) and preemption points. Then with 3230 * a few preempting contexts thrown into the mix, we look for any 3231 * repeated instructions (which show up as incorrect values). 3232 */ 3233 3234 global = create_global(gt, 4096); 3235 if (IS_ERR(global)) 3236 return PTR_ERR(global); 3237 3238 result = i915_gem_object_pin_map(global->obj, I915_MAP_WC); 3239 if (IS_ERR(result)) { 3240 i915_vma_unpin_and_release(&global, 0); 3241 return PTR_ERR(result); 3242 } 3243 3244 for_each_engine(engine, gt, id) { 3245 struct i915_request *client[3] = {}; 3246 struct igt_live_test t; 3247 int i; 3248 3249 if (!intel_engine_has_preemption(engine)) 3250 continue; 3251 3252 if (IS_GEN(gt->i915, 8) && engine->class != RENDER_CLASS) 3253 continue; /* we need per-context GPR */ 3254 3255 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 3256 err = -EIO; 3257 break; 3258 } 3259 3260 memset(result, 0, 4096); 3261 3262 for (i = 0; i < ARRAY_SIZE(client); i++) { 3263 struct i915_request *rq; 3264 3265 rq = create_gpr_client(engine, global, 3266 NUM_GPR * i * sizeof(u32)); 3267 if (IS_ERR(rq)) 3268 goto end_test; 3269 3270 client[i] = rq; 3271 } 3272 3273 /* Continuously preempt the set of 3 running contexts */ 3274 for (i = 1; i <= NUM_GPR; i++) { 3275 err = preempt_user(engine, global, i); 3276 if (err) 3277 goto end_test; 3278 } 3279 3280 if (READ_ONCE(result[0]) != NUM_GPR) { 3281 pr_err("%s: Failed to release semaphore\n", 3282 engine->name); 3283 err = -EIO; 3284 goto end_test; 3285 } 3286 3287 for (i = 0; i < ARRAY_SIZE(client); i++) { 3288 int gpr; 3289 3290 if (i915_request_wait(client[i], 0, HZ / 2) < 0) { 3291 err = -ETIME; 3292 goto end_test; 3293 } 3294 3295 for (gpr = 1; gpr < NUM_GPR; gpr++) { 3296 if (result[NUM_GPR * i + gpr] != 1) { 3297 pr_err("%s: Invalid result, client %d, gpr %d, result: %d\n", 3298 engine->name, 3299 i, gpr, result[NUM_GPR * i + gpr]); 3300 err = -EINVAL; 3301 goto end_test; 3302 } 3303 } 3304 } 3305 3306 end_test: 3307 for (i = 0; i < ARRAY_SIZE(client); i++) { 3308 if (!client[i]) 3309 break; 3310 3311 i915_request_put(client[i]); 3312 } 3313 3314 /* Flush the semaphores on error */ 3315 smp_store_mb(result[0], -1); 3316 if (igt_live_test_end(&t)) 3317 err = -EIO; 3318 if (err) 3319 break; 3320 } 3321 3322 i915_vma_unpin_and_release(&global, I915_VMA_RELEASE_MAP); 3323 return err; 3324 } 3325 3326 static int live_preempt_timeout(void *arg) 3327 { 3328 struct intel_gt *gt = arg; 3329 struct i915_gem_context *ctx_hi, *ctx_lo; 3330 struct igt_spinner spin_lo; 3331 struct intel_engine_cs *engine; 3332 enum intel_engine_id id; 3333 int err = -ENOMEM; 3334 3335 /* 3336 * Check that we force preemption to occur by cancelling the previous 3337 * context if it refuses to yield the GPU. 3338 */ 3339 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) 3340 return 0; 3341 3342 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 3343 return 0; 3344 3345 if (!intel_has_reset_engine(gt)) 3346 return 0; 3347 3348 if (igt_spinner_init(&spin_lo, gt)) 3349 return -ENOMEM; 3350 3351 ctx_hi = kernel_context(gt->i915); 3352 if (!ctx_hi) 3353 goto err_spin_lo; 3354 ctx_hi->sched.priority = 3355 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); 3356 3357 ctx_lo = kernel_context(gt->i915); 3358 if (!ctx_lo) 3359 goto err_ctx_hi; 3360 ctx_lo->sched.priority = 3361 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); 3362 3363 for_each_engine(engine, gt, id) { 3364 unsigned long saved_timeout; 3365 struct i915_request *rq; 3366 3367 if (!intel_engine_has_preemption(engine)) 3368 continue; 3369 3370 rq = spinner_create_request(&spin_lo, ctx_lo, engine, 3371 MI_NOOP); /* preemption disabled */ 3372 if (IS_ERR(rq)) { 3373 err = PTR_ERR(rq); 3374 goto err_ctx_lo; 3375 } 3376 3377 i915_request_add(rq); 3378 if (!igt_wait_for_spinner(&spin_lo, rq)) { 3379 intel_gt_set_wedged(gt); 3380 err = -EIO; 3381 goto err_ctx_lo; 3382 } 3383 3384 rq = igt_request_alloc(ctx_hi, engine); 3385 if (IS_ERR(rq)) { 3386 igt_spinner_end(&spin_lo); 3387 err = PTR_ERR(rq); 3388 goto err_ctx_lo; 3389 } 3390 3391 /* Flush the previous CS ack before changing timeouts */ 3392 while (READ_ONCE(engine->execlists.pending[0])) 3393 cpu_relax(); 3394 3395 saved_timeout = engine->props.preempt_timeout_ms; 3396 engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */ 3397 3398 i915_request_get(rq); 3399 i915_request_add(rq); 3400 3401 intel_engine_flush_submission(engine); 3402 engine->props.preempt_timeout_ms = saved_timeout; 3403 3404 if (i915_request_wait(rq, 0, HZ / 10) < 0) { 3405 intel_gt_set_wedged(gt); 3406 i915_request_put(rq); 3407 err = -ETIME; 3408 goto err_ctx_lo; 3409 } 3410 3411 igt_spinner_end(&spin_lo); 3412 i915_request_put(rq); 3413 } 3414 3415 err = 0; 3416 err_ctx_lo: 3417 kernel_context_close(ctx_lo); 3418 err_ctx_hi: 3419 kernel_context_close(ctx_hi); 3420 err_spin_lo: 3421 igt_spinner_fini(&spin_lo); 3422 return err; 3423 } 3424 3425 static int random_range(struct rnd_state *rnd, int min, int max) 3426 { 3427 return i915_prandom_u32_max_state(max - min, rnd) + min; 3428 } 3429 3430 static int random_priority(struct rnd_state *rnd) 3431 { 3432 return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX); 3433 } 3434 3435 struct preempt_smoke { 3436 struct intel_gt *gt; 3437 struct i915_gem_context **contexts; 3438 struct intel_engine_cs *engine; 3439 struct drm_i915_gem_object *batch; 3440 unsigned int ncontext; 3441 struct rnd_state prng; 3442 unsigned long count; 3443 }; 3444 3445 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke) 3446 { 3447 return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext, 3448 &smoke->prng)]; 3449 } 3450 3451 static int smoke_submit(struct preempt_smoke *smoke, 3452 struct i915_gem_context *ctx, int prio, 3453 struct drm_i915_gem_object *batch) 3454 { 3455 struct i915_request *rq; 3456 struct i915_vma *vma = NULL; 3457 int err = 0; 3458 3459 if (batch) { 3460 struct i915_address_space *vm; 3461 3462 vm = i915_gem_context_get_vm_rcu(ctx); 3463 vma = i915_vma_instance(batch, vm, NULL); 3464 i915_vm_put(vm); 3465 if (IS_ERR(vma)) 3466 return PTR_ERR(vma); 3467 3468 err = i915_vma_pin(vma, 0, 0, PIN_USER); 3469 if (err) 3470 return err; 3471 } 3472 3473 ctx->sched.priority = prio; 3474 3475 rq = igt_request_alloc(ctx, smoke->engine); 3476 if (IS_ERR(rq)) { 3477 err = PTR_ERR(rq); 3478 goto unpin; 3479 } 3480 3481 if (vma) { 3482 i915_vma_lock(vma); 3483 err = i915_request_await_object(rq, vma->obj, false); 3484 if (!err) 3485 err = i915_vma_move_to_active(vma, rq, 0); 3486 if (!err) 3487 err = rq->engine->emit_bb_start(rq, 3488 vma->node.start, 3489 PAGE_SIZE, 0); 3490 i915_vma_unlock(vma); 3491 } 3492 3493 i915_request_add(rq); 3494 3495 unpin: 3496 if (vma) 3497 i915_vma_unpin(vma); 3498 3499 return err; 3500 } 3501 3502 static int smoke_crescendo_thread(void *arg) 3503 { 3504 struct preempt_smoke *smoke = arg; 3505 IGT_TIMEOUT(end_time); 3506 unsigned long count; 3507 3508 count = 0; 3509 do { 3510 struct i915_gem_context *ctx = smoke_context(smoke); 3511 int err; 3512 3513 err = smoke_submit(smoke, 3514 ctx, count % I915_PRIORITY_MAX, 3515 smoke->batch); 3516 if (err) 3517 return err; 3518 3519 count++; 3520 } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL)); 3521 3522 smoke->count = count; 3523 return 0; 3524 } 3525 3526 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) 3527 #define BATCH BIT(0) 3528 { 3529 struct task_struct *tsk[I915_NUM_ENGINES] = {}; 3530 struct preempt_smoke arg[I915_NUM_ENGINES]; 3531 struct intel_engine_cs *engine; 3532 enum intel_engine_id id; 3533 unsigned long count; 3534 int err = 0; 3535 3536 for_each_engine(engine, smoke->gt, id) { 3537 arg[id] = *smoke; 3538 arg[id].engine = engine; 3539 if (!(flags & BATCH)) 3540 arg[id].batch = NULL; 3541 arg[id].count = 0; 3542 3543 tsk[id] = kthread_run(smoke_crescendo_thread, &arg, 3544 "igt/smoke:%d", id); 3545 if (IS_ERR(tsk[id])) { 3546 err = PTR_ERR(tsk[id]); 3547 break; 3548 } 3549 get_task_struct(tsk[id]); 3550 } 3551 3552 yield(); /* start all threads before we kthread_stop() */ 3553 3554 count = 0; 3555 for_each_engine(engine, smoke->gt, id) { 3556 int status; 3557 3558 if (IS_ERR_OR_NULL(tsk[id])) 3559 continue; 3560 3561 status = kthread_stop(tsk[id]); 3562 if (status && !err) 3563 err = status; 3564 3565 count += arg[id].count; 3566 3567 put_task_struct(tsk[id]); 3568 } 3569 3570 pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n", 3571 count, flags, smoke->gt->info.num_engines, smoke->ncontext); 3572 return 0; 3573 } 3574 3575 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags) 3576 { 3577 enum intel_engine_id id; 3578 IGT_TIMEOUT(end_time); 3579 unsigned long count; 3580 3581 count = 0; 3582 do { 3583 for_each_engine(smoke->engine, smoke->gt, id) { 3584 struct i915_gem_context *ctx = smoke_context(smoke); 3585 int err; 3586 3587 err = smoke_submit(smoke, 3588 ctx, random_priority(&smoke->prng), 3589 flags & BATCH ? smoke->batch : NULL); 3590 if (err) 3591 return err; 3592 3593 count++; 3594 } 3595 } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL)); 3596 3597 pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n", 3598 count, flags, smoke->gt->info.num_engines, smoke->ncontext); 3599 return 0; 3600 } 3601 3602 static int live_preempt_smoke(void *arg) 3603 { 3604 struct preempt_smoke smoke = { 3605 .gt = arg, 3606 .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed), 3607 .ncontext = 256, 3608 }; 3609 const unsigned int phase[] = { 0, BATCH }; 3610 struct igt_live_test t; 3611 int err = -ENOMEM; 3612 u32 *cs; 3613 int n; 3614 3615 if (!HAS_LOGICAL_RING_PREEMPTION(smoke.gt->i915)) 3616 return 0; 3617 3618 smoke.contexts = kmalloc_array(smoke.ncontext, 3619 sizeof(*smoke.contexts), 3620 GFP_KERNEL); 3621 if (!smoke.contexts) 3622 return -ENOMEM; 3623 3624 smoke.batch = 3625 i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE); 3626 if (IS_ERR(smoke.batch)) { 3627 err = PTR_ERR(smoke.batch); 3628 goto err_free; 3629 } 3630 3631 cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB); 3632 if (IS_ERR(cs)) { 3633 err = PTR_ERR(cs); 3634 goto err_batch; 3635 } 3636 for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++) 3637 cs[n] = MI_ARB_CHECK; 3638 cs[n] = MI_BATCH_BUFFER_END; 3639 i915_gem_object_flush_map(smoke.batch); 3640 i915_gem_object_unpin_map(smoke.batch); 3641 3642 if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) { 3643 err = -EIO; 3644 goto err_batch; 3645 } 3646 3647 for (n = 0; n < smoke.ncontext; n++) { 3648 smoke.contexts[n] = kernel_context(smoke.gt->i915); 3649 if (!smoke.contexts[n]) 3650 goto err_ctx; 3651 } 3652 3653 for (n = 0; n < ARRAY_SIZE(phase); n++) { 3654 err = smoke_crescendo(&smoke, phase[n]); 3655 if (err) 3656 goto err_ctx; 3657 3658 err = smoke_random(&smoke, phase[n]); 3659 if (err) 3660 goto err_ctx; 3661 } 3662 3663 err_ctx: 3664 if (igt_live_test_end(&t)) 3665 err = -EIO; 3666 3667 for (n = 0; n < smoke.ncontext; n++) { 3668 if (!smoke.contexts[n]) 3669 break; 3670 kernel_context_close(smoke.contexts[n]); 3671 } 3672 3673 err_batch: 3674 i915_gem_object_put(smoke.batch); 3675 err_free: 3676 kfree(smoke.contexts); 3677 3678 return err; 3679 } 3680 3681 static int nop_virtual_engine(struct intel_gt *gt, 3682 struct intel_engine_cs **siblings, 3683 unsigned int nsibling, 3684 unsigned int nctx, 3685 unsigned int flags) 3686 #define CHAIN BIT(0) 3687 { 3688 IGT_TIMEOUT(end_time); 3689 struct i915_request *request[16] = {}; 3690 struct intel_context *ve[16]; 3691 unsigned long n, prime, nc; 3692 struct igt_live_test t; 3693 ktime_t times[2] = {}; 3694 int err; 3695 3696 GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve)); 3697 3698 for (n = 0; n < nctx; n++) { 3699 ve[n] = intel_execlists_create_virtual(siblings, nsibling); 3700 if (IS_ERR(ve[n])) { 3701 err = PTR_ERR(ve[n]); 3702 nctx = n; 3703 goto out; 3704 } 3705 3706 err = intel_context_pin(ve[n]); 3707 if (err) { 3708 intel_context_put(ve[n]); 3709 nctx = n; 3710 goto out; 3711 } 3712 } 3713 3714 err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name); 3715 if (err) 3716 goto out; 3717 3718 for_each_prime_number_from(prime, 1, 8192) { 3719 times[1] = ktime_get_raw(); 3720 3721 if (flags & CHAIN) { 3722 for (nc = 0; nc < nctx; nc++) { 3723 for (n = 0; n < prime; n++) { 3724 struct i915_request *rq; 3725 3726 rq = i915_request_create(ve[nc]); 3727 if (IS_ERR(rq)) { 3728 err = PTR_ERR(rq); 3729 goto out; 3730 } 3731 3732 if (request[nc]) 3733 i915_request_put(request[nc]); 3734 request[nc] = i915_request_get(rq); 3735 i915_request_add(rq); 3736 } 3737 } 3738 } else { 3739 for (n = 0; n < prime; n++) { 3740 for (nc = 0; nc < nctx; nc++) { 3741 struct i915_request *rq; 3742 3743 rq = i915_request_create(ve[nc]); 3744 if (IS_ERR(rq)) { 3745 err = PTR_ERR(rq); 3746 goto out; 3747 } 3748 3749 if (request[nc]) 3750 i915_request_put(request[nc]); 3751 request[nc] = i915_request_get(rq); 3752 i915_request_add(rq); 3753 } 3754 } 3755 } 3756 3757 for (nc = 0; nc < nctx; nc++) { 3758 if (i915_request_wait(request[nc], 0, HZ / 10) < 0) { 3759 pr_err("%s(%s): wait for %llx:%lld timed out\n", 3760 __func__, ve[0]->engine->name, 3761 request[nc]->fence.context, 3762 request[nc]->fence.seqno); 3763 3764 GEM_TRACE("%s(%s) failed at request %llx:%lld\n", 3765 __func__, ve[0]->engine->name, 3766 request[nc]->fence.context, 3767 request[nc]->fence.seqno); 3768 GEM_TRACE_DUMP(); 3769 intel_gt_set_wedged(gt); 3770 break; 3771 } 3772 } 3773 3774 times[1] = ktime_sub(ktime_get_raw(), times[1]); 3775 if (prime == 1) 3776 times[0] = times[1]; 3777 3778 for (nc = 0; nc < nctx; nc++) { 3779 i915_request_put(request[nc]); 3780 request[nc] = NULL; 3781 } 3782 3783 if (__igt_timeout(end_time, NULL)) 3784 break; 3785 } 3786 3787 err = igt_live_test_end(&t); 3788 if (err) 3789 goto out; 3790 3791 pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n", 3792 nctx, ve[0]->engine->name, ktime_to_ns(times[0]), 3793 prime, div64_u64(ktime_to_ns(times[1]), prime)); 3794 3795 out: 3796 if (igt_flush_test(gt->i915)) 3797 err = -EIO; 3798 3799 for (nc = 0; nc < nctx; nc++) { 3800 i915_request_put(request[nc]); 3801 intel_context_unpin(ve[nc]); 3802 intel_context_put(ve[nc]); 3803 } 3804 return err; 3805 } 3806 3807 static unsigned int 3808 __select_siblings(struct intel_gt *gt, 3809 unsigned int class, 3810 struct intel_engine_cs **siblings, 3811 bool (*filter)(const struct intel_engine_cs *)) 3812 { 3813 unsigned int n = 0; 3814 unsigned int inst; 3815 3816 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { 3817 if (!gt->engine_class[class][inst]) 3818 continue; 3819 3820 if (filter && !filter(gt->engine_class[class][inst])) 3821 continue; 3822 3823 siblings[n++] = gt->engine_class[class][inst]; 3824 } 3825 3826 return n; 3827 } 3828 3829 static unsigned int 3830 select_siblings(struct intel_gt *gt, 3831 unsigned int class, 3832 struct intel_engine_cs **siblings) 3833 { 3834 return __select_siblings(gt, class, siblings, NULL); 3835 } 3836 3837 static int live_virtual_engine(void *arg) 3838 { 3839 struct intel_gt *gt = arg; 3840 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 3841 struct intel_engine_cs *engine; 3842 enum intel_engine_id id; 3843 unsigned int class; 3844 int err; 3845 3846 if (intel_uc_uses_guc_submission(>->uc)) 3847 return 0; 3848 3849 for_each_engine(engine, gt, id) { 3850 err = nop_virtual_engine(gt, &engine, 1, 1, 0); 3851 if (err) { 3852 pr_err("Failed to wrap engine %s: err=%d\n", 3853 engine->name, err); 3854 return err; 3855 } 3856 } 3857 3858 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 3859 int nsibling, n; 3860 3861 nsibling = select_siblings(gt, class, siblings); 3862 if (nsibling < 2) 3863 continue; 3864 3865 for (n = 1; n <= nsibling + 1; n++) { 3866 err = nop_virtual_engine(gt, siblings, nsibling, 3867 n, 0); 3868 if (err) 3869 return err; 3870 } 3871 3872 err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN); 3873 if (err) 3874 return err; 3875 } 3876 3877 return 0; 3878 } 3879 3880 static int mask_virtual_engine(struct intel_gt *gt, 3881 struct intel_engine_cs **siblings, 3882 unsigned int nsibling) 3883 { 3884 struct i915_request *request[MAX_ENGINE_INSTANCE + 1]; 3885 struct intel_context *ve; 3886 struct igt_live_test t; 3887 unsigned int n; 3888 int err; 3889 3890 /* 3891 * Check that by setting the execution mask on a request, we can 3892 * restrict it to our desired engine within the virtual engine. 3893 */ 3894 3895 ve = intel_execlists_create_virtual(siblings, nsibling); 3896 if (IS_ERR(ve)) { 3897 err = PTR_ERR(ve); 3898 goto out_close; 3899 } 3900 3901 err = intel_context_pin(ve); 3902 if (err) 3903 goto out_put; 3904 3905 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name); 3906 if (err) 3907 goto out_unpin; 3908 3909 for (n = 0; n < nsibling; n++) { 3910 request[n] = i915_request_create(ve); 3911 if (IS_ERR(request[n])) { 3912 err = PTR_ERR(request[n]); 3913 nsibling = n; 3914 goto out; 3915 } 3916 3917 /* Reverse order as it's more likely to be unnatural */ 3918 request[n]->execution_mask = siblings[nsibling - n - 1]->mask; 3919 3920 i915_request_get(request[n]); 3921 i915_request_add(request[n]); 3922 } 3923 3924 for (n = 0; n < nsibling; n++) { 3925 if (i915_request_wait(request[n], 0, HZ / 10) < 0) { 3926 pr_err("%s(%s): wait for %llx:%lld timed out\n", 3927 __func__, ve->engine->name, 3928 request[n]->fence.context, 3929 request[n]->fence.seqno); 3930 3931 GEM_TRACE("%s(%s) failed at request %llx:%lld\n", 3932 __func__, ve->engine->name, 3933 request[n]->fence.context, 3934 request[n]->fence.seqno); 3935 GEM_TRACE_DUMP(); 3936 intel_gt_set_wedged(gt); 3937 err = -EIO; 3938 goto out; 3939 } 3940 3941 if (request[n]->engine != siblings[nsibling - n - 1]) { 3942 pr_err("Executed on wrong sibling '%s', expected '%s'\n", 3943 request[n]->engine->name, 3944 siblings[nsibling - n - 1]->name); 3945 err = -EINVAL; 3946 goto out; 3947 } 3948 } 3949 3950 err = igt_live_test_end(&t); 3951 out: 3952 if (igt_flush_test(gt->i915)) 3953 err = -EIO; 3954 3955 for (n = 0; n < nsibling; n++) 3956 i915_request_put(request[n]); 3957 3958 out_unpin: 3959 intel_context_unpin(ve); 3960 out_put: 3961 intel_context_put(ve); 3962 out_close: 3963 return err; 3964 } 3965 3966 static int live_virtual_mask(void *arg) 3967 { 3968 struct intel_gt *gt = arg; 3969 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 3970 unsigned int class; 3971 int err; 3972 3973 if (intel_uc_uses_guc_submission(>->uc)) 3974 return 0; 3975 3976 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 3977 unsigned int nsibling; 3978 3979 nsibling = select_siblings(gt, class, siblings); 3980 if (nsibling < 2) 3981 continue; 3982 3983 err = mask_virtual_engine(gt, siblings, nsibling); 3984 if (err) 3985 return err; 3986 } 3987 3988 return 0; 3989 } 3990 3991 static int slicein_virtual_engine(struct intel_gt *gt, 3992 struct intel_engine_cs **siblings, 3993 unsigned int nsibling) 3994 { 3995 const long timeout = slice_timeout(siblings[0]); 3996 struct intel_context *ce; 3997 struct i915_request *rq; 3998 struct igt_spinner spin; 3999 unsigned int n; 4000 int err = 0; 4001 4002 /* 4003 * Virtual requests must take part in timeslicing on the target engines. 4004 */ 4005 4006 if (igt_spinner_init(&spin, gt)) 4007 return -ENOMEM; 4008 4009 for (n = 0; n < nsibling; n++) { 4010 ce = intel_context_create(siblings[n]); 4011 if (IS_ERR(ce)) { 4012 err = PTR_ERR(ce); 4013 goto out; 4014 } 4015 4016 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); 4017 intel_context_put(ce); 4018 if (IS_ERR(rq)) { 4019 err = PTR_ERR(rq); 4020 goto out; 4021 } 4022 4023 i915_request_add(rq); 4024 } 4025 4026 ce = intel_execlists_create_virtual(siblings, nsibling); 4027 if (IS_ERR(ce)) { 4028 err = PTR_ERR(ce); 4029 goto out; 4030 } 4031 4032 rq = intel_context_create_request(ce); 4033 intel_context_put(ce); 4034 if (IS_ERR(rq)) { 4035 err = PTR_ERR(rq); 4036 goto out; 4037 } 4038 4039 i915_request_get(rq); 4040 i915_request_add(rq); 4041 if (i915_request_wait(rq, 0, timeout) < 0) { 4042 GEM_TRACE_ERR("%s(%s) failed to slice in virtual request\n", 4043 __func__, rq->engine->name); 4044 GEM_TRACE_DUMP(); 4045 intel_gt_set_wedged(gt); 4046 err = -EIO; 4047 } 4048 i915_request_put(rq); 4049 4050 out: 4051 igt_spinner_end(&spin); 4052 if (igt_flush_test(gt->i915)) 4053 err = -EIO; 4054 igt_spinner_fini(&spin); 4055 return err; 4056 } 4057 4058 static int sliceout_virtual_engine(struct intel_gt *gt, 4059 struct intel_engine_cs **siblings, 4060 unsigned int nsibling) 4061 { 4062 const long timeout = slice_timeout(siblings[0]); 4063 struct intel_context *ce; 4064 struct i915_request *rq; 4065 struct igt_spinner spin; 4066 unsigned int n; 4067 int err = 0; 4068 4069 /* 4070 * Virtual requests must allow others a fair timeslice. 4071 */ 4072 4073 if (igt_spinner_init(&spin, gt)) 4074 return -ENOMEM; 4075 4076 /* XXX We do not handle oversubscription and fairness with normal rq */ 4077 for (n = 0; n < nsibling; n++) { 4078 ce = intel_execlists_create_virtual(siblings, nsibling); 4079 if (IS_ERR(ce)) { 4080 err = PTR_ERR(ce); 4081 goto out; 4082 } 4083 4084 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); 4085 intel_context_put(ce); 4086 if (IS_ERR(rq)) { 4087 err = PTR_ERR(rq); 4088 goto out; 4089 } 4090 4091 i915_request_add(rq); 4092 } 4093 4094 for (n = 0; !err && n < nsibling; n++) { 4095 ce = intel_context_create(siblings[n]); 4096 if (IS_ERR(ce)) { 4097 err = PTR_ERR(ce); 4098 goto out; 4099 } 4100 4101 rq = intel_context_create_request(ce); 4102 intel_context_put(ce); 4103 if (IS_ERR(rq)) { 4104 err = PTR_ERR(rq); 4105 goto out; 4106 } 4107 4108 i915_request_get(rq); 4109 i915_request_add(rq); 4110 if (i915_request_wait(rq, 0, timeout) < 0) { 4111 GEM_TRACE_ERR("%s(%s) failed to slice out virtual request\n", 4112 __func__, siblings[n]->name); 4113 GEM_TRACE_DUMP(); 4114 intel_gt_set_wedged(gt); 4115 err = -EIO; 4116 } 4117 i915_request_put(rq); 4118 } 4119 4120 out: 4121 igt_spinner_end(&spin); 4122 if (igt_flush_test(gt->i915)) 4123 err = -EIO; 4124 igt_spinner_fini(&spin); 4125 return err; 4126 } 4127 4128 static int live_virtual_slice(void *arg) 4129 { 4130 struct intel_gt *gt = arg; 4131 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 4132 unsigned int class; 4133 int err; 4134 4135 if (intel_uc_uses_guc_submission(>->uc)) 4136 return 0; 4137 4138 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 4139 unsigned int nsibling; 4140 4141 nsibling = __select_siblings(gt, class, siblings, 4142 intel_engine_has_timeslices); 4143 if (nsibling < 2) 4144 continue; 4145 4146 err = slicein_virtual_engine(gt, siblings, nsibling); 4147 if (err) 4148 return err; 4149 4150 err = sliceout_virtual_engine(gt, siblings, nsibling); 4151 if (err) 4152 return err; 4153 } 4154 4155 return 0; 4156 } 4157 4158 static int preserved_virtual_engine(struct intel_gt *gt, 4159 struct intel_engine_cs **siblings, 4160 unsigned int nsibling) 4161 { 4162 struct i915_request *last = NULL; 4163 struct intel_context *ve; 4164 struct i915_vma *scratch; 4165 struct igt_live_test t; 4166 unsigned int n; 4167 int err = 0; 4168 u32 *cs; 4169 4170 scratch = create_scratch(siblings[0]->gt); 4171 if (IS_ERR(scratch)) 4172 return PTR_ERR(scratch); 4173 4174 err = i915_vma_sync(scratch); 4175 if (err) 4176 goto out_scratch; 4177 4178 ve = intel_execlists_create_virtual(siblings, nsibling); 4179 if (IS_ERR(ve)) { 4180 err = PTR_ERR(ve); 4181 goto out_scratch; 4182 } 4183 4184 err = intel_context_pin(ve); 4185 if (err) 4186 goto out_put; 4187 4188 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name); 4189 if (err) 4190 goto out_unpin; 4191 4192 for (n = 0; n < NUM_GPR_DW; n++) { 4193 struct intel_engine_cs *engine = siblings[n % nsibling]; 4194 struct i915_request *rq; 4195 4196 rq = i915_request_create(ve); 4197 if (IS_ERR(rq)) { 4198 err = PTR_ERR(rq); 4199 goto out_end; 4200 } 4201 4202 i915_request_put(last); 4203 last = i915_request_get(rq); 4204 4205 cs = intel_ring_begin(rq, 8); 4206 if (IS_ERR(cs)) { 4207 i915_request_add(rq); 4208 err = PTR_ERR(cs); 4209 goto out_end; 4210 } 4211 4212 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 4213 *cs++ = CS_GPR(engine, n); 4214 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32); 4215 *cs++ = 0; 4216 4217 *cs++ = MI_LOAD_REGISTER_IMM(1); 4218 *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW); 4219 *cs++ = n + 1; 4220 4221 *cs++ = MI_NOOP; 4222 intel_ring_advance(rq, cs); 4223 4224 /* Restrict this request to run on a particular engine */ 4225 rq->execution_mask = engine->mask; 4226 i915_request_add(rq); 4227 } 4228 4229 if (i915_request_wait(last, 0, HZ / 5) < 0) { 4230 err = -ETIME; 4231 goto out_end; 4232 } 4233 4234 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); 4235 if (IS_ERR(cs)) { 4236 err = PTR_ERR(cs); 4237 goto out_end; 4238 } 4239 4240 for (n = 0; n < NUM_GPR_DW; n++) { 4241 if (cs[n] != n) { 4242 pr_err("Incorrect value[%d] found for GPR[%d]\n", 4243 cs[n], n); 4244 err = -EINVAL; 4245 break; 4246 } 4247 } 4248 4249 i915_gem_object_unpin_map(scratch->obj); 4250 4251 out_end: 4252 if (igt_live_test_end(&t)) 4253 err = -EIO; 4254 i915_request_put(last); 4255 out_unpin: 4256 intel_context_unpin(ve); 4257 out_put: 4258 intel_context_put(ve); 4259 out_scratch: 4260 i915_vma_unpin_and_release(&scratch, 0); 4261 return err; 4262 } 4263 4264 static int live_virtual_preserved(void *arg) 4265 { 4266 struct intel_gt *gt = arg; 4267 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 4268 unsigned int class; 4269 4270 /* 4271 * Check that the context image retains non-privileged (user) registers 4272 * from one engine to the next. For this we check that the CS_GPR 4273 * are preserved. 4274 */ 4275 4276 if (intel_uc_uses_guc_submission(>->uc)) 4277 return 0; 4278 4279 /* As we use CS_GPR we cannot run before they existed on all engines. */ 4280 if (INTEL_GEN(gt->i915) < 9) 4281 return 0; 4282 4283 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 4284 int nsibling, err; 4285 4286 nsibling = select_siblings(gt, class, siblings); 4287 if (nsibling < 2) 4288 continue; 4289 4290 err = preserved_virtual_engine(gt, siblings, nsibling); 4291 if (err) 4292 return err; 4293 } 4294 4295 return 0; 4296 } 4297 4298 static int bond_virtual_engine(struct intel_gt *gt, 4299 unsigned int class, 4300 struct intel_engine_cs **siblings, 4301 unsigned int nsibling, 4302 unsigned int flags) 4303 #define BOND_SCHEDULE BIT(0) 4304 { 4305 struct intel_engine_cs *master; 4306 struct i915_request *rq[16]; 4307 enum intel_engine_id id; 4308 struct igt_spinner spin; 4309 unsigned long n; 4310 int err; 4311 4312 /* 4313 * A set of bonded requests is intended to be run concurrently 4314 * across a number of engines. We use one request per-engine 4315 * and a magic fence to schedule each of the bonded requests 4316 * at the same time. A consequence of our current scheduler is that 4317 * we only move requests to the HW ready queue when the request 4318 * becomes ready, that is when all of its prerequisite fences have 4319 * been signaled. As one of those fences is the master submit fence, 4320 * there is a delay on all secondary fences as the HW may be 4321 * currently busy. Equally, as all the requests are independent, 4322 * they may have other fences that delay individual request 4323 * submission to HW. Ergo, we do not guarantee that all requests are 4324 * immediately submitted to HW at the same time, just that if the 4325 * rules are abided by, they are ready at the same time as the 4326 * first is submitted. Userspace can embed semaphores in its batch 4327 * to ensure parallel execution of its phases as it requires. 4328 * Though naturally it gets requested that perhaps the scheduler should 4329 * take care of parallel execution, even across preemption events on 4330 * different HW. (The proper answer is of course "lalalala".) 4331 * 4332 * With the submit-fence, we have identified three possible phases 4333 * of synchronisation depending on the master fence: queued (not 4334 * ready), executing, and signaled. The first two are quite simple 4335 * and checked below. However, the signaled master fence handling is 4336 * contentious. Currently we do not distinguish between a signaled 4337 * fence and an expired fence, as once signaled it does not convey 4338 * any information about the previous execution. It may even be freed 4339 * and hence checking later it may not exist at all. Ergo we currently 4340 * do not apply the bonding constraint for an already signaled fence, 4341 * as our expectation is that it should not constrain the secondaries 4342 * and is outside of the scope of the bonded request API (i.e. all 4343 * userspace requests are meant to be running in parallel). As 4344 * it imposes no constraint, and is effectively a no-op, we do not 4345 * check below as normal execution flows are checked extensively above. 4346 * 4347 * XXX Is the degenerate handling of signaled submit fences the 4348 * expected behaviour for userpace? 4349 */ 4350 4351 GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1); 4352 4353 if (igt_spinner_init(&spin, gt)) 4354 return -ENOMEM; 4355 4356 err = 0; 4357 rq[0] = ERR_PTR(-ENOMEM); 4358 for_each_engine(master, gt, id) { 4359 struct i915_sw_fence fence = {}; 4360 struct intel_context *ce; 4361 4362 if (master->class == class) 4363 continue; 4364 4365 ce = intel_context_create(master); 4366 if (IS_ERR(ce)) { 4367 err = PTR_ERR(ce); 4368 goto out; 4369 } 4370 4371 memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq)); 4372 4373 rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP); 4374 intel_context_put(ce); 4375 if (IS_ERR(rq[0])) { 4376 err = PTR_ERR(rq[0]); 4377 goto out; 4378 } 4379 i915_request_get(rq[0]); 4380 4381 if (flags & BOND_SCHEDULE) { 4382 onstack_fence_init(&fence); 4383 err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit, 4384 &fence, 4385 GFP_KERNEL); 4386 } 4387 4388 i915_request_add(rq[0]); 4389 if (err < 0) 4390 goto out; 4391 4392 if (!(flags & BOND_SCHEDULE) && 4393 !igt_wait_for_spinner(&spin, rq[0])) { 4394 err = -EIO; 4395 goto out; 4396 } 4397 4398 for (n = 0; n < nsibling; n++) { 4399 struct intel_context *ve; 4400 4401 ve = intel_execlists_create_virtual(siblings, nsibling); 4402 if (IS_ERR(ve)) { 4403 err = PTR_ERR(ve); 4404 onstack_fence_fini(&fence); 4405 goto out; 4406 } 4407 4408 err = intel_virtual_engine_attach_bond(ve->engine, 4409 master, 4410 siblings[n]); 4411 if (err) { 4412 intel_context_put(ve); 4413 onstack_fence_fini(&fence); 4414 goto out; 4415 } 4416 4417 err = intel_context_pin(ve); 4418 intel_context_put(ve); 4419 if (err) { 4420 onstack_fence_fini(&fence); 4421 goto out; 4422 } 4423 4424 rq[n + 1] = i915_request_create(ve); 4425 intel_context_unpin(ve); 4426 if (IS_ERR(rq[n + 1])) { 4427 err = PTR_ERR(rq[n + 1]); 4428 onstack_fence_fini(&fence); 4429 goto out; 4430 } 4431 i915_request_get(rq[n + 1]); 4432 4433 err = i915_request_await_execution(rq[n + 1], 4434 &rq[0]->fence, 4435 ve->engine->bond_execute); 4436 i915_request_add(rq[n + 1]); 4437 if (err < 0) { 4438 onstack_fence_fini(&fence); 4439 goto out; 4440 } 4441 } 4442 onstack_fence_fini(&fence); 4443 intel_engine_flush_submission(master); 4444 igt_spinner_end(&spin); 4445 4446 if (i915_request_wait(rq[0], 0, HZ / 10) < 0) { 4447 pr_err("Master request did not execute (on %s)!\n", 4448 rq[0]->engine->name); 4449 err = -EIO; 4450 goto out; 4451 } 4452 4453 for (n = 0; n < nsibling; n++) { 4454 if (i915_request_wait(rq[n + 1], 0, 4455 MAX_SCHEDULE_TIMEOUT) < 0) { 4456 err = -EIO; 4457 goto out; 4458 } 4459 4460 if (rq[n + 1]->engine != siblings[n]) { 4461 pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n", 4462 siblings[n]->name, 4463 rq[n + 1]->engine->name, 4464 rq[0]->engine->name); 4465 err = -EINVAL; 4466 goto out; 4467 } 4468 } 4469 4470 for (n = 0; !IS_ERR(rq[n]); n++) 4471 i915_request_put(rq[n]); 4472 rq[0] = ERR_PTR(-ENOMEM); 4473 } 4474 4475 out: 4476 for (n = 0; !IS_ERR(rq[n]); n++) 4477 i915_request_put(rq[n]); 4478 if (igt_flush_test(gt->i915)) 4479 err = -EIO; 4480 4481 igt_spinner_fini(&spin); 4482 return err; 4483 } 4484 4485 static int live_virtual_bond(void *arg) 4486 { 4487 static const struct phase { 4488 const char *name; 4489 unsigned int flags; 4490 } phases[] = { 4491 { "", 0 }, 4492 { "schedule", BOND_SCHEDULE }, 4493 { }, 4494 }; 4495 struct intel_gt *gt = arg; 4496 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 4497 unsigned int class; 4498 int err; 4499 4500 if (intel_uc_uses_guc_submission(>->uc)) 4501 return 0; 4502 4503 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 4504 const struct phase *p; 4505 int nsibling; 4506 4507 nsibling = select_siblings(gt, class, siblings); 4508 if (nsibling < 2) 4509 continue; 4510 4511 for (p = phases; p->name; p++) { 4512 err = bond_virtual_engine(gt, 4513 class, siblings, nsibling, 4514 p->flags); 4515 if (err) { 4516 pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n", 4517 __func__, p->name, class, nsibling, err); 4518 return err; 4519 } 4520 } 4521 } 4522 4523 return 0; 4524 } 4525 4526 static int reset_virtual_engine(struct intel_gt *gt, 4527 struct intel_engine_cs **siblings, 4528 unsigned int nsibling) 4529 { 4530 struct intel_engine_cs *engine; 4531 struct intel_context *ve; 4532 struct igt_spinner spin; 4533 struct i915_request *rq; 4534 unsigned int n; 4535 int err = 0; 4536 4537 /* 4538 * In order to support offline error capture for fast preempt reset, 4539 * we need to decouple the guilty request and ensure that it and its 4540 * descendents are not executed while the capture is in progress. 4541 */ 4542 4543 if (igt_spinner_init(&spin, gt)) 4544 return -ENOMEM; 4545 4546 ve = intel_execlists_create_virtual(siblings, nsibling); 4547 if (IS_ERR(ve)) { 4548 err = PTR_ERR(ve); 4549 goto out_spin; 4550 } 4551 4552 for (n = 0; n < nsibling; n++) 4553 st_engine_heartbeat_disable(siblings[n]); 4554 4555 rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK); 4556 if (IS_ERR(rq)) { 4557 err = PTR_ERR(rq); 4558 goto out_heartbeat; 4559 } 4560 i915_request_add(rq); 4561 4562 if (!igt_wait_for_spinner(&spin, rq)) { 4563 intel_gt_set_wedged(gt); 4564 err = -ETIME; 4565 goto out_heartbeat; 4566 } 4567 4568 engine = rq->engine; 4569 GEM_BUG_ON(engine == ve->engine); 4570 4571 /* Take ownership of the reset and tasklet */ 4572 if (test_and_set_bit(I915_RESET_ENGINE + engine->id, 4573 >->reset.flags)) { 4574 intel_gt_set_wedged(gt); 4575 err = -EBUSY; 4576 goto out_heartbeat; 4577 } 4578 tasklet_disable(&engine->execlists.tasklet); 4579 4580 engine->execlists.tasklet.func(engine->execlists.tasklet.data); 4581 GEM_BUG_ON(execlists_active(&engine->execlists) != rq); 4582 4583 /* Fake a preemption event; failed of course */ 4584 spin_lock_irq(&engine->active.lock); 4585 __unwind_incomplete_requests(engine); 4586 spin_unlock_irq(&engine->active.lock); 4587 GEM_BUG_ON(rq->engine != ve->engine); 4588 4589 /* Reset the engine while keeping our active request on hold */ 4590 execlists_hold(engine, rq); 4591 GEM_BUG_ON(!i915_request_on_hold(rq)); 4592 4593 intel_engine_reset(engine, NULL); 4594 GEM_BUG_ON(rq->fence.error != -EIO); 4595 4596 /* Release our grasp on the engine, letting CS flow again */ 4597 tasklet_enable(&engine->execlists.tasklet); 4598 clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, >->reset.flags); 4599 4600 /* Check that we do not resubmit the held request */ 4601 i915_request_get(rq); 4602 if (!i915_request_wait(rq, 0, HZ / 5)) { 4603 pr_err("%s: on hold request completed!\n", 4604 engine->name); 4605 intel_gt_set_wedged(gt); 4606 err = -EIO; 4607 goto out_rq; 4608 } 4609 GEM_BUG_ON(!i915_request_on_hold(rq)); 4610 4611 /* But is resubmitted on release */ 4612 execlists_unhold(engine, rq); 4613 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 4614 pr_err("%s: held request did not complete!\n", 4615 engine->name); 4616 intel_gt_set_wedged(gt); 4617 err = -ETIME; 4618 } 4619 4620 out_rq: 4621 i915_request_put(rq); 4622 out_heartbeat: 4623 for (n = 0; n < nsibling; n++) 4624 st_engine_heartbeat_enable(siblings[n]); 4625 4626 intel_context_put(ve); 4627 out_spin: 4628 igt_spinner_fini(&spin); 4629 return err; 4630 } 4631 4632 static int live_virtual_reset(void *arg) 4633 { 4634 struct intel_gt *gt = arg; 4635 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 4636 unsigned int class; 4637 4638 /* 4639 * Check that we handle a reset event within a virtual engine. 4640 * Only the physical engine is reset, but we have to check the flow 4641 * of the virtual requests around the reset, and make sure it is not 4642 * forgotten. 4643 */ 4644 4645 if (intel_uc_uses_guc_submission(>->uc)) 4646 return 0; 4647 4648 if (!intel_has_reset_engine(gt)) 4649 return 0; 4650 4651 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 4652 int nsibling, err; 4653 4654 nsibling = select_siblings(gt, class, siblings); 4655 if (nsibling < 2) 4656 continue; 4657 4658 err = reset_virtual_engine(gt, siblings, nsibling); 4659 if (err) 4660 return err; 4661 } 4662 4663 return 0; 4664 } 4665 4666 int intel_execlists_live_selftests(struct drm_i915_private *i915) 4667 { 4668 static const struct i915_subtest tests[] = { 4669 SUBTEST(live_sanitycheck), 4670 SUBTEST(live_unlite_switch), 4671 SUBTEST(live_unlite_preempt), 4672 SUBTEST(live_unlite_ring), 4673 SUBTEST(live_pin_rewind), 4674 SUBTEST(live_hold_reset), 4675 SUBTEST(live_error_interrupt), 4676 SUBTEST(live_timeslice_preempt), 4677 SUBTEST(live_timeslice_rewind), 4678 SUBTEST(live_timeslice_queue), 4679 SUBTEST(live_timeslice_nopreempt), 4680 SUBTEST(live_busywait_preempt), 4681 SUBTEST(live_preempt), 4682 SUBTEST(live_late_preempt), 4683 SUBTEST(live_nopreempt), 4684 SUBTEST(live_preempt_cancel), 4685 SUBTEST(live_suppress_self_preempt), 4686 SUBTEST(live_chain_preempt), 4687 SUBTEST(live_preempt_ring), 4688 SUBTEST(live_preempt_gang), 4689 SUBTEST(live_preempt_timeout), 4690 SUBTEST(live_preempt_user), 4691 SUBTEST(live_preempt_smoke), 4692 SUBTEST(live_virtual_engine), 4693 SUBTEST(live_virtual_mask), 4694 SUBTEST(live_virtual_preserved), 4695 SUBTEST(live_virtual_slice), 4696 SUBTEST(live_virtual_bond), 4697 SUBTEST(live_virtual_reset), 4698 }; 4699 4700 if (!HAS_EXECLISTS(i915)) 4701 return 0; 4702 4703 if (intel_gt_is_wedged(&i915->gt)) 4704 return 0; 4705 4706 return intel_gt_live_subtests(tests, &i915->gt); 4707 } 4708 4709 static int emit_semaphore_signal(struct intel_context *ce, void *slot) 4710 { 4711 const u32 offset = 4712 i915_ggtt_offset(ce->engine->status_page.vma) + 4713 offset_in_page(slot); 4714 struct i915_request *rq; 4715 u32 *cs; 4716 4717 rq = intel_context_create_request(ce); 4718 if (IS_ERR(rq)) 4719 return PTR_ERR(rq); 4720 4721 cs = intel_ring_begin(rq, 4); 4722 if (IS_ERR(cs)) { 4723 i915_request_add(rq); 4724 return PTR_ERR(cs); 4725 } 4726 4727 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 4728 *cs++ = offset; 4729 *cs++ = 0; 4730 *cs++ = 1; 4731 4732 intel_ring_advance(rq, cs); 4733 4734 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 4735 i915_request_add(rq); 4736 return 0; 4737 } 4738 4739 static int context_flush(struct intel_context *ce, long timeout) 4740 { 4741 struct i915_request *rq; 4742 struct dma_fence *fence; 4743 int err = 0; 4744 4745 rq = intel_engine_create_kernel_request(ce->engine); 4746 if (IS_ERR(rq)) 4747 return PTR_ERR(rq); 4748 4749 fence = i915_active_fence_get(&ce->timeline->last_request); 4750 if (fence) { 4751 i915_request_await_dma_fence(rq, fence); 4752 dma_fence_put(fence); 4753 } 4754 4755 rq = i915_request_get(rq); 4756 i915_request_add(rq); 4757 if (i915_request_wait(rq, 0, timeout) < 0) 4758 err = -ETIME; 4759 i915_request_put(rq); 4760 4761 rmb(); /* We know the request is written, make sure all state is too! */ 4762 return err; 4763 } 4764 4765 static int live_lrc_layout(void *arg) 4766 { 4767 struct intel_gt *gt = arg; 4768 struct intel_engine_cs *engine; 4769 enum intel_engine_id id; 4770 u32 *lrc; 4771 int err; 4772 4773 /* 4774 * Check the registers offsets we use to create the initial reg state 4775 * match the layout saved by HW. 4776 */ 4777 4778 lrc = kmalloc(PAGE_SIZE, GFP_KERNEL); 4779 if (!lrc) 4780 return -ENOMEM; 4781 4782 err = 0; 4783 for_each_engine(engine, gt, id) { 4784 u32 *hw; 4785 int dw; 4786 4787 if (!engine->default_state) 4788 continue; 4789 4790 hw = shmem_pin_map(engine->default_state); 4791 if (IS_ERR(hw)) { 4792 err = PTR_ERR(hw); 4793 break; 4794 } 4795 hw += LRC_STATE_OFFSET / sizeof(*hw); 4796 4797 execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE), 4798 engine->kernel_context, 4799 engine, 4800 engine->kernel_context->ring, 4801 true); 4802 4803 dw = 0; 4804 do { 4805 u32 lri = hw[dw]; 4806 4807 if (lri == 0) { 4808 dw++; 4809 continue; 4810 } 4811 4812 if (lrc[dw] == 0) { 4813 pr_debug("%s: skipped instruction %x at dword %d\n", 4814 engine->name, lri, dw); 4815 dw++; 4816 continue; 4817 } 4818 4819 if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { 4820 pr_err("%s: Expected LRI command at dword %d, found %08x\n", 4821 engine->name, dw, lri); 4822 err = -EINVAL; 4823 break; 4824 } 4825 4826 if (lrc[dw] != lri) { 4827 pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n", 4828 engine->name, dw, lri, lrc[dw]); 4829 err = -EINVAL; 4830 break; 4831 } 4832 4833 lri &= 0x7f; 4834 lri++; 4835 dw++; 4836 4837 while (lri) { 4838 if (hw[dw] != lrc[dw]) { 4839 pr_err("%s: Different registers found at dword %d, expected %x, found %x\n", 4840 engine->name, dw, hw[dw], lrc[dw]); 4841 err = -EINVAL; 4842 break; 4843 } 4844 4845 /* 4846 * Skip over the actual register value as we 4847 * expect that to differ. 4848 */ 4849 dw += 2; 4850 lri -= 2; 4851 } 4852 } while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); 4853 4854 if (err) { 4855 pr_info("%s: HW register image:\n", engine->name); 4856 igt_hexdump(hw, PAGE_SIZE); 4857 4858 pr_info("%s: SW register image:\n", engine->name); 4859 igt_hexdump(lrc, PAGE_SIZE); 4860 } 4861 4862 shmem_unpin_map(engine->default_state, hw); 4863 if (err) 4864 break; 4865 } 4866 4867 kfree(lrc); 4868 return err; 4869 } 4870 4871 static int find_offset(const u32 *lri, u32 offset) 4872 { 4873 int i; 4874 4875 for (i = 0; i < PAGE_SIZE / sizeof(u32); i++) 4876 if (lri[i] == offset) 4877 return i; 4878 4879 return -1; 4880 } 4881 4882 static int live_lrc_fixed(void *arg) 4883 { 4884 struct intel_gt *gt = arg; 4885 struct intel_engine_cs *engine; 4886 enum intel_engine_id id; 4887 int err = 0; 4888 4889 /* 4890 * Check the assumed register offsets match the actual locations in 4891 * the context image. 4892 */ 4893 4894 for_each_engine(engine, gt, id) { 4895 const struct { 4896 u32 reg; 4897 u32 offset; 4898 const char *name; 4899 } tbl[] = { 4900 { 4901 i915_mmio_reg_offset(RING_START(engine->mmio_base)), 4902 CTX_RING_START - 1, 4903 "RING_START" 4904 }, 4905 { 4906 i915_mmio_reg_offset(RING_CTL(engine->mmio_base)), 4907 CTX_RING_CTL - 1, 4908 "RING_CTL" 4909 }, 4910 { 4911 i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)), 4912 CTX_RING_HEAD - 1, 4913 "RING_HEAD" 4914 }, 4915 { 4916 i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)), 4917 CTX_RING_TAIL - 1, 4918 "RING_TAIL" 4919 }, 4920 { 4921 i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)), 4922 lrc_ring_mi_mode(engine), 4923 "RING_MI_MODE" 4924 }, 4925 { 4926 i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)), 4927 CTX_BB_STATE - 1, 4928 "BB_STATE" 4929 }, 4930 { 4931 i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(engine->mmio_base)), 4932 lrc_ring_wa_bb_per_ctx(engine), 4933 "RING_BB_PER_CTX_PTR" 4934 }, 4935 { 4936 i915_mmio_reg_offset(RING_INDIRECT_CTX(engine->mmio_base)), 4937 lrc_ring_indirect_ptr(engine), 4938 "RING_INDIRECT_CTX_PTR" 4939 }, 4940 { 4941 i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(engine->mmio_base)), 4942 lrc_ring_indirect_offset(engine), 4943 "RING_INDIRECT_CTX_OFFSET" 4944 }, 4945 { 4946 i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)), 4947 CTX_TIMESTAMP - 1, 4948 "RING_CTX_TIMESTAMP" 4949 }, 4950 { 4951 i915_mmio_reg_offset(GEN8_RING_CS_GPR(engine->mmio_base, 0)), 4952 lrc_ring_gpr0(engine), 4953 "RING_CS_GPR0" 4954 }, 4955 { 4956 i915_mmio_reg_offset(RING_CMD_BUF_CCTL(engine->mmio_base)), 4957 lrc_ring_cmd_buf_cctl(engine), 4958 "RING_CMD_BUF_CCTL" 4959 }, 4960 { }, 4961 }, *t; 4962 u32 *hw; 4963 4964 if (!engine->default_state) 4965 continue; 4966 4967 hw = shmem_pin_map(engine->default_state); 4968 if (IS_ERR(hw)) { 4969 err = PTR_ERR(hw); 4970 break; 4971 } 4972 hw += LRC_STATE_OFFSET / sizeof(*hw); 4973 4974 for (t = tbl; t->name; t++) { 4975 int dw = find_offset(hw, t->reg); 4976 4977 if (dw != t->offset) { 4978 pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n", 4979 engine->name, 4980 t->name, 4981 t->reg, 4982 dw, 4983 t->offset); 4984 err = -EINVAL; 4985 } 4986 } 4987 4988 shmem_unpin_map(engine->default_state, hw); 4989 } 4990 4991 return err; 4992 } 4993 4994 static int __live_lrc_state(struct intel_engine_cs *engine, 4995 struct i915_vma *scratch) 4996 { 4997 struct intel_context *ce; 4998 struct i915_request *rq; 4999 struct i915_gem_ww_ctx ww; 5000 enum { 5001 RING_START_IDX = 0, 5002 RING_TAIL_IDX, 5003 MAX_IDX 5004 }; 5005 u32 expected[MAX_IDX]; 5006 u32 *cs; 5007 int err; 5008 int n; 5009 5010 ce = intel_context_create(engine); 5011 if (IS_ERR(ce)) 5012 return PTR_ERR(ce); 5013 5014 i915_gem_ww_ctx_init(&ww, false); 5015 retry: 5016 err = i915_gem_object_lock(scratch->obj, &ww); 5017 if (!err) 5018 err = intel_context_pin_ww(ce, &ww); 5019 if (err) 5020 goto err_put; 5021 5022 rq = i915_request_create(ce); 5023 if (IS_ERR(rq)) { 5024 err = PTR_ERR(rq); 5025 goto err_unpin; 5026 } 5027 5028 cs = intel_ring_begin(rq, 4 * MAX_IDX); 5029 if (IS_ERR(cs)) { 5030 err = PTR_ERR(cs); 5031 i915_request_add(rq); 5032 goto err_unpin; 5033 } 5034 5035 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 5036 *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base)); 5037 *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32); 5038 *cs++ = 0; 5039 5040 expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma); 5041 5042 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 5043 *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)); 5044 *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32); 5045 *cs++ = 0; 5046 5047 err = i915_request_await_object(rq, scratch->obj, true); 5048 if (!err) 5049 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE); 5050 5051 i915_request_get(rq); 5052 i915_request_add(rq); 5053 if (err) 5054 goto err_rq; 5055 5056 intel_engine_flush_submission(engine); 5057 expected[RING_TAIL_IDX] = ce->ring->tail; 5058 5059 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 5060 err = -ETIME; 5061 goto err_rq; 5062 } 5063 5064 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); 5065 if (IS_ERR(cs)) { 5066 err = PTR_ERR(cs); 5067 goto err_rq; 5068 } 5069 5070 for (n = 0; n < MAX_IDX; n++) { 5071 if (cs[n] != expected[n]) { 5072 pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n", 5073 engine->name, n, cs[n], expected[n]); 5074 err = -EINVAL; 5075 break; 5076 } 5077 } 5078 5079 i915_gem_object_unpin_map(scratch->obj); 5080 5081 err_rq: 5082 i915_request_put(rq); 5083 err_unpin: 5084 intel_context_unpin(ce); 5085 err_put: 5086 if (err == -EDEADLK) { 5087 err = i915_gem_ww_ctx_backoff(&ww); 5088 if (!err) 5089 goto retry; 5090 } 5091 i915_gem_ww_ctx_fini(&ww); 5092 intel_context_put(ce); 5093 return err; 5094 } 5095 5096 static int live_lrc_state(void *arg) 5097 { 5098 struct intel_gt *gt = arg; 5099 struct intel_engine_cs *engine; 5100 struct i915_vma *scratch; 5101 enum intel_engine_id id; 5102 int err = 0; 5103 5104 /* 5105 * Check the live register state matches what we expect for this 5106 * intel_context. 5107 */ 5108 5109 scratch = create_scratch(gt); 5110 if (IS_ERR(scratch)) 5111 return PTR_ERR(scratch); 5112 5113 for_each_engine(engine, gt, id) { 5114 err = __live_lrc_state(engine, scratch); 5115 if (err) 5116 break; 5117 } 5118 5119 if (igt_flush_test(gt->i915)) 5120 err = -EIO; 5121 5122 i915_vma_unpin_and_release(&scratch, 0); 5123 return err; 5124 } 5125 5126 static int gpr_make_dirty(struct intel_context *ce) 5127 { 5128 struct i915_request *rq; 5129 u32 *cs; 5130 int n; 5131 5132 rq = intel_context_create_request(ce); 5133 if (IS_ERR(rq)) 5134 return PTR_ERR(rq); 5135 5136 cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2); 5137 if (IS_ERR(cs)) { 5138 i915_request_add(rq); 5139 return PTR_ERR(cs); 5140 } 5141 5142 *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW); 5143 for (n = 0; n < NUM_GPR_DW; n++) { 5144 *cs++ = CS_GPR(ce->engine, n); 5145 *cs++ = STACK_MAGIC; 5146 } 5147 *cs++ = MI_NOOP; 5148 5149 intel_ring_advance(rq, cs); 5150 5151 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 5152 i915_request_add(rq); 5153 5154 return 0; 5155 } 5156 5157 static struct i915_request * 5158 __gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot) 5159 { 5160 const u32 offset = 5161 i915_ggtt_offset(ce->engine->status_page.vma) + 5162 offset_in_page(slot); 5163 struct i915_request *rq; 5164 u32 *cs; 5165 int err; 5166 int n; 5167 5168 rq = intel_context_create_request(ce); 5169 if (IS_ERR(rq)) 5170 return rq; 5171 5172 cs = intel_ring_begin(rq, 6 + 4 * NUM_GPR_DW); 5173 if (IS_ERR(cs)) { 5174 i915_request_add(rq); 5175 return ERR_CAST(cs); 5176 } 5177 5178 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 5179 *cs++ = MI_NOOP; 5180 5181 *cs++ = MI_SEMAPHORE_WAIT | 5182 MI_SEMAPHORE_GLOBAL_GTT | 5183 MI_SEMAPHORE_POLL | 5184 MI_SEMAPHORE_SAD_NEQ_SDD; 5185 *cs++ = 0; 5186 *cs++ = offset; 5187 *cs++ = 0; 5188 5189 for (n = 0; n < NUM_GPR_DW; n++) { 5190 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 5191 *cs++ = CS_GPR(ce->engine, n); 5192 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32); 5193 *cs++ = 0; 5194 } 5195 5196 i915_vma_lock(scratch); 5197 err = i915_request_await_object(rq, scratch->obj, true); 5198 if (!err) 5199 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE); 5200 i915_vma_unlock(scratch); 5201 5202 i915_request_get(rq); 5203 i915_request_add(rq); 5204 if (err) { 5205 i915_request_put(rq); 5206 rq = ERR_PTR(err); 5207 } 5208 5209 return rq; 5210 } 5211 5212 static int __live_lrc_gpr(struct intel_engine_cs *engine, 5213 struct i915_vma *scratch, 5214 bool preempt) 5215 { 5216 u32 *slot = memset32(engine->status_page.addr + 1000, 0, 4); 5217 struct intel_context *ce; 5218 struct i915_request *rq; 5219 u32 *cs; 5220 int err; 5221 int n; 5222 5223 if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS) 5224 return 0; /* GPR only on rcs0 for gen8 */ 5225 5226 err = gpr_make_dirty(engine->kernel_context); 5227 if (err) 5228 return err; 5229 5230 ce = intel_context_create(engine); 5231 if (IS_ERR(ce)) 5232 return PTR_ERR(ce); 5233 5234 rq = __gpr_read(ce, scratch, slot); 5235 if (IS_ERR(rq)) { 5236 err = PTR_ERR(rq); 5237 goto err_put; 5238 } 5239 5240 err = wait_for_submit(engine, rq, HZ / 2); 5241 if (err) 5242 goto err_rq; 5243 5244 if (preempt) { 5245 err = gpr_make_dirty(engine->kernel_context); 5246 if (err) 5247 goto err_rq; 5248 5249 err = emit_semaphore_signal(engine->kernel_context, slot); 5250 if (err) 5251 goto err_rq; 5252 } else { 5253 slot[0] = 1; 5254 wmb(); 5255 } 5256 5257 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 5258 err = -ETIME; 5259 goto err_rq; 5260 } 5261 5262 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); 5263 if (IS_ERR(cs)) { 5264 err = PTR_ERR(cs); 5265 goto err_rq; 5266 } 5267 5268 for (n = 0; n < NUM_GPR_DW; n++) { 5269 if (cs[n]) { 5270 pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n", 5271 engine->name, 5272 n / 2, n & 1 ? "udw" : "ldw", 5273 cs[n]); 5274 err = -EINVAL; 5275 break; 5276 } 5277 } 5278 5279 i915_gem_object_unpin_map(scratch->obj); 5280 5281 err_rq: 5282 memset32(&slot[0], -1, 4); 5283 wmb(); 5284 i915_request_put(rq); 5285 err_put: 5286 intel_context_put(ce); 5287 return err; 5288 } 5289 5290 static int live_lrc_gpr(void *arg) 5291 { 5292 struct intel_gt *gt = arg; 5293 struct intel_engine_cs *engine; 5294 struct i915_vma *scratch; 5295 enum intel_engine_id id; 5296 int err = 0; 5297 5298 /* 5299 * Check that GPR registers are cleared in new contexts as we need 5300 * to avoid leaking any information from previous contexts. 5301 */ 5302 5303 scratch = create_scratch(gt); 5304 if (IS_ERR(scratch)) 5305 return PTR_ERR(scratch); 5306 5307 for_each_engine(engine, gt, id) { 5308 st_engine_heartbeat_disable(engine); 5309 5310 err = __live_lrc_gpr(engine, scratch, false); 5311 if (err) 5312 goto err; 5313 5314 err = __live_lrc_gpr(engine, scratch, true); 5315 if (err) 5316 goto err; 5317 5318 err: 5319 st_engine_heartbeat_enable(engine); 5320 if (igt_flush_test(gt->i915)) 5321 err = -EIO; 5322 if (err) 5323 break; 5324 } 5325 5326 i915_vma_unpin_and_release(&scratch, 0); 5327 return err; 5328 } 5329 5330 static struct i915_request * 5331 create_timestamp(struct intel_context *ce, void *slot, int idx) 5332 { 5333 const u32 offset = 5334 i915_ggtt_offset(ce->engine->status_page.vma) + 5335 offset_in_page(slot); 5336 struct i915_request *rq; 5337 u32 *cs; 5338 int err; 5339 5340 rq = intel_context_create_request(ce); 5341 if (IS_ERR(rq)) 5342 return rq; 5343 5344 cs = intel_ring_begin(rq, 10); 5345 if (IS_ERR(cs)) { 5346 err = PTR_ERR(cs); 5347 goto err; 5348 } 5349 5350 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 5351 *cs++ = MI_NOOP; 5352 5353 *cs++ = MI_SEMAPHORE_WAIT | 5354 MI_SEMAPHORE_GLOBAL_GTT | 5355 MI_SEMAPHORE_POLL | 5356 MI_SEMAPHORE_SAD_NEQ_SDD; 5357 *cs++ = 0; 5358 *cs++ = offset; 5359 *cs++ = 0; 5360 5361 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 5362 *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base)); 5363 *cs++ = offset + idx * sizeof(u32); 5364 *cs++ = 0; 5365 5366 intel_ring_advance(rq, cs); 5367 5368 rq->sched.attr.priority = I915_PRIORITY_MASK; 5369 err = 0; 5370 err: 5371 i915_request_get(rq); 5372 i915_request_add(rq); 5373 if (err) { 5374 i915_request_put(rq); 5375 return ERR_PTR(err); 5376 } 5377 5378 return rq; 5379 } 5380 5381 struct lrc_timestamp { 5382 struct intel_engine_cs *engine; 5383 struct intel_context *ce[2]; 5384 u32 poison; 5385 }; 5386 5387 static bool timestamp_advanced(u32 start, u32 end) 5388 { 5389 return (s32)(end - start) > 0; 5390 } 5391 5392 static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt) 5393 { 5394 u32 *slot = memset32(arg->engine->status_page.addr + 1000, 0, 4); 5395 struct i915_request *rq; 5396 u32 timestamp; 5397 int err = 0; 5398 5399 arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison; 5400 rq = create_timestamp(arg->ce[0], slot, 1); 5401 if (IS_ERR(rq)) 5402 return PTR_ERR(rq); 5403 5404 err = wait_for_submit(rq->engine, rq, HZ / 2); 5405 if (err) 5406 goto err; 5407 5408 if (preempt) { 5409 arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef; 5410 err = emit_semaphore_signal(arg->ce[1], slot); 5411 if (err) 5412 goto err; 5413 } else { 5414 slot[0] = 1; 5415 wmb(); 5416 } 5417 5418 /* And wait for switch to kernel (to save our context to memory) */ 5419 err = context_flush(arg->ce[0], HZ / 2); 5420 if (err) 5421 goto err; 5422 5423 if (!timestamp_advanced(arg->poison, slot[1])) { 5424 pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n", 5425 arg->engine->name, preempt ? "preempt" : "simple", 5426 arg->poison, slot[1]); 5427 err = -EINVAL; 5428 } 5429 5430 timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]); 5431 if (!timestamp_advanced(slot[1], timestamp)) { 5432 pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n", 5433 arg->engine->name, preempt ? "preempt" : "simple", 5434 slot[1], timestamp); 5435 err = -EINVAL; 5436 } 5437 5438 err: 5439 memset32(slot, -1, 4); 5440 i915_request_put(rq); 5441 return err; 5442 } 5443 5444 static int live_lrc_timestamp(void *arg) 5445 { 5446 struct lrc_timestamp data = {}; 5447 struct intel_gt *gt = arg; 5448 enum intel_engine_id id; 5449 const u32 poison[] = { 5450 0, 5451 S32_MAX, 5452 (u32)S32_MAX + 1, 5453 U32_MAX, 5454 }; 5455 5456 /* 5457 * We want to verify that the timestamp is saved and restore across 5458 * context switches and is monotonic. 5459 * 5460 * So we do this with a little bit of LRC poisoning to check various 5461 * boundary conditions, and see what happens if we preempt the context 5462 * with a second request (carrying more poison into the timestamp). 5463 */ 5464 5465 for_each_engine(data.engine, gt, id) { 5466 int i, err = 0; 5467 5468 st_engine_heartbeat_disable(data.engine); 5469 5470 for (i = 0; i < ARRAY_SIZE(data.ce); i++) { 5471 struct intel_context *tmp; 5472 5473 tmp = intel_context_create(data.engine); 5474 if (IS_ERR(tmp)) { 5475 err = PTR_ERR(tmp); 5476 goto err; 5477 } 5478 5479 err = intel_context_pin(tmp); 5480 if (err) { 5481 intel_context_put(tmp); 5482 goto err; 5483 } 5484 5485 data.ce[i] = tmp; 5486 } 5487 5488 for (i = 0; i < ARRAY_SIZE(poison); i++) { 5489 data.poison = poison[i]; 5490 5491 err = __lrc_timestamp(&data, false); 5492 if (err) 5493 break; 5494 5495 err = __lrc_timestamp(&data, true); 5496 if (err) 5497 break; 5498 } 5499 5500 err: 5501 st_engine_heartbeat_enable(data.engine); 5502 for (i = 0; i < ARRAY_SIZE(data.ce); i++) { 5503 if (!data.ce[i]) 5504 break; 5505 5506 intel_context_unpin(data.ce[i]); 5507 intel_context_put(data.ce[i]); 5508 } 5509 5510 if (igt_flush_test(gt->i915)) 5511 err = -EIO; 5512 if (err) 5513 return err; 5514 } 5515 5516 return 0; 5517 } 5518 5519 static struct i915_vma * 5520 create_user_vma(struct i915_address_space *vm, unsigned long size) 5521 { 5522 struct drm_i915_gem_object *obj; 5523 struct i915_vma *vma; 5524 int err; 5525 5526 obj = i915_gem_object_create_internal(vm->i915, size); 5527 if (IS_ERR(obj)) 5528 return ERR_CAST(obj); 5529 5530 vma = i915_vma_instance(obj, vm, NULL); 5531 if (IS_ERR(vma)) { 5532 i915_gem_object_put(obj); 5533 return vma; 5534 } 5535 5536 err = i915_vma_pin(vma, 0, 0, PIN_USER); 5537 if (err) { 5538 i915_gem_object_put(obj); 5539 return ERR_PTR(err); 5540 } 5541 5542 return vma; 5543 } 5544 5545 static struct i915_vma * 5546 store_context(struct intel_context *ce, struct i915_vma *scratch) 5547 { 5548 struct i915_vma *batch; 5549 u32 dw, x, *cs, *hw; 5550 u32 *defaults; 5551 5552 batch = create_user_vma(ce->vm, SZ_64K); 5553 if (IS_ERR(batch)) 5554 return batch; 5555 5556 cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); 5557 if (IS_ERR(cs)) { 5558 i915_vma_put(batch); 5559 return ERR_CAST(cs); 5560 } 5561 5562 defaults = shmem_pin_map(ce->engine->default_state); 5563 if (!defaults) { 5564 i915_gem_object_unpin_map(batch->obj); 5565 i915_vma_put(batch); 5566 return ERR_PTR(-ENOMEM); 5567 } 5568 5569 x = 0; 5570 dw = 0; 5571 hw = defaults; 5572 hw += LRC_STATE_OFFSET / sizeof(*hw); 5573 do { 5574 u32 len = hw[dw] & 0x7f; 5575 5576 if (hw[dw] == 0) { 5577 dw++; 5578 continue; 5579 } 5580 5581 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { 5582 dw += len + 2; 5583 continue; 5584 } 5585 5586 dw++; 5587 len = (len + 1) / 2; 5588 while (len--) { 5589 *cs++ = MI_STORE_REGISTER_MEM_GEN8; 5590 *cs++ = hw[dw]; 5591 *cs++ = lower_32_bits(scratch->node.start + x); 5592 *cs++ = upper_32_bits(scratch->node.start + x); 5593 5594 dw += 2; 5595 x += 4; 5596 } 5597 } while (dw < PAGE_SIZE / sizeof(u32) && 5598 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); 5599 5600 *cs++ = MI_BATCH_BUFFER_END; 5601 5602 shmem_unpin_map(ce->engine->default_state, defaults); 5603 5604 i915_gem_object_flush_map(batch->obj); 5605 i915_gem_object_unpin_map(batch->obj); 5606 5607 return batch; 5608 } 5609 5610 static int move_to_active(struct i915_request *rq, 5611 struct i915_vma *vma, 5612 unsigned int flags) 5613 { 5614 int err; 5615 5616 i915_vma_lock(vma); 5617 err = i915_request_await_object(rq, vma->obj, flags); 5618 if (!err) 5619 err = i915_vma_move_to_active(vma, rq, flags); 5620 i915_vma_unlock(vma); 5621 5622 return err; 5623 } 5624 5625 static struct i915_request * 5626 record_registers(struct intel_context *ce, 5627 struct i915_vma *before, 5628 struct i915_vma *after, 5629 u32 *sema) 5630 { 5631 struct i915_vma *b_before, *b_after; 5632 struct i915_request *rq; 5633 u32 *cs; 5634 int err; 5635 5636 b_before = store_context(ce, before); 5637 if (IS_ERR(b_before)) 5638 return ERR_CAST(b_before); 5639 5640 b_after = store_context(ce, after); 5641 if (IS_ERR(b_after)) { 5642 rq = ERR_CAST(b_after); 5643 goto err_before; 5644 } 5645 5646 rq = intel_context_create_request(ce); 5647 if (IS_ERR(rq)) 5648 goto err_after; 5649 5650 err = move_to_active(rq, before, EXEC_OBJECT_WRITE); 5651 if (err) 5652 goto err_rq; 5653 5654 err = move_to_active(rq, b_before, 0); 5655 if (err) 5656 goto err_rq; 5657 5658 err = move_to_active(rq, after, EXEC_OBJECT_WRITE); 5659 if (err) 5660 goto err_rq; 5661 5662 err = move_to_active(rq, b_after, 0); 5663 if (err) 5664 goto err_rq; 5665 5666 cs = intel_ring_begin(rq, 14); 5667 if (IS_ERR(cs)) { 5668 err = PTR_ERR(cs); 5669 goto err_rq; 5670 } 5671 5672 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 5673 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); 5674 *cs++ = lower_32_bits(b_before->node.start); 5675 *cs++ = upper_32_bits(b_before->node.start); 5676 5677 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 5678 *cs++ = MI_SEMAPHORE_WAIT | 5679 MI_SEMAPHORE_GLOBAL_GTT | 5680 MI_SEMAPHORE_POLL | 5681 MI_SEMAPHORE_SAD_NEQ_SDD; 5682 *cs++ = 0; 5683 *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) + 5684 offset_in_page(sema); 5685 *cs++ = 0; 5686 *cs++ = MI_NOOP; 5687 5688 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 5689 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); 5690 *cs++ = lower_32_bits(b_after->node.start); 5691 *cs++ = upper_32_bits(b_after->node.start); 5692 5693 intel_ring_advance(rq, cs); 5694 5695 WRITE_ONCE(*sema, 0); 5696 i915_request_get(rq); 5697 i915_request_add(rq); 5698 err_after: 5699 i915_vma_put(b_after); 5700 err_before: 5701 i915_vma_put(b_before); 5702 return rq; 5703 5704 err_rq: 5705 i915_request_add(rq); 5706 rq = ERR_PTR(err); 5707 goto err_after; 5708 } 5709 5710 static struct i915_vma *load_context(struct intel_context *ce, u32 poison) 5711 { 5712 struct i915_vma *batch; 5713 u32 dw, *cs, *hw; 5714 u32 *defaults; 5715 5716 batch = create_user_vma(ce->vm, SZ_64K); 5717 if (IS_ERR(batch)) 5718 return batch; 5719 5720 cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); 5721 if (IS_ERR(cs)) { 5722 i915_vma_put(batch); 5723 return ERR_CAST(cs); 5724 } 5725 5726 defaults = shmem_pin_map(ce->engine->default_state); 5727 if (!defaults) { 5728 i915_gem_object_unpin_map(batch->obj); 5729 i915_vma_put(batch); 5730 return ERR_PTR(-ENOMEM); 5731 } 5732 5733 dw = 0; 5734 hw = defaults; 5735 hw += LRC_STATE_OFFSET / sizeof(*hw); 5736 do { 5737 u32 len = hw[dw] & 0x7f; 5738 5739 if (hw[dw] == 0) { 5740 dw++; 5741 continue; 5742 } 5743 5744 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { 5745 dw += len + 2; 5746 continue; 5747 } 5748 5749 dw++; 5750 len = (len + 1) / 2; 5751 *cs++ = MI_LOAD_REGISTER_IMM(len); 5752 while (len--) { 5753 *cs++ = hw[dw]; 5754 *cs++ = poison; 5755 dw += 2; 5756 } 5757 } while (dw < PAGE_SIZE / sizeof(u32) && 5758 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); 5759 5760 *cs++ = MI_BATCH_BUFFER_END; 5761 5762 shmem_unpin_map(ce->engine->default_state, defaults); 5763 5764 i915_gem_object_flush_map(batch->obj); 5765 i915_gem_object_unpin_map(batch->obj); 5766 5767 return batch; 5768 } 5769 5770 static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema) 5771 { 5772 struct i915_request *rq; 5773 struct i915_vma *batch; 5774 u32 *cs; 5775 int err; 5776 5777 batch = load_context(ce, poison); 5778 if (IS_ERR(batch)) 5779 return PTR_ERR(batch); 5780 5781 rq = intel_context_create_request(ce); 5782 if (IS_ERR(rq)) { 5783 err = PTR_ERR(rq); 5784 goto err_batch; 5785 } 5786 5787 err = move_to_active(rq, batch, 0); 5788 if (err) 5789 goto err_rq; 5790 5791 cs = intel_ring_begin(rq, 8); 5792 if (IS_ERR(cs)) { 5793 err = PTR_ERR(cs); 5794 goto err_rq; 5795 } 5796 5797 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 5798 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); 5799 *cs++ = lower_32_bits(batch->node.start); 5800 *cs++ = upper_32_bits(batch->node.start); 5801 5802 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 5803 *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) + 5804 offset_in_page(sema); 5805 *cs++ = 0; 5806 *cs++ = 1; 5807 5808 intel_ring_advance(rq, cs); 5809 5810 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 5811 err_rq: 5812 i915_request_add(rq); 5813 err_batch: 5814 i915_vma_put(batch); 5815 return err; 5816 } 5817 5818 static bool is_moving(u32 a, u32 b) 5819 { 5820 return a != b; 5821 } 5822 5823 static int compare_isolation(struct intel_engine_cs *engine, 5824 struct i915_vma *ref[2], 5825 struct i915_vma *result[2], 5826 struct intel_context *ce, 5827 u32 poison) 5828 { 5829 u32 x, dw, *hw, *lrc; 5830 u32 *A[2], *B[2]; 5831 u32 *defaults; 5832 int err = 0; 5833 5834 A[0] = i915_gem_object_pin_map(ref[0]->obj, I915_MAP_WC); 5835 if (IS_ERR(A[0])) 5836 return PTR_ERR(A[0]); 5837 5838 A[1] = i915_gem_object_pin_map(ref[1]->obj, I915_MAP_WC); 5839 if (IS_ERR(A[1])) { 5840 err = PTR_ERR(A[1]); 5841 goto err_A0; 5842 } 5843 5844 B[0] = i915_gem_object_pin_map(result[0]->obj, I915_MAP_WC); 5845 if (IS_ERR(B[0])) { 5846 err = PTR_ERR(B[0]); 5847 goto err_A1; 5848 } 5849 5850 B[1] = i915_gem_object_pin_map(result[1]->obj, I915_MAP_WC); 5851 if (IS_ERR(B[1])) { 5852 err = PTR_ERR(B[1]); 5853 goto err_B0; 5854 } 5855 5856 lrc = i915_gem_object_pin_map(ce->state->obj, 5857 i915_coherent_map_type(engine->i915)); 5858 if (IS_ERR(lrc)) { 5859 err = PTR_ERR(lrc); 5860 goto err_B1; 5861 } 5862 lrc += LRC_STATE_OFFSET / sizeof(*hw); 5863 5864 defaults = shmem_pin_map(ce->engine->default_state); 5865 if (!defaults) { 5866 err = -ENOMEM; 5867 goto err_lrc; 5868 } 5869 5870 x = 0; 5871 dw = 0; 5872 hw = defaults; 5873 hw += LRC_STATE_OFFSET / sizeof(*hw); 5874 do { 5875 u32 len = hw[dw] & 0x7f; 5876 5877 if (hw[dw] == 0) { 5878 dw++; 5879 continue; 5880 } 5881 5882 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { 5883 dw += len + 2; 5884 continue; 5885 } 5886 5887 dw++; 5888 len = (len + 1) / 2; 5889 while (len--) { 5890 if (!is_moving(A[0][x], A[1][x]) && 5891 (A[0][x] != B[0][x] || A[1][x] != B[1][x])) { 5892 switch (hw[dw] & 4095) { 5893 case 0x30: /* RING_HEAD */ 5894 case 0x34: /* RING_TAIL */ 5895 break; 5896 5897 default: 5898 pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n", 5899 engine->name, dw, 5900 hw[dw], hw[dw + 1], 5901 A[0][x], B[0][x], B[1][x], 5902 poison, lrc[dw + 1]); 5903 err = -EINVAL; 5904 } 5905 } 5906 dw += 2; 5907 x++; 5908 } 5909 } while (dw < PAGE_SIZE / sizeof(u32) && 5910 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); 5911 5912 shmem_unpin_map(ce->engine->default_state, defaults); 5913 err_lrc: 5914 i915_gem_object_unpin_map(ce->state->obj); 5915 err_B1: 5916 i915_gem_object_unpin_map(result[1]->obj); 5917 err_B0: 5918 i915_gem_object_unpin_map(result[0]->obj); 5919 err_A1: 5920 i915_gem_object_unpin_map(ref[1]->obj); 5921 err_A0: 5922 i915_gem_object_unpin_map(ref[0]->obj); 5923 return err; 5924 } 5925 5926 static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison) 5927 { 5928 u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1); 5929 struct i915_vma *ref[2], *result[2]; 5930 struct intel_context *A, *B; 5931 struct i915_request *rq; 5932 int err; 5933 5934 A = intel_context_create(engine); 5935 if (IS_ERR(A)) 5936 return PTR_ERR(A); 5937 5938 B = intel_context_create(engine); 5939 if (IS_ERR(B)) { 5940 err = PTR_ERR(B); 5941 goto err_A; 5942 } 5943 5944 ref[0] = create_user_vma(A->vm, SZ_64K); 5945 if (IS_ERR(ref[0])) { 5946 err = PTR_ERR(ref[0]); 5947 goto err_B; 5948 } 5949 5950 ref[1] = create_user_vma(A->vm, SZ_64K); 5951 if (IS_ERR(ref[1])) { 5952 err = PTR_ERR(ref[1]); 5953 goto err_ref0; 5954 } 5955 5956 rq = record_registers(A, ref[0], ref[1], sema); 5957 if (IS_ERR(rq)) { 5958 err = PTR_ERR(rq); 5959 goto err_ref1; 5960 } 5961 5962 WRITE_ONCE(*sema, 1); 5963 wmb(); 5964 5965 if (i915_request_wait(rq, 0, HZ / 2) < 0) { 5966 i915_request_put(rq); 5967 err = -ETIME; 5968 goto err_ref1; 5969 } 5970 i915_request_put(rq); 5971 5972 result[0] = create_user_vma(A->vm, SZ_64K); 5973 if (IS_ERR(result[0])) { 5974 err = PTR_ERR(result[0]); 5975 goto err_ref1; 5976 } 5977 5978 result[1] = create_user_vma(A->vm, SZ_64K); 5979 if (IS_ERR(result[1])) { 5980 err = PTR_ERR(result[1]); 5981 goto err_result0; 5982 } 5983 5984 rq = record_registers(A, result[0], result[1], sema); 5985 if (IS_ERR(rq)) { 5986 err = PTR_ERR(rq); 5987 goto err_result1; 5988 } 5989 5990 err = poison_registers(B, poison, sema); 5991 if (err) { 5992 WRITE_ONCE(*sema, -1); 5993 i915_request_put(rq); 5994 goto err_result1; 5995 } 5996 5997 if (i915_request_wait(rq, 0, HZ / 2) < 0) { 5998 i915_request_put(rq); 5999 err = -ETIME; 6000 goto err_result1; 6001 } 6002 i915_request_put(rq); 6003 6004 err = compare_isolation(engine, ref, result, A, poison); 6005 6006 err_result1: 6007 i915_vma_put(result[1]); 6008 err_result0: 6009 i915_vma_put(result[0]); 6010 err_ref1: 6011 i915_vma_put(ref[1]); 6012 err_ref0: 6013 i915_vma_put(ref[0]); 6014 err_B: 6015 intel_context_put(B); 6016 err_A: 6017 intel_context_put(A); 6018 return err; 6019 } 6020 6021 static bool skip_isolation(const struct intel_engine_cs *engine) 6022 { 6023 if (engine->class == COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) == 9) 6024 return true; 6025 6026 if (engine->class == RENDER_CLASS && INTEL_GEN(engine->i915) == 11) 6027 return true; 6028 6029 return false; 6030 } 6031 6032 static int live_lrc_isolation(void *arg) 6033 { 6034 struct intel_gt *gt = arg; 6035 struct intel_engine_cs *engine; 6036 enum intel_engine_id id; 6037 const u32 poison[] = { 6038 STACK_MAGIC, 6039 0x3a3a3a3a, 6040 0x5c5c5c5c, 6041 0xffffffff, 6042 0xffff0000, 6043 }; 6044 int err = 0; 6045 6046 /* 6047 * Our goal is try and verify that per-context state cannot be 6048 * tampered with by another non-privileged client. 6049 * 6050 * We take the list of context registers from the LRI in the default 6051 * context image and attempt to modify that list from a remote context. 6052 */ 6053 6054 for_each_engine(engine, gt, id) { 6055 int i; 6056 6057 /* Just don't even ask */ 6058 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) && 6059 skip_isolation(engine)) 6060 continue; 6061 6062 intel_engine_pm_get(engine); 6063 for (i = 0; i < ARRAY_SIZE(poison); i++) { 6064 int result; 6065 6066 result = __lrc_isolation(engine, poison[i]); 6067 if (result && !err) 6068 err = result; 6069 6070 result = __lrc_isolation(engine, ~poison[i]); 6071 if (result && !err) 6072 err = result; 6073 } 6074 intel_engine_pm_put(engine); 6075 if (igt_flush_test(gt->i915)) { 6076 err = -EIO; 6077 break; 6078 } 6079 } 6080 6081 return err; 6082 } 6083 6084 static int indirect_ctx_submit_req(struct intel_context *ce) 6085 { 6086 struct i915_request *rq; 6087 int err = 0; 6088 6089 rq = intel_context_create_request(ce); 6090 if (IS_ERR(rq)) 6091 return PTR_ERR(rq); 6092 6093 i915_request_get(rq); 6094 i915_request_add(rq); 6095 6096 if (i915_request_wait(rq, 0, HZ / 5) < 0) 6097 err = -ETIME; 6098 6099 i915_request_put(rq); 6100 6101 return err; 6102 } 6103 6104 #define CTX_BB_CANARY_OFFSET (3 * 1024) 6105 #define CTX_BB_CANARY_INDEX (CTX_BB_CANARY_OFFSET / sizeof(u32)) 6106 6107 static u32 * 6108 emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs) 6109 { 6110 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | 6111 MI_SRM_LRM_GLOBAL_GTT | 6112 MI_LRI_LRM_CS_MMIO; 6113 *cs++ = i915_mmio_reg_offset(RING_START(0)); 6114 *cs++ = i915_ggtt_offset(ce->state) + 6115 context_wa_bb_offset(ce) + 6116 CTX_BB_CANARY_OFFSET; 6117 *cs++ = 0; 6118 6119 return cs; 6120 } 6121 6122 static void 6123 indirect_ctx_bb_setup(struct intel_context *ce) 6124 { 6125 u32 *cs = context_indirect_bb(ce); 6126 6127 cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d; 6128 6129 setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary); 6130 } 6131 6132 static bool check_ring_start(struct intel_context *ce) 6133 { 6134 const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) - 6135 LRC_STATE_OFFSET + context_wa_bb_offset(ce); 6136 6137 if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START]) 6138 return true; 6139 6140 pr_err("ring start mismatch: canary 0x%08x vs state 0x%08x\n", 6141 ctx_bb[CTX_BB_CANARY_INDEX], 6142 ce->lrc_reg_state[CTX_RING_START]); 6143 6144 return false; 6145 } 6146 6147 static int indirect_ctx_bb_check(struct intel_context *ce) 6148 { 6149 int err; 6150 6151 err = indirect_ctx_submit_req(ce); 6152 if (err) 6153 return err; 6154 6155 if (!check_ring_start(ce)) 6156 return -EINVAL; 6157 6158 return 0; 6159 } 6160 6161 static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine) 6162 { 6163 struct intel_context *a, *b; 6164 int err; 6165 6166 a = intel_context_create(engine); 6167 if (IS_ERR(a)) 6168 return PTR_ERR(a); 6169 err = intel_context_pin(a); 6170 if (err) 6171 goto put_a; 6172 6173 b = intel_context_create(engine); 6174 if (IS_ERR(b)) { 6175 err = PTR_ERR(b); 6176 goto unpin_a; 6177 } 6178 err = intel_context_pin(b); 6179 if (err) 6180 goto put_b; 6181 6182 /* We use the already reserved extra page in context state */ 6183 if (!a->wa_bb_page) { 6184 GEM_BUG_ON(b->wa_bb_page); 6185 GEM_BUG_ON(INTEL_GEN(engine->i915) == 12); 6186 goto unpin_b; 6187 } 6188 6189 /* 6190 * In order to test that our per context bb is truly per context, 6191 * and executes at the intended spot on context restoring process, 6192 * make the batch store the ring start value to memory. 6193 * As ring start is restored apriori of starting the indirect ctx bb and 6194 * as it will be different for each context, it fits to this purpose. 6195 */ 6196 indirect_ctx_bb_setup(a); 6197 indirect_ctx_bb_setup(b); 6198 6199 err = indirect_ctx_bb_check(a); 6200 if (err) 6201 goto unpin_b; 6202 6203 err = indirect_ctx_bb_check(b); 6204 6205 unpin_b: 6206 intel_context_unpin(b); 6207 put_b: 6208 intel_context_put(b); 6209 unpin_a: 6210 intel_context_unpin(a); 6211 put_a: 6212 intel_context_put(a); 6213 6214 return err; 6215 } 6216 6217 static int live_lrc_indirect_ctx_bb(void *arg) 6218 { 6219 struct intel_gt *gt = arg; 6220 struct intel_engine_cs *engine; 6221 enum intel_engine_id id; 6222 int err = 0; 6223 6224 for_each_engine(engine, gt, id) { 6225 intel_engine_pm_get(engine); 6226 err = __live_lrc_indirect_ctx_bb(engine); 6227 intel_engine_pm_put(engine); 6228 6229 if (igt_flush_test(gt->i915)) 6230 err = -EIO; 6231 6232 if (err) 6233 break; 6234 } 6235 6236 return err; 6237 } 6238 6239 static void garbage_reset(struct intel_engine_cs *engine, 6240 struct i915_request *rq) 6241 { 6242 const unsigned int bit = I915_RESET_ENGINE + engine->id; 6243 unsigned long *lock = &engine->gt->reset.flags; 6244 6245 if (test_and_set_bit(bit, lock)) 6246 return; 6247 6248 tasklet_disable(&engine->execlists.tasklet); 6249 6250 if (!rq->fence.error) 6251 intel_engine_reset(engine, NULL); 6252 6253 tasklet_enable(&engine->execlists.tasklet); 6254 clear_and_wake_up_bit(bit, lock); 6255 } 6256 6257 static struct i915_request *garbage(struct intel_context *ce, 6258 struct rnd_state *prng) 6259 { 6260 struct i915_request *rq; 6261 int err; 6262 6263 err = intel_context_pin(ce); 6264 if (err) 6265 return ERR_PTR(err); 6266 6267 prandom_bytes_state(prng, 6268 ce->lrc_reg_state, 6269 ce->engine->context_size - 6270 LRC_STATE_OFFSET); 6271 6272 rq = intel_context_create_request(ce); 6273 if (IS_ERR(rq)) { 6274 err = PTR_ERR(rq); 6275 goto err_unpin; 6276 } 6277 6278 i915_request_get(rq); 6279 i915_request_add(rq); 6280 return rq; 6281 6282 err_unpin: 6283 intel_context_unpin(ce); 6284 return ERR_PTR(err); 6285 } 6286 6287 static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng) 6288 { 6289 struct intel_context *ce; 6290 struct i915_request *hang; 6291 int err = 0; 6292 6293 ce = intel_context_create(engine); 6294 if (IS_ERR(ce)) 6295 return PTR_ERR(ce); 6296 6297 hang = garbage(ce, prng); 6298 if (IS_ERR(hang)) { 6299 err = PTR_ERR(hang); 6300 goto err_ce; 6301 } 6302 6303 if (wait_for_submit(engine, hang, HZ / 2)) { 6304 i915_request_put(hang); 6305 err = -ETIME; 6306 goto err_ce; 6307 } 6308 6309 intel_context_set_banned(ce); 6310 garbage_reset(engine, hang); 6311 6312 intel_engine_flush_submission(engine); 6313 if (!hang->fence.error) { 6314 i915_request_put(hang); 6315 pr_err("%s: corrupted context was not reset\n", 6316 engine->name); 6317 err = -EINVAL; 6318 goto err_ce; 6319 } 6320 6321 if (i915_request_wait(hang, 0, HZ / 2) < 0) { 6322 pr_err("%s: corrupted context did not recover\n", 6323 engine->name); 6324 i915_request_put(hang); 6325 err = -EIO; 6326 goto err_ce; 6327 } 6328 i915_request_put(hang); 6329 6330 err_ce: 6331 intel_context_put(ce); 6332 return err; 6333 } 6334 6335 static int live_lrc_garbage(void *arg) 6336 { 6337 struct intel_gt *gt = arg; 6338 struct intel_engine_cs *engine; 6339 enum intel_engine_id id; 6340 6341 /* 6342 * Verify that we can recover if one context state is completely 6343 * corrupted. 6344 */ 6345 6346 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN)) 6347 return 0; 6348 6349 for_each_engine(engine, gt, id) { 6350 I915_RND_STATE(prng); 6351 int err = 0, i; 6352 6353 if (!intel_has_reset_engine(engine->gt)) 6354 continue; 6355 6356 intel_engine_pm_get(engine); 6357 for (i = 0; i < 3; i++) { 6358 err = __lrc_garbage(engine, &prng); 6359 if (err) 6360 break; 6361 } 6362 intel_engine_pm_put(engine); 6363 6364 if (igt_flush_test(gt->i915)) 6365 err = -EIO; 6366 if (err) 6367 return err; 6368 } 6369 6370 return 0; 6371 } 6372 6373 static int __live_pphwsp_runtime(struct intel_engine_cs *engine) 6374 { 6375 struct intel_context *ce; 6376 struct i915_request *rq; 6377 IGT_TIMEOUT(end_time); 6378 int err; 6379 6380 ce = intel_context_create(engine); 6381 if (IS_ERR(ce)) 6382 return PTR_ERR(ce); 6383 6384 ce->runtime.num_underflow = 0; 6385 ce->runtime.max_underflow = 0; 6386 6387 do { 6388 unsigned int loop = 1024; 6389 6390 while (loop) { 6391 rq = intel_context_create_request(ce); 6392 if (IS_ERR(rq)) { 6393 err = PTR_ERR(rq); 6394 goto err_rq; 6395 } 6396 6397 if (--loop == 0) 6398 i915_request_get(rq); 6399 6400 i915_request_add(rq); 6401 } 6402 6403 if (__igt_timeout(end_time, NULL)) 6404 break; 6405 6406 i915_request_put(rq); 6407 } while (1); 6408 6409 err = i915_request_wait(rq, 0, HZ / 5); 6410 if (err < 0) { 6411 pr_err("%s: request not completed!\n", engine->name); 6412 goto err_wait; 6413 } 6414 6415 igt_flush_test(engine->i915); 6416 6417 pr_info("%s: pphwsp runtime %lluns, average %lluns\n", 6418 engine->name, 6419 intel_context_get_total_runtime_ns(ce), 6420 intel_context_get_avg_runtime_ns(ce)); 6421 6422 err = 0; 6423 if (ce->runtime.num_underflow) { 6424 pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n", 6425 engine->name, 6426 ce->runtime.num_underflow, 6427 ce->runtime.max_underflow); 6428 GEM_TRACE_DUMP(); 6429 err = -EOVERFLOW; 6430 } 6431 6432 err_wait: 6433 i915_request_put(rq); 6434 err_rq: 6435 intel_context_put(ce); 6436 return err; 6437 } 6438 6439 static int live_pphwsp_runtime(void *arg) 6440 { 6441 struct intel_gt *gt = arg; 6442 struct intel_engine_cs *engine; 6443 enum intel_engine_id id; 6444 int err = 0; 6445 6446 /* 6447 * Check that cumulative context runtime as stored in the pphwsp[16] 6448 * is monotonic. 6449 */ 6450 6451 for_each_engine(engine, gt, id) { 6452 err = __live_pphwsp_runtime(engine); 6453 if (err) 6454 break; 6455 } 6456 6457 if (igt_flush_test(gt->i915)) 6458 err = -EIO; 6459 6460 return err; 6461 } 6462 6463 int intel_lrc_live_selftests(struct drm_i915_private *i915) 6464 { 6465 static const struct i915_subtest tests[] = { 6466 SUBTEST(live_lrc_layout), 6467 SUBTEST(live_lrc_fixed), 6468 SUBTEST(live_lrc_state), 6469 SUBTEST(live_lrc_gpr), 6470 SUBTEST(live_lrc_isolation), 6471 SUBTEST(live_lrc_timestamp), 6472 SUBTEST(live_lrc_garbage), 6473 SUBTEST(live_pphwsp_runtime), 6474 SUBTEST(live_lrc_indirect_ctx_bb), 6475 }; 6476 6477 if (!HAS_LOGICAL_RING_CONTEXTS(i915)) 6478 return 0; 6479 6480 return intel_gt_live_subtests(tests, &i915->gt); 6481 } 6482