1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2018 Intel Corporation 5 */ 6 7 #include <linux/prime_numbers.h> 8 9 #include "gem/i915_gem_pm.h" 10 #include "gt/intel_engine_heartbeat.h" 11 #include "gt/intel_reset.h" 12 #include "gt/selftest_engine_heartbeat.h" 13 14 #include "i915_selftest.h" 15 #include "selftests/i915_random.h" 16 #include "selftests/igt_flush_test.h" 17 #include "selftests/igt_live_test.h" 18 #include "selftests/igt_spinner.h" 19 #include "selftests/lib_sw_fence.h" 20 21 #include "gem/selftests/igt_gem_utils.h" 22 #include "gem/selftests/mock_context.h" 23 24 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4) 25 #define NUM_GPR 16 26 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */ 27 28 static struct i915_vma *create_scratch(struct intel_gt *gt) 29 { 30 struct drm_i915_gem_object *obj; 31 struct i915_vma *vma; 32 int err; 33 34 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 35 if (IS_ERR(obj)) 36 return ERR_CAST(obj); 37 38 i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED); 39 40 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 41 if (IS_ERR(vma)) { 42 i915_gem_object_put(obj); 43 return vma; 44 } 45 46 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 47 if (err) { 48 i915_gem_object_put(obj); 49 return ERR_PTR(err); 50 } 51 52 return vma; 53 } 54 55 static bool is_active(struct i915_request *rq) 56 { 57 if (i915_request_is_active(rq)) 58 return true; 59 60 if (i915_request_on_hold(rq)) 61 return true; 62 63 if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq)) 64 return true; 65 66 return false; 67 } 68 69 static int wait_for_submit(struct intel_engine_cs *engine, 70 struct i915_request *rq, 71 unsigned long timeout) 72 { 73 timeout += jiffies; 74 do { 75 bool done = time_after(jiffies, timeout); 76 77 if (i915_request_completed(rq)) /* that was quick! */ 78 return 0; 79 80 /* Wait until the HW has acknowleged the submission (or err) */ 81 intel_engine_flush_submission(engine); 82 if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq)) 83 return 0; 84 85 if (done) 86 return -ETIME; 87 88 cond_resched(); 89 } while (1); 90 } 91 92 static int wait_for_reset(struct intel_engine_cs *engine, 93 struct i915_request *rq, 94 unsigned long timeout) 95 { 96 timeout += jiffies; 97 98 do { 99 cond_resched(); 100 intel_engine_flush_submission(engine); 101 102 if (READ_ONCE(engine->execlists.pending[0])) 103 continue; 104 105 if (i915_request_completed(rq)) 106 break; 107 108 if (READ_ONCE(rq->fence.error)) 109 break; 110 } while (time_before(jiffies, timeout)); 111 112 flush_scheduled_work(); 113 114 if (rq->fence.error != -EIO) { 115 pr_err("%s: hanging request %llx:%lld not reset\n", 116 engine->name, 117 rq->fence.context, 118 rq->fence.seqno); 119 return -EINVAL; 120 } 121 122 /* Give the request a jiffie to complete after flushing the worker */ 123 if (i915_request_wait(rq, 0, 124 max(0l, (long)(timeout - jiffies)) + 1) < 0) { 125 pr_err("%s: hanging request %llx:%lld did not complete\n", 126 engine->name, 127 rq->fence.context, 128 rq->fence.seqno); 129 return -ETIME; 130 } 131 132 return 0; 133 } 134 135 static int live_sanitycheck(void *arg) 136 { 137 struct intel_gt *gt = arg; 138 struct intel_engine_cs *engine; 139 enum intel_engine_id id; 140 struct igt_spinner spin; 141 int err = 0; 142 143 if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915)) 144 return 0; 145 146 if (igt_spinner_init(&spin, gt)) 147 return -ENOMEM; 148 149 for_each_engine(engine, gt, id) { 150 struct intel_context *ce; 151 struct i915_request *rq; 152 153 ce = intel_context_create(engine); 154 if (IS_ERR(ce)) { 155 err = PTR_ERR(ce); 156 break; 157 } 158 159 rq = igt_spinner_create_request(&spin, ce, MI_NOOP); 160 if (IS_ERR(rq)) { 161 err = PTR_ERR(rq); 162 goto out_ctx; 163 } 164 165 i915_request_add(rq); 166 if (!igt_wait_for_spinner(&spin, rq)) { 167 GEM_TRACE("spinner failed to start\n"); 168 GEM_TRACE_DUMP(); 169 intel_gt_set_wedged(gt); 170 err = -EIO; 171 goto out_ctx; 172 } 173 174 igt_spinner_end(&spin); 175 if (igt_flush_test(gt->i915)) { 176 err = -EIO; 177 goto out_ctx; 178 } 179 180 out_ctx: 181 intel_context_put(ce); 182 if (err) 183 break; 184 } 185 186 igt_spinner_fini(&spin); 187 return err; 188 } 189 190 static int live_unlite_restore(struct intel_gt *gt, int prio) 191 { 192 struct intel_engine_cs *engine; 193 enum intel_engine_id id; 194 struct igt_spinner spin; 195 int err = -ENOMEM; 196 197 /* 198 * Check that we can correctly context switch between 2 instances 199 * on the same engine from the same parent context. 200 */ 201 202 if (igt_spinner_init(&spin, gt)) 203 return err; 204 205 err = 0; 206 for_each_engine(engine, gt, id) { 207 struct intel_context *ce[2] = {}; 208 struct i915_request *rq[2]; 209 struct igt_live_test t; 210 int n; 211 212 if (prio && !intel_engine_has_preemption(engine)) 213 continue; 214 215 if (!intel_engine_can_store_dword(engine)) 216 continue; 217 218 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 219 err = -EIO; 220 break; 221 } 222 st_engine_heartbeat_disable(engine); 223 224 for (n = 0; n < ARRAY_SIZE(ce); n++) { 225 struct intel_context *tmp; 226 227 tmp = intel_context_create(engine); 228 if (IS_ERR(tmp)) { 229 err = PTR_ERR(tmp); 230 goto err_ce; 231 } 232 233 err = intel_context_pin(tmp); 234 if (err) { 235 intel_context_put(tmp); 236 goto err_ce; 237 } 238 239 /* 240 * Setup the pair of contexts such that if we 241 * lite-restore using the RING_TAIL from ce[1] it 242 * will execute garbage from ce[0]->ring. 243 */ 244 memset(tmp->ring->vaddr, 245 POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */ 246 tmp->ring->vma->size); 247 248 ce[n] = tmp; 249 } 250 GEM_BUG_ON(!ce[1]->ring->size); 251 intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2); 252 __execlists_update_reg_state(ce[1], engine, ce[1]->ring->head); 253 254 rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK); 255 if (IS_ERR(rq[0])) { 256 err = PTR_ERR(rq[0]); 257 goto err_ce; 258 } 259 260 i915_request_get(rq[0]); 261 i915_request_add(rq[0]); 262 GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit); 263 264 if (!igt_wait_for_spinner(&spin, rq[0])) { 265 i915_request_put(rq[0]); 266 goto err_ce; 267 } 268 269 rq[1] = i915_request_create(ce[1]); 270 if (IS_ERR(rq[1])) { 271 err = PTR_ERR(rq[1]); 272 i915_request_put(rq[0]); 273 goto err_ce; 274 } 275 276 if (!prio) { 277 /* 278 * Ensure we do the switch to ce[1] on completion. 279 * 280 * rq[0] is already submitted, so this should reduce 281 * to a no-op (a wait on a request on the same engine 282 * uses the submit fence, not the completion fence), 283 * but it will install a dependency on rq[1] for rq[0] 284 * that will prevent the pair being reordered by 285 * timeslicing. 286 */ 287 i915_request_await_dma_fence(rq[1], &rq[0]->fence); 288 } 289 290 i915_request_get(rq[1]); 291 i915_request_add(rq[1]); 292 GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix); 293 i915_request_put(rq[0]); 294 295 if (prio) { 296 struct i915_sched_attr attr = { 297 .priority = prio, 298 }; 299 300 /* Alternatively preempt the spinner with ce[1] */ 301 engine->schedule(rq[1], &attr); 302 } 303 304 /* And switch back to ce[0] for good measure */ 305 rq[0] = i915_request_create(ce[0]); 306 if (IS_ERR(rq[0])) { 307 err = PTR_ERR(rq[0]); 308 i915_request_put(rq[1]); 309 goto err_ce; 310 } 311 312 i915_request_await_dma_fence(rq[0], &rq[1]->fence); 313 i915_request_get(rq[0]); 314 i915_request_add(rq[0]); 315 GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix); 316 i915_request_put(rq[1]); 317 i915_request_put(rq[0]); 318 319 err_ce: 320 intel_engine_flush_submission(engine); 321 igt_spinner_end(&spin); 322 for (n = 0; n < ARRAY_SIZE(ce); n++) { 323 if (IS_ERR_OR_NULL(ce[n])) 324 break; 325 326 intel_context_unpin(ce[n]); 327 intel_context_put(ce[n]); 328 } 329 330 st_engine_heartbeat_enable(engine); 331 if (igt_live_test_end(&t)) 332 err = -EIO; 333 if (err) 334 break; 335 } 336 337 igt_spinner_fini(&spin); 338 return err; 339 } 340 341 static int live_unlite_switch(void *arg) 342 { 343 return live_unlite_restore(arg, 0); 344 } 345 346 static int live_unlite_preempt(void *arg) 347 { 348 return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX)); 349 } 350 351 static int live_unlite_ring(void *arg) 352 { 353 struct intel_gt *gt = arg; 354 struct intel_engine_cs *engine; 355 struct igt_spinner spin; 356 enum intel_engine_id id; 357 int err = 0; 358 359 /* 360 * Setup a preemption event that will cause almost the entire ring 361 * to be unwound, potentially fooling our intel_ring_direction() 362 * into emitting a forward lite-restore instead of the rollback. 363 */ 364 365 if (igt_spinner_init(&spin, gt)) 366 return -ENOMEM; 367 368 for_each_engine(engine, gt, id) { 369 struct intel_context *ce[2] = {}; 370 struct i915_request *rq; 371 struct igt_live_test t; 372 int n; 373 374 if (!intel_engine_has_preemption(engine)) 375 continue; 376 377 if (!intel_engine_can_store_dword(engine)) 378 continue; 379 380 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 381 err = -EIO; 382 break; 383 } 384 st_engine_heartbeat_disable(engine); 385 386 for (n = 0; n < ARRAY_SIZE(ce); n++) { 387 struct intel_context *tmp; 388 389 tmp = intel_context_create(engine); 390 if (IS_ERR(tmp)) { 391 err = PTR_ERR(tmp); 392 goto err_ce; 393 } 394 395 err = intel_context_pin(tmp); 396 if (err) { 397 intel_context_put(tmp); 398 goto err_ce; 399 } 400 401 memset32(tmp->ring->vaddr, 402 0xdeadbeef, /* trigger a hang if executed */ 403 tmp->ring->vma->size / sizeof(u32)); 404 405 ce[n] = tmp; 406 } 407 408 /* Create max prio spinner, followed by N low prio nops */ 409 rq = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK); 410 if (IS_ERR(rq)) { 411 err = PTR_ERR(rq); 412 goto err_ce; 413 } 414 415 i915_request_get(rq); 416 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 417 i915_request_add(rq); 418 419 if (!igt_wait_for_spinner(&spin, rq)) { 420 intel_gt_set_wedged(gt); 421 i915_request_put(rq); 422 err = -ETIME; 423 goto err_ce; 424 } 425 426 /* Fill the ring, until we will cause a wrap */ 427 n = 0; 428 while (intel_ring_direction(ce[0]->ring, 429 rq->wa_tail, 430 ce[0]->ring->tail) <= 0) { 431 struct i915_request *tmp; 432 433 tmp = intel_context_create_request(ce[0]); 434 if (IS_ERR(tmp)) { 435 err = PTR_ERR(tmp); 436 i915_request_put(rq); 437 goto err_ce; 438 } 439 440 i915_request_add(tmp); 441 intel_engine_flush_submission(engine); 442 n++; 443 } 444 intel_engine_flush_submission(engine); 445 pr_debug("%s: Filled ring with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n", 446 engine->name, n, 447 ce[0]->ring->size, 448 ce[0]->ring->tail, 449 ce[0]->ring->emit, 450 rq->tail); 451 GEM_BUG_ON(intel_ring_direction(ce[0]->ring, 452 rq->tail, 453 ce[0]->ring->tail) <= 0); 454 i915_request_put(rq); 455 456 /* Create a second ring to preempt the first ring after rq[0] */ 457 rq = intel_context_create_request(ce[1]); 458 if (IS_ERR(rq)) { 459 err = PTR_ERR(rq); 460 goto err_ce; 461 } 462 463 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 464 i915_request_get(rq); 465 i915_request_add(rq); 466 467 err = wait_for_submit(engine, rq, HZ / 2); 468 i915_request_put(rq); 469 if (err) { 470 pr_err("%s: preemption request was not submitted\n", 471 engine->name); 472 err = -ETIME; 473 } 474 475 pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n", 476 engine->name, 477 ce[0]->ring->tail, ce[0]->ring->emit, 478 ce[1]->ring->tail, ce[1]->ring->emit); 479 480 err_ce: 481 intel_engine_flush_submission(engine); 482 igt_spinner_end(&spin); 483 for (n = 0; n < ARRAY_SIZE(ce); n++) { 484 if (IS_ERR_OR_NULL(ce[n])) 485 break; 486 487 intel_context_unpin(ce[n]); 488 intel_context_put(ce[n]); 489 } 490 st_engine_heartbeat_enable(engine); 491 if (igt_live_test_end(&t)) 492 err = -EIO; 493 if (err) 494 break; 495 } 496 497 igt_spinner_fini(&spin); 498 return err; 499 } 500 501 static int live_pin_rewind(void *arg) 502 { 503 struct intel_gt *gt = arg; 504 struct intel_engine_cs *engine; 505 enum intel_engine_id id; 506 int err = 0; 507 508 /* 509 * We have to be careful not to trust intel_ring too much, for example 510 * ring->head is updated upon retire which is out of sync with pinning 511 * the context. Thus we cannot use ring->head to set CTX_RING_HEAD, 512 * or else we risk writing an older, stale value. 513 * 514 * To simulate this, let's apply a bit of deliberate sabotague. 515 */ 516 517 for_each_engine(engine, gt, id) { 518 struct intel_context *ce; 519 struct i915_request *rq; 520 struct intel_ring *ring; 521 struct igt_live_test t; 522 523 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 524 err = -EIO; 525 break; 526 } 527 528 ce = intel_context_create(engine); 529 if (IS_ERR(ce)) { 530 err = PTR_ERR(ce); 531 break; 532 } 533 534 err = intel_context_pin(ce); 535 if (err) { 536 intel_context_put(ce); 537 break; 538 } 539 540 /* Keep the context awake while we play games */ 541 err = i915_active_acquire(&ce->active); 542 if (err) { 543 intel_context_unpin(ce); 544 intel_context_put(ce); 545 break; 546 } 547 ring = ce->ring; 548 549 /* Poison the ring, and offset the next request from HEAD */ 550 memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32)); 551 ring->emit = ring->size / 2; 552 ring->tail = ring->emit; 553 GEM_BUG_ON(ring->head); 554 555 intel_context_unpin(ce); 556 557 /* Submit a simple nop request */ 558 GEM_BUG_ON(intel_context_is_pinned(ce)); 559 rq = intel_context_create_request(ce); 560 i915_active_release(&ce->active); /* e.g. async retire */ 561 intel_context_put(ce); 562 if (IS_ERR(rq)) { 563 err = PTR_ERR(rq); 564 break; 565 } 566 GEM_BUG_ON(!rq->head); 567 i915_request_add(rq); 568 569 /* Expect not to hang! */ 570 if (igt_live_test_end(&t)) { 571 err = -EIO; 572 break; 573 } 574 } 575 576 return err; 577 } 578 579 static int live_hold_reset(void *arg) 580 { 581 struct intel_gt *gt = arg; 582 struct intel_engine_cs *engine; 583 enum intel_engine_id id; 584 struct igt_spinner spin; 585 int err = 0; 586 587 /* 588 * In order to support offline error capture for fast preempt reset, 589 * we need to decouple the guilty request and ensure that it and its 590 * descendents are not executed while the capture is in progress. 591 */ 592 593 if (!intel_has_reset_engine(gt)) 594 return 0; 595 596 if (igt_spinner_init(&spin, gt)) 597 return -ENOMEM; 598 599 for_each_engine(engine, gt, id) { 600 struct intel_context *ce; 601 struct i915_request *rq; 602 603 ce = intel_context_create(engine); 604 if (IS_ERR(ce)) { 605 err = PTR_ERR(ce); 606 break; 607 } 608 609 st_engine_heartbeat_disable(engine); 610 611 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); 612 if (IS_ERR(rq)) { 613 err = PTR_ERR(rq); 614 goto out; 615 } 616 i915_request_add(rq); 617 618 if (!igt_wait_for_spinner(&spin, rq)) { 619 intel_gt_set_wedged(gt); 620 err = -ETIME; 621 goto out; 622 } 623 624 /* We have our request executing, now remove it and reset */ 625 626 if (test_and_set_bit(I915_RESET_ENGINE + id, 627 >->reset.flags)) { 628 intel_gt_set_wedged(gt); 629 err = -EBUSY; 630 goto out; 631 } 632 tasklet_disable(&engine->execlists.tasklet); 633 634 engine->execlists.tasklet.func(engine->execlists.tasklet.data); 635 GEM_BUG_ON(execlists_active(&engine->execlists) != rq); 636 637 i915_request_get(rq); 638 execlists_hold(engine, rq); 639 GEM_BUG_ON(!i915_request_on_hold(rq)); 640 641 intel_engine_reset(engine, NULL); 642 GEM_BUG_ON(rq->fence.error != -EIO); 643 644 tasklet_enable(&engine->execlists.tasklet); 645 clear_and_wake_up_bit(I915_RESET_ENGINE + id, 646 >->reset.flags); 647 648 /* Check that we do not resubmit the held request */ 649 if (!i915_request_wait(rq, 0, HZ / 5)) { 650 pr_err("%s: on hold request completed!\n", 651 engine->name); 652 i915_request_put(rq); 653 err = -EIO; 654 goto out; 655 } 656 GEM_BUG_ON(!i915_request_on_hold(rq)); 657 658 /* But is resubmitted on release */ 659 execlists_unhold(engine, rq); 660 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 661 pr_err("%s: held request did not complete!\n", 662 engine->name); 663 intel_gt_set_wedged(gt); 664 err = -ETIME; 665 } 666 i915_request_put(rq); 667 668 out: 669 st_engine_heartbeat_enable(engine); 670 intel_context_put(ce); 671 if (err) 672 break; 673 } 674 675 igt_spinner_fini(&spin); 676 return err; 677 } 678 679 static const char *error_repr(int err) 680 { 681 return err ? "bad" : "good"; 682 } 683 684 static int live_error_interrupt(void *arg) 685 { 686 static const struct error_phase { 687 enum { GOOD = 0, BAD = -EIO } error[2]; 688 } phases[] = { 689 { { BAD, GOOD } }, 690 { { BAD, BAD } }, 691 { { BAD, GOOD } }, 692 { { GOOD, GOOD } }, /* sentinel */ 693 }; 694 struct intel_gt *gt = arg; 695 struct intel_engine_cs *engine; 696 enum intel_engine_id id; 697 698 /* 699 * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning 700 * of invalid commands in user batches that will cause a GPU hang. 701 * This is a faster mechanism than using hangcheck/heartbeats, but 702 * only detects problems the HW knows about -- it will not warn when 703 * we kill the HW! 704 * 705 * To verify our detection and reset, we throw some invalid commands 706 * at the HW and wait for the interrupt. 707 */ 708 709 if (!intel_has_reset_engine(gt)) 710 return 0; 711 712 for_each_engine(engine, gt, id) { 713 const struct error_phase *p; 714 int err = 0; 715 716 st_engine_heartbeat_disable(engine); 717 718 for (p = phases; p->error[0] != GOOD; p++) { 719 struct i915_request *client[ARRAY_SIZE(phases->error)]; 720 u32 *cs; 721 int i; 722 723 memset(client, 0, sizeof(*client)); 724 for (i = 0; i < ARRAY_SIZE(client); i++) { 725 struct intel_context *ce; 726 struct i915_request *rq; 727 728 ce = intel_context_create(engine); 729 if (IS_ERR(ce)) { 730 err = PTR_ERR(ce); 731 goto out; 732 } 733 734 rq = intel_context_create_request(ce); 735 intel_context_put(ce); 736 if (IS_ERR(rq)) { 737 err = PTR_ERR(rq); 738 goto out; 739 } 740 741 if (rq->engine->emit_init_breadcrumb) { 742 err = rq->engine->emit_init_breadcrumb(rq); 743 if (err) { 744 i915_request_add(rq); 745 goto out; 746 } 747 } 748 749 cs = intel_ring_begin(rq, 2); 750 if (IS_ERR(cs)) { 751 i915_request_add(rq); 752 err = PTR_ERR(cs); 753 goto out; 754 } 755 756 if (p->error[i]) { 757 *cs++ = 0xdeadbeef; 758 *cs++ = 0xdeadbeef; 759 } else { 760 *cs++ = MI_NOOP; 761 *cs++ = MI_NOOP; 762 } 763 764 client[i] = i915_request_get(rq); 765 i915_request_add(rq); 766 } 767 768 err = wait_for_submit(engine, client[0], HZ / 2); 769 if (err) { 770 pr_err("%s: first request did not start within time!\n", 771 engine->name); 772 err = -ETIME; 773 goto out; 774 } 775 776 for (i = 0; i < ARRAY_SIZE(client); i++) { 777 if (i915_request_wait(client[i], 0, HZ / 5) < 0) 778 pr_debug("%s: %s request incomplete!\n", 779 engine->name, 780 error_repr(p->error[i])); 781 782 if (!i915_request_started(client[i])) { 783 pr_err("%s: %s request not started!\n", 784 engine->name, 785 error_repr(p->error[i])); 786 err = -ETIME; 787 goto out; 788 } 789 790 /* Kick the tasklet to process the error */ 791 intel_engine_flush_submission(engine); 792 if (client[i]->fence.error != p->error[i]) { 793 pr_err("%s: %s request (%s) with wrong error code: %d\n", 794 engine->name, 795 error_repr(p->error[i]), 796 i915_request_completed(client[i]) ? "completed" : "running", 797 client[i]->fence.error); 798 err = -EINVAL; 799 goto out; 800 } 801 } 802 803 out: 804 for (i = 0; i < ARRAY_SIZE(client); i++) 805 if (client[i]) 806 i915_request_put(client[i]); 807 if (err) { 808 pr_err("%s: failed at phase[%zd] { %d, %d }\n", 809 engine->name, p - phases, 810 p->error[0], p->error[1]); 811 break; 812 } 813 } 814 815 st_engine_heartbeat_enable(engine); 816 if (err) { 817 intel_gt_set_wedged(gt); 818 return err; 819 } 820 } 821 822 return 0; 823 } 824 825 static int 826 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx) 827 { 828 u32 *cs; 829 830 cs = intel_ring_begin(rq, 10); 831 if (IS_ERR(cs)) 832 return PTR_ERR(cs); 833 834 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 835 836 *cs++ = MI_SEMAPHORE_WAIT | 837 MI_SEMAPHORE_GLOBAL_GTT | 838 MI_SEMAPHORE_POLL | 839 MI_SEMAPHORE_SAD_NEQ_SDD; 840 *cs++ = 0; 841 *cs++ = i915_ggtt_offset(vma) + 4 * idx; 842 *cs++ = 0; 843 844 if (idx > 0) { 845 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 846 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1); 847 *cs++ = 0; 848 *cs++ = 1; 849 } else { 850 *cs++ = MI_NOOP; 851 *cs++ = MI_NOOP; 852 *cs++ = MI_NOOP; 853 *cs++ = MI_NOOP; 854 } 855 856 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 857 858 intel_ring_advance(rq, cs); 859 return 0; 860 } 861 862 static struct i915_request * 863 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx) 864 { 865 struct intel_context *ce; 866 struct i915_request *rq; 867 int err; 868 869 ce = intel_context_create(engine); 870 if (IS_ERR(ce)) 871 return ERR_CAST(ce); 872 873 rq = intel_context_create_request(ce); 874 if (IS_ERR(rq)) 875 goto out_ce; 876 877 err = 0; 878 if (rq->engine->emit_init_breadcrumb) 879 err = rq->engine->emit_init_breadcrumb(rq); 880 if (err == 0) 881 err = emit_semaphore_chain(rq, vma, idx); 882 if (err == 0) 883 i915_request_get(rq); 884 i915_request_add(rq); 885 if (err) 886 rq = ERR_PTR(err); 887 888 out_ce: 889 intel_context_put(ce); 890 return rq; 891 } 892 893 static int 894 release_queue(struct intel_engine_cs *engine, 895 struct i915_vma *vma, 896 int idx, int prio) 897 { 898 struct i915_sched_attr attr = { 899 .priority = prio, 900 }; 901 struct i915_request *rq; 902 u32 *cs; 903 904 rq = intel_engine_create_kernel_request(engine); 905 if (IS_ERR(rq)) 906 return PTR_ERR(rq); 907 908 cs = intel_ring_begin(rq, 4); 909 if (IS_ERR(cs)) { 910 i915_request_add(rq); 911 return PTR_ERR(cs); 912 } 913 914 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 915 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1); 916 *cs++ = 0; 917 *cs++ = 1; 918 919 intel_ring_advance(rq, cs); 920 921 i915_request_get(rq); 922 i915_request_add(rq); 923 924 local_bh_disable(); 925 engine->schedule(rq, &attr); 926 local_bh_enable(); /* kick tasklet */ 927 928 i915_request_put(rq); 929 930 return 0; 931 } 932 933 static int 934 slice_semaphore_queue(struct intel_engine_cs *outer, 935 struct i915_vma *vma, 936 int count) 937 { 938 struct intel_engine_cs *engine; 939 struct i915_request *head; 940 enum intel_engine_id id; 941 int err, i, n = 0; 942 943 head = semaphore_queue(outer, vma, n++); 944 if (IS_ERR(head)) 945 return PTR_ERR(head); 946 947 for_each_engine(engine, outer->gt, id) { 948 for (i = 0; i < count; i++) { 949 struct i915_request *rq; 950 951 rq = semaphore_queue(engine, vma, n++); 952 if (IS_ERR(rq)) { 953 err = PTR_ERR(rq); 954 goto out; 955 } 956 957 i915_request_put(rq); 958 } 959 } 960 961 err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER); 962 if (err) 963 goto out; 964 965 if (i915_request_wait(head, 0, 966 2 * outer->gt->info.num_engines * (count + 2) * (count + 3)) < 0) { 967 pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n", 968 count, n); 969 GEM_TRACE_DUMP(); 970 intel_gt_set_wedged(outer->gt); 971 err = -EIO; 972 } 973 974 out: 975 i915_request_put(head); 976 return err; 977 } 978 979 static int live_timeslice_preempt(void *arg) 980 { 981 struct intel_gt *gt = arg; 982 struct drm_i915_gem_object *obj; 983 struct intel_engine_cs *engine; 984 enum intel_engine_id id; 985 struct i915_vma *vma; 986 void *vaddr; 987 int err = 0; 988 989 /* 990 * If a request takes too long, we would like to give other users 991 * a fair go on the GPU. In particular, users may create batches 992 * that wait upon external input, where that input may even be 993 * supplied by another GPU job. To avoid blocking forever, we 994 * need to preempt the current task and replace it with another 995 * ready task. 996 */ 997 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 998 return 0; 999 1000 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 1001 if (IS_ERR(obj)) 1002 return PTR_ERR(obj); 1003 1004 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 1005 if (IS_ERR(vma)) { 1006 err = PTR_ERR(vma); 1007 goto err_obj; 1008 } 1009 1010 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); 1011 if (IS_ERR(vaddr)) { 1012 err = PTR_ERR(vaddr); 1013 goto err_obj; 1014 } 1015 1016 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 1017 if (err) 1018 goto err_map; 1019 1020 err = i915_vma_sync(vma); 1021 if (err) 1022 goto err_pin; 1023 1024 for_each_engine(engine, gt, id) { 1025 if (!intel_engine_has_preemption(engine)) 1026 continue; 1027 1028 memset(vaddr, 0, PAGE_SIZE); 1029 1030 st_engine_heartbeat_disable(engine); 1031 err = slice_semaphore_queue(engine, vma, 5); 1032 st_engine_heartbeat_enable(engine); 1033 if (err) 1034 goto err_pin; 1035 1036 if (igt_flush_test(gt->i915)) { 1037 err = -EIO; 1038 goto err_pin; 1039 } 1040 } 1041 1042 err_pin: 1043 i915_vma_unpin(vma); 1044 err_map: 1045 i915_gem_object_unpin_map(obj); 1046 err_obj: 1047 i915_gem_object_put(obj); 1048 return err; 1049 } 1050 1051 static struct i915_request * 1052 create_rewinder(struct intel_context *ce, 1053 struct i915_request *wait, 1054 void *slot, int idx) 1055 { 1056 const u32 offset = 1057 i915_ggtt_offset(ce->engine->status_page.vma) + 1058 offset_in_page(slot); 1059 struct i915_request *rq; 1060 u32 *cs; 1061 int err; 1062 1063 rq = intel_context_create_request(ce); 1064 if (IS_ERR(rq)) 1065 return rq; 1066 1067 if (wait) { 1068 err = i915_request_await_dma_fence(rq, &wait->fence); 1069 if (err) 1070 goto err; 1071 } 1072 1073 cs = intel_ring_begin(rq, 14); 1074 if (IS_ERR(cs)) { 1075 err = PTR_ERR(cs); 1076 goto err; 1077 } 1078 1079 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 1080 *cs++ = MI_NOOP; 1081 1082 *cs++ = MI_SEMAPHORE_WAIT | 1083 MI_SEMAPHORE_GLOBAL_GTT | 1084 MI_SEMAPHORE_POLL | 1085 MI_SEMAPHORE_SAD_GTE_SDD; 1086 *cs++ = idx; 1087 *cs++ = offset; 1088 *cs++ = 0; 1089 1090 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 1091 *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base)); 1092 *cs++ = offset + idx * sizeof(u32); 1093 *cs++ = 0; 1094 1095 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 1096 *cs++ = offset; 1097 *cs++ = 0; 1098 *cs++ = idx + 1; 1099 1100 intel_ring_advance(rq, cs); 1101 1102 rq->sched.attr.priority = I915_PRIORITY_MASK; 1103 err = 0; 1104 err: 1105 i915_request_get(rq); 1106 i915_request_add(rq); 1107 if (err) { 1108 i915_request_put(rq); 1109 return ERR_PTR(err); 1110 } 1111 1112 return rq; 1113 } 1114 1115 static int live_timeslice_rewind(void *arg) 1116 { 1117 struct intel_gt *gt = arg; 1118 struct intel_engine_cs *engine; 1119 enum intel_engine_id id; 1120 1121 /* 1122 * The usual presumption on timeslice expiration is that we replace 1123 * the active context with another. However, given a chain of 1124 * dependencies we may end up with replacing the context with itself, 1125 * but only a few of those requests, forcing us to rewind the 1126 * RING_TAIL of the original request. 1127 */ 1128 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 1129 return 0; 1130 1131 for_each_engine(engine, gt, id) { 1132 enum { A1, A2, B1 }; 1133 enum { X = 1, Z, Y }; 1134 struct i915_request *rq[3] = {}; 1135 struct intel_context *ce; 1136 unsigned long timeslice; 1137 int i, err = 0; 1138 u32 *slot; 1139 1140 if (!intel_engine_has_timeslices(engine)) 1141 continue; 1142 1143 /* 1144 * A:rq1 -- semaphore wait, timestamp X 1145 * A:rq2 -- write timestamp Y 1146 * 1147 * B:rq1 [await A:rq1] -- write timestamp Z 1148 * 1149 * Force timeslice, release semaphore. 1150 * 1151 * Expect execution/evaluation order XZY 1152 */ 1153 1154 st_engine_heartbeat_disable(engine); 1155 timeslice = xchg(&engine->props.timeslice_duration_ms, 1); 1156 1157 slot = memset32(engine->status_page.addr + 1000, 0, 4); 1158 1159 ce = intel_context_create(engine); 1160 if (IS_ERR(ce)) { 1161 err = PTR_ERR(ce); 1162 goto err; 1163 } 1164 1165 rq[A1] = create_rewinder(ce, NULL, slot, X); 1166 if (IS_ERR(rq[A1])) { 1167 intel_context_put(ce); 1168 goto err; 1169 } 1170 1171 rq[A2] = create_rewinder(ce, NULL, slot, Y); 1172 intel_context_put(ce); 1173 if (IS_ERR(rq[A2])) 1174 goto err; 1175 1176 err = wait_for_submit(engine, rq[A2], HZ / 2); 1177 if (err) { 1178 pr_err("%s: failed to submit first context\n", 1179 engine->name); 1180 goto err; 1181 } 1182 1183 ce = intel_context_create(engine); 1184 if (IS_ERR(ce)) { 1185 err = PTR_ERR(ce); 1186 goto err; 1187 } 1188 1189 rq[B1] = create_rewinder(ce, rq[A1], slot, Z); 1190 intel_context_put(ce); 1191 if (IS_ERR(rq[2])) 1192 goto err; 1193 1194 err = wait_for_submit(engine, rq[B1], HZ / 2); 1195 if (err) { 1196 pr_err("%s: failed to submit second context\n", 1197 engine->name); 1198 goto err; 1199 } 1200 1201 /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */ 1202 ENGINE_TRACE(engine, "forcing tasklet for rewind\n"); 1203 if (i915_request_is_active(rq[A2])) { /* semaphore yielded! */ 1204 /* Wait for the timeslice to kick in */ 1205 del_timer(&engine->execlists.timer); 1206 tasklet_hi_schedule(&engine->execlists.tasklet); 1207 intel_engine_flush_submission(engine); 1208 } 1209 /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */ 1210 GEM_BUG_ON(!i915_request_is_active(rq[A1])); 1211 GEM_BUG_ON(!i915_request_is_active(rq[B1])); 1212 GEM_BUG_ON(i915_request_is_active(rq[A2])); 1213 1214 /* Release the hounds! */ 1215 slot[0] = 1; 1216 wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */ 1217 1218 for (i = 1; i <= 3; i++) { 1219 unsigned long timeout = jiffies + HZ / 2; 1220 1221 while (!READ_ONCE(slot[i]) && 1222 time_before(jiffies, timeout)) 1223 ; 1224 1225 if (!time_before(jiffies, timeout)) { 1226 pr_err("%s: rq[%d] timed out\n", 1227 engine->name, i - 1); 1228 err = -ETIME; 1229 goto err; 1230 } 1231 1232 pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]); 1233 } 1234 1235 /* XZY: XZ < XY */ 1236 if (slot[Z] - slot[X] >= slot[Y] - slot[X]) { 1237 pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n", 1238 engine->name, 1239 slot[Z] - slot[X], 1240 slot[Y] - slot[X]); 1241 err = -EINVAL; 1242 } 1243 1244 err: 1245 memset32(&slot[0], -1, 4); 1246 wmb(); 1247 1248 engine->props.timeslice_duration_ms = timeslice; 1249 st_engine_heartbeat_enable(engine); 1250 for (i = 0; i < 3; i++) 1251 i915_request_put(rq[i]); 1252 if (igt_flush_test(gt->i915)) 1253 err = -EIO; 1254 if (err) 1255 return err; 1256 } 1257 1258 return 0; 1259 } 1260 1261 static struct i915_request *nop_request(struct intel_engine_cs *engine) 1262 { 1263 struct i915_request *rq; 1264 1265 rq = intel_engine_create_kernel_request(engine); 1266 if (IS_ERR(rq)) 1267 return rq; 1268 1269 i915_request_get(rq); 1270 i915_request_add(rq); 1271 1272 return rq; 1273 } 1274 1275 static long slice_timeout(struct intel_engine_cs *engine) 1276 { 1277 long timeout; 1278 1279 /* Enough time for a timeslice to kick in, and kick out */ 1280 timeout = 2 * msecs_to_jiffies_timeout(timeslice(engine)); 1281 1282 /* Enough time for the nop request to complete */ 1283 timeout += HZ / 5; 1284 1285 return timeout + 1; 1286 } 1287 1288 static int live_timeslice_queue(void *arg) 1289 { 1290 struct intel_gt *gt = arg; 1291 struct drm_i915_gem_object *obj; 1292 struct intel_engine_cs *engine; 1293 enum intel_engine_id id; 1294 struct i915_vma *vma; 1295 void *vaddr; 1296 int err = 0; 1297 1298 /* 1299 * Make sure that even if ELSP[0] and ELSP[1] are filled with 1300 * timeslicing between them disabled, we *do* enable timeslicing 1301 * if the queue demands it. (Normally, we do not submit if 1302 * ELSP[1] is already occupied, so must rely on timeslicing to 1303 * eject ELSP[0] in favour of the queue.) 1304 */ 1305 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 1306 return 0; 1307 1308 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 1309 if (IS_ERR(obj)) 1310 return PTR_ERR(obj); 1311 1312 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 1313 if (IS_ERR(vma)) { 1314 err = PTR_ERR(vma); 1315 goto err_obj; 1316 } 1317 1318 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); 1319 if (IS_ERR(vaddr)) { 1320 err = PTR_ERR(vaddr); 1321 goto err_obj; 1322 } 1323 1324 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 1325 if (err) 1326 goto err_map; 1327 1328 err = i915_vma_sync(vma); 1329 if (err) 1330 goto err_pin; 1331 1332 for_each_engine(engine, gt, id) { 1333 struct i915_sched_attr attr = { 1334 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), 1335 }; 1336 struct i915_request *rq, *nop; 1337 1338 if (!intel_engine_has_preemption(engine)) 1339 continue; 1340 1341 st_engine_heartbeat_disable(engine); 1342 memset(vaddr, 0, PAGE_SIZE); 1343 1344 /* ELSP[0]: semaphore wait */ 1345 rq = semaphore_queue(engine, vma, 0); 1346 if (IS_ERR(rq)) { 1347 err = PTR_ERR(rq); 1348 goto err_heartbeat; 1349 } 1350 engine->schedule(rq, &attr); 1351 err = wait_for_submit(engine, rq, HZ / 2); 1352 if (err) { 1353 pr_err("%s: Timed out trying to submit semaphores\n", 1354 engine->name); 1355 goto err_rq; 1356 } 1357 1358 /* ELSP[1]: nop request */ 1359 nop = nop_request(engine); 1360 if (IS_ERR(nop)) { 1361 err = PTR_ERR(nop); 1362 goto err_rq; 1363 } 1364 err = wait_for_submit(engine, nop, HZ / 2); 1365 i915_request_put(nop); 1366 if (err) { 1367 pr_err("%s: Timed out trying to submit nop\n", 1368 engine->name); 1369 goto err_rq; 1370 } 1371 1372 GEM_BUG_ON(i915_request_completed(rq)); 1373 GEM_BUG_ON(execlists_active(&engine->execlists) != rq); 1374 1375 /* Queue: semaphore signal, matching priority as semaphore */ 1376 err = release_queue(engine, vma, 1, effective_prio(rq)); 1377 if (err) 1378 goto err_rq; 1379 1380 /* Wait until we ack the release_queue and start timeslicing */ 1381 do { 1382 cond_resched(); 1383 intel_engine_flush_submission(engine); 1384 } while (READ_ONCE(engine->execlists.pending[0])); 1385 1386 /* Timeslice every jiffy, so within 2 we should signal */ 1387 if (i915_request_wait(rq, 0, slice_timeout(engine)) < 0) { 1388 struct drm_printer p = 1389 drm_info_printer(gt->i915->drm.dev); 1390 1391 pr_err("%s: Failed to timeslice into queue\n", 1392 engine->name); 1393 intel_engine_dump(engine, &p, 1394 "%s\n", engine->name); 1395 1396 memset(vaddr, 0xff, PAGE_SIZE); 1397 err = -EIO; 1398 } 1399 err_rq: 1400 i915_request_put(rq); 1401 err_heartbeat: 1402 st_engine_heartbeat_enable(engine); 1403 if (err) 1404 break; 1405 } 1406 1407 err_pin: 1408 i915_vma_unpin(vma); 1409 err_map: 1410 i915_gem_object_unpin_map(obj); 1411 err_obj: 1412 i915_gem_object_put(obj); 1413 return err; 1414 } 1415 1416 static int live_timeslice_nopreempt(void *arg) 1417 { 1418 struct intel_gt *gt = arg; 1419 struct intel_engine_cs *engine; 1420 enum intel_engine_id id; 1421 struct igt_spinner spin; 1422 int err = 0; 1423 1424 /* 1425 * We should not timeslice into a request that is marked with 1426 * I915_REQUEST_NOPREEMPT. 1427 */ 1428 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 1429 return 0; 1430 1431 if (igt_spinner_init(&spin, gt)) 1432 return -ENOMEM; 1433 1434 for_each_engine(engine, gt, id) { 1435 struct intel_context *ce; 1436 struct i915_request *rq; 1437 unsigned long timeslice; 1438 1439 if (!intel_engine_has_preemption(engine)) 1440 continue; 1441 1442 ce = intel_context_create(engine); 1443 if (IS_ERR(ce)) { 1444 err = PTR_ERR(ce); 1445 break; 1446 } 1447 1448 st_engine_heartbeat_disable(engine); 1449 timeslice = xchg(&engine->props.timeslice_duration_ms, 1); 1450 1451 /* Create an unpreemptible spinner */ 1452 1453 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); 1454 intel_context_put(ce); 1455 if (IS_ERR(rq)) { 1456 err = PTR_ERR(rq); 1457 goto out_heartbeat; 1458 } 1459 1460 i915_request_get(rq); 1461 i915_request_add(rq); 1462 1463 if (!igt_wait_for_spinner(&spin, rq)) { 1464 i915_request_put(rq); 1465 err = -ETIME; 1466 goto out_spin; 1467 } 1468 1469 set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags); 1470 i915_request_put(rq); 1471 1472 /* Followed by a maximum priority barrier (heartbeat) */ 1473 1474 ce = intel_context_create(engine); 1475 if (IS_ERR(ce)) { 1476 err = PTR_ERR(ce); 1477 goto out_spin; 1478 } 1479 1480 rq = intel_context_create_request(ce); 1481 intel_context_put(ce); 1482 if (IS_ERR(rq)) { 1483 err = PTR_ERR(rq); 1484 goto out_spin; 1485 } 1486 1487 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 1488 i915_request_get(rq); 1489 i915_request_add(rq); 1490 1491 /* 1492 * Wait until the barrier is in ELSP, and we know timeslicing 1493 * will have been activated. 1494 */ 1495 if (wait_for_submit(engine, rq, HZ / 2)) { 1496 i915_request_put(rq); 1497 err = -ETIME; 1498 goto out_spin; 1499 } 1500 1501 /* 1502 * Since the ELSP[0] request is unpreemptible, it should not 1503 * allow the maximum priority barrier through. Wait long 1504 * enough to see if it is timesliced in by mistake. 1505 */ 1506 if (i915_request_wait(rq, 0, slice_timeout(engine)) >= 0) { 1507 pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n", 1508 engine->name); 1509 err = -EINVAL; 1510 } 1511 i915_request_put(rq); 1512 1513 out_spin: 1514 igt_spinner_end(&spin); 1515 out_heartbeat: 1516 xchg(&engine->props.timeslice_duration_ms, timeslice); 1517 st_engine_heartbeat_enable(engine); 1518 if (err) 1519 break; 1520 1521 if (igt_flush_test(gt->i915)) { 1522 err = -EIO; 1523 break; 1524 } 1525 } 1526 1527 igt_spinner_fini(&spin); 1528 return err; 1529 } 1530 1531 static int live_busywait_preempt(void *arg) 1532 { 1533 struct intel_gt *gt = arg; 1534 struct i915_gem_context *ctx_hi, *ctx_lo; 1535 struct intel_engine_cs *engine; 1536 struct drm_i915_gem_object *obj; 1537 struct i915_vma *vma; 1538 enum intel_engine_id id; 1539 int err = -ENOMEM; 1540 u32 *map; 1541 1542 /* 1543 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can 1544 * preempt the busywaits used to synchronise between rings. 1545 */ 1546 1547 ctx_hi = kernel_context(gt->i915); 1548 if (!ctx_hi) 1549 return -ENOMEM; 1550 ctx_hi->sched.priority = 1551 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); 1552 1553 ctx_lo = kernel_context(gt->i915); 1554 if (!ctx_lo) 1555 goto err_ctx_hi; 1556 ctx_lo->sched.priority = 1557 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); 1558 1559 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 1560 if (IS_ERR(obj)) { 1561 err = PTR_ERR(obj); 1562 goto err_ctx_lo; 1563 } 1564 1565 map = i915_gem_object_pin_map(obj, I915_MAP_WC); 1566 if (IS_ERR(map)) { 1567 err = PTR_ERR(map); 1568 goto err_obj; 1569 } 1570 1571 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 1572 if (IS_ERR(vma)) { 1573 err = PTR_ERR(vma); 1574 goto err_map; 1575 } 1576 1577 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 1578 if (err) 1579 goto err_map; 1580 1581 err = i915_vma_sync(vma); 1582 if (err) 1583 goto err_vma; 1584 1585 for_each_engine(engine, gt, id) { 1586 struct i915_request *lo, *hi; 1587 struct igt_live_test t; 1588 u32 *cs; 1589 1590 if (!intel_engine_has_preemption(engine)) 1591 continue; 1592 1593 if (!intel_engine_can_store_dword(engine)) 1594 continue; 1595 1596 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 1597 err = -EIO; 1598 goto err_vma; 1599 } 1600 1601 /* 1602 * We create two requests. The low priority request 1603 * busywaits on a semaphore (inside the ringbuffer where 1604 * is should be preemptible) and the high priority requests 1605 * uses a MI_STORE_DWORD_IMM to update the semaphore value 1606 * allowing the first request to complete. If preemption 1607 * fails, we hang instead. 1608 */ 1609 1610 lo = igt_request_alloc(ctx_lo, engine); 1611 if (IS_ERR(lo)) { 1612 err = PTR_ERR(lo); 1613 goto err_vma; 1614 } 1615 1616 cs = intel_ring_begin(lo, 8); 1617 if (IS_ERR(cs)) { 1618 err = PTR_ERR(cs); 1619 i915_request_add(lo); 1620 goto err_vma; 1621 } 1622 1623 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 1624 *cs++ = i915_ggtt_offset(vma); 1625 *cs++ = 0; 1626 *cs++ = 1; 1627 1628 /* XXX Do we need a flush + invalidate here? */ 1629 1630 *cs++ = MI_SEMAPHORE_WAIT | 1631 MI_SEMAPHORE_GLOBAL_GTT | 1632 MI_SEMAPHORE_POLL | 1633 MI_SEMAPHORE_SAD_EQ_SDD; 1634 *cs++ = 0; 1635 *cs++ = i915_ggtt_offset(vma); 1636 *cs++ = 0; 1637 1638 intel_ring_advance(lo, cs); 1639 1640 i915_request_get(lo); 1641 i915_request_add(lo); 1642 1643 if (wait_for(READ_ONCE(*map), 10)) { 1644 i915_request_put(lo); 1645 err = -ETIMEDOUT; 1646 goto err_vma; 1647 } 1648 1649 /* Low priority request should be busywaiting now */ 1650 if (i915_request_wait(lo, 0, 1) != -ETIME) { 1651 i915_request_put(lo); 1652 pr_err("%s: Busywaiting request did not!\n", 1653 engine->name); 1654 err = -EIO; 1655 goto err_vma; 1656 } 1657 1658 hi = igt_request_alloc(ctx_hi, engine); 1659 if (IS_ERR(hi)) { 1660 err = PTR_ERR(hi); 1661 i915_request_put(lo); 1662 goto err_vma; 1663 } 1664 1665 cs = intel_ring_begin(hi, 4); 1666 if (IS_ERR(cs)) { 1667 err = PTR_ERR(cs); 1668 i915_request_add(hi); 1669 i915_request_put(lo); 1670 goto err_vma; 1671 } 1672 1673 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 1674 *cs++ = i915_ggtt_offset(vma); 1675 *cs++ = 0; 1676 *cs++ = 0; 1677 1678 intel_ring_advance(hi, cs); 1679 i915_request_add(hi); 1680 1681 if (i915_request_wait(lo, 0, HZ / 5) < 0) { 1682 struct drm_printer p = drm_info_printer(gt->i915->drm.dev); 1683 1684 pr_err("%s: Failed to preempt semaphore busywait!\n", 1685 engine->name); 1686 1687 intel_engine_dump(engine, &p, "%s\n", engine->name); 1688 GEM_TRACE_DUMP(); 1689 1690 i915_request_put(lo); 1691 intel_gt_set_wedged(gt); 1692 err = -EIO; 1693 goto err_vma; 1694 } 1695 GEM_BUG_ON(READ_ONCE(*map)); 1696 i915_request_put(lo); 1697 1698 if (igt_live_test_end(&t)) { 1699 err = -EIO; 1700 goto err_vma; 1701 } 1702 } 1703 1704 err = 0; 1705 err_vma: 1706 i915_vma_unpin(vma); 1707 err_map: 1708 i915_gem_object_unpin_map(obj); 1709 err_obj: 1710 i915_gem_object_put(obj); 1711 err_ctx_lo: 1712 kernel_context_close(ctx_lo); 1713 err_ctx_hi: 1714 kernel_context_close(ctx_hi); 1715 return err; 1716 } 1717 1718 static struct i915_request * 1719 spinner_create_request(struct igt_spinner *spin, 1720 struct i915_gem_context *ctx, 1721 struct intel_engine_cs *engine, 1722 u32 arb) 1723 { 1724 struct intel_context *ce; 1725 struct i915_request *rq; 1726 1727 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); 1728 if (IS_ERR(ce)) 1729 return ERR_CAST(ce); 1730 1731 rq = igt_spinner_create_request(spin, ce, arb); 1732 intel_context_put(ce); 1733 return rq; 1734 } 1735 1736 static int live_preempt(void *arg) 1737 { 1738 struct intel_gt *gt = arg; 1739 struct i915_gem_context *ctx_hi, *ctx_lo; 1740 struct igt_spinner spin_hi, spin_lo; 1741 struct intel_engine_cs *engine; 1742 enum intel_engine_id id; 1743 int err = -ENOMEM; 1744 1745 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 1746 return 0; 1747 1748 if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION)) 1749 pr_err("Logical preemption supported, but not exposed\n"); 1750 1751 if (igt_spinner_init(&spin_hi, gt)) 1752 return -ENOMEM; 1753 1754 if (igt_spinner_init(&spin_lo, gt)) 1755 goto err_spin_hi; 1756 1757 ctx_hi = kernel_context(gt->i915); 1758 if (!ctx_hi) 1759 goto err_spin_lo; 1760 ctx_hi->sched.priority = 1761 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); 1762 1763 ctx_lo = kernel_context(gt->i915); 1764 if (!ctx_lo) 1765 goto err_ctx_hi; 1766 ctx_lo->sched.priority = 1767 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); 1768 1769 for_each_engine(engine, gt, id) { 1770 struct igt_live_test t; 1771 struct i915_request *rq; 1772 1773 if (!intel_engine_has_preemption(engine)) 1774 continue; 1775 1776 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 1777 err = -EIO; 1778 goto err_ctx_lo; 1779 } 1780 1781 rq = spinner_create_request(&spin_lo, ctx_lo, engine, 1782 MI_ARB_CHECK); 1783 if (IS_ERR(rq)) { 1784 err = PTR_ERR(rq); 1785 goto err_ctx_lo; 1786 } 1787 1788 i915_request_add(rq); 1789 if (!igt_wait_for_spinner(&spin_lo, rq)) { 1790 GEM_TRACE("lo spinner failed to start\n"); 1791 GEM_TRACE_DUMP(); 1792 intel_gt_set_wedged(gt); 1793 err = -EIO; 1794 goto err_ctx_lo; 1795 } 1796 1797 rq = spinner_create_request(&spin_hi, ctx_hi, engine, 1798 MI_ARB_CHECK); 1799 if (IS_ERR(rq)) { 1800 igt_spinner_end(&spin_lo); 1801 err = PTR_ERR(rq); 1802 goto err_ctx_lo; 1803 } 1804 1805 i915_request_add(rq); 1806 if (!igt_wait_for_spinner(&spin_hi, rq)) { 1807 GEM_TRACE("hi spinner failed to start\n"); 1808 GEM_TRACE_DUMP(); 1809 intel_gt_set_wedged(gt); 1810 err = -EIO; 1811 goto err_ctx_lo; 1812 } 1813 1814 igt_spinner_end(&spin_hi); 1815 igt_spinner_end(&spin_lo); 1816 1817 if (igt_live_test_end(&t)) { 1818 err = -EIO; 1819 goto err_ctx_lo; 1820 } 1821 } 1822 1823 err = 0; 1824 err_ctx_lo: 1825 kernel_context_close(ctx_lo); 1826 err_ctx_hi: 1827 kernel_context_close(ctx_hi); 1828 err_spin_lo: 1829 igt_spinner_fini(&spin_lo); 1830 err_spin_hi: 1831 igt_spinner_fini(&spin_hi); 1832 return err; 1833 } 1834 1835 static int live_late_preempt(void *arg) 1836 { 1837 struct intel_gt *gt = arg; 1838 struct i915_gem_context *ctx_hi, *ctx_lo; 1839 struct igt_spinner spin_hi, spin_lo; 1840 struct intel_engine_cs *engine; 1841 struct i915_sched_attr attr = {}; 1842 enum intel_engine_id id; 1843 int err = -ENOMEM; 1844 1845 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 1846 return 0; 1847 1848 if (igt_spinner_init(&spin_hi, gt)) 1849 return -ENOMEM; 1850 1851 if (igt_spinner_init(&spin_lo, gt)) 1852 goto err_spin_hi; 1853 1854 ctx_hi = kernel_context(gt->i915); 1855 if (!ctx_hi) 1856 goto err_spin_lo; 1857 1858 ctx_lo = kernel_context(gt->i915); 1859 if (!ctx_lo) 1860 goto err_ctx_hi; 1861 1862 /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */ 1863 ctx_lo->sched.priority = I915_USER_PRIORITY(1); 1864 1865 for_each_engine(engine, gt, id) { 1866 struct igt_live_test t; 1867 struct i915_request *rq; 1868 1869 if (!intel_engine_has_preemption(engine)) 1870 continue; 1871 1872 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 1873 err = -EIO; 1874 goto err_ctx_lo; 1875 } 1876 1877 rq = spinner_create_request(&spin_lo, ctx_lo, engine, 1878 MI_ARB_CHECK); 1879 if (IS_ERR(rq)) { 1880 err = PTR_ERR(rq); 1881 goto err_ctx_lo; 1882 } 1883 1884 i915_request_add(rq); 1885 if (!igt_wait_for_spinner(&spin_lo, rq)) { 1886 pr_err("First context failed to start\n"); 1887 goto err_wedged; 1888 } 1889 1890 rq = spinner_create_request(&spin_hi, ctx_hi, engine, 1891 MI_NOOP); 1892 if (IS_ERR(rq)) { 1893 igt_spinner_end(&spin_lo); 1894 err = PTR_ERR(rq); 1895 goto err_ctx_lo; 1896 } 1897 1898 i915_request_add(rq); 1899 if (igt_wait_for_spinner(&spin_hi, rq)) { 1900 pr_err("Second context overtook first?\n"); 1901 goto err_wedged; 1902 } 1903 1904 attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX); 1905 engine->schedule(rq, &attr); 1906 1907 if (!igt_wait_for_spinner(&spin_hi, rq)) { 1908 pr_err("High priority context failed to preempt the low priority context\n"); 1909 GEM_TRACE_DUMP(); 1910 goto err_wedged; 1911 } 1912 1913 igt_spinner_end(&spin_hi); 1914 igt_spinner_end(&spin_lo); 1915 1916 if (igt_live_test_end(&t)) { 1917 err = -EIO; 1918 goto err_ctx_lo; 1919 } 1920 } 1921 1922 err = 0; 1923 err_ctx_lo: 1924 kernel_context_close(ctx_lo); 1925 err_ctx_hi: 1926 kernel_context_close(ctx_hi); 1927 err_spin_lo: 1928 igt_spinner_fini(&spin_lo); 1929 err_spin_hi: 1930 igt_spinner_fini(&spin_hi); 1931 return err; 1932 1933 err_wedged: 1934 igt_spinner_end(&spin_hi); 1935 igt_spinner_end(&spin_lo); 1936 intel_gt_set_wedged(gt); 1937 err = -EIO; 1938 goto err_ctx_lo; 1939 } 1940 1941 struct preempt_client { 1942 struct igt_spinner spin; 1943 struct i915_gem_context *ctx; 1944 }; 1945 1946 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c) 1947 { 1948 c->ctx = kernel_context(gt->i915); 1949 if (!c->ctx) 1950 return -ENOMEM; 1951 1952 if (igt_spinner_init(&c->spin, gt)) 1953 goto err_ctx; 1954 1955 return 0; 1956 1957 err_ctx: 1958 kernel_context_close(c->ctx); 1959 return -ENOMEM; 1960 } 1961 1962 static void preempt_client_fini(struct preempt_client *c) 1963 { 1964 igt_spinner_fini(&c->spin); 1965 kernel_context_close(c->ctx); 1966 } 1967 1968 static int live_nopreempt(void *arg) 1969 { 1970 struct intel_gt *gt = arg; 1971 struct intel_engine_cs *engine; 1972 struct preempt_client a, b; 1973 enum intel_engine_id id; 1974 int err = -ENOMEM; 1975 1976 /* 1977 * Verify that we can disable preemption for an individual request 1978 * that may be being observed and not want to be interrupted. 1979 */ 1980 1981 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 1982 return 0; 1983 1984 if (preempt_client_init(gt, &a)) 1985 return -ENOMEM; 1986 if (preempt_client_init(gt, &b)) 1987 goto err_client_a; 1988 b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX); 1989 1990 for_each_engine(engine, gt, id) { 1991 struct i915_request *rq_a, *rq_b; 1992 1993 if (!intel_engine_has_preemption(engine)) 1994 continue; 1995 1996 engine->execlists.preempt_hang.count = 0; 1997 1998 rq_a = spinner_create_request(&a.spin, 1999 a.ctx, engine, 2000 MI_ARB_CHECK); 2001 if (IS_ERR(rq_a)) { 2002 err = PTR_ERR(rq_a); 2003 goto err_client_b; 2004 } 2005 2006 /* Low priority client, but unpreemptable! */ 2007 __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags); 2008 2009 i915_request_add(rq_a); 2010 if (!igt_wait_for_spinner(&a.spin, rq_a)) { 2011 pr_err("First client failed to start\n"); 2012 goto err_wedged; 2013 } 2014 2015 rq_b = spinner_create_request(&b.spin, 2016 b.ctx, engine, 2017 MI_ARB_CHECK); 2018 if (IS_ERR(rq_b)) { 2019 err = PTR_ERR(rq_b); 2020 goto err_client_b; 2021 } 2022 2023 i915_request_add(rq_b); 2024 2025 /* B is much more important than A! (But A is unpreemptable.) */ 2026 GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a)); 2027 2028 /* Wait long enough for preemption and timeslicing */ 2029 if (igt_wait_for_spinner(&b.spin, rq_b)) { 2030 pr_err("Second client started too early!\n"); 2031 goto err_wedged; 2032 } 2033 2034 igt_spinner_end(&a.spin); 2035 2036 if (!igt_wait_for_spinner(&b.spin, rq_b)) { 2037 pr_err("Second client failed to start\n"); 2038 goto err_wedged; 2039 } 2040 2041 igt_spinner_end(&b.spin); 2042 2043 if (engine->execlists.preempt_hang.count) { 2044 pr_err("Preemption recorded x%d; should have been suppressed!\n", 2045 engine->execlists.preempt_hang.count); 2046 err = -EINVAL; 2047 goto err_wedged; 2048 } 2049 2050 if (igt_flush_test(gt->i915)) 2051 goto err_wedged; 2052 } 2053 2054 err = 0; 2055 err_client_b: 2056 preempt_client_fini(&b); 2057 err_client_a: 2058 preempt_client_fini(&a); 2059 return err; 2060 2061 err_wedged: 2062 igt_spinner_end(&b.spin); 2063 igt_spinner_end(&a.spin); 2064 intel_gt_set_wedged(gt); 2065 err = -EIO; 2066 goto err_client_b; 2067 } 2068 2069 struct live_preempt_cancel { 2070 struct intel_engine_cs *engine; 2071 struct preempt_client a, b; 2072 }; 2073 2074 static int __cancel_active0(struct live_preempt_cancel *arg) 2075 { 2076 struct i915_request *rq; 2077 struct igt_live_test t; 2078 int err; 2079 2080 /* Preempt cancel of ELSP0 */ 2081 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 2082 if (igt_live_test_begin(&t, arg->engine->i915, 2083 __func__, arg->engine->name)) 2084 return -EIO; 2085 2086 rq = spinner_create_request(&arg->a.spin, 2087 arg->a.ctx, arg->engine, 2088 MI_ARB_CHECK); 2089 if (IS_ERR(rq)) 2090 return PTR_ERR(rq); 2091 2092 clear_bit(CONTEXT_BANNED, &rq->context->flags); 2093 i915_request_get(rq); 2094 i915_request_add(rq); 2095 if (!igt_wait_for_spinner(&arg->a.spin, rq)) { 2096 err = -EIO; 2097 goto out; 2098 } 2099 2100 intel_context_set_banned(rq->context); 2101 err = intel_engine_pulse(arg->engine); 2102 if (err) 2103 goto out; 2104 2105 err = wait_for_reset(arg->engine, rq, HZ / 2); 2106 if (err) { 2107 pr_err("Cancelled inflight0 request did not reset\n"); 2108 goto out; 2109 } 2110 2111 out: 2112 i915_request_put(rq); 2113 if (igt_live_test_end(&t)) 2114 err = -EIO; 2115 return err; 2116 } 2117 2118 static int __cancel_active1(struct live_preempt_cancel *arg) 2119 { 2120 struct i915_request *rq[2] = {}; 2121 struct igt_live_test t; 2122 int err; 2123 2124 /* Preempt cancel of ELSP1 */ 2125 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 2126 if (igt_live_test_begin(&t, arg->engine->i915, 2127 __func__, arg->engine->name)) 2128 return -EIO; 2129 2130 rq[0] = spinner_create_request(&arg->a.spin, 2131 arg->a.ctx, arg->engine, 2132 MI_NOOP); /* no preemption */ 2133 if (IS_ERR(rq[0])) 2134 return PTR_ERR(rq[0]); 2135 2136 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); 2137 i915_request_get(rq[0]); 2138 i915_request_add(rq[0]); 2139 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { 2140 err = -EIO; 2141 goto out; 2142 } 2143 2144 rq[1] = spinner_create_request(&arg->b.spin, 2145 arg->b.ctx, arg->engine, 2146 MI_ARB_CHECK); 2147 if (IS_ERR(rq[1])) { 2148 err = PTR_ERR(rq[1]); 2149 goto out; 2150 } 2151 2152 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); 2153 i915_request_get(rq[1]); 2154 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); 2155 i915_request_add(rq[1]); 2156 if (err) 2157 goto out; 2158 2159 intel_context_set_banned(rq[1]->context); 2160 err = intel_engine_pulse(arg->engine); 2161 if (err) 2162 goto out; 2163 2164 igt_spinner_end(&arg->a.spin); 2165 err = wait_for_reset(arg->engine, rq[1], HZ / 2); 2166 if (err) 2167 goto out; 2168 2169 if (rq[0]->fence.error != 0) { 2170 pr_err("Normal inflight0 request did not complete\n"); 2171 err = -EINVAL; 2172 goto out; 2173 } 2174 2175 if (rq[1]->fence.error != -EIO) { 2176 pr_err("Cancelled inflight1 request did not report -EIO\n"); 2177 err = -EINVAL; 2178 goto out; 2179 } 2180 2181 out: 2182 i915_request_put(rq[1]); 2183 i915_request_put(rq[0]); 2184 if (igt_live_test_end(&t)) 2185 err = -EIO; 2186 return err; 2187 } 2188 2189 static int __cancel_queued(struct live_preempt_cancel *arg) 2190 { 2191 struct i915_request *rq[3] = {}; 2192 struct igt_live_test t; 2193 int err; 2194 2195 /* Full ELSP and one in the wings */ 2196 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 2197 if (igt_live_test_begin(&t, arg->engine->i915, 2198 __func__, arg->engine->name)) 2199 return -EIO; 2200 2201 rq[0] = spinner_create_request(&arg->a.spin, 2202 arg->a.ctx, arg->engine, 2203 MI_ARB_CHECK); 2204 if (IS_ERR(rq[0])) 2205 return PTR_ERR(rq[0]); 2206 2207 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); 2208 i915_request_get(rq[0]); 2209 i915_request_add(rq[0]); 2210 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { 2211 err = -EIO; 2212 goto out; 2213 } 2214 2215 rq[1] = igt_request_alloc(arg->b.ctx, arg->engine); 2216 if (IS_ERR(rq[1])) { 2217 err = PTR_ERR(rq[1]); 2218 goto out; 2219 } 2220 2221 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); 2222 i915_request_get(rq[1]); 2223 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); 2224 i915_request_add(rq[1]); 2225 if (err) 2226 goto out; 2227 2228 rq[2] = spinner_create_request(&arg->b.spin, 2229 arg->a.ctx, arg->engine, 2230 MI_ARB_CHECK); 2231 if (IS_ERR(rq[2])) { 2232 err = PTR_ERR(rq[2]); 2233 goto out; 2234 } 2235 2236 i915_request_get(rq[2]); 2237 err = i915_request_await_dma_fence(rq[2], &rq[1]->fence); 2238 i915_request_add(rq[2]); 2239 if (err) 2240 goto out; 2241 2242 intel_context_set_banned(rq[2]->context); 2243 err = intel_engine_pulse(arg->engine); 2244 if (err) 2245 goto out; 2246 2247 err = wait_for_reset(arg->engine, rq[2], HZ / 2); 2248 if (err) 2249 goto out; 2250 2251 if (rq[0]->fence.error != -EIO) { 2252 pr_err("Cancelled inflight0 request did not report -EIO\n"); 2253 err = -EINVAL; 2254 goto out; 2255 } 2256 2257 if (rq[1]->fence.error != 0) { 2258 pr_err("Normal inflight1 request did not complete\n"); 2259 err = -EINVAL; 2260 goto out; 2261 } 2262 2263 if (rq[2]->fence.error != -EIO) { 2264 pr_err("Cancelled queued request did not report -EIO\n"); 2265 err = -EINVAL; 2266 goto out; 2267 } 2268 2269 out: 2270 i915_request_put(rq[2]); 2271 i915_request_put(rq[1]); 2272 i915_request_put(rq[0]); 2273 if (igt_live_test_end(&t)) 2274 err = -EIO; 2275 return err; 2276 } 2277 2278 static int __cancel_hostile(struct live_preempt_cancel *arg) 2279 { 2280 struct i915_request *rq; 2281 int err; 2282 2283 /* Preempt cancel non-preemptible spinner in ELSP0 */ 2284 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) 2285 return 0; 2286 2287 if (!intel_has_reset_engine(arg->engine->gt)) 2288 return 0; 2289 2290 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 2291 rq = spinner_create_request(&arg->a.spin, 2292 arg->a.ctx, arg->engine, 2293 MI_NOOP); /* preemption disabled */ 2294 if (IS_ERR(rq)) 2295 return PTR_ERR(rq); 2296 2297 clear_bit(CONTEXT_BANNED, &rq->context->flags); 2298 i915_request_get(rq); 2299 i915_request_add(rq); 2300 if (!igt_wait_for_spinner(&arg->a.spin, rq)) { 2301 err = -EIO; 2302 goto out; 2303 } 2304 2305 intel_context_set_banned(rq->context); 2306 err = intel_engine_pulse(arg->engine); /* force reset */ 2307 if (err) 2308 goto out; 2309 2310 err = wait_for_reset(arg->engine, rq, HZ / 2); 2311 if (err) { 2312 pr_err("Cancelled inflight0 request did not reset\n"); 2313 goto out; 2314 } 2315 2316 out: 2317 i915_request_put(rq); 2318 if (igt_flush_test(arg->engine->i915)) 2319 err = -EIO; 2320 return err; 2321 } 2322 2323 static int live_preempt_cancel(void *arg) 2324 { 2325 struct intel_gt *gt = arg; 2326 struct live_preempt_cancel data; 2327 enum intel_engine_id id; 2328 int err = -ENOMEM; 2329 2330 /* 2331 * To cancel an inflight context, we need to first remove it from the 2332 * GPU. That sounds like preemption! Plus a little bit of bookkeeping. 2333 */ 2334 2335 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2336 return 0; 2337 2338 if (preempt_client_init(gt, &data.a)) 2339 return -ENOMEM; 2340 if (preempt_client_init(gt, &data.b)) 2341 goto err_client_a; 2342 2343 for_each_engine(data.engine, gt, id) { 2344 if (!intel_engine_has_preemption(data.engine)) 2345 continue; 2346 2347 err = __cancel_active0(&data); 2348 if (err) 2349 goto err_wedged; 2350 2351 err = __cancel_active1(&data); 2352 if (err) 2353 goto err_wedged; 2354 2355 err = __cancel_queued(&data); 2356 if (err) 2357 goto err_wedged; 2358 2359 err = __cancel_hostile(&data); 2360 if (err) 2361 goto err_wedged; 2362 } 2363 2364 err = 0; 2365 err_client_b: 2366 preempt_client_fini(&data.b); 2367 err_client_a: 2368 preempt_client_fini(&data.a); 2369 return err; 2370 2371 err_wedged: 2372 GEM_TRACE_DUMP(); 2373 igt_spinner_end(&data.b.spin); 2374 igt_spinner_end(&data.a.spin); 2375 intel_gt_set_wedged(gt); 2376 goto err_client_b; 2377 } 2378 2379 static int live_suppress_self_preempt(void *arg) 2380 { 2381 struct intel_gt *gt = arg; 2382 struct intel_engine_cs *engine; 2383 struct i915_sched_attr attr = { 2384 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX) 2385 }; 2386 struct preempt_client a, b; 2387 enum intel_engine_id id; 2388 int err = -ENOMEM; 2389 2390 /* 2391 * Verify that if a preemption request does not cause a change in 2392 * the current execution order, the preempt-to-idle injection is 2393 * skipped and that we do not accidentally apply it after the CS 2394 * completion event. 2395 */ 2396 2397 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2398 return 0; 2399 2400 if (intel_uc_uses_guc_submission(>->uc)) 2401 return 0; /* presume black blox */ 2402 2403 if (intel_vgpu_active(gt->i915)) 2404 return 0; /* GVT forces single port & request submission */ 2405 2406 if (preempt_client_init(gt, &a)) 2407 return -ENOMEM; 2408 if (preempt_client_init(gt, &b)) 2409 goto err_client_a; 2410 2411 for_each_engine(engine, gt, id) { 2412 struct i915_request *rq_a, *rq_b; 2413 int depth; 2414 2415 if (!intel_engine_has_preemption(engine)) 2416 continue; 2417 2418 if (igt_flush_test(gt->i915)) 2419 goto err_wedged; 2420 2421 st_engine_heartbeat_disable(engine); 2422 engine->execlists.preempt_hang.count = 0; 2423 2424 rq_a = spinner_create_request(&a.spin, 2425 a.ctx, engine, 2426 MI_NOOP); 2427 if (IS_ERR(rq_a)) { 2428 err = PTR_ERR(rq_a); 2429 st_engine_heartbeat_enable(engine); 2430 goto err_client_b; 2431 } 2432 2433 i915_request_add(rq_a); 2434 if (!igt_wait_for_spinner(&a.spin, rq_a)) { 2435 pr_err("First client failed to start\n"); 2436 st_engine_heartbeat_enable(engine); 2437 goto err_wedged; 2438 } 2439 2440 /* Keep postponing the timer to avoid premature slicing */ 2441 mod_timer(&engine->execlists.timer, jiffies + HZ); 2442 for (depth = 0; depth < 8; depth++) { 2443 rq_b = spinner_create_request(&b.spin, 2444 b.ctx, engine, 2445 MI_NOOP); 2446 if (IS_ERR(rq_b)) { 2447 err = PTR_ERR(rq_b); 2448 st_engine_heartbeat_enable(engine); 2449 goto err_client_b; 2450 } 2451 i915_request_add(rq_b); 2452 2453 GEM_BUG_ON(i915_request_completed(rq_a)); 2454 engine->schedule(rq_a, &attr); 2455 igt_spinner_end(&a.spin); 2456 2457 if (!igt_wait_for_spinner(&b.spin, rq_b)) { 2458 pr_err("Second client failed to start\n"); 2459 st_engine_heartbeat_enable(engine); 2460 goto err_wedged; 2461 } 2462 2463 swap(a, b); 2464 rq_a = rq_b; 2465 } 2466 igt_spinner_end(&a.spin); 2467 2468 if (engine->execlists.preempt_hang.count) { 2469 pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n", 2470 engine->name, 2471 engine->execlists.preempt_hang.count, 2472 depth); 2473 st_engine_heartbeat_enable(engine); 2474 err = -EINVAL; 2475 goto err_client_b; 2476 } 2477 2478 st_engine_heartbeat_enable(engine); 2479 if (igt_flush_test(gt->i915)) 2480 goto err_wedged; 2481 } 2482 2483 err = 0; 2484 err_client_b: 2485 preempt_client_fini(&b); 2486 err_client_a: 2487 preempt_client_fini(&a); 2488 return err; 2489 2490 err_wedged: 2491 igt_spinner_end(&b.spin); 2492 igt_spinner_end(&a.spin); 2493 intel_gt_set_wedged(gt); 2494 err = -EIO; 2495 goto err_client_b; 2496 } 2497 2498 static int live_chain_preempt(void *arg) 2499 { 2500 struct intel_gt *gt = arg; 2501 struct intel_engine_cs *engine; 2502 struct preempt_client hi, lo; 2503 enum intel_engine_id id; 2504 int err = -ENOMEM; 2505 2506 /* 2507 * Build a chain AB...BA between two contexts (A, B) and request 2508 * preemption of the last request. It should then complete before 2509 * the previously submitted spinner in B. 2510 */ 2511 2512 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2513 return 0; 2514 2515 if (preempt_client_init(gt, &hi)) 2516 return -ENOMEM; 2517 2518 if (preempt_client_init(gt, &lo)) 2519 goto err_client_hi; 2520 2521 for_each_engine(engine, gt, id) { 2522 struct i915_sched_attr attr = { 2523 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), 2524 }; 2525 struct igt_live_test t; 2526 struct i915_request *rq; 2527 int ring_size, count, i; 2528 2529 if (!intel_engine_has_preemption(engine)) 2530 continue; 2531 2532 rq = spinner_create_request(&lo.spin, 2533 lo.ctx, engine, 2534 MI_ARB_CHECK); 2535 if (IS_ERR(rq)) 2536 goto err_wedged; 2537 2538 i915_request_get(rq); 2539 i915_request_add(rq); 2540 2541 ring_size = rq->wa_tail - rq->head; 2542 if (ring_size < 0) 2543 ring_size += rq->ring->size; 2544 ring_size = rq->ring->size / ring_size; 2545 pr_debug("%s(%s): Using maximum of %d requests\n", 2546 __func__, engine->name, ring_size); 2547 2548 igt_spinner_end(&lo.spin); 2549 if (i915_request_wait(rq, 0, HZ / 2) < 0) { 2550 pr_err("Timed out waiting to flush %s\n", engine->name); 2551 i915_request_put(rq); 2552 goto err_wedged; 2553 } 2554 i915_request_put(rq); 2555 2556 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 2557 err = -EIO; 2558 goto err_wedged; 2559 } 2560 2561 for_each_prime_number_from(count, 1, ring_size) { 2562 rq = spinner_create_request(&hi.spin, 2563 hi.ctx, engine, 2564 MI_ARB_CHECK); 2565 if (IS_ERR(rq)) 2566 goto err_wedged; 2567 i915_request_add(rq); 2568 if (!igt_wait_for_spinner(&hi.spin, rq)) 2569 goto err_wedged; 2570 2571 rq = spinner_create_request(&lo.spin, 2572 lo.ctx, engine, 2573 MI_ARB_CHECK); 2574 if (IS_ERR(rq)) 2575 goto err_wedged; 2576 i915_request_add(rq); 2577 2578 for (i = 0; i < count; i++) { 2579 rq = igt_request_alloc(lo.ctx, engine); 2580 if (IS_ERR(rq)) 2581 goto err_wedged; 2582 i915_request_add(rq); 2583 } 2584 2585 rq = igt_request_alloc(hi.ctx, engine); 2586 if (IS_ERR(rq)) 2587 goto err_wedged; 2588 2589 i915_request_get(rq); 2590 i915_request_add(rq); 2591 engine->schedule(rq, &attr); 2592 2593 igt_spinner_end(&hi.spin); 2594 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 2595 struct drm_printer p = 2596 drm_info_printer(gt->i915->drm.dev); 2597 2598 pr_err("Failed to preempt over chain of %d\n", 2599 count); 2600 intel_engine_dump(engine, &p, 2601 "%s\n", engine->name); 2602 i915_request_put(rq); 2603 goto err_wedged; 2604 } 2605 igt_spinner_end(&lo.spin); 2606 i915_request_put(rq); 2607 2608 rq = igt_request_alloc(lo.ctx, engine); 2609 if (IS_ERR(rq)) 2610 goto err_wedged; 2611 2612 i915_request_get(rq); 2613 i915_request_add(rq); 2614 2615 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 2616 struct drm_printer p = 2617 drm_info_printer(gt->i915->drm.dev); 2618 2619 pr_err("Failed to flush low priority chain of %d requests\n", 2620 count); 2621 intel_engine_dump(engine, &p, 2622 "%s\n", engine->name); 2623 2624 i915_request_put(rq); 2625 goto err_wedged; 2626 } 2627 i915_request_put(rq); 2628 } 2629 2630 if (igt_live_test_end(&t)) { 2631 err = -EIO; 2632 goto err_wedged; 2633 } 2634 } 2635 2636 err = 0; 2637 err_client_lo: 2638 preempt_client_fini(&lo); 2639 err_client_hi: 2640 preempt_client_fini(&hi); 2641 return err; 2642 2643 err_wedged: 2644 igt_spinner_end(&hi.spin); 2645 igt_spinner_end(&lo.spin); 2646 intel_gt_set_wedged(gt); 2647 err = -EIO; 2648 goto err_client_lo; 2649 } 2650 2651 static int create_gang(struct intel_engine_cs *engine, 2652 struct i915_request **prev) 2653 { 2654 struct drm_i915_gem_object *obj; 2655 struct intel_context *ce; 2656 struct i915_request *rq; 2657 struct i915_vma *vma; 2658 u32 *cs; 2659 int err; 2660 2661 ce = intel_context_create(engine); 2662 if (IS_ERR(ce)) 2663 return PTR_ERR(ce); 2664 2665 obj = i915_gem_object_create_internal(engine->i915, 4096); 2666 if (IS_ERR(obj)) { 2667 err = PTR_ERR(obj); 2668 goto err_ce; 2669 } 2670 2671 vma = i915_vma_instance(obj, ce->vm, NULL); 2672 if (IS_ERR(vma)) { 2673 err = PTR_ERR(vma); 2674 goto err_obj; 2675 } 2676 2677 err = i915_vma_pin(vma, 0, 0, PIN_USER); 2678 if (err) 2679 goto err_obj; 2680 2681 cs = i915_gem_object_pin_map(obj, I915_MAP_WC); 2682 if (IS_ERR(cs)) 2683 goto err_obj; 2684 2685 /* Semaphore target: spin until zero */ 2686 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 2687 2688 *cs++ = MI_SEMAPHORE_WAIT | 2689 MI_SEMAPHORE_POLL | 2690 MI_SEMAPHORE_SAD_EQ_SDD; 2691 *cs++ = 0; 2692 *cs++ = lower_32_bits(vma->node.start); 2693 *cs++ = upper_32_bits(vma->node.start); 2694 2695 if (*prev) { 2696 u64 offset = (*prev)->batch->node.start; 2697 2698 /* Terminate the spinner in the next lower priority batch. */ 2699 *cs++ = MI_STORE_DWORD_IMM_GEN4; 2700 *cs++ = lower_32_bits(offset); 2701 *cs++ = upper_32_bits(offset); 2702 *cs++ = 0; 2703 } 2704 2705 *cs++ = MI_BATCH_BUFFER_END; 2706 i915_gem_object_flush_map(obj); 2707 i915_gem_object_unpin_map(obj); 2708 2709 rq = intel_context_create_request(ce); 2710 if (IS_ERR(rq)) 2711 goto err_obj; 2712 2713 rq->batch = i915_vma_get(vma); 2714 i915_request_get(rq); 2715 2716 i915_vma_lock(vma); 2717 err = i915_request_await_object(rq, vma->obj, false); 2718 if (!err) 2719 err = i915_vma_move_to_active(vma, rq, 0); 2720 if (!err) 2721 err = rq->engine->emit_bb_start(rq, 2722 vma->node.start, 2723 PAGE_SIZE, 0); 2724 i915_vma_unlock(vma); 2725 i915_request_add(rq); 2726 if (err) 2727 goto err_rq; 2728 2729 i915_gem_object_put(obj); 2730 intel_context_put(ce); 2731 2732 rq->client_link.next = &(*prev)->client_link; 2733 *prev = rq; 2734 return 0; 2735 2736 err_rq: 2737 i915_vma_put(rq->batch); 2738 i915_request_put(rq); 2739 err_obj: 2740 i915_gem_object_put(obj); 2741 err_ce: 2742 intel_context_put(ce); 2743 return err; 2744 } 2745 2746 static int __live_preempt_ring(struct intel_engine_cs *engine, 2747 struct igt_spinner *spin, 2748 int queue_sz, int ring_sz) 2749 { 2750 struct intel_context *ce[2] = {}; 2751 struct i915_request *rq; 2752 struct igt_live_test t; 2753 int err = 0; 2754 int n; 2755 2756 if (igt_live_test_begin(&t, engine->i915, __func__, engine->name)) 2757 return -EIO; 2758 2759 for (n = 0; n < ARRAY_SIZE(ce); n++) { 2760 struct intel_context *tmp; 2761 2762 tmp = intel_context_create(engine); 2763 if (IS_ERR(tmp)) { 2764 err = PTR_ERR(tmp); 2765 goto err_ce; 2766 } 2767 2768 tmp->ring = __intel_context_ring_size(ring_sz); 2769 2770 err = intel_context_pin(tmp); 2771 if (err) { 2772 intel_context_put(tmp); 2773 goto err_ce; 2774 } 2775 2776 memset32(tmp->ring->vaddr, 2777 0xdeadbeef, /* trigger a hang if executed */ 2778 tmp->ring->vma->size / sizeof(u32)); 2779 2780 ce[n] = tmp; 2781 } 2782 2783 rq = igt_spinner_create_request(spin, ce[0], MI_ARB_CHECK); 2784 if (IS_ERR(rq)) { 2785 err = PTR_ERR(rq); 2786 goto err_ce; 2787 } 2788 2789 i915_request_get(rq); 2790 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 2791 i915_request_add(rq); 2792 2793 if (!igt_wait_for_spinner(spin, rq)) { 2794 intel_gt_set_wedged(engine->gt); 2795 i915_request_put(rq); 2796 err = -ETIME; 2797 goto err_ce; 2798 } 2799 2800 /* Fill the ring, until we will cause a wrap */ 2801 n = 0; 2802 while (ce[0]->ring->tail - rq->wa_tail <= queue_sz) { 2803 struct i915_request *tmp; 2804 2805 tmp = intel_context_create_request(ce[0]); 2806 if (IS_ERR(tmp)) { 2807 err = PTR_ERR(tmp); 2808 i915_request_put(rq); 2809 goto err_ce; 2810 } 2811 2812 i915_request_add(tmp); 2813 intel_engine_flush_submission(engine); 2814 n++; 2815 } 2816 intel_engine_flush_submission(engine); 2817 pr_debug("%s: Filled %d with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n", 2818 engine->name, queue_sz, n, 2819 ce[0]->ring->size, 2820 ce[0]->ring->tail, 2821 ce[0]->ring->emit, 2822 rq->tail); 2823 i915_request_put(rq); 2824 2825 /* Create a second request to preempt the first ring */ 2826 rq = intel_context_create_request(ce[1]); 2827 if (IS_ERR(rq)) { 2828 err = PTR_ERR(rq); 2829 goto err_ce; 2830 } 2831 2832 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 2833 i915_request_get(rq); 2834 i915_request_add(rq); 2835 2836 err = wait_for_submit(engine, rq, HZ / 2); 2837 i915_request_put(rq); 2838 if (err) { 2839 pr_err("%s: preemption request was not submited\n", 2840 engine->name); 2841 err = -ETIME; 2842 } 2843 2844 pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n", 2845 engine->name, 2846 ce[0]->ring->tail, ce[0]->ring->emit, 2847 ce[1]->ring->tail, ce[1]->ring->emit); 2848 2849 err_ce: 2850 intel_engine_flush_submission(engine); 2851 igt_spinner_end(spin); 2852 for (n = 0; n < ARRAY_SIZE(ce); n++) { 2853 if (IS_ERR_OR_NULL(ce[n])) 2854 break; 2855 2856 intel_context_unpin(ce[n]); 2857 intel_context_put(ce[n]); 2858 } 2859 if (igt_live_test_end(&t)) 2860 err = -EIO; 2861 return err; 2862 } 2863 2864 static int live_preempt_ring(void *arg) 2865 { 2866 struct intel_gt *gt = arg; 2867 struct intel_engine_cs *engine; 2868 struct igt_spinner spin; 2869 enum intel_engine_id id; 2870 int err = 0; 2871 2872 /* 2873 * Check that we rollback large chunks of a ring in order to do a 2874 * preemption event. Similar to live_unlite_ring, but looking at 2875 * ring size rather than the impact of intel_ring_direction(). 2876 */ 2877 2878 if (igt_spinner_init(&spin, gt)) 2879 return -ENOMEM; 2880 2881 for_each_engine(engine, gt, id) { 2882 int n; 2883 2884 if (!intel_engine_has_preemption(engine)) 2885 continue; 2886 2887 if (!intel_engine_can_store_dword(engine)) 2888 continue; 2889 2890 st_engine_heartbeat_disable(engine); 2891 2892 for (n = 0; n <= 3; n++) { 2893 err = __live_preempt_ring(engine, &spin, 2894 n * SZ_4K / 4, SZ_4K); 2895 if (err) 2896 break; 2897 } 2898 2899 st_engine_heartbeat_enable(engine); 2900 if (err) 2901 break; 2902 } 2903 2904 igt_spinner_fini(&spin); 2905 return err; 2906 } 2907 2908 static int live_preempt_gang(void *arg) 2909 { 2910 struct intel_gt *gt = arg; 2911 struct intel_engine_cs *engine; 2912 enum intel_engine_id id; 2913 2914 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2915 return 0; 2916 2917 /* 2918 * Build as long a chain of preempters as we can, with each 2919 * request higher priority than the last. Once we are ready, we release 2920 * the last batch which then precolates down the chain, each releasing 2921 * the next oldest in turn. The intent is to simply push as hard as we 2922 * can with the number of preemptions, trying to exceed narrow HW 2923 * limits. At a minimum, we insist that we can sort all the user 2924 * high priority levels into execution order. 2925 */ 2926 2927 for_each_engine(engine, gt, id) { 2928 struct i915_request *rq = NULL; 2929 struct igt_live_test t; 2930 IGT_TIMEOUT(end_time); 2931 int prio = 0; 2932 int err = 0; 2933 u32 *cs; 2934 2935 if (!intel_engine_has_preemption(engine)) 2936 continue; 2937 2938 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) 2939 return -EIO; 2940 2941 do { 2942 struct i915_sched_attr attr = { 2943 .priority = I915_USER_PRIORITY(prio++), 2944 }; 2945 2946 err = create_gang(engine, &rq); 2947 if (err) 2948 break; 2949 2950 /* Submit each spinner at increasing priority */ 2951 engine->schedule(rq, &attr); 2952 } while (prio <= I915_PRIORITY_MAX && 2953 !__igt_timeout(end_time, NULL)); 2954 pr_debug("%s: Preempt chain of %d requests\n", 2955 engine->name, prio); 2956 2957 /* 2958 * Such that the last spinner is the highest priority and 2959 * should execute first. When that spinner completes, 2960 * it will terminate the next lowest spinner until there 2961 * are no more spinners and the gang is complete. 2962 */ 2963 cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC); 2964 if (!IS_ERR(cs)) { 2965 *cs = 0; 2966 i915_gem_object_unpin_map(rq->batch->obj); 2967 } else { 2968 err = PTR_ERR(cs); 2969 intel_gt_set_wedged(gt); 2970 } 2971 2972 while (rq) { /* wait for each rq from highest to lowest prio */ 2973 struct i915_request *n = 2974 list_next_entry(rq, client_link); 2975 2976 if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) { 2977 struct drm_printer p = 2978 drm_info_printer(engine->i915->drm.dev); 2979 2980 pr_err("Failed to flush chain of %d requests, at %d\n", 2981 prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT); 2982 intel_engine_dump(engine, &p, 2983 "%s\n", engine->name); 2984 2985 err = -ETIME; 2986 } 2987 2988 i915_vma_put(rq->batch); 2989 i915_request_put(rq); 2990 rq = n; 2991 } 2992 2993 if (igt_live_test_end(&t)) 2994 err = -EIO; 2995 if (err) 2996 return err; 2997 } 2998 2999 return 0; 3000 } 3001 3002 static struct i915_vma * 3003 create_gpr_user(struct intel_engine_cs *engine, 3004 struct i915_vma *result, 3005 unsigned int offset) 3006 { 3007 struct drm_i915_gem_object *obj; 3008 struct i915_vma *vma; 3009 u32 *cs; 3010 int err; 3011 int i; 3012 3013 obj = i915_gem_object_create_internal(engine->i915, 4096); 3014 if (IS_ERR(obj)) 3015 return ERR_CAST(obj); 3016 3017 vma = i915_vma_instance(obj, result->vm, NULL); 3018 if (IS_ERR(vma)) { 3019 i915_gem_object_put(obj); 3020 return vma; 3021 } 3022 3023 err = i915_vma_pin(vma, 0, 0, PIN_USER); 3024 if (err) { 3025 i915_vma_put(vma); 3026 return ERR_PTR(err); 3027 } 3028 3029 cs = i915_gem_object_pin_map(obj, I915_MAP_WC); 3030 if (IS_ERR(cs)) { 3031 i915_vma_put(vma); 3032 return ERR_CAST(cs); 3033 } 3034 3035 /* All GPR are clear for new contexts. We use GPR(0) as a constant */ 3036 *cs++ = MI_LOAD_REGISTER_IMM(1); 3037 *cs++ = CS_GPR(engine, 0); 3038 *cs++ = 1; 3039 3040 for (i = 1; i < NUM_GPR; i++) { 3041 u64 addr; 3042 3043 /* 3044 * Perform: GPR[i]++ 3045 * 3046 * As we read and write into the context saved GPR[i], if 3047 * we restart this batch buffer from an earlier point, we 3048 * will repeat the increment and store a value > 1. 3049 */ 3050 *cs++ = MI_MATH(4); 3051 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(i)); 3052 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(0)); 3053 *cs++ = MI_MATH_ADD; 3054 *cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU); 3055 3056 addr = result->node.start + offset + i * sizeof(*cs); 3057 *cs++ = MI_STORE_REGISTER_MEM_GEN8; 3058 *cs++ = CS_GPR(engine, 2 * i); 3059 *cs++ = lower_32_bits(addr); 3060 *cs++ = upper_32_bits(addr); 3061 3062 *cs++ = MI_SEMAPHORE_WAIT | 3063 MI_SEMAPHORE_POLL | 3064 MI_SEMAPHORE_SAD_GTE_SDD; 3065 *cs++ = i; 3066 *cs++ = lower_32_bits(result->node.start); 3067 *cs++ = upper_32_bits(result->node.start); 3068 } 3069 3070 *cs++ = MI_BATCH_BUFFER_END; 3071 i915_gem_object_flush_map(obj); 3072 i915_gem_object_unpin_map(obj); 3073 3074 return vma; 3075 } 3076 3077 static struct i915_vma *create_global(struct intel_gt *gt, size_t sz) 3078 { 3079 struct drm_i915_gem_object *obj; 3080 struct i915_vma *vma; 3081 int err; 3082 3083 obj = i915_gem_object_create_internal(gt->i915, sz); 3084 if (IS_ERR(obj)) 3085 return ERR_CAST(obj); 3086 3087 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 3088 if (IS_ERR(vma)) { 3089 i915_gem_object_put(obj); 3090 return vma; 3091 } 3092 3093 err = i915_ggtt_pin(vma, 0, 0); 3094 if (err) { 3095 i915_vma_put(vma); 3096 return ERR_PTR(err); 3097 } 3098 3099 return vma; 3100 } 3101 3102 static struct i915_request * 3103 create_gpr_client(struct intel_engine_cs *engine, 3104 struct i915_vma *global, 3105 unsigned int offset) 3106 { 3107 struct i915_vma *batch, *vma; 3108 struct intel_context *ce; 3109 struct i915_request *rq; 3110 int err; 3111 3112 ce = intel_context_create(engine); 3113 if (IS_ERR(ce)) 3114 return ERR_CAST(ce); 3115 3116 vma = i915_vma_instance(global->obj, ce->vm, NULL); 3117 if (IS_ERR(vma)) { 3118 err = PTR_ERR(vma); 3119 goto out_ce; 3120 } 3121 3122 err = i915_vma_pin(vma, 0, 0, PIN_USER); 3123 if (err) 3124 goto out_ce; 3125 3126 batch = create_gpr_user(engine, vma, offset); 3127 if (IS_ERR(batch)) { 3128 err = PTR_ERR(batch); 3129 goto out_vma; 3130 } 3131 3132 rq = intel_context_create_request(ce); 3133 if (IS_ERR(rq)) { 3134 err = PTR_ERR(rq); 3135 goto out_batch; 3136 } 3137 3138 i915_vma_lock(vma); 3139 err = i915_request_await_object(rq, vma->obj, false); 3140 if (!err) 3141 err = i915_vma_move_to_active(vma, rq, 0); 3142 i915_vma_unlock(vma); 3143 3144 i915_vma_lock(batch); 3145 if (!err) 3146 err = i915_request_await_object(rq, batch->obj, false); 3147 if (!err) 3148 err = i915_vma_move_to_active(batch, rq, 0); 3149 if (!err) 3150 err = rq->engine->emit_bb_start(rq, 3151 batch->node.start, 3152 PAGE_SIZE, 0); 3153 i915_vma_unlock(batch); 3154 i915_vma_unpin(batch); 3155 3156 if (!err) 3157 i915_request_get(rq); 3158 i915_request_add(rq); 3159 3160 out_batch: 3161 i915_vma_put(batch); 3162 out_vma: 3163 i915_vma_unpin(vma); 3164 out_ce: 3165 intel_context_put(ce); 3166 return err ? ERR_PTR(err) : rq; 3167 } 3168 3169 static int preempt_user(struct intel_engine_cs *engine, 3170 struct i915_vma *global, 3171 int id) 3172 { 3173 struct i915_sched_attr attr = { 3174 .priority = I915_PRIORITY_MAX 3175 }; 3176 struct i915_request *rq; 3177 int err = 0; 3178 u32 *cs; 3179 3180 rq = intel_engine_create_kernel_request(engine); 3181 if (IS_ERR(rq)) 3182 return PTR_ERR(rq); 3183 3184 cs = intel_ring_begin(rq, 4); 3185 if (IS_ERR(cs)) { 3186 i915_request_add(rq); 3187 return PTR_ERR(cs); 3188 } 3189 3190 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 3191 *cs++ = i915_ggtt_offset(global); 3192 *cs++ = 0; 3193 *cs++ = id; 3194 3195 intel_ring_advance(rq, cs); 3196 3197 i915_request_get(rq); 3198 i915_request_add(rq); 3199 3200 engine->schedule(rq, &attr); 3201 3202 if (i915_request_wait(rq, 0, HZ / 2) < 0) 3203 err = -ETIME; 3204 i915_request_put(rq); 3205 3206 return err; 3207 } 3208 3209 static int live_preempt_user(void *arg) 3210 { 3211 struct intel_gt *gt = arg; 3212 struct intel_engine_cs *engine; 3213 struct i915_vma *global; 3214 enum intel_engine_id id; 3215 u32 *result; 3216 int err = 0; 3217 3218 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 3219 return 0; 3220 3221 /* 3222 * In our other tests, we look at preemption in carefully 3223 * controlled conditions in the ringbuffer. Since most of the 3224 * time is spent in user batches, most of our preemptions naturally 3225 * occur there. We want to verify that when we preempt inside a batch 3226 * we continue on from the current instruction and do not roll back 3227 * to the start, or another earlier arbitration point. 3228 * 3229 * To verify this, we create a batch which is a mixture of 3230 * MI_MATH (gpr++) MI_SRM (gpr) and preemption points. Then with 3231 * a few preempting contexts thrown into the mix, we look for any 3232 * repeated instructions (which show up as incorrect values). 3233 */ 3234 3235 global = create_global(gt, 4096); 3236 if (IS_ERR(global)) 3237 return PTR_ERR(global); 3238 3239 result = i915_gem_object_pin_map(global->obj, I915_MAP_WC); 3240 if (IS_ERR(result)) { 3241 i915_vma_unpin_and_release(&global, 0); 3242 return PTR_ERR(result); 3243 } 3244 3245 for_each_engine(engine, gt, id) { 3246 struct i915_request *client[3] = {}; 3247 struct igt_live_test t; 3248 int i; 3249 3250 if (!intel_engine_has_preemption(engine)) 3251 continue; 3252 3253 if (IS_GEN(gt->i915, 8) && engine->class != RENDER_CLASS) 3254 continue; /* we need per-context GPR */ 3255 3256 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 3257 err = -EIO; 3258 break; 3259 } 3260 3261 memset(result, 0, 4096); 3262 3263 for (i = 0; i < ARRAY_SIZE(client); i++) { 3264 struct i915_request *rq; 3265 3266 rq = create_gpr_client(engine, global, 3267 NUM_GPR * i * sizeof(u32)); 3268 if (IS_ERR(rq)) 3269 goto end_test; 3270 3271 client[i] = rq; 3272 } 3273 3274 /* Continuously preempt the set of 3 running contexts */ 3275 for (i = 1; i <= NUM_GPR; i++) { 3276 err = preempt_user(engine, global, i); 3277 if (err) 3278 goto end_test; 3279 } 3280 3281 if (READ_ONCE(result[0]) != NUM_GPR) { 3282 pr_err("%s: Failed to release semaphore\n", 3283 engine->name); 3284 err = -EIO; 3285 goto end_test; 3286 } 3287 3288 for (i = 0; i < ARRAY_SIZE(client); i++) { 3289 int gpr; 3290 3291 if (i915_request_wait(client[i], 0, HZ / 2) < 0) { 3292 err = -ETIME; 3293 goto end_test; 3294 } 3295 3296 for (gpr = 1; gpr < NUM_GPR; gpr++) { 3297 if (result[NUM_GPR * i + gpr] != 1) { 3298 pr_err("%s: Invalid result, client %d, gpr %d, result: %d\n", 3299 engine->name, 3300 i, gpr, result[NUM_GPR * i + gpr]); 3301 err = -EINVAL; 3302 goto end_test; 3303 } 3304 } 3305 } 3306 3307 end_test: 3308 for (i = 0; i < ARRAY_SIZE(client); i++) { 3309 if (!client[i]) 3310 break; 3311 3312 i915_request_put(client[i]); 3313 } 3314 3315 /* Flush the semaphores on error */ 3316 smp_store_mb(result[0], -1); 3317 if (igt_live_test_end(&t)) 3318 err = -EIO; 3319 if (err) 3320 break; 3321 } 3322 3323 i915_vma_unpin_and_release(&global, I915_VMA_RELEASE_MAP); 3324 return err; 3325 } 3326 3327 static int live_preempt_timeout(void *arg) 3328 { 3329 struct intel_gt *gt = arg; 3330 struct i915_gem_context *ctx_hi, *ctx_lo; 3331 struct igt_spinner spin_lo; 3332 struct intel_engine_cs *engine; 3333 enum intel_engine_id id; 3334 int err = -ENOMEM; 3335 3336 /* 3337 * Check that we force preemption to occur by cancelling the previous 3338 * context if it refuses to yield the GPU. 3339 */ 3340 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) 3341 return 0; 3342 3343 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 3344 return 0; 3345 3346 if (!intel_has_reset_engine(gt)) 3347 return 0; 3348 3349 if (igt_spinner_init(&spin_lo, gt)) 3350 return -ENOMEM; 3351 3352 ctx_hi = kernel_context(gt->i915); 3353 if (!ctx_hi) 3354 goto err_spin_lo; 3355 ctx_hi->sched.priority = 3356 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); 3357 3358 ctx_lo = kernel_context(gt->i915); 3359 if (!ctx_lo) 3360 goto err_ctx_hi; 3361 ctx_lo->sched.priority = 3362 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); 3363 3364 for_each_engine(engine, gt, id) { 3365 unsigned long saved_timeout; 3366 struct i915_request *rq; 3367 3368 if (!intel_engine_has_preemption(engine)) 3369 continue; 3370 3371 rq = spinner_create_request(&spin_lo, ctx_lo, engine, 3372 MI_NOOP); /* preemption disabled */ 3373 if (IS_ERR(rq)) { 3374 err = PTR_ERR(rq); 3375 goto err_ctx_lo; 3376 } 3377 3378 i915_request_add(rq); 3379 if (!igt_wait_for_spinner(&spin_lo, rq)) { 3380 intel_gt_set_wedged(gt); 3381 err = -EIO; 3382 goto err_ctx_lo; 3383 } 3384 3385 rq = igt_request_alloc(ctx_hi, engine); 3386 if (IS_ERR(rq)) { 3387 igt_spinner_end(&spin_lo); 3388 err = PTR_ERR(rq); 3389 goto err_ctx_lo; 3390 } 3391 3392 /* Flush the previous CS ack before changing timeouts */ 3393 while (READ_ONCE(engine->execlists.pending[0])) 3394 cpu_relax(); 3395 3396 saved_timeout = engine->props.preempt_timeout_ms; 3397 engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */ 3398 3399 i915_request_get(rq); 3400 i915_request_add(rq); 3401 3402 intel_engine_flush_submission(engine); 3403 engine->props.preempt_timeout_ms = saved_timeout; 3404 3405 if (i915_request_wait(rq, 0, HZ / 10) < 0) { 3406 intel_gt_set_wedged(gt); 3407 i915_request_put(rq); 3408 err = -ETIME; 3409 goto err_ctx_lo; 3410 } 3411 3412 igt_spinner_end(&spin_lo); 3413 i915_request_put(rq); 3414 } 3415 3416 err = 0; 3417 err_ctx_lo: 3418 kernel_context_close(ctx_lo); 3419 err_ctx_hi: 3420 kernel_context_close(ctx_hi); 3421 err_spin_lo: 3422 igt_spinner_fini(&spin_lo); 3423 return err; 3424 } 3425 3426 static int random_range(struct rnd_state *rnd, int min, int max) 3427 { 3428 return i915_prandom_u32_max_state(max - min, rnd) + min; 3429 } 3430 3431 static int random_priority(struct rnd_state *rnd) 3432 { 3433 return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX); 3434 } 3435 3436 struct preempt_smoke { 3437 struct intel_gt *gt; 3438 struct i915_gem_context **contexts; 3439 struct intel_engine_cs *engine; 3440 struct drm_i915_gem_object *batch; 3441 unsigned int ncontext; 3442 struct rnd_state prng; 3443 unsigned long count; 3444 }; 3445 3446 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke) 3447 { 3448 return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext, 3449 &smoke->prng)]; 3450 } 3451 3452 static int smoke_submit(struct preempt_smoke *smoke, 3453 struct i915_gem_context *ctx, int prio, 3454 struct drm_i915_gem_object *batch) 3455 { 3456 struct i915_request *rq; 3457 struct i915_vma *vma = NULL; 3458 int err = 0; 3459 3460 if (batch) { 3461 struct i915_address_space *vm; 3462 3463 vm = i915_gem_context_get_vm_rcu(ctx); 3464 vma = i915_vma_instance(batch, vm, NULL); 3465 i915_vm_put(vm); 3466 if (IS_ERR(vma)) 3467 return PTR_ERR(vma); 3468 3469 err = i915_vma_pin(vma, 0, 0, PIN_USER); 3470 if (err) 3471 return err; 3472 } 3473 3474 ctx->sched.priority = prio; 3475 3476 rq = igt_request_alloc(ctx, smoke->engine); 3477 if (IS_ERR(rq)) { 3478 err = PTR_ERR(rq); 3479 goto unpin; 3480 } 3481 3482 if (vma) { 3483 i915_vma_lock(vma); 3484 err = i915_request_await_object(rq, vma->obj, false); 3485 if (!err) 3486 err = i915_vma_move_to_active(vma, rq, 0); 3487 if (!err) 3488 err = rq->engine->emit_bb_start(rq, 3489 vma->node.start, 3490 PAGE_SIZE, 0); 3491 i915_vma_unlock(vma); 3492 } 3493 3494 i915_request_add(rq); 3495 3496 unpin: 3497 if (vma) 3498 i915_vma_unpin(vma); 3499 3500 return err; 3501 } 3502 3503 static int smoke_crescendo_thread(void *arg) 3504 { 3505 struct preempt_smoke *smoke = arg; 3506 IGT_TIMEOUT(end_time); 3507 unsigned long count; 3508 3509 count = 0; 3510 do { 3511 struct i915_gem_context *ctx = smoke_context(smoke); 3512 int err; 3513 3514 err = smoke_submit(smoke, 3515 ctx, count % I915_PRIORITY_MAX, 3516 smoke->batch); 3517 if (err) 3518 return err; 3519 3520 count++; 3521 } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL)); 3522 3523 smoke->count = count; 3524 return 0; 3525 } 3526 3527 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) 3528 #define BATCH BIT(0) 3529 { 3530 struct task_struct *tsk[I915_NUM_ENGINES] = {}; 3531 struct preempt_smoke arg[I915_NUM_ENGINES]; 3532 struct intel_engine_cs *engine; 3533 enum intel_engine_id id; 3534 unsigned long count; 3535 int err = 0; 3536 3537 for_each_engine(engine, smoke->gt, id) { 3538 arg[id] = *smoke; 3539 arg[id].engine = engine; 3540 if (!(flags & BATCH)) 3541 arg[id].batch = NULL; 3542 arg[id].count = 0; 3543 3544 tsk[id] = kthread_run(smoke_crescendo_thread, &arg, 3545 "igt/smoke:%d", id); 3546 if (IS_ERR(tsk[id])) { 3547 err = PTR_ERR(tsk[id]); 3548 break; 3549 } 3550 get_task_struct(tsk[id]); 3551 } 3552 3553 yield(); /* start all threads before we kthread_stop() */ 3554 3555 count = 0; 3556 for_each_engine(engine, smoke->gt, id) { 3557 int status; 3558 3559 if (IS_ERR_OR_NULL(tsk[id])) 3560 continue; 3561 3562 status = kthread_stop(tsk[id]); 3563 if (status && !err) 3564 err = status; 3565 3566 count += arg[id].count; 3567 3568 put_task_struct(tsk[id]); 3569 } 3570 3571 pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n", 3572 count, flags, smoke->gt->info.num_engines, smoke->ncontext); 3573 return 0; 3574 } 3575 3576 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags) 3577 { 3578 enum intel_engine_id id; 3579 IGT_TIMEOUT(end_time); 3580 unsigned long count; 3581 3582 count = 0; 3583 do { 3584 for_each_engine(smoke->engine, smoke->gt, id) { 3585 struct i915_gem_context *ctx = smoke_context(smoke); 3586 int err; 3587 3588 err = smoke_submit(smoke, 3589 ctx, random_priority(&smoke->prng), 3590 flags & BATCH ? smoke->batch : NULL); 3591 if (err) 3592 return err; 3593 3594 count++; 3595 } 3596 } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL)); 3597 3598 pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n", 3599 count, flags, smoke->gt->info.num_engines, smoke->ncontext); 3600 return 0; 3601 } 3602 3603 static int live_preempt_smoke(void *arg) 3604 { 3605 struct preempt_smoke smoke = { 3606 .gt = arg, 3607 .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed), 3608 .ncontext = 256, 3609 }; 3610 const unsigned int phase[] = { 0, BATCH }; 3611 struct igt_live_test t; 3612 int err = -ENOMEM; 3613 u32 *cs; 3614 int n; 3615 3616 if (!HAS_LOGICAL_RING_PREEMPTION(smoke.gt->i915)) 3617 return 0; 3618 3619 smoke.contexts = kmalloc_array(smoke.ncontext, 3620 sizeof(*smoke.contexts), 3621 GFP_KERNEL); 3622 if (!smoke.contexts) 3623 return -ENOMEM; 3624 3625 smoke.batch = 3626 i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE); 3627 if (IS_ERR(smoke.batch)) { 3628 err = PTR_ERR(smoke.batch); 3629 goto err_free; 3630 } 3631 3632 cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB); 3633 if (IS_ERR(cs)) { 3634 err = PTR_ERR(cs); 3635 goto err_batch; 3636 } 3637 for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++) 3638 cs[n] = MI_ARB_CHECK; 3639 cs[n] = MI_BATCH_BUFFER_END; 3640 i915_gem_object_flush_map(smoke.batch); 3641 i915_gem_object_unpin_map(smoke.batch); 3642 3643 if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) { 3644 err = -EIO; 3645 goto err_batch; 3646 } 3647 3648 for (n = 0; n < smoke.ncontext; n++) { 3649 smoke.contexts[n] = kernel_context(smoke.gt->i915); 3650 if (!smoke.contexts[n]) 3651 goto err_ctx; 3652 } 3653 3654 for (n = 0; n < ARRAY_SIZE(phase); n++) { 3655 err = smoke_crescendo(&smoke, phase[n]); 3656 if (err) 3657 goto err_ctx; 3658 3659 err = smoke_random(&smoke, phase[n]); 3660 if (err) 3661 goto err_ctx; 3662 } 3663 3664 err_ctx: 3665 if (igt_live_test_end(&t)) 3666 err = -EIO; 3667 3668 for (n = 0; n < smoke.ncontext; n++) { 3669 if (!smoke.contexts[n]) 3670 break; 3671 kernel_context_close(smoke.contexts[n]); 3672 } 3673 3674 err_batch: 3675 i915_gem_object_put(smoke.batch); 3676 err_free: 3677 kfree(smoke.contexts); 3678 3679 return err; 3680 } 3681 3682 static int nop_virtual_engine(struct intel_gt *gt, 3683 struct intel_engine_cs **siblings, 3684 unsigned int nsibling, 3685 unsigned int nctx, 3686 unsigned int flags) 3687 #define CHAIN BIT(0) 3688 { 3689 IGT_TIMEOUT(end_time); 3690 struct i915_request *request[16] = {}; 3691 struct intel_context *ve[16]; 3692 unsigned long n, prime, nc; 3693 struct igt_live_test t; 3694 ktime_t times[2] = {}; 3695 int err; 3696 3697 GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve)); 3698 3699 for (n = 0; n < nctx; n++) { 3700 ve[n] = intel_execlists_create_virtual(siblings, nsibling); 3701 if (IS_ERR(ve[n])) { 3702 err = PTR_ERR(ve[n]); 3703 nctx = n; 3704 goto out; 3705 } 3706 3707 err = intel_context_pin(ve[n]); 3708 if (err) { 3709 intel_context_put(ve[n]); 3710 nctx = n; 3711 goto out; 3712 } 3713 } 3714 3715 err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name); 3716 if (err) 3717 goto out; 3718 3719 for_each_prime_number_from(prime, 1, 8192) { 3720 times[1] = ktime_get_raw(); 3721 3722 if (flags & CHAIN) { 3723 for (nc = 0; nc < nctx; nc++) { 3724 for (n = 0; n < prime; n++) { 3725 struct i915_request *rq; 3726 3727 rq = i915_request_create(ve[nc]); 3728 if (IS_ERR(rq)) { 3729 err = PTR_ERR(rq); 3730 goto out; 3731 } 3732 3733 if (request[nc]) 3734 i915_request_put(request[nc]); 3735 request[nc] = i915_request_get(rq); 3736 i915_request_add(rq); 3737 } 3738 } 3739 } else { 3740 for (n = 0; n < prime; n++) { 3741 for (nc = 0; nc < nctx; nc++) { 3742 struct i915_request *rq; 3743 3744 rq = i915_request_create(ve[nc]); 3745 if (IS_ERR(rq)) { 3746 err = PTR_ERR(rq); 3747 goto out; 3748 } 3749 3750 if (request[nc]) 3751 i915_request_put(request[nc]); 3752 request[nc] = i915_request_get(rq); 3753 i915_request_add(rq); 3754 } 3755 } 3756 } 3757 3758 for (nc = 0; nc < nctx; nc++) { 3759 if (i915_request_wait(request[nc], 0, HZ / 10) < 0) { 3760 pr_err("%s(%s): wait for %llx:%lld timed out\n", 3761 __func__, ve[0]->engine->name, 3762 request[nc]->fence.context, 3763 request[nc]->fence.seqno); 3764 3765 GEM_TRACE("%s(%s) failed at request %llx:%lld\n", 3766 __func__, ve[0]->engine->name, 3767 request[nc]->fence.context, 3768 request[nc]->fence.seqno); 3769 GEM_TRACE_DUMP(); 3770 intel_gt_set_wedged(gt); 3771 break; 3772 } 3773 } 3774 3775 times[1] = ktime_sub(ktime_get_raw(), times[1]); 3776 if (prime == 1) 3777 times[0] = times[1]; 3778 3779 for (nc = 0; nc < nctx; nc++) { 3780 i915_request_put(request[nc]); 3781 request[nc] = NULL; 3782 } 3783 3784 if (__igt_timeout(end_time, NULL)) 3785 break; 3786 } 3787 3788 err = igt_live_test_end(&t); 3789 if (err) 3790 goto out; 3791 3792 pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n", 3793 nctx, ve[0]->engine->name, ktime_to_ns(times[0]), 3794 prime, div64_u64(ktime_to_ns(times[1]), prime)); 3795 3796 out: 3797 if (igt_flush_test(gt->i915)) 3798 err = -EIO; 3799 3800 for (nc = 0; nc < nctx; nc++) { 3801 i915_request_put(request[nc]); 3802 intel_context_unpin(ve[nc]); 3803 intel_context_put(ve[nc]); 3804 } 3805 return err; 3806 } 3807 3808 static unsigned int 3809 __select_siblings(struct intel_gt *gt, 3810 unsigned int class, 3811 struct intel_engine_cs **siblings, 3812 bool (*filter)(const struct intel_engine_cs *)) 3813 { 3814 unsigned int n = 0; 3815 unsigned int inst; 3816 3817 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { 3818 if (!gt->engine_class[class][inst]) 3819 continue; 3820 3821 if (filter && !filter(gt->engine_class[class][inst])) 3822 continue; 3823 3824 siblings[n++] = gt->engine_class[class][inst]; 3825 } 3826 3827 return n; 3828 } 3829 3830 static unsigned int 3831 select_siblings(struct intel_gt *gt, 3832 unsigned int class, 3833 struct intel_engine_cs **siblings) 3834 { 3835 return __select_siblings(gt, class, siblings, NULL); 3836 } 3837 3838 static int live_virtual_engine(void *arg) 3839 { 3840 struct intel_gt *gt = arg; 3841 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 3842 struct intel_engine_cs *engine; 3843 enum intel_engine_id id; 3844 unsigned int class; 3845 int err; 3846 3847 if (intel_uc_uses_guc_submission(>->uc)) 3848 return 0; 3849 3850 for_each_engine(engine, gt, id) { 3851 err = nop_virtual_engine(gt, &engine, 1, 1, 0); 3852 if (err) { 3853 pr_err("Failed to wrap engine %s: err=%d\n", 3854 engine->name, err); 3855 return err; 3856 } 3857 } 3858 3859 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 3860 int nsibling, n; 3861 3862 nsibling = select_siblings(gt, class, siblings); 3863 if (nsibling < 2) 3864 continue; 3865 3866 for (n = 1; n <= nsibling + 1; n++) { 3867 err = nop_virtual_engine(gt, siblings, nsibling, 3868 n, 0); 3869 if (err) 3870 return err; 3871 } 3872 3873 err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN); 3874 if (err) 3875 return err; 3876 } 3877 3878 return 0; 3879 } 3880 3881 static int mask_virtual_engine(struct intel_gt *gt, 3882 struct intel_engine_cs **siblings, 3883 unsigned int nsibling) 3884 { 3885 struct i915_request *request[MAX_ENGINE_INSTANCE + 1]; 3886 struct intel_context *ve; 3887 struct igt_live_test t; 3888 unsigned int n; 3889 int err; 3890 3891 /* 3892 * Check that by setting the execution mask on a request, we can 3893 * restrict it to our desired engine within the virtual engine. 3894 */ 3895 3896 ve = intel_execlists_create_virtual(siblings, nsibling); 3897 if (IS_ERR(ve)) { 3898 err = PTR_ERR(ve); 3899 goto out_close; 3900 } 3901 3902 err = intel_context_pin(ve); 3903 if (err) 3904 goto out_put; 3905 3906 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name); 3907 if (err) 3908 goto out_unpin; 3909 3910 for (n = 0; n < nsibling; n++) { 3911 request[n] = i915_request_create(ve); 3912 if (IS_ERR(request[n])) { 3913 err = PTR_ERR(request[n]); 3914 nsibling = n; 3915 goto out; 3916 } 3917 3918 /* Reverse order as it's more likely to be unnatural */ 3919 request[n]->execution_mask = siblings[nsibling - n - 1]->mask; 3920 3921 i915_request_get(request[n]); 3922 i915_request_add(request[n]); 3923 } 3924 3925 for (n = 0; n < nsibling; n++) { 3926 if (i915_request_wait(request[n], 0, HZ / 10) < 0) { 3927 pr_err("%s(%s): wait for %llx:%lld timed out\n", 3928 __func__, ve->engine->name, 3929 request[n]->fence.context, 3930 request[n]->fence.seqno); 3931 3932 GEM_TRACE("%s(%s) failed at request %llx:%lld\n", 3933 __func__, ve->engine->name, 3934 request[n]->fence.context, 3935 request[n]->fence.seqno); 3936 GEM_TRACE_DUMP(); 3937 intel_gt_set_wedged(gt); 3938 err = -EIO; 3939 goto out; 3940 } 3941 3942 if (request[n]->engine != siblings[nsibling - n - 1]) { 3943 pr_err("Executed on wrong sibling '%s', expected '%s'\n", 3944 request[n]->engine->name, 3945 siblings[nsibling - n - 1]->name); 3946 err = -EINVAL; 3947 goto out; 3948 } 3949 } 3950 3951 err = igt_live_test_end(&t); 3952 out: 3953 if (igt_flush_test(gt->i915)) 3954 err = -EIO; 3955 3956 for (n = 0; n < nsibling; n++) 3957 i915_request_put(request[n]); 3958 3959 out_unpin: 3960 intel_context_unpin(ve); 3961 out_put: 3962 intel_context_put(ve); 3963 out_close: 3964 return err; 3965 } 3966 3967 static int live_virtual_mask(void *arg) 3968 { 3969 struct intel_gt *gt = arg; 3970 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 3971 unsigned int class; 3972 int err; 3973 3974 if (intel_uc_uses_guc_submission(>->uc)) 3975 return 0; 3976 3977 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 3978 unsigned int nsibling; 3979 3980 nsibling = select_siblings(gt, class, siblings); 3981 if (nsibling < 2) 3982 continue; 3983 3984 err = mask_virtual_engine(gt, siblings, nsibling); 3985 if (err) 3986 return err; 3987 } 3988 3989 return 0; 3990 } 3991 3992 static int slicein_virtual_engine(struct intel_gt *gt, 3993 struct intel_engine_cs **siblings, 3994 unsigned int nsibling) 3995 { 3996 const long timeout = slice_timeout(siblings[0]); 3997 struct intel_context *ce; 3998 struct i915_request *rq; 3999 struct igt_spinner spin; 4000 unsigned int n; 4001 int err = 0; 4002 4003 /* 4004 * Virtual requests must take part in timeslicing on the target engines. 4005 */ 4006 4007 if (igt_spinner_init(&spin, gt)) 4008 return -ENOMEM; 4009 4010 for (n = 0; n < nsibling; n++) { 4011 ce = intel_context_create(siblings[n]); 4012 if (IS_ERR(ce)) { 4013 err = PTR_ERR(ce); 4014 goto out; 4015 } 4016 4017 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); 4018 intel_context_put(ce); 4019 if (IS_ERR(rq)) { 4020 err = PTR_ERR(rq); 4021 goto out; 4022 } 4023 4024 i915_request_add(rq); 4025 } 4026 4027 ce = intel_execlists_create_virtual(siblings, nsibling); 4028 if (IS_ERR(ce)) { 4029 err = PTR_ERR(ce); 4030 goto out; 4031 } 4032 4033 rq = intel_context_create_request(ce); 4034 intel_context_put(ce); 4035 if (IS_ERR(rq)) { 4036 err = PTR_ERR(rq); 4037 goto out; 4038 } 4039 4040 i915_request_get(rq); 4041 i915_request_add(rq); 4042 if (i915_request_wait(rq, 0, timeout) < 0) { 4043 GEM_TRACE_ERR("%s(%s) failed to slice in virtual request\n", 4044 __func__, rq->engine->name); 4045 GEM_TRACE_DUMP(); 4046 intel_gt_set_wedged(gt); 4047 err = -EIO; 4048 } 4049 i915_request_put(rq); 4050 4051 out: 4052 igt_spinner_end(&spin); 4053 if (igt_flush_test(gt->i915)) 4054 err = -EIO; 4055 igt_spinner_fini(&spin); 4056 return err; 4057 } 4058 4059 static int sliceout_virtual_engine(struct intel_gt *gt, 4060 struct intel_engine_cs **siblings, 4061 unsigned int nsibling) 4062 { 4063 const long timeout = slice_timeout(siblings[0]); 4064 struct intel_context *ce; 4065 struct i915_request *rq; 4066 struct igt_spinner spin; 4067 unsigned int n; 4068 int err = 0; 4069 4070 /* 4071 * Virtual requests must allow others a fair timeslice. 4072 */ 4073 4074 if (igt_spinner_init(&spin, gt)) 4075 return -ENOMEM; 4076 4077 /* XXX We do not handle oversubscription and fairness with normal rq */ 4078 for (n = 0; n < nsibling; n++) { 4079 ce = intel_execlists_create_virtual(siblings, nsibling); 4080 if (IS_ERR(ce)) { 4081 err = PTR_ERR(ce); 4082 goto out; 4083 } 4084 4085 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); 4086 intel_context_put(ce); 4087 if (IS_ERR(rq)) { 4088 err = PTR_ERR(rq); 4089 goto out; 4090 } 4091 4092 i915_request_add(rq); 4093 } 4094 4095 for (n = 0; !err && n < nsibling; n++) { 4096 ce = intel_context_create(siblings[n]); 4097 if (IS_ERR(ce)) { 4098 err = PTR_ERR(ce); 4099 goto out; 4100 } 4101 4102 rq = intel_context_create_request(ce); 4103 intel_context_put(ce); 4104 if (IS_ERR(rq)) { 4105 err = PTR_ERR(rq); 4106 goto out; 4107 } 4108 4109 i915_request_get(rq); 4110 i915_request_add(rq); 4111 if (i915_request_wait(rq, 0, timeout) < 0) { 4112 GEM_TRACE_ERR("%s(%s) failed to slice out virtual request\n", 4113 __func__, siblings[n]->name); 4114 GEM_TRACE_DUMP(); 4115 intel_gt_set_wedged(gt); 4116 err = -EIO; 4117 } 4118 i915_request_put(rq); 4119 } 4120 4121 out: 4122 igt_spinner_end(&spin); 4123 if (igt_flush_test(gt->i915)) 4124 err = -EIO; 4125 igt_spinner_fini(&spin); 4126 return err; 4127 } 4128 4129 static int live_virtual_slice(void *arg) 4130 { 4131 struct intel_gt *gt = arg; 4132 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 4133 unsigned int class; 4134 int err; 4135 4136 if (intel_uc_uses_guc_submission(>->uc)) 4137 return 0; 4138 4139 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 4140 unsigned int nsibling; 4141 4142 nsibling = __select_siblings(gt, class, siblings, 4143 intel_engine_has_timeslices); 4144 if (nsibling < 2) 4145 continue; 4146 4147 err = slicein_virtual_engine(gt, siblings, nsibling); 4148 if (err) 4149 return err; 4150 4151 err = sliceout_virtual_engine(gt, siblings, nsibling); 4152 if (err) 4153 return err; 4154 } 4155 4156 return 0; 4157 } 4158 4159 static int preserved_virtual_engine(struct intel_gt *gt, 4160 struct intel_engine_cs **siblings, 4161 unsigned int nsibling) 4162 { 4163 struct i915_request *last = NULL; 4164 struct intel_context *ve; 4165 struct i915_vma *scratch; 4166 struct igt_live_test t; 4167 unsigned int n; 4168 int err = 0; 4169 u32 *cs; 4170 4171 scratch = create_scratch(siblings[0]->gt); 4172 if (IS_ERR(scratch)) 4173 return PTR_ERR(scratch); 4174 4175 err = i915_vma_sync(scratch); 4176 if (err) 4177 goto out_scratch; 4178 4179 ve = intel_execlists_create_virtual(siblings, nsibling); 4180 if (IS_ERR(ve)) { 4181 err = PTR_ERR(ve); 4182 goto out_scratch; 4183 } 4184 4185 err = intel_context_pin(ve); 4186 if (err) 4187 goto out_put; 4188 4189 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name); 4190 if (err) 4191 goto out_unpin; 4192 4193 for (n = 0; n < NUM_GPR_DW; n++) { 4194 struct intel_engine_cs *engine = siblings[n % nsibling]; 4195 struct i915_request *rq; 4196 4197 rq = i915_request_create(ve); 4198 if (IS_ERR(rq)) { 4199 err = PTR_ERR(rq); 4200 goto out_end; 4201 } 4202 4203 i915_request_put(last); 4204 last = i915_request_get(rq); 4205 4206 cs = intel_ring_begin(rq, 8); 4207 if (IS_ERR(cs)) { 4208 i915_request_add(rq); 4209 err = PTR_ERR(cs); 4210 goto out_end; 4211 } 4212 4213 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 4214 *cs++ = CS_GPR(engine, n); 4215 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32); 4216 *cs++ = 0; 4217 4218 *cs++ = MI_LOAD_REGISTER_IMM(1); 4219 *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW); 4220 *cs++ = n + 1; 4221 4222 *cs++ = MI_NOOP; 4223 intel_ring_advance(rq, cs); 4224 4225 /* Restrict this request to run on a particular engine */ 4226 rq->execution_mask = engine->mask; 4227 i915_request_add(rq); 4228 } 4229 4230 if (i915_request_wait(last, 0, HZ / 5) < 0) { 4231 err = -ETIME; 4232 goto out_end; 4233 } 4234 4235 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); 4236 if (IS_ERR(cs)) { 4237 err = PTR_ERR(cs); 4238 goto out_end; 4239 } 4240 4241 for (n = 0; n < NUM_GPR_DW; n++) { 4242 if (cs[n] != n) { 4243 pr_err("Incorrect value[%d] found for GPR[%d]\n", 4244 cs[n], n); 4245 err = -EINVAL; 4246 break; 4247 } 4248 } 4249 4250 i915_gem_object_unpin_map(scratch->obj); 4251 4252 out_end: 4253 if (igt_live_test_end(&t)) 4254 err = -EIO; 4255 i915_request_put(last); 4256 out_unpin: 4257 intel_context_unpin(ve); 4258 out_put: 4259 intel_context_put(ve); 4260 out_scratch: 4261 i915_vma_unpin_and_release(&scratch, 0); 4262 return err; 4263 } 4264 4265 static int live_virtual_preserved(void *arg) 4266 { 4267 struct intel_gt *gt = arg; 4268 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 4269 unsigned int class; 4270 4271 /* 4272 * Check that the context image retains non-privileged (user) registers 4273 * from one engine to the next. For this we check that the CS_GPR 4274 * are preserved. 4275 */ 4276 4277 if (intel_uc_uses_guc_submission(>->uc)) 4278 return 0; 4279 4280 /* As we use CS_GPR we cannot run before they existed on all engines. */ 4281 if (INTEL_GEN(gt->i915) < 9) 4282 return 0; 4283 4284 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 4285 int nsibling, err; 4286 4287 nsibling = select_siblings(gt, class, siblings); 4288 if (nsibling < 2) 4289 continue; 4290 4291 err = preserved_virtual_engine(gt, siblings, nsibling); 4292 if (err) 4293 return err; 4294 } 4295 4296 return 0; 4297 } 4298 4299 static int bond_virtual_engine(struct intel_gt *gt, 4300 unsigned int class, 4301 struct intel_engine_cs **siblings, 4302 unsigned int nsibling, 4303 unsigned int flags) 4304 #define BOND_SCHEDULE BIT(0) 4305 { 4306 struct intel_engine_cs *master; 4307 struct i915_request *rq[16]; 4308 enum intel_engine_id id; 4309 struct igt_spinner spin; 4310 unsigned long n; 4311 int err; 4312 4313 /* 4314 * A set of bonded requests is intended to be run concurrently 4315 * across a number of engines. We use one request per-engine 4316 * and a magic fence to schedule each of the bonded requests 4317 * at the same time. A consequence of our current scheduler is that 4318 * we only move requests to the HW ready queue when the request 4319 * becomes ready, that is when all of its prerequisite fences have 4320 * been signaled. As one of those fences is the master submit fence, 4321 * there is a delay on all secondary fences as the HW may be 4322 * currently busy. Equally, as all the requests are independent, 4323 * they may have other fences that delay individual request 4324 * submission to HW. Ergo, we do not guarantee that all requests are 4325 * immediately submitted to HW at the same time, just that if the 4326 * rules are abided by, they are ready at the same time as the 4327 * first is submitted. Userspace can embed semaphores in its batch 4328 * to ensure parallel execution of its phases as it requires. 4329 * Though naturally it gets requested that perhaps the scheduler should 4330 * take care of parallel execution, even across preemption events on 4331 * different HW. (The proper answer is of course "lalalala".) 4332 * 4333 * With the submit-fence, we have identified three possible phases 4334 * of synchronisation depending on the master fence: queued (not 4335 * ready), executing, and signaled. The first two are quite simple 4336 * and checked below. However, the signaled master fence handling is 4337 * contentious. Currently we do not distinguish between a signaled 4338 * fence and an expired fence, as once signaled it does not convey 4339 * any information about the previous execution. It may even be freed 4340 * and hence checking later it may not exist at all. Ergo we currently 4341 * do not apply the bonding constraint for an already signaled fence, 4342 * as our expectation is that it should not constrain the secondaries 4343 * and is outside of the scope of the bonded request API (i.e. all 4344 * userspace requests are meant to be running in parallel). As 4345 * it imposes no constraint, and is effectively a no-op, we do not 4346 * check below as normal execution flows are checked extensively above. 4347 * 4348 * XXX Is the degenerate handling of signaled submit fences the 4349 * expected behaviour for userpace? 4350 */ 4351 4352 GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1); 4353 4354 if (igt_spinner_init(&spin, gt)) 4355 return -ENOMEM; 4356 4357 err = 0; 4358 rq[0] = ERR_PTR(-ENOMEM); 4359 for_each_engine(master, gt, id) { 4360 struct i915_sw_fence fence = {}; 4361 struct intel_context *ce; 4362 4363 if (master->class == class) 4364 continue; 4365 4366 ce = intel_context_create(master); 4367 if (IS_ERR(ce)) { 4368 err = PTR_ERR(ce); 4369 goto out; 4370 } 4371 4372 memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq)); 4373 4374 rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP); 4375 intel_context_put(ce); 4376 if (IS_ERR(rq[0])) { 4377 err = PTR_ERR(rq[0]); 4378 goto out; 4379 } 4380 i915_request_get(rq[0]); 4381 4382 if (flags & BOND_SCHEDULE) { 4383 onstack_fence_init(&fence); 4384 err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit, 4385 &fence, 4386 GFP_KERNEL); 4387 } 4388 4389 i915_request_add(rq[0]); 4390 if (err < 0) 4391 goto out; 4392 4393 if (!(flags & BOND_SCHEDULE) && 4394 !igt_wait_for_spinner(&spin, rq[0])) { 4395 err = -EIO; 4396 goto out; 4397 } 4398 4399 for (n = 0; n < nsibling; n++) { 4400 struct intel_context *ve; 4401 4402 ve = intel_execlists_create_virtual(siblings, nsibling); 4403 if (IS_ERR(ve)) { 4404 err = PTR_ERR(ve); 4405 onstack_fence_fini(&fence); 4406 goto out; 4407 } 4408 4409 err = intel_virtual_engine_attach_bond(ve->engine, 4410 master, 4411 siblings[n]); 4412 if (err) { 4413 intel_context_put(ve); 4414 onstack_fence_fini(&fence); 4415 goto out; 4416 } 4417 4418 err = intel_context_pin(ve); 4419 intel_context_put(ve); 4420 if (err) { 4421 onstack_fence_fini(&fence); 4422 goto out; 4423 } 4424 4425 rq[n + 1] = i915_request_create(ve); 4426 intel_context_unpin(ve); 4427 if (IS_ERR(rq[n + 1])) { 4428 err = PTR_ERR(rq[n + 1]); 4429 onstack_fence_fini(&fence); 4430 goto out; 4431 } 4432 i915_request_get(rq[n + 1]); 4433 4434 err = i915_request_await_execution(rq[n + 1], 4435 &rq[0]->fence, 4436 ve->engine->bond_execute); 4437 i915_request_add(rq[n + 1]); 4438 if (err < 0) { 4439 onstack_fence_fini(&fence); 4440 goto out; 4441 } 4442 } 4443 onstack_fence_fini(&fence); 4444 intel_engine_flush_submission(master); 4445 igt_spinner_end(&spin); 4446 4447 if (i915_request_wait(rq[0], 0, HZ / 10) < 0) { 4448 pr_err("Master request did not execute (on %s)!\n", 4449 rq[0]->engine->name); 4450 err = -EIO; 4451 goto out; 4452 } 4453 4454 for (n = 0; n < nsibling; n++) { 4455 if (i915_request_wait(rq[n + 1], 0, 4456 MAX_SCHEDULE_TIMEOUT) < 0) { 4457 err = -EIO; 4458 goto out; 4459 } 4460 4461 if (rq[n + 1]->engine != siblings[n]) { 4462 pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n", 4463 siblings[n]->name, 4464 rq[n + 1]->engine->name, 4465 rq[0]->engine->name); 4466 err = -EINVAL; 4467 goto out; 4468 } 4469 } 4470 4471 for (n = 0; !IS_ERR(rq[n]); n++) 4472 i915_request_put(rq[n]); 4473 rq[0] = ERR_PTR(-ENOMEM); 4474 } 4475 4476 out: 4477 for (n = 0; !IS_ERR(rq[n]); n++) 4478 i915_request_put(rq[n]); 4479 if (igt_flush_test(gt->i915)) 4480 err = -EIO; 4481 4482 igt_spinner_fini(&spin); 4483 return err; 4484 } 4485 4486 static int live_virtual_bond(void *arg) 4487 { 4488 static const struct phase { 4489 const char *name; 4490 unsigned int flags; 4491 } phases[] = { 4492 { "", 0 }, 4493 { "schedule", BOND_SCHEDULE }, 4494 { }, 4495 }; 4496 struct intel_gt *gt = arg; 4497 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 4498 unsigned int class; 4499 int err; 4500 4501 if (intel_uc_uses_guc_submission(>->uc)) 4502 return 0; 4503 4504 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 4505 const struct phase *p; 4506 int nsibling; 4507 4508 nsibling = select_siblings(gt, class, siblings); 4509 if (nsibling < 2) 4510 continue; 4511 4512 for (p = phases; p->name; p++) { 4513 err = bond_virtual_engine(gt, 4514 class, siblings, nsibling, 4515 p->flags); 4516 if (err) { 4517 pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n", 4518 __func__, p->name, class, nsibling, err); 4519 return err; 4520 } 4521 } 4522 } 4523 4524 return 0; 4525 } 4526 4527 static int reset_virtual_engine(struct intel_gt *gt, 4528 struct intel_engine_cs **siblings, 4529 unsigned int nsibling) 4530 { 4531 struct intel_engine_cs *engine; 4532 struct intel_context *ve; 4533 struct igt_spinner spin; 4534 struct i915_request *rq; 4535 unsigned int n; 4536 int err = 0; 4537 4538 /* 4539 * In order to support offline error capture for fast preempt reset, 4540 * we need to decouple the guilty request and ensure that it and its 4541 * descendents are not executed while the capture is in progress. 4542 */ 4543 4544 if (igt_spinner_init(&spin, gt)) 4545 return -ENOMEM; 4546 4547 ve = intel_execlists_create_virtual(siblings, nsibling); 4548 if (IS_ERR(ve)) { 4549 err = PTR_ERR(ve); 4550 goto out_spin; 4551 } 4552 4553 for (n = 0; n < nsibling; n++) 4554 st_engine_heartbeat_disable(siblings[n]); 4555 4556 rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK); 4557 if (IS_ERR(rq)) { 4558 err = PTR_ERR(rq); 4559 goto out_heartbeat; 4560 } 4561 i915_request_add(rq); 4562 4563 if (!igt_wait_for_spinner(&spin, rq)) { 4564 intel_gt_set_wedged(gt); 4565 err = -ETIME; 4566 goto out_heartbeat; 4567 } 4568 4569 engine = rq->engine; 4570 GEM_BUG_ON(engine == ve->engine); 4571 4572 /* Take ownership of the reset and tasklet */ 4573 if (test_and_set_bit(I915_RESET_ENGINE + engine->id, 4574 >->reset.flags)) { 4575 intel_gt_set_wedged(gt); 4576 err = -EBUSY; 4577 goto out_heartbeat; 4578 } 4579 tasklet_disable(&engine->execlists.tasklet); 4580 4581 engine->execlists.tasklet.func(engine->execlists.tasklet.data); 4582 GEM_BUG_ON(execlists_active(&engine->execlists) != rq); 4583 4584 /* Fake a preemption event; failed of course */ 4585 spin_lock_irq(&engine->active.lock); 4586 __unwind_incomplete_requests(engine); 4587 spin_unlock_irq(&engine->active.lock); 4588 GEM_BUG_ON(rq->engine != ve->engine); 4589 4590 /* Reset the engine while keeping our active request on hold */ 4591 execlists_hold(engine, rq); 4592 GEM_BUG_ON(!i915_request_on_hold(rq)); 4593 4594 intel_engine_reset(engine, NULL); 4595 GEM_BUG_ON(rq->fence.error != -EIO); 4596 4597 /* Release our grasp on the engine, letting CS flow again */ 4598 tasklet_enable(&engine->execlists.tasklet); 4599 clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, >->reset.flags); 4600 4601 /* Check that we do not resubmit the held request */ 4602 i915_request_get(rq); 4603 if (!i915_request_wait(rq, 0, HZ / 5)) { 4604 pr_err("%s: on hold request completed!\n", 4605 engine->name); 4606 intel_gt_set_wedged(gt); 4607 err = -EIO; 4608 goto out_rq; 4609 } 4610 GEM_BUG_ON(!i915_request_on_hold(rq)); 4611 4612 /* But is resubmitted on release */ 4613 execlists_unhold(engine, rq); 4614 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 4615 pr_err("%s: held request did not complete!\n", 4616 engine->name); 4617 intel_gt_set_wedged(gt); 4618 err = -ETIME; 4619 } 4620 4621 out_rq: 4622 i915_request_put(rq); 4623 out_heartbeat: 4624 for (n = 0; n < nsibling; n++) 4625 st_engine_heartbeat_enable(siblings[n]); 4626 4627 intel_context_put(ve); 4628 out_spin: 4629 igt_spinner_fini(&spin); 4630 return err; 4631 } 4632 4633 static int live_virtual_reset(void *arg) 4634 { 4635 struct intel_gt *gt = arg; 4636 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 4637 unsigned int class; 4638 4639 /* 4640 * Check that we handle a reset event within a virtual engine. 4641 * Only the physical engine is reset, but we have to check the flow 4642 * of the virtual requests around the reset, and make sure it is not 4643 * forgotten. 4644 */ 4645 4646 if (intel_uc_uses_guc_submission(>->uc)) 4647 return 0; 4648 4649 if (!intel_has_reset_engine(gt)) 4650 return 0; 4651 4652 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 4653 int nsibling, err; 4654 4655 nsibling = select_siblings(gt, class, siblings); 4656 if (nsibling < 2) 4657 continue; 4658 4659 err = reset_virtual_engine(gt, siblings, nsibling); 4660 if (err) 4661 return err; 4662 } 4663 4664 return 0; 4665 } 4666 4667 int intel_execlists_live_selftests(struct drm_i915_private *i915) 4668 { 4669 static const struct i915_subtest tests[] = { 4670 SUBTEST(live_sanitycheck), 4671 SUBTEST(live_unlite_switch), 4672 SUBTEST(live_unlite_preempt), 4673 SUBTEST(live_unlite_ring), 4674 SUBTEST(live_pin_rewind), 4675 SUBTEST(live_hold_reset), 4676 SUBTEST(live_error_interrupt), 4677 SUBTEST(live_timeslice_preempt), 4678 SUBTEST(live_timeslice_rewind), 4679 SUBTEST(live_timeslice_queue), 4680 SUBTEST(live_timeslice_nopreempt), 4681 SUBTEST(live_busywait_preempt), 4682 SUBTEST(live_preempt), 4683 SUBTEST(live_late_preempt), 4684 SUBTEST(live_nopreempt), 4685 SUBTEST(live_preempt_cancel), 4686 SUBTEST(live_suppress_self_preempt), 4687 SUBTEST(live_chain_preempt), 4688 SUBTEST(live_preempt_ring), 4689 SUBTEST(live_preempt_gang), 4690 SUBTEST(live_preempt_timeout), 4691 SUBTEST(live_preempt_user), 4692 SUBTEST(live_preempt_smoke), 4693 SUBTEST(live_virtual_engine), 4694 SUBTEST(live_virtual_mask), 4695 SUBTEST(live_virtual_preserved), 4696 SUBTEST(live_virtual_slice), 4697 SUBTEST(live_virtual_bond), 4698 SUBTEST(live_virtual_reset), 4699 }; 4700 4701 if (!HAS_EXECLISTS(i915)) 4702 return 0; 4703 4704 if (intel_gt_is_wedged(&i915->gt)) 4705 return 0; 4706 4707 return intel_gt_live_subtests(tests, &i915->gt); 4708 } 4709 4710 static int emit_semaphore_signal(struct intel_context *ce, void *slot) 4711 { 4712 const u32 offset = 4713 i915_ggtt_offset(ce->engine->status_page.vma) + 4714 offset_in_page(slot); 4715 struct i915_request *rq; 4716 u32 *cs; 4717 4718 rq = intel_context_create_request(ce); 4719 if (IS_ERR(rq)) 4720 return PTR_ERR(rq); 4721 4722 cs = intel_ring_begin(rq, 4); 4723 if (IS_ERR(cs)) { 4724 i915_request_add(rq); 4725 return PTR_ERR(cs); 4726 } 4727 4728 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 4729 *cs++ = offset; 4730 *cs++ = 0; 4731 *cs++ = 1; 4732 4733 intel_ring_advance(rq, cs); 4734 4735 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 4736 i915_request_add(rq); 4737 return 0; 4738 } 4739 4740 static int context_flush(struct intel_context *ce, long timeout) 4741 { 4742 struct i915_request *rq; 4743 struct dma_fence *fence; 4744 int err = 0; 4745 4746 rq = intel_engine_create_kernel_request(ce->engine); 4747 if (IS_ERR(rq)) 4748 return PTR_ERR(rq); 4749 4750 fence = i915_active_fence_get(&ce->timeline->last_request); 4751 if (fence) { 4752 i915_request_await_dma_fence(rq, fence); 4753 dma_fence_put(fence); 4754 } 4755 4756 rq = i915_request_get(rq); 4757 i915_request_add(rq); 4758 if (i915_request_wait(rq, 0, timeout) < 0) 4759 err = -ETIME; 4760 i915_request_put(rq); 4761 4762 rmb(); /* We know the request is written, make sure all state is too! */ 4763 return err; 4764 } 4765 4766 static int live_lrc_layout(void *arg) 4767 { 4768 struct intel_gt *gt = arg; 4769 struct intel_engine_cs *engine; 4770 enum intel_engine_id id; 4771 u32 *lrc; 4772 int err; 4773 4774 /* 4775 * Check the registers offsets we use to create the initial reg state 4776 * match the layout saved by HW. 4777 */ 4778 4779 lrc = kmalloc(PAGE_SIZE, GFP_KERNEL); 4780 if (!lrc) 4781 return -ENOMEM; 4782 4783 err = 0; 4784 for_each_engine(engine, gt, id) { 4785 u32 *hw; 4786 int dw; 4787 4788 if (!engine->default_state) 4789 continue; 4790 4791 hw = shmem_pin_map(engine->default_state); 4792 if (IS_ERR(hw)) { 4793 err = PTR_ERR(hw); 4794 break; 4795 } 4796 hw += LRC_STATE_OFFSET / sizeof(*hw); 4797 4798 execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE), 4799 engine->kernel_context, 4800 engine, 4801 engine->kernel_context->ring, 4802 true); 4803 4804 dw = 0; 4805 do { 4806 u32 lri = hw[dw]; 4807 4808 if (lri == 0) { 4809 dw++; 4810 continue; 4811 } 4812 4813 if (lrc[dw] == 0) { 4814 pr_debug("%s: skipped instruction %x at dword %d\n", 4815 engine->name, lri, dw); 4816 dw++; 4817 continue; 4818 } 4819 4820 if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { 4821 pr_err("%s: Expected LRI command at dword %d, found %08x\n", 4822 engine->name, dw, lri); 4823 err = -EINVAL; 4824 break; 4825 } 4826 4827 if (lrc[dw] != lri) { 4828 pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n", 4829 engine->name, dw, lri, lrc[dw]); 4830 err = -EINVAL; 4831 break; 4832 } 4833 4834 lri &= 0x7f; 4835 lri++; 4836 dw++; 4837 4838 while (lri) { 4839 if (hw[dw] != lrc[dw]) { 4840 pr_err("%s: Different registers found at dword %d, expected %x, found %x\n", 4841 engine->name, dw, hw[dw], lrc[dw]); 4842 err = -EINVAL; 4843 break; 4844 } 4845 4846 /* 4847 * Skip over the actual register value as we 4848 * expect that to differ. 4849 */ 4850 dw += 2; 4851 lri -= 2; 4852 } 4853 } while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); 4854 4855 if (err) { 4856 pr_info("%s: HW register image:\n", engine->name); 4857 igt_hexdump(hw, PAGE_SIZE); 4858 4859 pr_info("%s: SW register image:\n", engine->name); 4860 igt_hexdump(lrc, PAGE_SIZE); 4861 } 4862 4863 shmem_unpin_map(engine->default_state, hw); 4864 if (err) 4865 break; 4866 } 4867 4868 kfree(lrc); 4869 return err; 4870 } 4871 4872 static int find_offset(const u32 *lri, u32 offset) 4873 { 4874 int i; 4875 4876 for (i = 0; i < PAGE_SIZE / sizeof(u32); i++) 4877 if (lri[i] == offset) 4878 return i; 4879 4880 return -1; 4881 } 4882 4883 static int live_lrc_fixed(void *arg) 4884 { 4885 struct intel_gt *gt = arg; 4886 struct intel_engine_cs *engine; 4887 enum intel_engine_id id; 4888 int err = 0; 4889 4890 /* 4891 * Check the assumed register offsets match the actual locations in 4892 * the context image. 4893 */ 4894 4895 for_each_engine(engine, gt, id) { 4896 const struct { 4897 u32 reg; 4898 u32 offset; 4899 const char *name; 4900 } tbl[] = { 4901 { 4902 i915_mmio_reg_offset(RING_START(engine->mmio_base)), 4903 CTX_RING_START - 1, 4904 "RING_START" 4905 }, 4906 { 4907 i915_mmio_reg_offset(RING_CTL(engine->mmio_base)), 4908 CTX_RING_CTL - 1, 4909 "RING_CTL" 4910 }, 4911 { 4912 i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)), 4913 CTX_RING_HEAD - 1, 4914 "RING_HEAD" 4915 }, 4916 { 4917 i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)), 4918 CTX_RING_TAIL - 1, 4919 "RING_TAIL" 4920 }, 4921 { 4922 i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)), 4923 lrc_ring_mi_mode(engine), 4924 "RING_MI_MODE" 4925 }, 4926 { 4927 i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)), 4928 CTX_BB_STATE - 1, 4929 "BB_STATE" 4930 }, 4931 { 4932 i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(engine->mmio_base)), 4933 lrc_ring_wa_bb_per_ctx(engine), 4934 "RING_BB_PER_CTX_PTR" 4935 }, 4936 { 4937 i915_mmio_reg_offset(RING_INDIRECT_CTX(engine->mmio_base)), 4938 lrc_ring_indirect_ptr(engine), 4939 "RING_INDIRECT_CTX_PTR" 4940 }, 4941 { 4942 i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(engine->mmio_base)), 4943 lrc_ring_indirect_offset(engine), 4944 "RING_INDIRECT_CTX_OFFSET" 4945 }, 4946 { 4947 i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)), 4948 CTX_TIMESTAMP - 1, 4949 "RING_CTX_TIMESTAMP" 4950 }, 4951 { 4952 i915_mmio_reg_offset(GEN8_RING_CS_GPR(engine->mmio_base, 0)), 4953 lrc_ring_gpr0(engine), 4954 "RING_CS_GPR0" 4955 }, 4956 { 4957 i915_mmio_reg_offset(RING_CMD_BUF_CCTL(engine->mmio_base)), 4958 lrc_ring_cmd_buf_cctl(engine), 4959 "RING_CMD_BUF_CCTL" 4960 }, 4961 { }, 4962 }, *t; 4963 u32 *hw; 4964 4965 if (!engine->default_state) 4966 continue; 4967 4968 hw = shmem_pin_map(engine->default_state); 4969 if (IS_ERR(hw)) { 4970 err = PTR_ERR(hw); 4971 break; 4972 } 4973 hw += LRC_STATE_OFFSET / sizeof(*hw); 4974 4975 for (t = tbl; t->name; t++) { 4976 int dw = find_offset(hw, t->reg); 4977 4978 if (dw != t->offset) { 4979 pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n", 4980 engine->name, 4981 t->name, 4982 t->reg, 4983 dw, 4984 t->offset); 4985 err = -EINVAL; 4986 } 4987 } 4988 4989 shmem_unpin_map(engine->default_state, hw); 4990 } 4991 4992 return err; 4993 } 4994 4995 static int __live_lrc_state(struct intel_engine_cs *engine, 4996 struct i915_vma *scratch) 4997 { 4998 struct intel_context *ce; 4999 struct i915_request *rq; 5000 enum { 5001 RING_START_IDX = 0, 5002 RING_TAIL_IDX, 5003 MAX_IDX 5004 }; 5005 u32 expected[MAX_IDX]; 5006 u32 *cs; 5007 int err; 5008 int n; 5009 5010 ce = intel_context_create(engine); 5011 if (IS_ERR(ce)) 5012 return PTR_ERR(ce); 5013 5014 err = intel_context_pin(ce); 5015 if (err) 5016 goto err_put; 5017 5018 rq = i915_request_create(ce); 5019 if (IS_ERR(rq)) { 5020 err = PTR_ERR(rq); 5021 goto err_unpin; 5022 } 5023 5024 cs = intel_ring_begin(rq, 4 * MAX_IDX); 5025 if (IS_ERR(cs)) { 5026 err = PTR_ERR(cs); 5027 i915_request_add(rq); 5028 goto err_unpin; 5029 } 5030 5031 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 5032 *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base)); 5033 *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32); 5034 *cs++ = 0; 5035 5036 expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma); 5037 5038 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 5039 *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)); 5040 *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32); 5041 *cs++ = 0; 5042 5043 i915_vma_lock(scratch); 5044 err = i915_request_await_object(rq, scratch->obj, true); 5045 if (!err) 5046 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE); 5047 i915_vma_unlock(scratch); 5048 5049 i915_request_get(rq); 5050 i915_request_add(rq); 5051 if (err) 5052 goto err_rq; 5053 5054 intel_engine_flush_submission(engine); 5055 expected[RING_TAIL_IDX] = ce->ring->tail; 5056 5057 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 5058 err = -ETIME; 5059 goto err_rq; 5060 } 5061 5062 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); 5063 if (IS_ERR(cs)) { 5064 err = PTR_ERR(cs); 5065 goto err_rq; 5066 } 5067 5068 for (n = 0; n < MAX_IDX; n++) { 5069 if (cs[n] != expected[n]) { 5070 pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n", 5071 engine->name, n, cs[n], expected[n]); 5072 err = -EINVAL; 5073 break; 5074 } 5075 } 5076 5077 i915_gem_object_unpin_map(scratch->obj); 5078 5079 err_rq: 5080 i915_request_put(rq); 5081 err_unpin: 5082 intel_context_unpin(ce); 5083 err_put: 5084 intel_context_put(ce); 5085 return err; 5086 } 5087 5088 static int live_lrc_state(void *arg) 5089 { 5090 struct intel_gt *gt = arg; 5091 struct intel_engine_cs *engine; 5092 struct i915_vma *scratch; 5093 enum intel_engine_id id; 5094 int err = 0; 5095 5096 /* 5097 * Check the live register state matches what we expect for this 5098 * intel_context. 5099 */ 5100 5101 scratch = create_scratch(gt); 5102 if (IS_ERR(scratch)) 5103 return PTR_ERR(scratch); 5104 5105 for_each_engine(engine, gt, id) { 5106 err = __live_lrc_state(engine, scratch); 5107 if (err) 5108 break; 5109 } 5110 5111 if (igt_flush_test(gt->i915)) 5112 err = -EIO; 5113 5114 i915_vma_unpin_and_release(&scratch, 0); 5115 return err; 5116 } 5117 5118 static int gpr_make_dirty(struct intel_context *ce) 5119 { 5120 struct i915_request *rq; 5121 u32 *cs; 5122 int n; 5123 5124 rq = intel_context_create_request(ce); 5125 if (IS_ERR(rq)) 5126 return PTR_ERR(rq); 5127 5128 cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2); 5129 if (IS_ERR(cs)) { 5130 i915_request_add(rq); 5131 return PTR_ERR(cs); 5132 } 5133 5134 *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW); 5135 for (n = 0; n < NUM_GPR_DW; n++) { 5136 *cs++ = CS_GPR(ce->engine, n); 5137 *cs++ = STACK_MAGIC; 5138 } 5139 *cs++ = MI_NOOP; 5140 5141 intel_ring_advance(rq, cs); 5142 5143 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 5144 i915_request_add(rq); 5145 5146 return 0; 5147 } 5148 5149 static struct i915_request * 5150 __gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot) 5151 { 5152 const u32 offset = 5153 i915_ggtt_offset(ce->engine->status_page.vma) + 5154 offset_in_page(slot); 5155 struct i915_request *rq; 5156 u32 *cs; 5157 int err; 5158 int n; 5159 5160 rq = intel_context_create_request(ce); 5161 if (IS_ERR(rq)) 5162 return rq; 5163 5164 cs = intel_ring_begin(rq, 6 + 4 * NUM_GPR_DW); 5165 if (IS_ERR(cs)) { 5166 i915_request_add(rq); 5167 return ERR_CAST(cs); 5168 } 5169 5170 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 5171 *cs++ = MI_NOOP; 5172 5173 *cs++ = MI_SEMAPHORE_WAIT | 5174 MI_SEMAPHORE_GLOBAL_GTT | 5175 MI_SEMAPHORE_POLL | 5176 MI_SEMAPHORE_SAD_NEQ_SDD; 5177 *cs++ = 0; 5178 *cs++ = offset; 5179 *cs++ = 0; 5180 5181 for (n = 0; n < NUM_GPR_DW; n++) { 5182 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 5183 *cs++ = CS_GPR(ce->engine, n); 5184 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32); 5185 *cs++ = 0; 5186 } 5187 5188 i915_vma_lock(scratch); 5189 err = i915_request_await_object(rq, scratch->obj, true); 5190 if (!err) 5191 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE); 5192 i915_vma_unlock(scratch); 5193 5194 i915_request_get(rq); 5195 i915_request_add(rq); 5196 if (err) { 5197 i915_request_put(rq); 5198 rq = ERR_PTR(err); 5199 } 5200 5201 return rq; 5202 } 5203 5204 static int __live_lrc_gpr(struct intel_engine_cs *engine, 5205 struct i915_vma *scratch, 5206 bool preempt) 5207 { 5208 u32 *slot = memset32(engine->status_page.addr + 1000, 0, 4); 5209 struct intel_context *ce; 5210 struct i915_request *rq; 5211 u32 *cs; 5212 int err; 5213 int n; 5214 5215 if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS) 5216 return 0; /* GPR only on rcs0 for gen8 */ 5217 5218 err = gpr_make_dirty(engine->kernel_context); 5219 if (err) 5220 return err; 5221 5222 ce = intel_context_create(engine); 5223 if (IS_ERR(ce)) 5224 return PTR_ERR(ce); 5225 5226 rq = __gpr_read(ce, scratch, slot); 5227 if (IS_ERR(rq)) { 5228 err = PTR_ERR(rq); 5229 goto err_put; 5230 } 5231 5232 err = wait_for_submit(engine, rq, HZ / 2); 5233 if (err) 5234 goto err_rq; 5235 5236 if (preempt) { 5237 err = gpr_make_dirty(engine->kernel_context); 5238 if (err) 5239 goto err_rq; 5240 5241 err = emit_semaphore_signal(engine->kernel_context, slot); 5242 if (err) 5243 goto err_rq; 5244 } else { 5245 slot[0] = 1; 5246 wmb(); 5247 } 5248 5249 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 5250 err = -ETIME; 5251 goto err_rq; 5252 } 5253 5254 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); 5255 if (IS_ERR(cs)) { 5256 err = PTR_ERR(cs); 5257 goto err_rq; 5258 } 5259 5260 for (n = 0; n < NUM_GPR_DW; n++) { 5261 if (cs[n]) { 5262 pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n", 5263 engine->name, 5264 n / 2, n & 1 ? "udw" : "ldw", 5265 cs[n]); 5266 err = -EINVAL; 5267 break; 5268 } 5269 } 5270 5271 i915_gem_object_unpin_map(scratch->obj); 5272 5273 err_rq: 5274 memset32(&slot[0], -1, 4); 5275 wmb(); 5276 i915_request_put(rq); 5277 err_put: 5278 intel_context_put(ce); 5279 return err; 5280 } 5281 5282 static int live_lrc_gpr(void *arg) 5283 { 5284 struct intel_gt *gt = arg; 5285 struct intel_engine_cs *engine; 5286 struct i915_vma *scratch; 5287 enum intel_engine_id id; 5288 int err = 0; 5289 5290 /* 5291 * Check that GPR registers are cleared in new contexts as we need 5292 * to avoid leaking any information from previous contexts. 5293 */ 5294 5295 scratch = create_scratch(gt); 5296 if (IS_ERR(scratch)) 5297 return PTR_ERR(scratch); 5298 5299 for_each_engine(engine, gt, id) { 5300 st_engine_heartbeat_disable(engine); 5301 5302 err = __live_lrc_gpr(engine, scratch, false); 5303 if (err) 5304 goto err; 5305 5306 err = __live_lrc_gpr(engine, scratch, true); 5307 if (err) 5308 goto err; 5309 5310 err: 5311 st_engine_heartbeat_enable(engine); 5312 if (igt_flush_test(gt->i915)) 5313 err = -EIO; 5314 if (err) 5315 break; 5316 } 5317 5318 i915_vma_unpin_and_release(&scratch, 0); 5319 return err; 5320 } 5321 5322 static struct i915_request * 5323 create_timestamp(struct intel_context *ce, void *slot, int idx) 5324 { 5325 const u32 offset = 5326 i915_ggtt_offset(ce->engine->status_page.vma) + 5327 offset_in_page(slot); 5328 struct i915_request *rq; 5329 u32 *cs; 5330 int err; 5331 5332 rq = intel_context_create_request(ce); 5333 if (IS_ERR(rq)) 5334 return rq; 5335 5336 cs = intel_ring_begin(rq, 10); 5337 if (IS_ERR(cs)) { 5338 err = PTR_ERR(cs); 5339 goto err; 5340 } 5341 5342 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 5343 *cs++ = MI_NOOP; 5344 5345 *cs++ = MI_SEMAPHORE_WAIT | 5346 MI_SEMAPHORE_GLOBAL_GTT | 5347 MI_SEMAPHORE_POLL | 5348 MI_SEMAPHORE_SAD_NEQ_SDD; 5349 *cs++ = 0; 5350 *cs++ = offset; 5351 *cs++ = 0; 5352 5353 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 5354 *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base)); 5355 *cs++ = offset + idx * sizeof(u32); 5356 *cs++ = 0; 5357 5358 intel_ring_advance(rq, cs); 5359 5360 rq->sched.attr.priority = I915_PRIORITY_MASK; 5361 err = 0; 5362 err: 5363 i915_request_get(rq); 5364 i915_request_add(rq); 5365 if (err) { 5366 i915_request_put(rq); 5367 return ERR_PTR(err); 5368 } 5369 5370 return rq; 5371 } 5372 5373 struct lrc_timestamp { 5374 struct intel_engine_cs *engine; 5375 struct intel_context *ce[2]; 5376 u32 poison; 5377 }; 5378 5379 static bool timestamp_advanced(u32 start, u32 end) 5380 { 5381 return (s32)(end - start) > 0; 5382 } 5383 5384 static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt) 5385 { 5386 u32 *slot = memset32(arg->engine->status_page.addr + 1000, 0, 4); 5387 struct i915_request *rq; 5388 u32 timestamp; 5389 int err = 0; 5390 5391 arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison; 5392 rq = create_timestamp(arg->ce[0], slot, 1); 5393 if (IS_ERR(rq)) 5394 return PTR_ERR(rq); 5395 5396 err = wait_for_submit(rq->engine, rq, HZ / 2); 5397 if (err) 5398 goto err; 5399 5400 if (preempt) { 5401 arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef; 5402 err = emit_semaphore_signal(arg->ce[1], slot); 5403 if (err) 5404 goto err; 5405 } else { 5406 slot[0] = 1; 5407 wmb(); 5408 } 5409 5410 /* And wait for switch to kernel (to save our context to memory) */ 5411 err = context_flush(arg->ce[0], HZ / 2); 5412 if (err) 5413 goto err; 5414 5415 if (!timestamp_advanced(arg->poison, slot[1])) { 5416 pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n", 5417 arg->engine->name, preempt ? "preempt" : "simple", 5418 arg->poison, slot[1]); 5419 err = -EINVAL; 5420 } 5421 5422 timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]); 5423 if (!timestamp_advanced(slot[1], timestamp)) { 5424 pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n", 5425 arg->engine->name, preempt ? "preempt" : "simple", 5426 slot[1], timestamp); 5427 err = -EINVAL; 5428 } 5429 5430 err: 5431 memset32(slot, -1, 4); 5432 i915_request_put(rq); 5433 return err; 5434 } 5435 5436 static int live_lrc_timestamp(void *arg) 5437 { 5438 struct lrc_timestamp data = {}; 5439 struct intel_gt *gt = arg; 5440 enum intel_engine_id id; 5441 const u32 poison[] = { 5442 0, 5443 S32_MAX, 5444 (u32)S32_MAX + 1, 5445 U32_MAX, 5446 }; 5447 5448 /* 5449 * We want to verify that the timestamp is saved and restore across 5450 * context switches and is monotonic. 5451 * 5452 * So we do this with a little bit of LRC poisoning to check various 5453 * boundary conditions, and see what happens if we preempt the context 5454 * with a second request (carrying more poison into the timestamp). 5455 */ 5456 5457 for_each_engine(data.engine, gt, id) { 5458 int i, err = 0; 5459 5460 st_engine_heartbeat_disable(data.engine); 5461 5462 for (i = 0; i < ARRAY_SIZE(data.ce); i++) { 5463 struct intel_context *tmp; 5464 5465 tmp = intel_context_create(data.engine); 5466 if (IS_ERR(tmp)) { 5467 err = PTR_ERR(tmp); 5468 goto err; 5469 } 5470 5471 err = intel_context_pin(tmp); 5472 if (err) { 5473 intel_context_put(tmp); 5474 goto err; 5475 } 5476 5477 data.ce[i] = tmp; 5478 } 5479 5480 for (i = 0; i < ARRAY_SIZE(poison); i++) { 5481 data.poison = poison[i]; 5482 5483 err = __lrc_timestamp(&data, false); 5484 if (err) 5485 break; 5486 5487 err = __lrc_timestamp(&data, true); 5488 if (err) 5489 break; 5490 } 5491 5492 err: 5493 st_engine_heartbeat_enable(data.engine); 5494 for (i = 0; i < ARRAY_SIZE(data.ce); i++) { 5495 if (!data.ce[i]) 5496 break; 5497 5498 intel_context_unpin(data.ce[i]); 5499 intel_context_put(data.ce[i]); 5500 } 5501 5502 if (igt_flush_test(gt->i915)) 5503 err = -EIO; 5504 if (err) 5505 return err; 5506 } 5507 5508 return 0; 5509 } 5510 5511 static struct i915_vma * 5512 create_user_vma(struct i915_address_space *vm, unsigned long size) 5513 { 5514 struct drm_i915_gem_object *obj; 5515 struct i915_vma *vma; 5516 int err; 5517 5518 obj = i915_gem_object_create_internal(vm->i915, size); 5519 if (IS_ERR(obj)) 5520 return ERR_CAST(obj); 5521 5522 vma = i915_vma_instance(obj, vm, NULL); 5523 if (IS_ERR(vma)) { 5524 i915_gem_object_put(obj); 5525 return vma; 5526 } 5527 5528 err = i915_vma_pin(vma, 0, 0, PIN_USER); 5529 if (err) { 5530 i915_gem_object_put(obj); 5531 return ERR_PTR(err); 5532 } 5533 5534 return vma; 5535 } 5536 5537 static struct i915_vma * 5538 store_context(struct intel_context *ce, struct i915_vma *scratch) 5539 { 5540 struct i915_vma *batch; 5541 u32 dw, x, *cs, *hw; 5542 u32 *defaults; 5543 5544 batch = create_user_vma(ce->vm, SZ_64K); 5545 if (IS_ERR(batch)) 5546 return batch; 5547 5548 cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); 5549 if (IS_ERR(cs)) { 5550 i915_vma_put(batch); 5551 return ERR_CAST(cs); 5552 } 5553 5554 defaults = shmem_pin_map(ce->engine->default_state); 5555 if (!defaults) { 5556 i915_gem_object_unpin_map(batch->obj); 5557 i915_vma_put(batch); 5558 return ERR_PTR(-ENOMEM); 5559 } 5560 5561 x = 0; 5562 dw = 0; 5563 hw = defaults; 5564 hw += LRC_STATE_OFFSET / sizeof(*hw); 5565 do { 5566 u32 len = hw[dw] & 0x7f; 5567 5568 if (hw[dw] == 0) { 5569 dw++; 5570 continue; 5571 } 5572 5573 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { 5574 dw += len + 2; 5575 continue; 5576 } 5577 5578 dw++; 5579 len = (len + 1) / 2; 5580 while (len--) { 5581 *cs++ = MI_STORE_REGISTER_MEM_GEN8; 5582 *cs++ = hw[dw]; 5583 *cs++ = lower_32_bits(scratch->node.start + x); 5584 *cs++ = upper_32_bits(scratch->node.start + x); 5585 5586 dw += 2; 5587 x += 4; 5588 } 5589 } while (dw < PAGE_SIZE / sizeof(u32) && 5590 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); 5591 5592 *cs++ = MI_BATCH_BUFFER_END; 5593 5594 shmem_unpin_map(ce->engine->default_state, defaults); 5595 5596 i915_gem_object_flush_map(batch->obj); 5597 i915_gem_object_unpin_map(batch->obj); 5598 5599 return batch; 5600 } 5601 5602 static int move_to_active(struct i915_request *rq, 5603 struct i915_vma *vma, 5604 unsigned int flags) 5605 { 5606 int err; 5607 5608 i915_vma_lock(vma); 5609 err = i915_request_await_object(rq, vma->obj, flags); 5610 if (!err) 5611 err = i915_vma_move_to_active(vma, rq, flags); 5612 i915_vma_unlock(vma); 5613 5614 return err; 5615 } 5616 5617 static struct i915_request * 5618 record_registers(struct intel_context *ce, 5619 struct i915_vma *before, 5620 struct i915_vma *after, 5621 u32 *sema) 5622 { 5623 struct i915_vma *b_before, *b_after; 5624 struct i915_request *rq; 5625 u32 *cs; 5626 int err; 5627 5628 b_before = store_context(ce, before); 5629 if (IS_ERR(b_before)) 5630 return ERR_CAST(b_before); 5631 5632 b_after = store_context(ce, after); 5633 if (IS_ERR(b_after)) { 5634 rq = ERR_CAST(b_after); 5635 goto err_before; 5636 } 5637 5638 rq = intel_context_create_request(ce); 5639 if (IS_ERR(rq)) 5640 goto err_after; 5641 5642 err = move_to_active(rq, before, EXEC_OBJECT_WRITE); 5643 if (err) 5644 goto err_rq; 5645 5646 err = move_to_active(rq, b_before, 0); 5647 if (err) 5648 goto err_rq; 5649 5650 err = move_to_active(rq, after, EXEC_OBJECT_WRITE); 5651 if (err) 5652 goto err_rq; 5653 5654 err = move_to_active(rq, b_after, 0); 5655 if (err) 5656 goto err_rq; 5657 5658 cs = intel_ring_begin(rq, 14); 5659 if (IS_ERR(cs)) { 5660 err = PTR_ERR(cs); 5661 goto err_rq; 5662 } 5663 5664 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 5665 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); 5666 *cs++ = lower_32_bits(b_before->node.start); 5667 *cs++ = upper_32_bits(b_before->node.start); 5668 5669 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 5670 *cs++ = MI_SEMAPHORE_WAIT | 5671 MI_SEMAPHORE_GLOBAL_GTT | 5672 MI_SEMAPHORE_POLL | 5673 MI_SEMAPHORE_SAD_NEQ_SDD; 5674 *cs++ = 0; 5675 *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) + 5676 offset_in_page(sema); 5677 *cs++ = 0; 5678 *cs++ = MI_NOOP; 5679 5680 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 5681 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); 5682 *cs++ = lower_32_bits(b_after->node.start); 5683 *cs++ = upper_32_bits(b_after->node.start); 5684 5685 intel_ring_advance(rq, cs); 5686 5687 WRITE_ONCE(*sema, 0); 5688 i915_request_get(rq); 5689 i915_request_add(rq); 5690 err_after: 5691 i915_vma_put(b_after); 5692 err_before: 5693 i915_vma_put(b_before); 5694 return rq; 5695 5696 err_rq: 5697 i915_request_add(rq); 5698 rq = ERR_PTR(err); 5699 goto err_after; 5700 } 5701 5702 static struct i915_vma *load_context(struct intel_context *ce, u32 poison) 5703 { 5704 struct i915_vma *batch; 5705 u32 dw, *cs, *hw; 5706 u32 *defaults; 5707 5708 batch = create_user_vma(ce->vm, SZ_64K); 5709 if (IS_ERR(batch)) 5710 return batch; 5711 5712 cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); 5713 if (IS_ERR(cs)) { 5714 i915_vma_put(batch); 5715 return ERR_CAST(cs); 5716 } 5717 5718 defaults = shmem_pin_map(ce->engine->default_state); 5719 if (!defaults) { 5720 i915_gem_object_unpin_map(batch->obj); 5721 i915_vma_put(batch); 5722 return ERR_PTR(-ENOMEM); 5723 } 5724 5725 dw = 0; 5726 hw = defaults; 5727 hw += LRC_STATE_OFFSET / sizeof(*hw); 5728 do { 5729 u32 len = hw[dw] & 0x7f; 5730 5731 if (hw[dw] == 0) { 5732 dw++; 5733 continue; 5734 } 5735 5736 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { 5737 dw += len + 2; 5738 continue; 5739 } 5740 5741 dw++; 5742 len = (len + 1) / 2; 5743 *cs++ = MI_LOAD_REGISTER_IMM(len); 5744 while (len--) { 5745 *cs++ = hw[dw]; 5746 *cs++ = poison; 5747 dw += 2; 5748 } 5749 } while (dw < PAGE_SIZE / sizeof(u32) && 5750 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); 5751 5752 *cs++ = MI_BATCH_BUFFER_END; 5753 5754 shmem_unpin_map(ce->engine->default_state, defaults); 5755 5756 i915_gem_object_flush_map(batch->obj); 5757 i915_gem_object_unpin_map(batch->obj); 5758 5759 return batch; 5760 } 5761 5762 static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema) 5763 { 5764 struct i915_request *rq; 5765 struct i915_vma *batch; 5766 u32 *cs; 5767 int err; 5768 5769 batch = load_context(ce, poison); 5770 if (IS_ERR(batch)) 5771 return PTR_ERR(batch); 5772 5773 rq = intel_context_create_request(ce); 5774 if (IS_ERR(rq)) { 5775 err = PTR_ERR(rq); 5776 goto err_batch; 5777 } 5778 5779 err = move_to_active(rq, batch, 0); 5780 if (err) 5781 goto err_rq; 5782 5783 cs = intel_ring_begin(rq, 8); 5784 if (IS_ERR(cs)) { 5785 err = PTR_ERR(cs); 5786 goto err_rq; 5787 } 5788 5789 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 5790 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); 5791 *cs++ = lower_32_bits(batch->node.start); 5792 *cs++ = upper_32_bits(batch->node.start); 5793 5794 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 5795 *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) + 5796 offset_in_page(sema); 5797 *cs++ = 0; 5798 *cs++ = 1; 5799 5800 intel_ring_advance(rq, cs); 5801 5802 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 5803 err_rq: 5804 i915_request_add(rq); 5805 err_batch: 5806 i915_vma_put(batch); 5807 return err; 5808 } 5809 5810 static bool is_moving(u32 a, u32 b) 5811 { 5812 return a != b; 5813 } 5814 5815 static int compare_isolation(struct intel_engine_cs *engine, 5816 struct i915_vma *ref[2], 5817 struct i915_vma *result[2], 5818 struct intel_context *ce, 5819 u32 poison) 5820 { 5821 u32 x, dw, *hw, *lrc; 5822 u32 *A[2], *B[2]; 5823 u32 *defaults; 5824 int err = 0; 5825 5826 A[0] = i915_gem_object_pin_map(ref[0]->obj, I915_MAP_WC); 5827 if (IS_ERR(A[0])) 5828 return PTR_ERR(A[0]); 5829 5830 A[1] = i915_gem_object_pin_map(ref[1]->obj, I915_MAP_WC); 5831 if (IS_ERR(A[1])) { 5832 err = PTR_ERR(A[1]); 5833 goto err_A0; 5834 } 5835 5836 B[0] = i915_gem_object_pin_map(result[0]->obj, I915_MAP_WC); 5837 if (IS_ERR(B[0])) { 5838 err = PTR_ERR(B[0]); 5839 goto err_A1; 5840 } 5841 5842 B[1] = i915_gem_object_pin_map(result[1]->obj, I915_MAP_WC); 5843 if (IS_ERR(B[1])) { 5844 err = PTR_ERR(B[1]); 5845 goto err_B0; 5846 } 5847 5848 lrc = i915_gem_object_pin_map(ce->state->obj, 5849 i915_coherent_map_type(engine->i915)); 5850 if (IS_ERR(lrc)) { 5851 err = PTR_ERR(lrc); 5852 goto err_B1; 5853 } 5854 lrc += LRC_STATE_OFFSET / sizeof(*hw); 5855 5856 defaults = shmem_pin_map(ce->engine->default_state); 5857 if (!defaults) { 5858 err = -ENOMEM; 5859 goto err_lrc; 5860 } 5861 5862 x = 0; 5863 dw = 0; 5864 hw = defaults; 5865 hw += LRC_STATE_OFFSET / sizeof(*hw); 5866 do { 5867 u32 len = hw[dw] & 0x7f; 5868 5869 if (hw[dw] == 0) { 5870 dw++; 5871 continue; 5872 } 5873 5874 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { 5875 dw += len + 2; 5876 continue; 5877 } 5878 5879 dw++; 5880 len = (len + 1) / 2; 5881 while (len--) { 5882 if (!is_moving(A[0][x], A[1][x]) && 5883 (A[0][x] != B[0][x] || A[1][x] != B[1][x])) { 5884 switch (hw[dw] & 4095) { 5885 case 0x30: /* RING_HEAD */ 5886 case 0x34: /* RING_TAIL */ 5887 break; 5888 5889 default: 5890 pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n", 5891 engine->name, dw, 5892 hw[dw], hw[dw + 1], 5893 A[0][x], B[0][x], B[1][x], 5894 poison, lrc[dw + 1]); 5895 err = -EINVAL; 5896 } 5897 } 5898 dw += 2; 5899 x++; 5900 } 5901 } while (dw < PAGE_SIZE / sizeof(u32) && 5902 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); 5903 5904 shmem_unpin_map(ce->engine->default_state, defaults); 5905 err_lrc: 5906 i915_gem_object_unpin_map(ce->state->obj); 5907 err_B1: 5908 i915_gem_object_unpin_map(result[1]->obj); 5909 err_B0: 5910 i915_gem_object_unpin_map(result[0]->obj); 5911 err_A1: 5912 i915_gem_object_unpin_map(ref[1]->obj); 5913 err_A0: 5914 i915_gem_object_unpin_map(ref[0]->obj); 5915 return err; 5916 } 5917 5918 static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison) 5919 { 5920 u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1); 5921 struct i915_vma *ref[2], *result[2]; 5922 struct intel_context *A, *B; 5923 struct i915_request *rq; 5924 int err; 5925 5926 A = intel_context_create(engine); 5927 if (IS_ERR(A)) 5928 return PTR_ERR(A); 5929 5930 B = intel_context_create(engine); 5931 if (IS_ERR(B)) { 5932 err = PTR_ERR(B); 5933 goto err_A; 5934 } 5935 5936 ref[0] = create_user_vma(A->vm, SZ_64K); 5937 if (IS_ERR(ref[0])) { 5938 err = PTR_ERR(ref[0]); 5939 goto err_B; 5940 } 5941 5942 ref[1] = create_user_vma(A->vm, SZ_64K); 5943 if (IS_ERR(ref[1])) { 5944 err = PTR_ERR(ref[1]); 5945 goto err_ref0; 5946 } 5947 5948 rq = record_registers(A, ref[0], ref[1], sema); 5949 if (IS_ERR(rq)) { 5950 err = PTR_ERR(rq); 5951 goto err_ref1; 5952 } 5953 5954 WRITE_ONCE(*sema, 1); 5955 wmb(); 5956 5957 if (i915_request_wait(rq, 0, HZ / 2) < 0) { 5958 i915_request_put(rq); 5959 err = -ETIME; 5960 goto err_ref1; 5961 } 5962 i915_request_put(rq); 5963 5964 result[0] = create_user_vma(A->vm, SZ_64K); 5965 if (IS_ERR(result[0])) { 5966 err = PTR_ERR(result[0]); 5967 goto err_ref1; 5968 } 5969 5970 result[1] = create_user_vma(A->vm, SZ_64K); 5971 if (IS_ERR(result[1])) { 5972 err = PTR_ERR(result[1]); 5973 goto err_result0; 5974 } 5975 5976 rq = record_registers(A, result[0], result[1], sema); 5977 if (IS_ERR(rq)) { 5978 err = PTR_ERR(rq); 5979 goto err_result1; 5980 } 5981 5982 err = poison_registers(B, poison, sema); 5983 if (err) { 5984 WRITE_ONCE(*sema, -1); 5985 i915_request_put(rq); 5986 goto err_result1; 5987 } 5988 5989 if (i915_request_wait(rq, 0, HZ / 2) < 0) { 5990 i915_request_put(rq); 5991 err = -ETIME; 5992 goto err_result1; 5993 } 5994 i915_request_put(rq); 5995 5996 err = compare_isolation(engine, ref, result, A, poison); 5997 5998 err_result1: 5999 i915_vma_put(result[1]); 6000 err_result0: 6001 i915_vma_put(result[0]); 6002 err_ref1: 6003 i915_vma_put(ref[1]); 6004 err_ref0: 6005 i915_vma_put(ref[0]); 6006 err_B: 6007 intel_context_put(B); 6008 err_A: 6009 intel_context_put(A); 6010 return err; 6011 } 6012 6013 static bool skip_isolation(const struct intel_engine_cs *engine) 6014 { 6015 if (engine->class == COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) == 9) 6016 return true; 6017 6018 if (engine->class == RENDER_CLASS && INTEL_GEN(engine->i915) == 11) 6019 return true; 6020 6021 return false; 6022 } 6023 6024 static int live_lrc_isolation(void *arg) 6025 { 6026 struct intel_gt *gt = arg; 6027 struct intel_engine_cs *engine; 6028 enum intel_engine_id id; 6029 const u32 poison[] = { 6030 STACK_MAGIC, 6031 0x3a3a3a3a, 6032 0x5c5c5c5c, 6033 0xffffffff, 6034 0xffff0000, 6035 }; 6036 int err = 0; 6037 6038 /* 6039 * Our goal is try and verify that per-context state cannot be 6040 * tampered with by another non-privileged client. 6041 * 6042 * We take the list of context registers from the LRI in the default 6043 * context image and attempt to modify that list from a remote context. 6044 */ 6045 6046 for_each_engine(engine, gt, id) { 6047 int i; 6048 6049 /* Just don't even ask */ 6050 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) && 6051 skip_isolation(engine)) 6052 continue; 6053 6054 intel_engine_pm_get(engine); 6055 for (i = 0; i < ARRAY_SIZE(poison); i++) { 6056 int result; 6057 6058 result = __lrc_isolation(engine, poison[i]); 6059 if (result && !err) 6060 err = result; 6061 6062 result = __lrc_isolation(engine, ~poison[i]); 6063 if (result && !err) 6064 err = result; 6065 } 6066 intel_engine_pm_put(engine); 6067 if (igt_flush_test(gt->i915)) { 6068 err = -EIO; 6069 break; 6070 } 6071 } 6072 6073 return err; 6074 } 6075 6076 static int indirect_ctx_submit_req(struct intel_context *ce) 6077 { 6078 struct i915_request *rq; 6079 int err = 0; 6080 6081 rq = intel_context_create_request(ce); 6082 if (IS_ERR(rq)) 6083 return PTR_ERR(rq); 6084 6085 i915_request_get(rq); 6086 i915_request_add(rq); 6087 6088 if (i915_request_wait(rq, 0, HZ / 5) < 0) 6089 err = -ETIME; 6090 6091 i915_request_put(rq); 6092 6093 return err; 6094 } 6095 6096 #define CTX_BB_CANARY_OFFSET (3 * 1024) 6097 #define CTX_BB_CANARY_INDEX (CTX_BB_CANARY_OFFSET / sizeof(u32)) 6098 6099 static u32 * 6100 emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs) 6101 { 6102 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | 6103 MI_SRM_LRM_GLOBAL_GTT | 6104 MI_LRI_LRM_CS_MMIO; 6105 *cs++ = i915_mmio_reg_offset(RING_START(0)); 6106 *cs++ = i915_ggtt_offset(ce->state) + 6107 context_wa_bb_offset(ce) + 6108 CTX_BB_CANARY_OFFSET; 6109 *cs++ = 0; 6110 6111 return cs; 6112 } 6113 6114 static void 6115 indirect_ctx_bb_setup(struct intel_context *ce) 6116 { 6117 u32 *cs = context_indirect_bb(ce); 6118 6119 cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d; 6120 6121 setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary); 6122 } 6123 6124 static bool check_ring_start(struct intel_context *ce) 6125 { 6126 const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) - 6127 LRC_STATE_OFFSET + context_wa_bb_offset(ce); 6128 6129 if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START]) 6130 return true; 6131 6132 pr_err("ring start mismatch: canary 0x%08x vs state 0x%08x\n", 6133 ctx_bb[CTX_BB_CANARY_INDEX], 6134 ce->lrc_reg_state[CTX_RING_START]); 6135 6136 return false; 6137 } 6138 6139 static int indirect_ctx_bb_check(struct intel_context *ce) 6140 { 6141 int err; 6142 6143 err = indirect_ctx_submit_req(ce); 6144 if (err) 6145 return err; 6146 6147 if (!check_ring_start(ce)) 6148 return -EINVAL; 6149 6150 return 0; 6151 } 6152 6153 static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine) 6154 { 6155 struct intel_context *a, *b; 6156 int err; 6157 6158 a = intel_context_create(engine); 6159 if (IS_ERR(a)) 6160 return PTR_ERR(a); 6161 err = intel_context_pin(a); 6162 if (err) 6163 goto put_a; 6164 6165 b = intel_context_create(engine); 6166 if (IS_ERR(b)) { 6167 err = PTR_ERR(b); 6168 goto unpin_a; 6169 } 6170 err = intel_context_pin(b); 6171 if (err) 6172 goto put_b; 6173 6174 /* We use the already reserved extra page in context state */ 6175 if (!a->wa_bb_page) { 6176 GEM_BUG_ON(b->wa_bb_page); 6177 GEM_BUG_ON(INTEL_GEN(engine->i915) == 12); 6178 goto unpin_b; 6179 } 6180 6181 /* 6182 * In order to test that our per context bb is truly per context, 6183 * and executes at the intended spot on context restoring process, 6184 * make the batch store the ring start value to memory. 6185 * As ring start is restored apriori of starting the indirect ctx bb and 6186 * as it will be different for each context, it fits to this purpose. 6187 */ 6188 indirect_ctx_bb_setup(a); 6189 indirect_ctx_bb_setup(b); 6190 6191 err = indirect_ctx_bb_check(a); 6192 if (err) 6193 goto unpin_b; 6194 6195 err = indirect_ctx_bb_check(b); 6196 6197 unpin_b: 6198 intel_context_unpin(b); 6199 put_b: 6200 intel_context_put(b); 6201 unpin_a: 6202 intel_context_unpin(a); 6203 put_a: 6204 intel_context_put(a); 6205 6206 return err; 6207 } 6208 6209 static int live_lrc_indirect_ctx_bb(void *arg) 6210 { 6211 struct intel_gt *gt = arg; 6212 struct intel_engine_cs *engine; 6213 enum intel_engine_id id; 6214 int err = 0; 6215 6216 for_each_engine(engine, gt, id) { 6217 intel_engine_pm_get(engine); 6218 err = __live_lrc_indirect_ctx_bb(engine); 6219 intel_engine_pm_put(engine); 6220 6221 if (igt_flush_test(gt->i915)) 6222 err = -EIO; 6223 6224 if (err) 6225 break; 6226 } 6227 6228 return err; 6229 } 6230 6231 static void garbage_reset(struct intel_engine_cs *engine, 6232 struct i915_request *rq) 6233 { 6234 const unsigned int bit = I915_RESET_ENGINE + engine->id; 6235 unsigned long *lock = &engine->gt->reset.flags; 6236 6237 if (test_and_set_bit(bit, lock)) 6238 return; 6239 6240 tasklet_disable(&engine->execlists.tasklet); 6241 6242 if (!rq->fence.error) 6243 intel_engine_reset(engine, NULL); 6244 6245 tasklet_enable(&engine->execlists.tasklet); 6246 clear_and_wake_up_bit(bit, lock); 6247 } 6248 6249 static struct i915_request *garbage(struct intel_context *ce, 6250 struct rnd_state *prng) 6251 { 6252 struct i915_request *rq; 6253 int err; 6254 6255 err = intel_context_pin(ce); 6256 if (err) 6257 return ERR_PTR(err); 6258 6259 prandom_bytes_state(prng, 6260 ce->lrc_reg_state, 6261 ce->engine->context_size - 6262 LRC_STATE_OFFSET); 6263 6264 rq = intel_context_create_request(ce); 6265 if (IS_ERR(rq)) { 6266 err = PTR_ERR(rq); 6267 goto err_unpin; 6268 } 6269 6270 i915_request_get(rq); 6271 i915_request_add(rq); 6272 return rq; 6273 6274 err_unpin: 6275 intel_context_unpin(ce); 6276 return ERR_PTR(err); 6277 } 6278 6279 static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng) 6280 { 6281 struct intel_context *ce; 6282 struct i915_request *hang; 6283 int err = 0; 6284 6285 ce = intel_context_create(engine); 6286 if (IS_ERR(ce)) 6287 return PTR_ERR(ce); 6288 6289 hang = garbage(ce, prng); 6290 if (IS_ERR(hang)) { 6291 err = PTR_ERR(hang); 6292 goto err_ce; 6293 } 6294 6295 if (wait_for_submit(engine, hang, HZ / 2)) { 6296 i915_request_put(hang); 6297 err = -ETIME; 6298 goto err_ce; 6299 } 6300 6301 intel_context_set_banned(ce); 6302 garbage_reset(engine, hang); 6303 6304 intel_engine_flush_submission(engine); 6305 if (!hang->fence.error) { 6306 i915_request_put(hang); 6307 pr_err("%s: corrupted context was not reset\n", 6308 engine->name); 6309 err = -EINVAL; 6310 goto err_ce; 6311 } 6312 6313 if (i915_request_wait(hang, 0, HZ / 2) < 0) { 6314 pr_err("%s: corrupted context did not recover\n", 6315 engine->name); 6316 i915_request_put(hang); 6317 err = -EIO; 6318 goto err_ce; 6319 } 6320 i915_request_put(hang); 6321 6322 err_ce: 6323 intel_context_put(ce); 6324 return err; 6325 } 6326 6327 static int live_lrc_garbage(void *arg) 6328 { 6329 struct intel_gt *gt = arg; 6330 struct intel_engine_cs *engine; 6331 enum intel_engine_id id; 6332 6333 /* 6334 * Verify that we can recover if one context state is completely 6335 * corrupted. 6336 */ 6337 6338 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN)) 6339 return 0; 6340 6341 for_each_engine(engine, gt, id) { 6342 I915_RND_STATE(prng); 6343 int err = 0, i; 6344 6345 if (!intel_has_reset_engine(engine->gt)) 6346 continue; 6347 6348 intel_engine_pm_get(engine); 6349 for (i = 0; i < 3; i++) { 6350 err = __lrc_garbage(engine, &prng); 6351 if (err) 6352 break; 6353 } 6354 intel_engine_pm_put(engine); 6355 6356 if (igt_flush_test(gt->i915)) 6357 err = -EIO; 6358 if (err) 6359 return err; 6360 } 6361 6362 return 0; 6363 } 6364 6365 static int __live_pphwsp_runtime(struct intel_engine_cs *engine) 6366 { 6367 struct intel_context *ce; 6368 struct i915_request *rq; 6369 IGT_TIMEOUT(end_time); 6370 int err; 6371 6372 ce = intel_context_create(engine); 6373 if (IS_ERR(ce)) 6374 return PTR_ERR(ce); 6375 6376 ce->runtime.num_underflow = 0; 6377 ce->runtime.max_underflow = 0; 6378 6379 do { 6380 unsigned int loop = 1024; 6381 6382 while (loop) { 6383 rq = intel_context_create_request(ce); 6384 if (IS_ERR(rq)) { 6385 err = PTR_ERR(rq); 6386 goto err_rq; 6387 } 6388 6389 if (--loop == 0) 6390 i915_request_get(rq); 6391 6392 i915_request_add(rq); 6393 } 6394 6395 if (__igt_timeout(end_time, NULL)) 6396 break; 6397 6398 i915_request_put(rq); 6399 } while (1); 6400 6401 err = i915_request_wait(rq, 0, HZ / 5); 6402 if (err < 0) { 6403 pr_err("%s: request not completed!\n", engine->name); 6404 goto err_wait; 6405 } 6406 6407 igt_flush_test(engine->i915); 6408 6409 pr_info("%s: pphwsp runtime %lluns, average %lluns\n", 6410 engine->name, 6411 intel_context_get_total_runtime_ns(ce), 6412 intel_context_get_avg_runtime_ns(ce)); 6413 6414 err = 0; 6415 if (ce->runtime.num_underflow) { 6416 pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n", 6417 engine->name, 6418 ce->runtime.num_underflow, 6419 ce->runtime.max_underflow); 6420 GEM_TRACE_DUMP(); 6421 err = -EOVERFLOW; 6422 } 6423 6424 err_wait: 6425 i915_request_put(rq); 6426 err_rq: 6427 intel_context_put(ce); 6428 return err; 6429 } 6430 6431 static int live_pphwsp_runtime(void *arg) 6432 { 6433 struct intel_gt *gt = arg; 6434 struct intel_engine_cs *engine; 6435 enum intel_engine_id id; 6436 int err = 0; 6437 6438 /* 6439 * Check that cumulative context runtime as stored in the pphwsp[16] 6440 * is monotonic. 6441 */ 6442 6443 for_each_engine(engine, gt, id) { 6444 err = __live_pphwsp_runtime(engine); 6445 if (err) 6446 break; 6447 } 6448 6449 if (igt_flush_test(gt->i915)) 6450 err = -EIO; 6451 6452 return err; 6453 } 6454 6455 int intel_lrc_live_selftests(struct drm_i915_private *i915) 6456 { 6457 static const struct i915_subtest tests[] = { 6458 SUBTEST(live_lrc_layout), 6459 SUBTEST(live_lrc_fixed), 6460 SUBTEST(live_lrc_state), 6461 SUBTEST(live_lrc_gpr), 6462 SUBTEST(live_lrc_isolation), 6463 SUBTEST(live_lrc_timestamp), 6464 SUBTEST(live_lrc_garbage), 6465 SUBTEST(live_pphwsp_runtime), 6466 SUBTEST(live_lrc_indirect_ctx_bb), 6467 }; 6468 6469 if (!HAS_LOGICAL_RING_CONTEXTS(i915)) 6470 return 0; 6471 6472 return intel_gt_live_subtests(tests, &i915->gt); 6473 } 6474