1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2018 Intel Corporation 5 */ 6 7 #include <linux/prime_numbers.h> 8 9 #include "gem/i915_gem_pm.h" 10 #include "gt/intel_engine_heartbeat.h" 11 #include "gt/intel_reset.h" 12 13 #include "i915_selftest.h" 14 #include "selftests/i915_random.h" 15 #include "selftests/igt_flush_test.h" 16 #include "selftests/igt_live_test.h" 17 #include "selftests/igt_spinner.h" 18 #include "selftests/lib_sw_fence.h" 19 20 #include "gem/selftests/igt_gem_utils.h" 21 #include "gem/selftests/mock_context.h" 22 23 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4) 24 #define NUM_GPR_DW (16 * 2) /* each GPR is 2 dwords */ 25 26 static struct i915_vma *create_scratch(struct intel_gt *gt) 27 { 28 struct drm_i915_gem_object *obj; 29 struct i915_vma *vma; 30 int err; 31 32 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 33 if (IS_ERR(obj)) 34 return ERR_CAST(obj); 35 36 i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED); 37 38 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 39 if (IS_ERR(vma)) { 40 i915_gem_object_put(obj); 41 return vma; 42 } 43 44 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 45 if (err) { 46 i915_gem_object_put(obj); 47 return ERR_PTR(err); 48 } 49 50 return vma; 51 } 52 53 static void engine_heartbeat_disable(struct intel_engine_cs *engine, 54 unsigned long *saved) 55 { 56 *saved = engine->props.heartbeat_interval_ms; 57 engine->props.heartbeat_interval_ms = 0; 58 59 intel_engine_pm_get(engine); 60 intel_engine_park_heartbeat(engine); 61 } 62 63 static void engine_heartbeat_enable(struct intel_engine_cs *engine, 64 unsigned long saved) 65 { 66 intel_engine_pm_put(engine); 67 68 engine->props.heartbeat_interval_ms = saved; 69 } 70 71 static int wait_for_submit(struct intel_engine_cs *engine, 72 struct i915_request *rq, 73 unsigned long timeout) 74 { 75 timeout += jiffies; 76 do { 77 cond_resched(); 78 intel_engine_flush_submission(engine); 79 80 if (READ_ONCE(engine->execlists.pending[0])) 81 continue; 82 83 if (i915_request_is_active(rq)) 84 return 0; 85 86 if (i915_request_started(rq)) /* that was quick! */ 87 return 0; 88 } while (time_before(jiffies, timeout)); 89 90 return -ETIME; 91 } 92 93 static int wait_for_reset(struct intel_engine_cs *engine, 94 struct i915_request *rq, 95 unsigned long timeout) 96 { 97 timeout += jiffies; 98 99 do { 100 cond_resched(); 101 intel_engine_flush_submission(engine); 102 103 if (READ_ONCE(engine->execlists.pending[0])) 104 continue; 105 106 if (i915_request_completed(rq)) 107 break; 108 109 if (READ_ONCE(rq->fence.error)) 110 break; 111 } while (time_before(jiffies, timeout)); 112 113 flush_scheduled_work(); 114 115 if (rq->fence.error != -EIO) { 116 pr_err("%s: hanging request %llx:%lld not reset\n", 117 engine->name, 118 rq->fence.context, 119 rq->fence.seqno); 120 return -EINVAL; 121 } 122 123 /* Give the request a jiffie to complete after flushing the worker */ 124 if (i915_request_wait(rq, 0, 125 max(0l, (long)(timeout - jiffies)) + 1) < 0) { 126 pr_err("%s: hanging request %llx:%lld did not complete\n", 127 engine->name, 128 rq->fence.context, 129 rq->fence.seqno); 130 return -ETIME; 131 } 132 133 return 0; 134 } 135 136 static int live_sanitycheck(void *arg) 137 { 138 struct intel_gt *gt = arg; 139 struct intel_engine_cs *engine; 140 enum intel_engine_id id; 141 struct igt_spinner spin; 142 int err = 0; 143 144 if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915)) 145 return 0; 146 147 if (igt_spinner_init(&spin, gt)) 148 return -ENOMEM; 149 150 for_each_engine(engine, gt, id) { 151 struct intel_context *ce; 152 struct i915_request *rq; 153 154 ce = intel_context_create(engine); 155 if (IS_ERR(ce)) { 156 err = PTR_ERR(ce); 157 break; 158 } 159 160 rq = igt_spinner_create_request(&spin, ce, MI_NOOP); 161 if (IS_ERR(rq)) { 162 err = PTR_ERR(rq); 163 goto out_ctx; 164 } 165 166 i915_request_add(rq); 167 if (!igt_wait_for_spinner(&spin, rq)) { 168 GEM_TRACE("spinner failed to start\n"); 169 GEM_TRACE_DUMP(); 170 intel_gt_set_wedged(gt); 171 err = -EIO; 172 goto out_ctx; 173 } 174 175 igt_spinner_end(&spin); 176 if (igt_flush_test(gt->i915)) { 177 err = -EIO; 178 goto out_ctx; 179 } 180 181 out_ctx: 182 intel_context_put(ce); 183 if (err) 184 break; 185 } 186 187 igt_spinner_fini(&spin); 188 return err; 189 } 190 191 static int live_unlite_restore(struct intel_gt *gt, int prio) 192 { 193 struct intel_engine_cs *engine; 194 enum intel_engine_id id; 195 struct igt_spinner spin; 196 int err = -ENOMEM; 197 198 /* 199 * Check that we can correctly context switch between 2 instances 200 * on the same engine from the same parent context. 201 */ 202 203 if (igt_spinner_init(&spin, gt)) 204 return err; 205 206 err = 0; 207 for_each_engine(engine, gt, id) { 208 struct intel_context *ce[2] = {}; 209 struct i915_request *rq[2]; 210 struct igt_live_test t; 211 unsigned long saved; 212 int n; 213 214 if (prio && !intel_engine_has_preemption(engine)) 215 continue; 216 217 if (!intel_engine_can_store_dword(engine)) 218 continue; 219 220 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 221 err = -EIO; 222 break; 223 } 224 engine_heartbeat_disable(engine, &saved); 225 226 for (n = 0; n < ARRAY_SIZE(ce); n++) { 227 struct intel_context *tmp; 228 229 tmp = intel_context_create(engine); 230 if (IS_ERR(tmp)) { 231 err = PTR_ERR(tmp); 232 goto err_ce; 233 } 234 235 err = intel_context_pin(tmp); 236 if (err) { 237 intel_context_put(tmp); 238 goto err_ce; 239 } 240 241 /* 242 * Setup the pair of contexts such that if we 243 * lite-restore using the RING_TAIL from ce[1] it 244 * will execute garbage from ce[0]->ring. 245 */ 246 memset(tmp->ring->vaddr, 247 POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */ 248 tmp->ring->vma->size); 249 250 ce[n] = tmp; 251 } 252 GEM_BUG_ON(!ce[1]->ring->size); 253 intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2); 254 __execlists_update_reg_state(ce[1], engine, ce[1]->ring->head); 255 256 rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK); 257 if (IS_ERR(rq[0])) { 258 err = PTR_ERR(rq[0]); 259 goto err_ce; 260 } 261 262 i915_request_get(rq[0]); 263 i915_request_add(rq[0]); 264 GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit); 265 266 if (!igt_wait_for_spinner(&spin, rq[0])) { 267 i915_request_put(rq[0]); 268 goto err_ce; 269 } 270 271 rq[1] = i915_request_create(ce[1]); 272 if (IS_ERR(rq[1])) { 273 err = PTR_ERR(rq[1]); 274 i915_request_put(rq[0]); 275 goto err_ce; 276 } 277 278 if (!prio) { 279 /* 280 * Ensure we do the switch to ce[1] on completion. 281 * 282 * rq[0] is already submitted, so this should reduce 283 * to a no-op (a wait on a request on the same engine 284 * uses the submit fence, not the completion fence), 285 * but it will install a dependency on rq[1] for rq[0] 286 * that will prevent the pair being reordered by 287 * timeslicing. 288 */ 289 i915_request_await_dma_fence(rq[1], &rq[0]->fence); 290 } 291 292 i915_request_get(rq[1]); 293 i915_request_add(rq[1]); 294 GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix); 295 i915_request_put(rq[0]); 296 297 if (prio) { 298 struct i915_sched_attr attr = { 299 .priority = prio, 300 }; 301 302 /* Alternatively preempt the spinner with ce[1] */ 303 engine->schedule(rq[1], &attr); 304 } 305 306 /* And switch back to ce[0] for good measure */ 307 rq[0] = i915_request_create(ce[0]); 308 if (IS_ERR(rq[0])) { 309 err = PTR_ERR(rq[0]); 310 i915_request_put(rq[1]); 311 goto err_ce; 312 } 313 314 i915_request_await_dma_fence(rq[0], &rq[1]->fence); 315 i915_request_get(rq[0]); 316 i915_request_add(rq[0]); 317 GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix); 318 i915_request_put(rq[1]); 319 i915_request_put(rq[0]); 320 321 err_ce: 322 tasklet_kill(&engine->execlists.tasklet); /* flush submission */ 323 igt_spinner_end(&spin); 324 for (n = 0; n < ARRAY_SIZE(ce); n++) { 325 if (IS_ERR_OR_NULL(ce[n])) 326 break; 327 328 intel_context_unpin(ce[n]); 329 intel_context_put(ce[n]); 330 } 331 332 engine_heartbeat_enable(engine, saved); 333 if (igt_live_test_end(&t)) 334 err = -EIO; 335 if (err) 336 break; 337 } 338 339 igt_spinner_fini(&spin); 340 return err; 341 } 342 343 static int live_unlite_switch(void *arg) 344 { 345 return live_unlite_restore(arg, 0); 346 } 347 348 static int live_unlite_preempt(void *arg) 349 { 350 return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX)); 351 } 352 353 static int live_pin_rewind(void *arg) 354 { 355 struct intel_gt *gt = arg; 356 struct intel_engine_cs *engine; 357 enum intel_engine_id id; 358 int err = 0; 359 360 /* 361 * We have to be careful not to trust intel_ring too much, for example 362 * ring->head is updated upon retire which is out of sync with pinning 363 * the context. Thus we cannot use ring->head to set CTX_RING_HEAD, 364 * or else we risk writing an older, stale value. 365 * 366 * To simulate this, let's apply a bit of deliberate sabotague. 367 */ 368 369 for_each_engine(engine, gt, id) { 370 struct intel_context *ce; 371 struct i915_request *rq; 372 struct intel_ring *ring; 373 struct igt_live_test t; 374 375 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 376 err = -EIO; 377 break; 378 } 379 380 ce = intel_context_create(engine); 381 if (IS_ERR(ce)) { 382 err = PTR_ERR(ce); 383 break; 384 } 385 386 err = intel_context_pin(ce); 387 if (err) { 388 intel_context_put(ce); 389 break; 390 } 391 392 /* Keep the context awake while we play games */ 393 err = i915_active_acquire(&ce->active); 394 if (err) { 395 intel_context_unpin(ce); 396 intel_context_put(ce); 397 break; 398 } 399 ring = ce->ring; 400 401 /* Poison the ring, and offset the next request from HEAD */ 402 memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32)); 403 ring->emit = ring->size / 2; 404 ring->tail = ring->emit; 405 GEM_BUG_ON(ring->head); 406 407 intel_context_unpin(ce); 408 409 /* Submit a simple nop request */ 410 GEM_BUG_ON(intel_context_is_pinned(ce)); 411 rq = intel_context_create_request(ce); 412 i915_active_release(&ce->active); /* e.g. async retire */ 413 intel_context_put(ce); 414 if (IS_ERR(rq)) { 415 err = PTR_ERR(rq); 416 break; 417 } 418 GEM_BUG_ON(!rq->head); 419 i915_request_add(rq); 420 421 /* Expect not to hang! */ 422 if (igt_live_test_end(&t)) { 423 err = -EIO; 424 break; 425 } 426 } 427 428 return err; 429 } 430 431 static int live_hold_reset(void *arg) 432 { 433 struct intel_gt *gt = arg; 434 struct intel_engine_cs *engine; 435 enum intel_engine_id id; 436 struct igt_spinner spin; 437 int err = 0; 438 439 /* 440 * In order to support offline error capture for fast preempt reset, 441 * we need to decouple the guilty request and ensure that it and its 442 * descendents are not executed while the capture is in progress. 443 */ 444 445 if (!intel_has_reset_engine(gt)) 446 return 0; 447 448 if (igt_spinner_init(&spin, gt)) 449 return -ENOMEM; 450 451 for_each_engine(engine, gt, id) { 452 struct intel_context *ce; 453 unsigned long heartbeat; 454 struct i915_request *rq; 455 456 ce = intel_context_create(engine); 457 if (IS_ERR(ce)) { 458 err = PTR_ERR(ce); 459 break; 460 } 461 462 engine_heartbeat_disable(engine, &heartbeat); 463 464 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); 465 if (IS_ERR(rq)) { 466 err = PTR_ERR(rq); 467 goto out; 468 } 469 i915_request_add(rq); 470 471 if (!igt_wait_for_spinner(&spin, rq)) { 472 intel_gt_set_wedged(gt); 473 err = -ETIME; 474 goto out; 475 } 476 477 /* We have our request executing, now remove it and reset */ 478 479 if (test_and_set_bit(I915_RESET_ENGINE + id, 480 >->reset.flags)) { 481 intel_gt_set_wedged(gt); 482 err = -EBUSY; 483 goto out; 484 } 485 tasklet_disable(&engine->execlists.tasklet); 486 487 engine->execlists.tasklet.func(engine->execlists.tasklet.data); 488 GEM_BUG_ON(execlists_active(&engine->execlists) != rq); 489 490 i915_request_get(rq); 491 execlists_hold(engine, rq); 492 GEM_BUG_ON(!i915_request_on_hold(rq)); 493 494 intel_engine_reset(engine, NULL); 495 GEM_BUG_ON(rq->fence.error != -EIO); 496 497 tasklet_enable(&engine->execlists.tasklet); 498 clear_and_wake_up_bit(I915_RESET_ENGINE + id, 499 >->reset.flags); 500 501 /* Check that we do not resubmit the held request */ 502 if (!i915_request_wait(rq, 0, HZ / 5)) { 503 pr_err("%s: on hold request completed!\n", 504 engine->name); 505 i915_request_put(rq); 506 err = -EIO; 507 goto out; 508 } 509 GEM_BUG_ON(!i915_request_on_hold(rq)); 510 511 /* But is resubmitted on release */ 512 execlists_unhold(engine, rq); 513 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 514 pr_err("%s: held request did not complete!\n", 515 engine->name); 516 intel_gt_set_wedged(gt); 517 err = -ETIME; 518 } 519 i915_request_put(rq); 520 521 out: 522 engine_heartbeat_enable(engine, heartbeat); 523 intel_context_put(ce); 524 if (err) 525 break; 526 } 527 528 igt_spinner_fini(&spin); 529 return err; 530 } 531 532 static const char *error_repr(int err) 533 { 534 return err ? "bad" : "good"; 535 } 536 537 static int live_error_interrupt(void *arg) 538 { 539 static const struct error_phase { 540 enum { GOOD = 0, BAD = -EIO } error[2]; 541 } phases[] = { 542 { { BAD, GOOD } }, 543 { { BAD, BAD } }, 544 { { BAD, GOOD } }, 545 { { GOOD, GOOD } }, /* sentinel */ 546 }; 547 struct intel_gt *gt = arg; 548 struct intel_engine_cs *engine; 549 enum intel_engine_id id; 550 551 /* 552 * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning 553 * of invalid commands in user batches that will cause a GPU hang. 554 * This is a faster mechanism than using hangcheck/heartbeats, but 555 * only detects problems the HW knows about -- it will not warn when 556 * we kill the HW! 557 * 558 * To verify our detection and reset, we throw some invalid commands 559 * at the HW and wait for the interrupt. 560 */ 561 562 if (!intel_has_reset_engine(gt)) 563 return 0; 564 565 for_each_engine(engine, gt, id) { 566 const struct error_phase *p; 567 unsigned long heartbeat; 568 int err = 0; 569 570 engine_heartbeat_disable(engine, &heartbeat); 571 572 for (p = phases; p->error[0] != GOOD; p++) { 573 struct i915_request *client[ARRAY_SIZE(phases->error)]; 574 u32 *cs; 575 int i; 576 577 memset(client, 0, sizeof(*client)); 578 for (i = 0; i < ARRAY_SIZE(client); i++) { 579 struct intel_context *ce; 580 struct i915_request *rq; 581 582 ce = intel_context_create(engine); 583 if (IS_ERR(ce)) { 584 err = PTR_ERR(ce); 585 goto out; 586 } 587 588 rq = intel_context_create_request(ce); 589 intel_context_put(ce); 590 if (IS_ERR(rq)) { 591 err = PTR_ERR(rq); 592 goto out; 593 } 594 595 if (rq->engine->emit_init_breadcrumb) { 596 err = rq->engine->emit_init_breadcrumb(rq); 597 if (err) { 598 i915_request_add(rq); 599 goto out; 600 } 601 } 602 603 cs = intel_ring_begin(rq, 2); 604 if (IS_ERR(cs)) { 605 i915_request_add(rq); 606 err = PTR_ERR(cs); 607 goto out; 608 } 609 610 if (p->error[i]) { 611 *cs++ = 0xdeadbeef; 612 *cs++ = 0xdeadbeef; 613 } else { 614 *cs++ = MI_NOOP; 615 *cs++ = MI_NOOP; 616 } 617 618 client[i] = i915_request_get(rq); 619 i915_request_add(rq); 620 } 621 622 err = wait_for_submit(engine, client[0], HZ / 2); 623 if (err) { 624 pr_err("%s: first request did not start within time!\n", 625 engine->name); 626 err = -ETIME; 627 goto out; 628 } 629 630 for (i = 0; i < ARRAY_SIZE(client); i++) { 631 if (i915_request_wait(client[i], 0, HZ / 5) < 0) 632 pr_debug("%s: %s request incomplete!\n", 633 engine->name, 634 error_repr(p->error[i])); 635 636 if (!i915_request_started(client[i])) { 637 pr_debug("%s: %s request not stated!\n", 638 engine->name, 639 error_repr(p->error[i])); 640 err = -ETIME; 641 goto out; 642 } 643 644 /* Kick the tasklet to process the error */ 645 intel_engine_flush_submission(engine); 646 if (client[i]->fence.error != p->error[i]) { 647 pr_err("%s: %s request completed with wrong error code: %d\n", 648 engine->name, 649 error_repr(p->error[i]), 650 client[i]->fence.error); 651 err = -EINVAL; 652 goto out; 653 } 654 } 655 656 out: 657 for (i = 0; i < ARRAY_SIZE(client); i++) 658 if (client[i]) 659 i915_request_put(client[i]); 660 if (err) { 661 pr_err("%s: failed at phase[%zd] { %d, %d }\n", 662 engine->name, p - phases, 663 p->error[0], p->error[1]); 664 break; 665 } 666 } 667 668 engine_heartbeat_enable(engine, heartbeat); 669 if (err) { 670 intel_gt_set_wedged(gt); 671 return err; 672 } 673 } 674 675 return 0; 676 } 677 678 static int 679 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx) 680 { 681 u32 *cs; 682 683 cs = intel_ring_begin(rq, 10); 684 if (IS_ERR(cs)) 685 return PTR_ERR(cs); 686 687 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 688 689 *cs++ = MI_SEMAPHORE_WAIT | 690 MI_SEMAPHORE_GLOBAL_GTT | 691 MI_SEMAPHORE_POLL | 692 MI_SEMAPHORE_SAD_NEQ_SDD; 693 *cs++ = 0; 694 *cs++ = i915_ggtt_offset(vma) + 4 * idx; 695 *cs++ = 0; 696 697 if (idx > 0) { 698 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 699 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1); 700 *cs++ = 0; 701 *cs++ = 1; 702 } else { 703 *cs++ = MI_NOOP; 704 *cs++ = MI_NOOP; 705 *cs++ = MI_NOOP; 706 *cs++ = MI_NOOP; 707 } 708 709 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 710 711 intel_ring_advance(rq, cs); 712 return 0; 713 } 714 715 static struct i915_request * 716 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx) 717 { 718 struct intel_context *ce; 719 struct i915_request *rq; 720 int err; 721 722 ce = intel_context_create(engine); 723 if (IS_ERR(ce)) 724 return ERR_CAST(ce); 725 726 rq = intel_context_create_request(ce); 727 if (IS_ERR(rq)) 728 goto out_ce; 729 730 err = 0; 731 if (rq->engine->emit_init_breadcrumb) 732 err = rq->engine->emit_init_breadcrumb(rq); 733 if (err == 0) 734 err = emit_semaphore_chain(rq, vma, idx); 735 if (err == 0) 736 i915_request_get(rq); 737 i915_request_add(rq); 738 if (err) 739 rq = ERR_PTR(err); 740 741 out_ce: 742 intel_context_put(ce); 743 return rq; 744 } 745 746 static int 747 release_queue(struct intel_engine_cs *engine, 748 struct i915_vma *vma, 749 int idx, int prio) 750 { 751 struct i915_sched_attr attr = { 752 .priority = prio, 753 }; 754 struct i915_request *rq; 755 u32 *cs; 756 757 rq = intel_engine_create_kernel_request(engine); 758 if (IS_ERR(rq)) 759 return PTR_ERR(rq); 760 761 cs = intel_ring_begin(rq, 4); 762 if (IS_ERR(cs)) { 763 i915_request_add(rq); 764 return PTR_ERR(cs); 765 } 766 767 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 768 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1); 769 *cs++ = 0; 770 *cs++ = 1; 771 772 intel_ring_advance(rq, cs); 773 774 i915_request_get(rq); 775 i915_request_add(rq); 776 777 local_bh_disable(); 778 engine->schedule(rq, &attr); 779 local_bh_enable(); /* kick tasklet */ 780 781 i915_request_put(rq); 782 783 return 0; 784 } 785 786 static int 787 slice_semaphore_queue(struct intel_engine_cs *outer, 788 struct i915_vma *vma, 789 int count) 790 { 791 struct intel_engine_cs *engine; 792 struct i915_request *head; 793 enum intel_engine_id id; 794 int err, i, n = 0; 795 796 head = semaphore_queue(outer, vma, n++); 797 if (IS_ERR(head)) 798 return PTR_ERR(head); 799 800 for_each_engine(engine, outer->gt, id) { 801 for (i = 0; i < count; i++) { 802 struct i915_request *rq; 803 804 rq = semaphore_queue(engine, vma, n++); 805 if (IS_ERR(rq)) { 806 err = PTR_ERR(rq); 807 goto out; 808 } 809 810 i915_request_put(rq); 811 } 812 } 813 814 err = release_queue(outer, vma, n, INT_MAX); 815 if (err) 816 goto out; 817 818 if (i915_request_wait(head, 0, 819 2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) { 820 pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n", 821 count, n); 822 GEM_TRACE_DUMP(); 823 intel_gt_set_wedged(outer->gt); 824 err = -EIO; 825 } 826 827 out: 828 i915_request_put(head); 829 return err; 830 } 831 832 static int live_timeslice_preempt(void *arg) 833 { 834 struct intel_gt *gt = arg; 835 struct drm_i915_gem_object *obj; 836 struct i915_vma *vma; 837 void *vaddr; 838 int err = 0; 839 int count; 840 841 /* 842 * If a request takes too long, we would like to give other users 843 * a fair go on the GPU. In particular, users may create batches 844 * that wait upon external input, where that input may even be 845 * supplied by another GPU job. To avoid blocking forever, we 846 * need to preempt the current task and replace it with another 847 * ready task. 848 */ 849 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 850 return 0; 851 852 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 853 if (IS_ERR(obj)) 854 return PTR_ERR(obj); 855 856 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 857 if (IS_ERR(vma)) { 858 err = PTR_ERR(vma); 859 goto err_obj; 860 } 861 862 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); 863 if (IS_ERR(vaddr)) { 864 err = PTR_ERR(vaddr); 865 goto err_obj; 866 } 867 868 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 869 if (err) 870 goto err_map; 871 872 err = i915_vma_sync(vma); 873 if (err) 874 goto err_pin; 875 876 for_each_prime_number_from(count, 1, 16) { 877 struct intel_engine_cs *engine; 878 enum intel_engine_id id; 879 880 for_each_engine(engine, gt, id) { 881 unsigned long saved; 882 883 if (!intel_engine_has_preemption(engine)) 884 continue; 885 886 memset(vaddr, 0, PAGE_SIZE); 887 888 engine_heartbeat_disable(engine, &saved); 889 err = slice_semaphore_queue(engine, vma, count); 890 engine_heartbeat_enable(engine, saved); 891 if (err) 892 goto err_pin; 893 894 if (igt_flush_test(gt->i915)) { 895 err = -EIO; 896 goto err_pin; 897 } 898 } 899 } 900 901 err_pin: 902 i915_vma_unpin(vma); 903 err_map: 904 i915_gem_object_unpin_map(obj); 905 err_obj: 906 i915_gem_object_put(obj); 907 return err; 908 } 909 910 static struct i915_request * 911 create_rewinder(struct intel_context *ce, 912 struct i915_request *wait, 913 void *slot, int idx) 914 { 915 const u32 offset = 916 i915_ggtt_offset(ce->engine->status_page.vma) + 917 offset_in_page(slot); 918 struct i915_request *rq; 919 u32 *cs; 920 int err; 921 922 rq = intel_context_create_request(ce); 923 if (IS_ERR(rq)) 924 return rq; 925 926 if (wait) { 927 err = i915_request_await_dma_fence(rq, &wait->fence); 928 if (err) 929 goto err; 930 } 931 932 cs = intel_ring_begin(rq, 10); 933 if (IS_ERR(cs)) { 934 err = PTR_ERR(cs); 935 goto err; 936 } 937 938 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 939 *cs++ = MI_NOOP; 940 941 *cs++ = MI_SEMAPHORE_WAIT | 942 MI_SEMAPHORE_GLOBAL_GTT | 943 MI_SEMAPHORE_POLL | 944 MI_SEMAPHORE_SAD_NEQ_SDD; 945 *cs++ = 0; 946 *cs++ = offset; 947 *cs++ = 0; 948 949 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 950 *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base)); 951 *cs++ = offset + idx * sizeof(u32); 952 *cs++ = 0; 953 954 intel_ring_advance(rq, cs); 955 956 rq->sched.attr.priority = I915_PRIORITY_MASK; 957 err = 0; 958 err: 959 i915_request_get(rq); 960 i915_request_add(rq); 961 if (err) { 962 i915_request_put(rq); 963 return ERR_PTR(err); 964 } 965 966 return rq; 967 } 968 969 static int live_timeslice_rewind(void *arg) 970 { 971 struct intel_gt *gt = arg; 972 struct intel_engine_cs *engine; 973 enum intel_engine_id id; 974 975 /* 976 * The usual presumption on timeslice expiration is that we replace 977 * the active context with another. However, given a chain of 978 * dependencies we may end up with replacing the context with itself, 979 * but only a few of those requests, forcing us to rewind the 980 * RING_TAIL of the original request. 981 */ 982 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 983 return 0; 984 985 for_each_engine(engine, gt, id) { 986 enum { A1, A2, B1 }; 987 enum { X = 1, Y, Z }; 988 struct i915_request *rq[3] = {}; 989 struct intel_context *ce; 990 unsigned long heartbeat; 991 unsigned long timeslice; 992 int i, err = 0; 993 u32 *slot; 994 995 if (!intel_engine_has_timeslices(engine)) 996 continue; 997 998 /* 999 * A:rq1 -- semaphore wait, timestamp X 1000 * A:rq2 -- write timestamp Y 1001 * 1002 * B:rq1 [await A:rq1] -- write timestamp Z 1003 * 1004 * Force timeslice, release semaphore. 1005 * 1006 * Expect execution/evaluation order XZY 1007 */ 1008 1009 engine_heartbeat_disable(engine, &heartbeat); 1010 timeslice = xchg(&engine->props.timeslice_duration_ms, 1); 1011 1012 slot = memset32(engine->status_page.addr + 1000, 0, 4); 1013 1014 ce = intel_context_create(engine); 1015 if (IS_ERR(ce)) { 1016 err = PTR_ERR(ce); 1017 goto err; 1018 } 1019 1020 rq[0] = create_rewinder(ce, NULL, slot, 1); 1021 if (IS_ERR(rq[0])) { 1022 intel_context_put(ce); 1023 goto err; 1024 } 1025 1026 rq[1] = create_rewinder(ce, NULL, slot, 2); 1027 intel_context_put(ce); 1028 if (IS_ERR(rq[1])) 1029 goto err; 1030 1031 err = wait_for_submit(engine, rq[1], HZ / 2); 1032 if (err) { 1033 pr_err("%s: failed to submit first context\n", 1034 engine->name); 1035 goto err; 1036 } 1037 1038 ce = intel_context_create(engine); 1039 if (IS_ERR(ce)) { 1040 err = PTR_ERR(ce); 1041 goto err; 1042 } 1043 1044 rq[2] = create_rewinder(ce, rq[0], slot, 3); 1045 intel_context_put(ce); 1046 if (IS_ERR(rq[2])) 1047 goto err; 1048 1049 err = wait_for_submit(engine, rq[2], HZ / 2); 1050 if (err) { 1051 pr_err("%s: failed to submit second context\n", 1052 engine->name); 1053 goto err; 1054 } 1055 GEM_BUG_ON(!timer_pending(&engine->execlists.timer)); 1056 1057 /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */ 1058 GEM_BUG_ON(!i915_request_is_active(rq[A1])); 1059 GEM_BUG_ON(!i915_request_is_active(rq[A2])); 1060 GEM_BUG_ON(!i915_request_is_active(rq[B1])); 1061 1062 /* Wait for the timeslice to kick in */ 1063 del_timer(&engine->execlists.timer); 1064 tasklet_hi_schedule(&engine->execlists.tasklet); 1065 intel_engine_flush_submission(engine); 1066 1067 /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */ 1068 GEM_BUG_ON(!i915_request_is_active(rq[A1])); 1069 GEM_BUG_ON(!i915_request_is_active(rq[B1])); 1070 GEM_BUG_ON(i915_request_is_active(rq[A2])); 1071 1072 /* Release the hounds! */ 1073 slot[0] = 1; 1074 wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */ 1075 1076 for (i = 1; i <= 3; i++) { 1077 unsigned long timeout = jiffies + HZ / 2; 1078 1079 while (!READ_ONCE(slot[i]) && 1080 time_before(jiffies, timeout)) 1081 ; 1082 1083 if (!time_before(jiffies, timeout)) { 1084 pr_err("%s: rq[%d] timed out\n", 1085 engine->name, i - 1); 1086 err = -ETIME; 1087 goto err; 1088 } 1089 1090 pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]); 1091 } 1092 1093 /* XZY: XZ < XY */ 1094 if (slot[Z] - slot[X] >= slot[Y] - slot[X]) { 1095 pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n", 1096 engine->name, 1097 slot[Z] - slot[X], 1098 slot[Y] - slot[X]); 1099 err = -EINVAL; 1100 } 1101 1102 err: 1103 memset32(&slot[0], -1, 4); 1104 wmb(); 1105 1106 engine->props.timeslice_duration_ms = timeslice; 1107 engine_heartbeat_enable(engine, heartbeat); 1108 for (i = 0; i < 3; i++) 1109 i915_request_put(rq[i]); 1110 if (igt_flush_test(gt->i915)) 1111 err = -EIO; 1112 if (err) 1113 return err; 1114 } 1115 1116 return 0; 1117 } 1118 1119 static struct i915_request *nop_request(struct intel_engine_cs *engine) 1120 { 1121 struct i915_request *rq; 1122 1123 rq = intel_engine_create_kernel_request(engine); 1124 if (IS_ERR(rq)) 1125 return rq; 1126 1127 i915_request_get(rq); 1128 i915_request_add(rq); 1129 1130 return rq; 1131 } 1132 1133 static long timeslice_threshold(const struct intel_engine_cs *engine) 1134 { 1135 return 2 * msecs_to_jiffies_timeout(timeslice(engine)) + 1; 1136 } 1137 1138 static int live_timeslice_queue(void *arg) 1139 { 1140 struct intel_gt *gt = arg; 1141 struct drm_i915_gem_object *obj; 1142 struct intel_engine_cs *engine; 1143 enum intel_engine_id id; 1144 struct i915_vma *vma; 1145 void *vaddr; 1146 int err = 0; 1147 1148 /* 1149 * Make sure that even if ELSP[0] and ELSP[1] are filled with 1150 * timeslicing between them disabled, we *do* enable timeslicing 1151 * if the queue demands it. (Normally, we do not submit if 1152 * ELSP[1] is already occupied, so must rely on timeslicing to 1153 * eject ELSP[0] in favour of the queue.) 1154 */ 1155 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 1156 return 0; 1157 1158 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 1159 if (IS_ERR(obj)) 1160 return PTR_ERR(obj); 1161 1162 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 1163 if (IS_ERR(vma)) { 1164 err = PTR_ERR(vma); 1165 goto err_obj; 1166 } 1167 1168 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); 1169 if (IS_ERR(vaddr)) { 1170 err = PTR_ERR(vaddr); 1171 goto err_obj; 1172 } 1173 1174 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 1175 if (err) 1176 goto err_map; 1177 1178 err = i915_vma_sync(vma); 1179 if (err) 1180 goto err_pin; 1181 1182 for_each_engine(engine, gt, id) { 1183 struct i915_sched_attr attr = { 1184 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), 1185 }; 1186 struct i915_request *rq, *nop; 1187 unsigned long saved; 1188 1189 if (!intel_engine_has_preemption(engine)) 1190 continue; 1191 1192 engine_heartbeat_disable(engine, &saved); 1193 memset(vaddr, 0, PAGE_SIZE); 1194 1195 /* ELSP[0]: semaphore wait */ 1196 rq = semaphore_queue(engine, vma, 0); 1197 if (IS_ERR(rq)) { 1198 err = PTR_ERR(rq); 1199 goto err_heartbeat; 1200 } 1201 engine->schedule(rq, &attr); 1202 err = wait_for_submit(engine, rq, HZ / 2); 1203 if (err) { 1204 pr_err("%s: Timed out trying to submit semaphores\n", 1205 engine->name); 1206 goto err_rq; 1207 } 1208 1209 /* ELSP[1]: nop request */ 1210 nop = nop_request(engine); 1211 if (IS_ERR(nop)) { 1212 err = PTR_ERR(nop); 1213 goto err_rq; 1214 } 1215 err = wait_for_submit(engine, nop, HZ / 2); 1216 i915_request_put(nop); 1217 if (err) { 1218 pr_err("%s: Timed out trying to submit nop\n", 1219 engine->name); 1220 goto err_rq; 1221 } 1222 1223 GEM_BUG_ON(i915_request_completed(rq)); 1224 GEM_BUG_ON(execlists_active(&engine->execlists) != rq); 1225 1226 /* Queue: semaphore signal, matching priority as semaphore */ 1227 err = release_queue(engine, vma, 1, effective_prio(rq)); 1228 if (err) 1229 goto err_rq; 1230 1231 intel_engine_flush_submission(engine); 1232 if (!READ_ONCE(engine->execlists.timer.expires) && 1233 !i915_request_completed(rq)) { 1234 struct drm_printer p = 1235 drm_info_printer(gt->i915->drm.dev); 1236 1237 GEM_TRACE_ERR("%s: Failed to enable timeslicing!\n", 1238 engine->name); 1239 intel_engine_dump(engine, &p, 1240 "%s\n", engine->name); 1241 GEM_TRACE_DUMP(); 1242 1243 memset(vaddr, 0xff, PAGE_SIZE); 1244 err = -EINVAL; 1245 } 1246 1247 /* Timeslice every jiffy, so within 2 we should signal */ 1248 if (i915_request_wait(rq, 0, timeslice_threshold(engine)) < 0) { 1249 struct drm_printer p = 1250 drm_info_printer(gt->i915->drm.dev); 1251 1252 pr_err("%s: Failed to timeslice into queue\n", 1253 engine->name); 1254 intel_engine_dump(engine, &p, 1255 "%s\n", engine->name); 1256 1257 memset(vaddr, 0xff, PAGE_SIZE); 1258 err = -EIO; 1259 } 1260 err_rq: 1261 i915_request_put(rq); 1262 err_heartbeat: 1263 engine_heartbeat_enable(engine, saved); 1264 if (err) 1265 break; 1266 } 1267 1268 err_pin: 1269 i915_vma_unpin(vma); 1270 err_map: 1271 i915_gem_object_unpin_map(obj); 1272 err_obj: 1273 i915_gem_object_put(obj); 1274 return err; 1275 } 1276 1277 static int live_busywait_preempt(void *arg) 1278 { 1279 struct intel_gt *gt = arg; 1280 struct i915_gem_context *ctx_hi, *ctx_lo; 1281 struct intel_engine_cs *engine; 1282 struct drm_i915_gem_object *obj; 1283 struct i915_vma *vma; 1284 enum intel_engine_id id; 1285 int err = -ENOMEM; 1286 u32 *map; 1287 1288 /* 1289 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can 1290 * preempt the busywaits used to synchronise between rings. 1291 */ 1292 1293 ctx_hi = kernel_context(gt->i915); 1294 if (!ctx_hi) 1295 return -ENOMEM; 1296 ctx_hi->sched.priority = 1297 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); 1298 1299 ctx_lo = kernel_context(gt->i915); 1300 if (!ctx_lo) 1301 goto err_ctx_hi; 1302 ctx_lo->sched.priority = 1303 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); 1304 1305 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 1306 if (IS_ERR(obj)) { 1307 err = PTR_ERR(obj); 1308 goto err_ctx_lo; 1309 } 1310 1311 map = i915_gem_object_pin_map(obj, I915_MAP_WC); 1312 if (IS_ERR(map)) { 1313 err = PTR_ERR(map); 1314 goto err_obj; 1315 } 1316 1317 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 1318 if (IS_ERR(vma)) { 1319 err = PTR_ERR(vma); 1320 goto err_map; 1321 } 1322 1323 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 1324 if (err) 1325 goto err_map; 1326 1327 err = i915_vma_sync(vma); 1328 if (err) 1329 goto err_vma; 1330 1331 for_each_engine(engine, gt, id) { 1332 struct i915_request *lo, *hi; 1333 struct igt_live_test t; 1334 u32 *cs; 1335 1336 if (!intel_engine_has_preemption(engine)) 1337 continue; 1338 1339 if (!intel_engine_can_store_dword(engine)) 1340 continue; 1341 1342 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 1343 err = -EIO; 1344 goto err_vma; 1345 } 1346 1347 /* 1348 * We create two requests. The low priority request 1349 * busywaits on a semaphore (inside the ringbuffer where 1350 * is should be preemptible) and the high priority requests 1351 * uses a MI_STORE_DWORD_IMM to update the semaphore value 1352 * allowing the first request to complete. If preemption 1353 * fails, we hang instead. 1354 */ 1355 1356 lo = igt_request_alloc(ctx_lo, engine); 1357 if (IS_ERR(lo)) { 1358 err = PTR_ERR(lo); 1359 goto err_vma; 1360 } 1361 1362 cs = intel_ring_begin(lo, 8); 1363 if (IS_ERR(cs)) { 1364 err = PTR_ERR(cs); 1365 i915_request_add(lo); 1366 goto err_vma; 1367 } 1368 1369 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 1370 *cs++ = i915_ggtt_offset(vma); 1371 *cs++ = 0; 1372 *cs++ = 1; 1373 1374 /* XXX Do we need a flush + invalidate here? */ 1375 1376 *cs++ = MI_SEMAPHORE_WAIT | 1377 MI_SEMAPHORE_GLOBAL_GTT | 1378 MI_SEMAPHORE_POLL | 1379 MI_SEMAPHORE_SAD_EQ_SDD; 1380 *cs++ = 0; 1381 *cs++ = i915_ggtt_offset(vma); 1382 *cs++ = 0; 1383 1384 intel_ring_advance(lo, cs); 1385 1386 i915_request_get(lo); 1387 i915_request_add(lo); 1388 1389 if (wait_for(READ_ONCE(*map), 10)) { 1390 i915_request_put(lo); 1391 err = -ETIMEDOUT; 1392 goto err_vma; 1393 } 1394 1395 /* Low priority request should be busywaiting now */ 1396 if (i915_request_wait(lo, 0, 1) != -ETIME) { 1397 i915_request_put(lo); 1398 pr_err("%s: Busywaiting request did not!\n", 1399 engine->name); 1400 err = -EIO; 1401 goto err_vma; 1402 } 1403 1404 hi = igt_request_alloc(ctx_hi, engine); 1405 if (IS_ERR(hi)) { 1406 err = PTR_ERR(hi); 1407 i915_request_put(lo); 1408 goto err_vma; 1409 } 1410 1411 cs = intel_ring_begin(hi, 4); 1412 if (IS_ERR(cs)) { 1413 err = PTR_ERR(cs); 1414 i915_request_add(hi); 1415 i915_request_put(lo); 1416 goto err_vma; 1417 } 1418 1419 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 1420 *cs++ = i915_ggtt_offset(vma); 1421 *cs++ = 0; 1422 *cs++ = 0; 1423 1424 intel_ring_advance(hi, cs); 1425 i915_request_add(hi); 1426 1427 if (i915_request_wait(lo, 0, HZ / 5) < 0) { 1428 struct drm_printer p = drm_info_printer(gt->i915->drm.dev); 1429 1430 pr_err("%s: Failed to preempt semaphore busywait!\n", 1431 engine->name); 1432 1433 intel_engine_dump(engine, &p, "%s\n", engine->name); 1434 GEM_TRACE_DUMP(); 1435 1436 i915_request_put(lo); 1437 intel_gt_set_wedged(gt); 1438 err = -EIO; 1439 goto err_vma; 1440 } 1441 GEM_BUG_ON(READ_ONCE(*map)); 1442 i915_request_put(lo); 1443 1444 if (igt_live_test_end(&t)) { 1445 err = -EIO; 1446 goto err_vma; 1447 } 1448 } 1449 1450 err = 0; 1451 err_vma: 1452 i915_vma_unpin(vma); 1453 err_map: 1454 i915_gem_object_unpin_map(obj); 1455 err_obj: 1456 i915_gem_object_put(obj); 1457 err_ctx_lo: 1458 kernel_context_close(ctx_lo); 1459 err_ctx_hi: 1460 kernel_context_close(ctx_hi); 1461 return err; 1462 } 1463 1464 static struct i915_request * 1465 spinner_create_request(struct igt_spinner *spin, 1466 struct i915_gem_context *ctx, 1467 struct intel_engine_cs *engine, 1468 u32 arb) 1469 { 1470 struct intel_context *ce; 1471 struct i915_request *rq; 1472 1473 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); 1474 if (IS_ERR(ce)) 1475 return ERR_CAST(ce); 1476 1477 rq = igt_spinner_create_request(spin, ce, arb); 1478 intel_context_put(ce); 1479 return rq; 1480 } 1481 1482 static int live_preempt(void *arg) 1483 { 1484 struct intel_gt *gt = arg; 1485 struct i915_gem_context *ctx_hi, *ctx_lo; 1486 struct igt_spinner spin_hi, spin_lo; 1487 struct intel_engine_cs *engine; 1488 enum intel_engine_id id; 1489 int err = -ENOMEM; 1490 1491 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 1492 return 0; 1493 1494 if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION)) 1495 pr_err("Logical preemption supported, but not exposed\n"); 1496 1497 if (igt_spinner_init(&spin_hi, gt)) 1498 return -ENOMEM; 1499 1500 if (igt_spinner_init(&spin_lo, gt)) 1501 goto err_spin_hi; 1502 1503 ctx_hi = kernel_context(gt->i915); 1504 if (!ctx_hi) 1505 goto err_spin_lo; 1506 ctx_hi->sched.priority = 1507 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); 1508 1509 ctx_lo = kernel_context(gt->i915); 1510 if (!ctx_lo) 1511 goto err_ctx_hi; 1512 ctx_lo->sched.priority = 1513 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); 1514 1515 for_each_engine(engine, gt, id) { 1516 struct igt_live_test t; 1517 struct i915_request *rq; 1518 1519 if (!intel_engine_has_preemption(engine)) 1520 continue; 1521 1522 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 1523 err = -EIO; 1524 goto err_ctx_lo; 1525 } 1526 1527 rq = spinner_create_request(&spin_lo, ctx_lo, engine, 1528 MI_ARB_CHECK); 1529 if (IS_ERR(rq)) { 1530 err = PTR_ERR(rq); 1531 goto err_ctx_lo; 1532 } 1533 1534 i915_request_add(rq); 1535 if (!igt_wait_for_spinner(&spin_lo, rq)) { 1536 GEM_TRACE("lo spinner failed to start\n"); 1537 GEM_TRACE_DUMP(); 1538 intel_gt_set_wedged(gt); 1539 err = -EIO; 1540 goto err_ctx_lo; 1541 } 1542 1543 rq = spinner_create_request(&spin_hi, ctx_hi, engine, 1544 MI_ARB_CHECK); 1545 if (IS_ERR(rq)) { 1546 igt_spinner_end(&spin_lo); 1547 err = PTR_ERR(rq); 1548 goto err_ctx_lo; 1549 } 1550 1551 i915_request_add(rq); 1552 if (!igt_wait_for_spinner(&spin_hi, rq)) { 1553 GEM_TRACE("hi spinner failed to start\n"); 1554 GEM_TRACE_DUMP(); 1555 intel_gt_set_wedged(gt); 1556 err = -EIO; 1557 goto err_ctx_lo; 1558 } 1559 1560 igt_spinner_end(&spin_hi); 1561 igt_spinner_end(&spin_lo); 1562 1563 if (igt_live_test_end(&t)) { 1564 err = -EIO; 1565 goto err_ctx_lo; 1566 } 1567 } 1568 1569 err = 0; 1570 err_ctx_lo: 1571 kernel_context_close(ctx_lo); 1572 err_ctx_hi: 1573 kernel_context_close(ctx_hi); 1574 err_spin_lo: 1575 igt_spinner_fini(&spin_lo); 1576 err_spin_hi: 1577 igt_spinner_fini(&spin_hi); 1578 return err; 1579 } 1580 1581 static int live_late_preempt(void *arg) 1582 { 1583 struct intel_gt *gt = arg; 1584 struct i915_gem_context *ctx_hi, *ctx_lo; 1585 struct igt_spinner spin_hi, spin_lo; 1586 struct intel_engine_cs *engine; 1587 struct i915_sched_attr attr = {}; 1588 enum intel_engine_id id; 1589 int err = -ENOMEM; 1590 1591 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 1592 return 0; 1593 1594 if (igt_spinner_init(&spin_hi, gt)) 1595 return -ENOMEM; 1596 1597 if (igt_spinner_init(&spin_lo, gt)) 1598 goto err_spin_hi; 1599 1600 ctx_hi = kernel_context(gt->i915); 1601 if (!ctx_hi) 1602 goto err_spin_lo; 1603 1604 ctx_lo = kernel_context(gt->i915); 1605 if (!ctx_lo) 1606 goto err_ctx_hi; 1607 1608 /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */ 1609 ctx_lo->sched.priority = I915_USER_PRIORITY(1); 1610 1611 for_each_engine(engine, gt, id) { 1612 struct igt_live_test t; 1613 struct i915_request *rq; 1614 1615 if (!intel_engine_has_preemption(engine)) 1616 continue; 1617 1618 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 1619 err = -EIO; 1620 goto err_ctx_lo; 1621 } 1622 1623 rq = spinner_create_request(&spin_lo, ctx_lo, engine, 1624 MI_ARB_CHECK); 1625 if (IS_ERR(rq)) { 1626 err = PTR_ERR(rq); 1627 goto err_ctx_lo; 1628 } 1629 1630 i915_request_add(rq); 1631 if (!igt_wait_for_spinner(&spin_lo, rq)) { 1632 pr_err("First context failed to start\n"); 1633 goto err_wedged; 1634 } 1635 1636 rq = spinner_create_request(&spin_hi, ctx_hi, engine, 1637 MI_NOOP); 1638 if (IS_ERR(rq)) { 1639 igt_spinner_end(&spin_lo); 1640 err = PTR_ERR(rq); 1641 goto err_ctx_lo; 1642 } 1643 1644 i915_request_add(rq); 1645 if (igt_wait_for_spinner(&spin_hi, rq)) { 1646 pr_err("Second context overtook first?\n"); 1647 goto err_wedged; 1648 } 1649 1650 attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX); 1651 engine->schedule(rq, &attr); 1652 1653 if (!igt_wait_for_spinner(&spin_hi, rq)) { 1654 pr_err("High priority context failed to preempt the low priority context\n"); 1655 GEM_TRACE_DUMP(); 1656 goto err_wedged; 1657 } 1658 1659 igt_spinner_end(&spin_hi); 1660 igt_spinner_end(&spin_lo); 1661 1662 if (igt_live_test_end(&t)) { 1663 err = -EIO; 1664 goto err_ctx_lo; 1665 } 1666 } 1667 1668 err = 0; 1669 err_ctx_lo: 1670 kernel_context_close(ctx_lo); 1671 err_ctx_hi: 1672 kernel_context_close(ctx_hi); 1673 err_spin_lo: 1674 igt_spinner_fini(&spin_lo); 1675 err_spin_hi: 1676 igt_spinner_fini(&spin_hi); 1677 return err; 1678 1679 err_wedged: 1680 igt_spinner_end(&spin_hi); 1681 igt_spinner_end(&spin_lo); 1682 intel_gt_set_wedged(gt); 1683 err = -EIO; 1684 goto err_ctx_lo; 1685 } 1686 1687 struct preempt_client { 1688 struct igt_spinner spin; 1689 struct i915_gem_context *ctx; 1690 }; 1691 1692 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c) 1693 { 1694 c->ctx = kernel_context(gt->i915); 1695 if (!c->ctx) 1696 return -ENOMEM; 1697 1698 if (igt_spinner_init(&c->spin, gt)) 1699 goto err_ctx; 1700 1701 return 0; 1702 1703 err_ctx: 1704 kernel_context_close(c->ctx); 1705 return -ENOMEM; 1706 } 1707 1708 static void preempt_client_fini(struct preempt_client *c) 1709 { 1710 igt_spinner_fini(&c->spin); 1711 kernel_context_close(c->ctx); 1712 } 1713 1714 static int live_nopreempt(void *arg) 1715 { 1716 struct intel_gt *gt = arg; 1717 struct intel_engine_cs *engine; 1718 struct preempt_client a, b; 1719 enum intel_engine_id id; 1720 int err = -ENOMEM; 1721 1722 /* 1723 * Verify that we can disable preemption for an individual request 1724 * that may be being observed and not want to be interrupted. 1725 */ 1726 1727 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 1728 return 0; 1729 1730 if (preempt_client_init(gt, &a)) 1731 return -ENOMEM; 1732 if (preempt_client_init(gt, &b)) 1733 goto err_client_a; 1734 b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX); 1735 1736 for_each_engine(engine, gt, id) { 1737 struct i915_request *rq_a, *rq_b; 1738 1739 if (!intel_engine_has_preemption(engine)) 1740 continue; 1741 1742 engine->execlists.preempt_hang.count = 0; 1743 1744 rq_a = spinner_create_request(&a.spin, 1745 a.ctx, engine, 1746 MI_ARB_CHECK); 1747 if (IS_ERR(rq_a)) { 1748 err = PTR_ERR(rq_a); 1749 goto err_client_b; 1750 } 1751 1752 /* Low priority client, but unpreemptable! */ 1753 __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags); 1754 1755 i915_request_add(rq_a); 1756 if (!igt_wait_for_spinner(&a.spin, rq_a)) { 1757 pr_err("First client failed to start\n"); 1758 goto err_wedged; 1759 } 1760 1761 rq_b = spinner_create_request(&b.spin, 1762 b.ctx, engine, 1763 MI_ARB_CHECK); 1764 if (IS_ERR(rq_b)) { 1765 err = PTR_ERR(rq_b); 1766 goto err_client_b; 1767 } 1768 1769 i915_request_add(rq_b); 1770 1771 /* B is much more important than A! (But A is unpreemptable.) */ 1772 GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a)); 1773 1774 /* Wait long enough for preemption and timeslicing */ 1775 if (igt_wait_for_spinner(&b.spin, rq_b)) { 1776 pr_err("Second client started too early!\n"); 1777 goto err_wedged; 1778 } 1779 1780 igt_spinner_end(&a.spin); 1781 1782 if (!igt_wait_for_spinner(&b.spin, rq_b)) { 1783 pr_err("Second client failed to start\n"); 1784 goto err_wedged; 1785 } 1786 1787 igt_spinner_end(&b.spin); 1788 1789 if (engine->execlists.preempt_hang.count) { 1790 pr_err("Preemption recorded x%d; should have been suppressed!\n", 1791 engine->execlists.preempt_hang.count); 1792 err = -EINVAL; 1793 goto err_wedged; 1794 } 1795 1796 if (igt_flush_test(gt->i915)) 1797 goto err_wedged; 1798 } 1799 1800 err = 0; 1801 err_client_b: 1802 preempt_client_fini(&b); 1803 err_client_a: 1804 preempt_client_fini(&a); 1805 return err; 1806 1807 err_wedged: 1808 igt_spinner_end(&b.spin); 1809 igt_spinner_end(&a.spin); 1810 intel_gt_set_wedged(gt); 1811 err = -EIO; 1812 goto err_client_b; 1813 } 1814 1815 struct live_preempt_cancel { 1816 struct intel_engine_cs *engine; 1817 struct preempt_client a, b; 1818 }; 1819 1820 static int __cancel_active0(struct live_preempt_cancel *arg) 1821 { 1822 struct i915_request *rq; 1823 struct igt_live_test t; 1824 int err; 1825 1826 /* Preempt cancel of ELSP0 */ 1827 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 1828 if (igt_live_test_begin(&t, arg->engine->i915, 1829 __func__, arg->engine->name)) 1830 return -EIO; 1831 1832 rq = spinner_create_request(&arg->a.spin, 1833 arg->a.ctx, arg->engine, 1834 MI_ARB_CHECK); 1835 if (IS_ERR(rq)) 1836 return PTR_ERR(rq); 1837 1838 clear_bit(CONTEXT_BANNED, &rq->context->flags); 1839 i915_request_get(rq); 1840 i915_request_add(rq); 1841 if (!igt_wait_for_spinner(&arg->a.spin, rq)) { 1842 err = -EIO; 1843 goto out; 1844 } 1845 1846 intel_context_set_banned(rq->context); 1847 err = intel_engine_pulse(arg->engine); 1848 if (err) 1849 goto out; 1850 1851 err = wait_for_reset(arg->engine, rq, HZ / 2); 1852 if (err) { 1853 pr_err("Cancelled inflight0 request did not reset\n"); 1854 goto out; 1855 } 1856 1857 out: 1858 i915_request_put(rq); 1859 if (igt_live_test_end(&t)) 1860 err = -EIO; 1861 return err; 1862 } 1863 1864 static int __cancel_active1(struct live_preempt_cancel *arg) 1865 { 1866 struct i915_request *rq[2] = {}; 1867 struct igt_live_test t; 1868 int err; 1869 1870 /* Preempt cancel of ELSP1 */ 1871 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 1872 if (igt_live_test_begin(&t, arg->engine->i915, 1873 __func__, arg->engine->name)) 1874 return -EIO; 1875 1876 rq[0] = spinner_create_request(&arg->a.spin, 1877 arg->a.ctx, arg->engine, 1878 MI_NOOP); /* no preemption */ 1879 if (IS_ERR(rq[0])) 1880 return PTR_ERR(rq[0]); 1881 1882 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); 1883 i915_request_get(rq[0]); 1884 i915_request_add(rq[0]); 1885 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { 1886 err = -EIO; 1887 goto out; 1888 } 1889 1890 rq[1] = spinner_create_request(&arg->b.spin, 1891 arg->b.ctx, arg->engine, 1892 MI_ARB_CHECK); 1893 if (IS_ERR(rq[1])) { 1894 err = PTR_ERR(rq[1]); 1895 goto out; 1896 } 1897 1898 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); 1899 i915_request_get(rq[1]); 1900 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); 1901 i915_request_add(rq[1]); 1902 if (err) 1903 goto out; 1904 1905 intel_context_set_banned(rq[1]->context); 1906 err = intel_engine_pulse(arg->engine); 1907 if (err) 1908 goto out; 1909 1910 igt_spinner_end(&arg->a.spin); 1911 err = wait_for_reset(arg->engine, rq[1], HZ / 2); 1912 if (err) 1913 goto out; 1914 1915 if (rq[0]->fence.error != 0) { 1916 pr_err("Normal inflight0 request did not complete\n"); 1917 err = -EINVAL; 1918 goto out; 1919 } 1920 1921 if (rq[1]->fence.error != -EIO) { 1922 pr_err("Cancelled inflight1 request did not report -EIO\n"); 1923 err = -EINVAL; 1924 goto out; 1925 } 1926 1927 out: 1928 i915_request_put(rq[1]); 1929 i915_request_put(rq[0]); 1930 if (igt_live_test_end(&t)) 1931 err = -EIO; 1932 return err; 1933 } 1934 1935 static int __cancel_queued(struct live_preempt_cancel *arg) 1936 { 1937 struct i915_request *rq[3] = {}; 1938 struct igt_live_test t; 1939 int err; 1940 1941 /* Full ELSP and one in the wings */ 1942 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 1943 if (igt_live_test_begin(&t, arg->engine->i915, 1944 __func__, arg->engine->name)) 1945 return -EIO; 1946 1947 rq[0] = spinner_create_request(&arg->a.spin, 1948 arg->a.ctx, arg->engine, 1949 MI_ARB_CHECK); 1950 if (IS_ERR(rq[0])) 1951 return PTR_ERR(rq[0]); 1952 1953 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); 1954 i915_request_get(rq[0]); 1955 i915_request_add(rq[0]); 1956 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { 1957 err = -EIO; 1958 goto out; 1959 } 1960 1961 rq[1] = igt_request_alloc(arg->b.ctx, arg->engine); 1962 if (IS_ERR(rq[1])) { 1963 err = PTR_ERR(rq[1]); 1964 goto out; 1965 } 1966 1967 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); 1968 i915_request_get(rq[1]); 1969 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); 1970 i915_request_add(rq[1]); 1971 if (err) 1972 goto out; 1973 1974 rq[2] = spinner_create_request(&arg->b.spin, 1975 arg->a.ctx, arg->engine, 1976 MI_ARB_CHECK); 1977 if (IS_ERR(rq[2])) { 1978 err = PTR_ERR(rq[2]); 1979 goto out; 1980 } 1981 1982 i915_request_get(rq[2]); 1983 err = i915_request_await_dma_fence(rq[2], &rq[1]->fence); 1984 i915_request_add(rq[2]); 1985 if (err) 1986 goto out; 1987 1988 intel_context_set_banned(rq[2]->context); 1989 err = intel_engine_pulse(arg->engine); 1990 if (err) 1991 goto out; 1992 1993 err = wait_for_reset(arg->engine, rq[2], HZ / 2); 1994 if (err) 1995 goto out; 1996 1997 if (rq[0]->fence.error != -EIO) { 1998 pr_err("Cancelled inflight0 request did not report -EIO\n"); 1999 err = -EINVAL; 2000 goto out; 2001 } 2002 2003 if (rq[1]->fence.error != 0) { 2004 pr_err("Normal inflight1 request did not complete\n"); 2005 err = -EINVAL; 2006 goto out; 2007 } 2008 2009 if (rq[2]->fence.error != -EIO) { 2010 pr_err("Cancelled queued request did not report -EIO\n"); 2011 err = -EINVAL; 2012 goto out; 2013 } 2014 2015 out: 2016 i915_request_put(rq[2]); 2017 i915_request_put(rq[1]); 2018 i915_request_put(rq[0]); 2019 if (igt_live_test_end(&t)) 2020 err = -EIO; 2021 return err; 2022 } 2023 2024 static int __cancel_hostile(struct live_preempt_cancel *arg) 2025 { 2026 struct i915_request *rq; 2027 int err; 2028 2029 /* Preempt cancel non-preemptible spinner in ELSP0 */ 2030 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) 2031 return 0; 2032 2033 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 2034 rq = spinner_create_request(&arg->a.spin, 2035 arg->a.ctx, arg->engine, 2036 MI_NOOP); /* preemption disabled */ 2037 if (IS_ERR(rq)) 2038 return PTR_ERR(rq); 2039 2040 clear_bit(CONTEXT_BANNED, &rq->context->flags); 2041 i915_request_get(rq); 2042 i915_request_add(rq); 2043 if (!igt_wait_for_spinner(&arg->a.spin, rq)) { 2044 err = -EIO; 2045 goto out; 2046 } 2047 2048 intel_context_set_banned(rq->context); 2049 err = intel_engine_pulse(arg->engine); /* force reset */ 2050 if (err) 2051 goto out; 2052 2053 err = wait_for_reset(arg->engine, rq, HZ / 2); 2054 if (err) { 2055 pr_err("Cancelled inflight0 request did not reset\n"); 2056 goto out; 2057 } 2058 2059 out: 2060 i915_request_put(rq); 2061 if (igt_flush_test(arg->engine->i915)) 2062 err = -EIO; 2063 return err; 2064 } 2065 2066 static int live_preempt_cancel(void *arg) 2067 { 2068 struct intel_gt *gt = arg; 2069 struct live_preempt_cancel data; 2070 enum intel_engine_id id; 2071 int err = -ENOMEM; 2072 2073 /* 2074 * To cancel an inflight context, we need to first remove it from the 2075 * GPU. That sounds like preemption! Plus a little bit of bookkeeping. 2076 */ 2077 2078 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2079 return 0; 2080 2081 if (preempt_client_init(gt, &data.a)) 2082 return -ENOMEM; 2083 if (preempt_client_init(gt, &data.b)) 2084 goto err_client_a; 2085 2086 for_each_engine(data.engine, gt, id) { 2087 if (!intel_engine_has_preemption(data.engine)) 2088 continue; 2089 2090 err = __cancel_active0(&data); 2091 if (err) 2092 goto err_wedged; 2093 2094 err = __cancel_active1(&data); 2095 if (err) 2096 goto err_wedged; 2097 2098 err = __cancel_queued(&data); 2099 if (err) 2100 goto err_wedged; 2101 2102 err = __cancel_hostile(&data); 2103 if (err) 2104 goto err_wedged; 2105 } 2106 2107 err = 0; 2108 err_client_b: 2109 preempt_client_fini(&data.b); 2110 err_client_a: 2111 preempt_client_fini(&data.a); 2112 return err; 2113 2114 err_wedged: 2115 GEM_TRACE_DUMP(); 2116 igt_spinner_end(&data.b.spin); 2117 igt_spinner_end(&data.a.spin); 2118 intel_gt_set_wedged(gt); 2119 goto err_client_b; 2120 } 2121 2122 static int live_suppress_self_preempt(void *arg) 2123 { 2124 struct intel_gt *gt = arg; 2125 struct intel_engine_cs *engine; 2126 struct i915_sched_attr attr = { 2127 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX) 2128 }; 2129 struct preempt_client a, b; 2130 enum intel_engine_id id; 2131 int err = -ENOMEM; 2132 2133 /* 2134 * Verify that if a preemption request does not cause a change in 2135 * the current execution order, the preempt-to-idle injection is 2136 * skipped and that we do not accidentally apply it after the CS 2137 * completion event. 2138 */ 2139 2140 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2141 return 0; 2142 2143 if (intel_uc_uses_guc_submission(>->uc)) 2144 return 0; /* presume black blox */ 2145 2146 if (intel_vgpu_active(gt->i915)) 2147 return 0; /* GVT forces single port & request submission */ 2148 2149 if (preempt_client_init(gt, &a)) 2150 return -ENOMEM; 2151 if (preempt_client_init(gt, &b)) 2152 goto err_client_a; 2153 2154 for_each_engine(engine, gt, id) { 2155 struct i915_request *rq_a, *rq_b; 2156 int depth; 2157 2158 if (!intel_engine_has_preemption(engine)) 2159 continue; 2160 2161 if (igt_flush_test(gt->i915)) 2162 goto err_wedged; 2163 2164 intel_engine_pm_get(engine); 2165 engine->execlists.preempt_hang.count = 0; 2166 2167 rq_a = spinner_create_request(&a.spin, 2168 a.ctx, engine, 2169 MI_NOOP); 2170 if (IS_ERR(rq_a)) { 2171 err = PTR_ERR(rq_a); 2172 intel_engine_pm_put(engine); 2173 goto err_client_b; 2174 } 2175 2176 i915_request_add(rq_a); 2177 if (!igt_wait_for_spinner(&a.spin, rq_a)) { 2178 pr_err("First client failed to start\n"); 2179 intel_engine_pm_put(engine); 2180 goto err_wedged; 2181 } 2182 2183 /* Keep postponing the timer to avoid premature slicing */ 2184 mod_timer(&engine->execlists.timer, jiffies + HZ); 2185 for (depth = 0; depth < 8; depth++) { 2186 rq_b = spinner_create_request(&b.spin, 2187 b.ctx, engine, 2188 MI_NOOP); 2189 if (IS_ERR(rq_b)) { 2190 err = PTR_ERR(rq_b); 2191 intel_engine_pm_put(engine); 2192 goto err_client_b; 2193 } 2194 i915_request_add(rq_b); 2195 2196 GEM_BUG_ON(i915_request_completed(rq_a)); 2197 engine->schedule(rq_a, &attr); 2198 igt_spinner_end(&a.spin); 2199 2200 if (!igt_wait_for_spinner(&b.spin, rq_b)) { 2201 pr_err("Second client failed to start\n"); 2202 intel_engine_pm_put(engine); 2203 goto err_wedged; 2204 } 2205 2206 swap(a, b); 2207 rq_a = rq_b; 2208 } 2209 igt_spinner_end(&a.spin); 2210 2211 if (engine->execlists.preempt_hang.count) { 2212 pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n", 2213 engine->name, 2214 engine->execlists.preempt_hang.count, 2215 depth); 2216 intel_engine_pm_put(engine); 2217 err = -EINVAL; 2218 goto err_client_b; 2219 } 2220 2221 intel_engine_pm_put(engine); 2222 if (igt_flush_test(gt->i915)) 2223 goto err_wedged; 2224 } 2225 2226 err = 0; 2227 err_client_b: 2228 preempt_client_fini(&b); 2229 err_client_a: 2230 preempt_client_fini(&a); 2231 return err; 2232 2233 err_wedged: 2234 igt_spinner_end(&b.spin); 2235 igt_spinner_end(&a.spin); 2236 intel_gt_set_wedged(gt); 2237 err = -EIO; 2238 goto err_client_b; 2239 } 2240 2241 static int __i915_sw_fence_call 2242 dummy_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) 2243 { 2244 return NOTIFY_DONE; 2245 } 2246 2247 static struct i915_request *dummy_request(struct intel_engine_cs *engine) 2248 { 2249 struct i915_request *rq; 2250 2251 rq = kzalloc(sizeof(*rq), GFP_KERNEL); 2252 if (!rq) 2253 return NULL; 2254 2255 rq->engine = engine; 2256 2257 spin_lock_init(&rq->lock); 2258 INIT_LIST_HEAD(&rq->fence.cb_list); 2259 rq->fence.lock = &rq->lock; 2260 rq->fence.ops = &i915_fence_ops; 2261 2262 i915_sched_node_init(&rq->sched); 2263 2264 /* mark this request as permanently incomplete */ 2265 rq->fence.seqno = 1; 2266 BUILD_BUG_ON(sizeof(rq->fence.seqno) != 8); /* upper 32b == 0 */ 2267 rq->hwsp_seqno = (u32 *)&rq->fence.seqno + 1; 2268 GEM_BUG_ON(i915_request_completed(rq)); 2269 2270 i915_sw_fence_init(&rq->submit, dummy_notify); 2271 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); 2272 2273 spin_lock_init(&rq->lock); 2274 rq->fence.lock = &rq->lock; 2275 INIT_LIST_HEAD(&rq->fence.cb_list); 2276 2277 return rq; 2278 } 2279 2280 static void dummy_request_free(struct i915_request *dummy) 2281 { 2282 /* We have to fake the CS interrupt to kick the next request */ 2283 i915_sw_fence_commit(&dummy->submit); 2284 2285 i915_request_mark_complete(dummy); 2286 dma_fence_signal(&dummy->fence); 2287 2288 i915_sched_node_fini(&dummy->sched); 2289 i915_sw_fence_fini(&dummy->submit); 2290 2291 dma_fence_free(&dummy->fence); 2292 } 2293 2294 static int live_suppress_wait_preempt(void *arg) 2295 { 2296 struct intel_gt *gt = arg; 2297 struct preempt_client client[4]; 2298 struct i915_request *rq[ARRAY_SIZE(client)] = {}; 2299 struct intel_engine_cs *engine; 2300 enum intel_engine_id id; 2301 int err = -ENOMEM; 2302 int i; 2303 2304 /* 2305 * Waiters are given a little priority nudge, but not enough 2306 * to actually cause any preemption. Double check that we do 2307 * not needlessly generate preempt-to-idle cycles. 2308 */ 2309 2310 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2311 return 0; 2312 2313 if (preempt_client_init(gt, &client[0])) /* ELSP[0] */ 2314 return -ENOMEM; 2315 if (preempt_client_init(gt, &client[1])) /* ELSP[1] */ 2316 goto err_client_0; 2317 if (preempt_client_init(gt, &client[2])) /* head of queue */ 2318 goto err_client_1; 2319 if (preempt_client_init(gt, &client[3])) /* bystander */ 2320 goto err_client_2; 2321 2322 for_each_engine(engine, gt, id) { 2323 int depth; 2324 2325 if (!intel_engine_has_preemption(engine)) 2326 continue; 2327 2328 if (!engine->emit_init_breadcrumb) 2329 continue; 2330 2331 for (depth = 0; depth < ARRAY_SIZE(client); depth++) { 2332 struct i915_request *dummy; 2333 2334 engine->execlists.preempt_hang.count = 0; 2335 2336 dummy = dummy_request(engine); 2337 if (!dummy) 2338 goto err_client_3; 2339 2340 for (i = 0; i < ARRAY_SIZE(client); i++) { 2341 struct i915_request *this; 2342 2343 this = spinner_create_request(&client[i].spin, 2344 client[i].ctx, engine, 2345 MI_NOOP); 2346 if (IS_ERR(this)) { 2347 err = PTR_ERR(this); 2348 goto err_wedged; 2349 } 2350 2351 /* Disable NEWCLIENT promotion */ 2352 __i915_active_fence_set(&i915_request_timeline(this)->last_request, 2353 &dummy->fence); 2354 2355 rq[i] = i915_request_get(this); 2356 i915_request_add(this); 2357 } 2358 2359 dummy_request_free(dummy); 2360 2361 GEM_BUG_ON(i915_request_completed(rq[0])); 2362 if (!igt_wait_for_spinner(&client[0].spin, rq[0])) { 2363 pr_err("%s: First client failed to start\n", 2364 engine->name); 2365 goto err_wedged; 2366 } 2367 GEM_BUG_ON(!i915_request_started(rq[0])); 2368 2369 if (i915_request_wait(rq[depth], 2370 I915_WAIT_PRIORITY, 2371 1) != -ETIME) { 2372 pr_err("%s: Waiter depth:%d completed!\n", 2373 engine->name, depth); 2374 goto err_wedged; 2375 } 2376 2377 for (i = 0; i < ARRAY_SIZE(client); i++) { 2378 igt_spinner_end(&client[i].spin); 2379 i915_request_put(rq[i]); 2380 rq[i] = NULL; 2381 } 2382 2383 if (igt_flush_test(gt->i915)) 2384 goto err_wedged; 2385 2386 if (engine->execlists.preempt_hang.count) { 2387 pr_err("%s: Preemption recorded x%d, depth %d; should have been suppressed!\n", 2388 engine->name, 2389 engine->execlists.preempt_hang.count, 2390 depth); 2391 err = -EINVAL; 2392 goto err_client_3; 2393 } 2394 } 2395 } 2396 2397 err = 0; 2398 err_client_3: 2399 preempt_client_fini(&client[3]); 2400 err_client_2: 2401 preempt_client_fini(&client[2]); 2402 err_client_1: 2403 preempt_client_fini(&client[1]); 2404 err_client_0: 2405 preempt_client_fini(&client[0]); 2406 return err; 2407 2408 err_wedged: 2409 for (i = 0; i < ARRAY_SIZE(client); i++) { 2410 igt_spinner_end(&client[i].spin); 2411 i915_request_put(rq[i]); 2412 } 2413 intel_gt_set_wedged(gt); 2414 err = -EIO; 2415 goto err_client_3; 2416 } 2417 2418 static int live_chain_preempt(void *arg) 2419 { 2420 struct intel_gt *gt = arg; 2421 struct intel_engine_cs *engine; 2422 struct preempt_client hi, lo; 2423 enum intel_engine_id id; 2424 int err = -ENOMEM; 2425 2426 /* 2427 * Build a chain AB...BA between two contexts (A, B) and request 2428 * preemption of the last request. It should then complete before 2429 * the previously submitted spinner in B. 2430 */ 2431 2432 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2433 return 0; 2434 2435 if (preempt_client_init(gt, &hi)) 2436 return -ENOMEM; 2437 2438 if (preempt_client_init(gt, &lo)) 2439 goto err_client_hi; 2440 2441 for_each_engine(engine, gt, id) { 2442 struct i915_sched_attr attr = { 2443 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), 2444 }; 2445 struct igt_live_test t; 2446 struct i915_request *rq; 2447 int ring_size, count, i; 2448 2449 if (!intel_engine_has_preemption(engine)) 2450 continue; 2451 2452 rq = spinner_create_request(&lo.spin, 2453 lo.ctx, engine, 2454 MI_ARB_CHECK); 2455 if (IS_ERR(rq)) 2456 goto err_wedged; 2457 2458 i915_request_get(rq); 2459 i915_request_add(rq); 2460 2461 ring_size = rq->wa_tail - rq->head; 2462 if (ring_size < 0) 2463 ring_size += rq->ring->size; 2464 ring_size = rq->ring->size / ring_size; 2465 pr_debug("%s(%s): Using maximum of %d requests\n", 2466 __func__, engine->name, ring_size); 2467 2468 igt_spinner_end(&lo.spin); 2469 if (i915_request_wait(rq, 0, HZ / 2) < 0) { 2470 pr_err("Timed out waiting to flush %s\n", engine->name); 2471 i915_request_put(rq); 2472 goto err_wedged; 2473 } 2474 i915_request_put(rq); 2475 2476 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 2477 err = -EIO; 2478 goto err_wedged; 2479 } 2480 2481 for_each_prime_number_from(count, 1, ring_size) { 2482 rq = spinner_create_request(&hi.spin, 2483 hi.ctx, engine, 2484 MI_ARB_CHECK); 2485 if (IS_ERR(rq)) 2486 goto err_wedged; 2487 i915_request_add(rq); 2488 if (!igt_wait_for_spinner(&hi.spin, rq)) 2489 goto err_wedged; 2490 2491 rq = spinner_create_request(&lo.spin, 2492 lo.ctx, engine, 2493 MI_ARB_CHECK); 2494 if (IS_ERR(rq)) 2495 goto err_wedged; 2496 i915_request_add(rq); 2497 2498 for (i = 0; i < count; i++) { 2499 rq = igt_request_alloc(lo.ctx, engine); 2500 if (IS_ERR(rq)) 2501 goto err_wedged; 2502 i915_request_add(rq); 2503 } 2504 2505 rq = igt_request_alloc(hi.ctx, engine); 2506 if (IS_ERR(rq)) 2507 goto err_wedged; 2508 2509 i915_request_get(rq); 2510 i915_request_add(rq); 2511 engine->schedule(rq, &attr); 2512 2513 igt_spinner_end(&hi.spin); 2514 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 2515 struct drm_printer p = 2516 drm_info_printer(gt->i915->drm.dev); 2517 2518 pr_err("Failed to preempt over chain of %d\n", 2519 count); 2520 intel_engine_dump(engine, &p, 2521 "%s\n", engine->name); 2522 i915_request_put(rq); 2523 goto err_wedged; 2524 } 2525 igt_spinner_end(&lo.spin); 2526 i915_request_put(rq); 2527 2528 rq = igt_request_alloc(lo.ctx, engine); 2529 if (IS_ERR(rq)) 2530 goto err_wedged; 2531 2532 i915_request_get(rq); 2533 i915_request_add(rq); 2534 2535 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 2536 struct drm_printer p = 2537 drm_info_printer(gt->i915->drm.dev); 2538 2539 pr_err("Failed to flush low priority chain of %d requests\n", 2540 count); 2541 intel_engine_dump(engine, &p, 2542 "%s\n", engine->name); 2543 2544 i915_request_put(rq); 2545 goto err_wedged; 2546 } 2547 i915_request_put(rq); 2548 } 2549 2550 if (igt_live_test_end(&t)) { 2551 err = -EIO; 2552 goto err_wedged; 2553 } 2554 } 2555 2556 err = 0; 2557 err_client_lo: 2558 preempt_client_fini(&lo); 2559 err_client_hi: 2560 preempt_client_fini(&hi); 2561 return err; 2562 2563 err_wedged: 2564 igt_spinner_end(&hi.spin); 2565 igt_spinner_end(&lo.spin); 2566 intel_gt_set_wedged(gt); 2567 err = -EIO; 2568 goto err_client_lo; 2569 } 2570 2571 static int create_gang(struct intel_engine_cs *engine, 2572 struct i915_request **prev) 2573 { 2574 struct drm_i915_gem_object *obj; 2575 struct intel_context *ce; 2576 struct i915_request *rq; 2577 struct i915_vma *vma; 2578 u32 *cs; 2579 int err; 2580 2581 ce = intel_context_create(engine); 2582 if (IS_ERR(ce)) 2583 return PTR_ERR(ce); 2584 2585 obj = i915_gem_object_create_internal(engine->i915, 4096); 2586 if (IS_ERR(obj)) { 2587 err = PTR_ERR(obj); 2588 goto err_ce; 2589 } 2590 2591 vma = i915_vma_instance(obj, ce->vm, NULL); 2592 if (IS_ERR(vma)) { 2593 err = PTR_ERR(vma); 2594 goto err_obj; 2595 } 2596 2597 err = i915_vma_pin(vma, 0, 0, PIN_USER); 2598 if (err) 2599 goto err_obj; 2600 2601 cs = i915_gem_object_pin_map(obj, I915_MAP_WC); 2602 if (IS_ERR(cs)) 2603 goto err_obj; 2604 2605 /* Semaphore target: spin until zero */ 2606 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 2607 2608 *cs++ = MI_SEMAPHORE_WAIT | 2609 MI_SEMAPHORE_POLL | 2610 MI_SEMAPHORE_SAD_EQ_SDD; 2611 *cs++ = 0; 2612 *cs++ = lower_32_bits(vma->node.start); 2613 *cs++ = upper_32_bits(vma->node.start); 2614 2615 if (*prev) { 2616 u64 offset = (*prev)->batch->node.start; 2617 2618 /* Terminate the spinner in the next lower priority batch. */ 2619 *cs++ = MI_STORE_DWORD_IMM_GEN4; 2620 *cs++ = lower_32_bits(offset); 2621 *cs++ = upper_32_bits(offset); 2622 *cs++ = 0; 2623 } 2624 2625 *cs++ = MI_BATCH_BUFFER_END; 2626 i915_gem_object_flush_map(obj); 2627 i915_gem_object_unpin_map(obj); 2628 2629 rq = intel_context_create_request(ce); 2630 if (IS_ERR(rq)) 2631 goto err_obj; 2632 2633 rq->batch = vma; 2634 i915_request_get(rq); 2635 2636 i915_vma_lock(vma); 2637 err = i915_request_await_object(rq, vma->obj, false); 2638 if (!err) 2639 err = i915_vma_move_to_active(vma, rq, 0); 2640 if (!err) 2641 err = rq->engine->emit_bb_start(rq, 2642 vma->node.start, 2643 PAGE_SIZE, 0); 2644 i915_vma_unlock(vma); 2645 i915_request_add(rq); 2646 if (err) 2647 goto err_rq; 2648 2649 i915_gem_object_put(obj); 2650 intel_context_put(ce); 2651 2652 rq->client_link.next = &(*prev)->client_link; 2653 *prev = rq; 2654 return 0; 2655 2656 err_rq: 2657 i915_request_put(rq); 2658 err_obj: 2659 i915_gem_object_put(obj); 2660 err_ce: 2661 intel_context_put(ce); 2662 return err; 2663 } 2664 2665 static int live_preempt_gang(void *arg) 2666 { 2667 struct intel_gt *gt = arg; 2668 struct intel_engine_cs *engine; 2669 enum intel_engine_id id; 2670 2671 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2672 return 0; 2673 2674 /* 2675 * Build as long a chain of preempters as we can, with each 2676 * request higher priority than the last. Once we are ready, we release 2677 * the last batch which then precolates down the chain, each releasing 2678 * the next oldest in turn. The intent is to simply push as hard as we 2679 * can with the number of preemptions, trying to exceed narrow HW 2680 * limits. At a minimum, we insist that we can sort all the user 2681 * high priority levels into execution order. 2682 */ 2683 2684 for_each_engine(engine, gt, id) { 2685 struct i915_request *rq = NULL; 2686 struct igt_live_test t; 2687 IGT_TIMEOUT(end_time); 2688 int prio = 0; 2689 int err = 0; 2690 u32 *cs; 2691 2692 if (!intel_engine_has_preemption(engine)) 2693 continue; 2694 2695 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) 2696 return -EIO; 2697 2698 do { 2699 struct i915_sched_attr attr = { 2700 .priority = I915_USER_PRIORITY(prio++), 2701 }; 2702 2703 err = create_gang(engine, &rq); 2704 if (err) 2705 break; 2706 2707 /* Submit each spinner at increasing priority */ 2708 engine->schedule(rq, &attr); 2709 2710 if (prio <= I915_PRIORITY_MAX) 2711 continue; 2712 2713 if (prio > (INT_MAX >> I915_USER_PRIORITY_SHIFT)) 2714 break; 2715 2716 if (__igt_timeout(end_time, NULL)) 2717 break; 2718 } while (1); 2719 pr_debug("%s: Preempt chain of %d requests\n", 2720 engine->name, prio); 2721 2722 /* 2723 * Such that the last spinner is the highest priority and 2724 * should execute first. When that spinner completes, 2725 * it will terminate the next lowest spinner until there 2726 * are no more spinners and the gang is complete. 2727 */ 2728 cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC); 2729 if (!IS_ERR(cs)) { 2730 *cs = 0; 2731 i915_gem_object_unpin_map(rq->batch->obj); 2732 } else { 2733 err = PTR_ERR(cs); 2734 intel_gt_set_wedged(gt); 2735 } 2736 2737 while (rq) { /* wait for each rq from highest to lowest prio */ 2738 struct i915_request *n = 2739 list_next_entry(rq, client_link); 2740 2741 if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) { 2742 struct drm_printer p = 2743 drm_info_printer(engine->i915->drm.dev); 2744 2745 pr_err("Failed to flush chain of %d requests, at %d\n", 2746 prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT); 2747 intel_engine_dump(engine, &p, 2748 "%s\n", engine->name); 2749 2750 err = -ETIME; 2751 } 2752 2753 i915_request_put(rq); 2754 rq = n; 2755 } 2756 2757 if (igt_live_test_end(&t)) 2758 err = -EIO; 2759 if (err) 2760 return err; 2761 } 2762 2763 return 0; 2764 } 2765 2766 static int live_preempt_timeout(void *arg) 2767 { 2768 struct intel_gt *gt = arg; 2769 struct i915_gem_context *ctx_hi, *ctx_lo; 2770 struct igt_spinner spin_lo; 2771 struct intel_engine_cs *engine; 2772 enum intel_engine_id id; 2773 int err = -ENOMEM; 2774 2775 /* 2776 * Check that we force preemption to occur by cancelling the previous 2777 * context if it refuses to yield the GPU. 2778 */ 2779 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) 2780 return 0; 2781 2782 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2783 return 0; 2784 2785 if (!intel_has_reset_engine(gt)) 2786 return 0; 2787 2788 if (igt_spinner_init(&spin_lo, gt)) 2789 return -ENOMEM; 2790 2791 ctx_hi = kernel_context(gt->i915); 2792 if (!ctx_hi) 2793 goto err_spin_lo; 2794 ctx_hi->sched.priority = 2795 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); 2796 2797 ctx_lo = kernel_context(gt->i915); 2798 if (!ctx_lo) 2799 goto err_ctx_hi; 2800 ctx_lo->sched.priority = 2801 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); 2802 2803 for_each_engine(engine, gt, id) { 2804 unsigned long saved_timeout; 2805 struct i915_request *rq; 2806 2807 if (!intel_engine_has_preemption(engine)) 2808 continue; 2809 2810 rq = spinner_create_request(&spin_lo, ctx_lo, engine, 2811 MI_NOOP); /* preemption disabled */ 2812 if (IS_ERR(rq)) { 2813 err = PTR_ERR(rq); 2814 goto err_ctx_lo; 2815 } 2816 2817 i915_request_add(rq); 2818 if (!igt_wait_for_spinner(&spin_lo, rq)) { 2819 intel_gt_set_wedged(gt); 2820 err = -EIO; 2821 goto err_ctx_lo; 2822 } 2823 2824 rq = igt_request_alloc(ctx_hi, engine); 2825 if (IS_ERR(rq)) { 2826 igt_spinner_end(&spin_lo); 2827 err = PTR_ERR(rq); 2828 goto err_ctx_lo; 2829 } 2830 2831 /* Flush the previous CS ack before changing timeouts */ 2832 while (READ_ONCE(engine->execlists.pending[0])) 2833 cpu_relax(); 2834 2835 saved_timeout = engine->props.preempt_timeout_ms; 2836 engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */ 2837 2838 i915_request_get(rq); 2839 i915_request_add(rq); 2840 2841 intel_engine_flush_submission(engine); 2842 engine->props.preempt_timeout_ms = saved_timeout; 2843 2844 if (i915_request_wait(rq, 0, HZ / 10) < 0) { 2845 intel_gt_set_wedged(gt); 2846 i915_request_put(rq); 2847 err = -ETIME; 2848 goto err_ctx_lo; 2849 } 2850 2851 igt_spinner_end(&spin_lo); 2852 i915_request_put(rq); 2853 } 2854 2855 err = 0; 2856 err_ctx_lo: 2857 kernel_context_close(ctx_lo); 2858 err_ctx_hi: 2859 kernel_context_close(ctx_hi); 2860 err_spin_lo: 2861 igt_spinner_fini(&spin_lo); 2862 return err; 2863 } 2864 2865 static int random_range(struct rnd_state *rnd, int min, int max) 2866 { 2867 return i915_prandom_u32_max_state(max - min, rnd) + min; 2868 } 2869 2870 static int random_priority(struct rnd_state *rnd) 2871 { 2872 return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX); 2873 } 2874 2875 struct preempt_smoke { 2876 struct intel_gt *gt; 2877 struct i915_gem_context **contexts; 2878 struct intel_engine_cs *engine; 2879 struct drm_i915_gem_object *batch; 2880 unsigned int ncontext; 2881 struct rnd_state prng; 2882 unsigned long count; 2883 }; 2884 2885 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke) 2886 { 2887 return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext, 2888 &smoke->prng)]; 2889 } 2890 2891 static int smoke_submit(struct preempt_smoke *smoke, 2892 struct i915_gem_context *ctx, int prio, 2893 struct drm_i915_gem_object *batch) 2894 { 2895 struct i915_request *rq; 2896 struct i915_vma *vma = NULL; 2897 int err = 0; 2898 2899 if (batch) { 2900 struct i915_address_space *vm; 2901 2902 vm = i915_gem_context_get_vm_rcu(ctx); 2903 vma = i915_vma_instance(batch, vm, NULL); 2904 i915_vm_put(vm); 2905 if (IS_ERR(vma)) 2906 return PTR_ERR(vma); 2907 2908 err = i915_vma_pin(vma, 0, 0, PIN_USER); 2909 if (err) 2910 return err; 2911 } 2912 2913 ctx->sched.priority = prio; 2914 2915 rq = igt_request_alloc(ctx, smoke->engine); 2916 if (IS_ERR(rq)) { 2917 err = PTR_ERR(rq); 2918 goto unpin; 2919 } 2920 2921 if (vma) { 2922 i915_vma_lock(vma); 2923 err = i915_request_await_object(rq, vma->obj, false); 2924 if (!err) 2925 err = i915_vma_move_to_active(vma, rq, 0); 2926 if (!err) 2927 err = rq->engine->emit_bb_start(rq, 2928 vma->node.start, 2929 PAGE_SIZE, 0); 2930 i915_vma_unlock(vma); 2931 } 2932 2933 i915_request_add(rq); 2934 2935 unpin: 2936 if (vma) 2937 i915_vma_unpin(vma); 2938 2939 return err; 2940 } 2941 2942 static int smoke_crescendo_thread(void *arg) 2943 { 2944 struct preempt_smoke *smoke = arg; 2945 IGT_TIMEOUT(end_time); 2946 unsigned long count; 2947 2948 count = 0; 2949 do { 2950 struct i915_gem_context *ctx = smoke_context(smoke); 2951 int err; 2952 2953 err = smoke_submit(smoke, 2954 ctx, count % I915_PRIORITY_MAX, 2955 smoke->batch); 2956 if (err) 2957 return err; 2958 2959 count++; 2960 } while (!__igt_timeout(end_time, NULL)); 2961 2962 smoke->count = count; 2963 return 0; 2964 } 2965 2966 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) 2967 #define BATCH BIT(0) 2968 { 2969 struct task_struct *tsk[I915_NUM_ENGINES] = {}; 2970 struct preempt_smoke arg[I915_NUM_ENGINES]; 2971 struct intel_engine_cs *engine; 2972 enum intel_engine_id id; 2973 unsigned long count; 2974 int err = 0; 2975 2976 for_each_engine(engine, smoke->gt, id) { 2977 arg[id] = *smoke; 2978 arg[id].engine = engine; 2979 if (!(flags & BATCH)) 2980 arg[id].batch = NULL; 2981 arg[id].count = 0; 2982 2983 tsk[id] = kthread_run(smoke_crescendo_thread, &arg, 2984 "igt/smoke:%d", id); 2985 if (IS_ERR(tsk[id])) { 2986 err = PTR_ERR(tsk[id]); 2987 break; 2988 } 2989 get_task_struct(tsk[id]); 2990 } 2991 2992 yield(); /* start all threads before we kthread_stop() */ 2993 2994 count = 0; 2995 for_each_engine(engine, smoke->gt, id) { 2996 int status; 2997 2998 if (IS_ERR_OR_NULL(tsk[id])) 2999 continue; 3000 3001 status = kthread_stop(tsk[id]); 3002 if (status && !err) 3003 err = status; 3004 3005 count += arg[id].count; 3006 3007 put_task_struct(tsk[id]); 3008 } 3009 3010 pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n", 3011 count, flags, 3012 RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext); 3013 return 0; 3014 } 3015 3016 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags) 3017 { 3018 enum intel_engine_id id; 3019 IGT_TIMEOUT(end_time); 3020 unsigned long count; 3021 3022 count = 0; 3023 do { 3024 for_each_engine(smoke->engine, smoke->gt, id) { 3025 struct i915_gem_context *ctx = smoke_context(smoke); 3026 int err; 3027 3028 err = smoke_submit(smoke, 3029 ctx, random_priority(&smoke->prng), 3030 flags & BATCH ? smoke->batch : NULL); 3031 if (err) 3032 return err; 3033 3034 count++; 3035 } 3036 } while (!__igt_timeout(end_time, NULL)); 3037 3038 pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n", 3039 count, flags, 3040 RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext); 3041 return 0; 3042 } 3043 3044 static int live_preempt_smoke(void *arg) 3045 { 3046 struct preempt_smoke smoke = { 3047 .gt = arg, 3048 .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed), 3049 .ncontext = 1024, 3050 }; 3051 const unsigned int phase[] = { 0, BATCH }; 3052 struct igt_live_test t; 3053 int err = -ENOMEM; 3054 u32 *cs; 3055 int n; 3056 3057 if (!HAS_LOGICAL_RING_PREEMPTION(smoke.gt->i915)) 3058 return 0; 3059 3060 smoke.contexts = kmalloc_array(smoke.ncontext, 3061 sizeof(*smoke.contexts), 3062 GFP_KERNEL); 3063 if (!smoke.contexts) 3064 return -ENOMEM; 3065 3066 smoke.batch = 3067 i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE); 3068 if (IS_ERR(smoke.batch)) { 3069 err = PTR_ERR(smoke.batch); 3070 goto err_free; 3071 } 3072 3073 cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB); 3074 if (IS_ERR(cs)) { 3075 err = PTR_ERR(cs); 3076 goto err_batch; 3077 } 3078 for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++) 3079 cs[n] = MI_ARB_CHECK; 3080 cs[n] = MI_BATCH_BUFFER_END; 3081 i915_gem_object_flush_map(smoke.batch); 3082 i915_gem_object_unpin_map(smoke.batch); 3083 3084 if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) { 3085 err = -EIO; 3086 goto err_batch; 3087 } 3088 3089 for (n = 0; n < smoke.ncontext; n++) { 3090 smoke.contexts[n] = kernel_context(smoke.gt->i915); 3091 if (!smoke.contexts[n]) 3092 goto err_ctx; 3093 } 3094 3095 for (n = 0; n < ARRAY_SIZE(phase); n++) { 3096 err = smoke_crescendo(&smoke, phase[n]); 3097 if (err) 3098 goto err_ctx; 3099 3100 err = smoke_random(&smoke, phase[n]); 3101 if (err) 3102 goto err_ctx; 3103 } 3104 3105 err_ctx: 3106 if (igt_live_test_end(&t)) 3107 err = -EIO; 3108 3109 for (n = 0; n < smoke.ncontext; n++) { 3110 if (!smoke.contexts[n]) 3111 break; 3112 kernel_context_close(smoke.contexts[n]); 3113 } 3114 3115 err_batch: 3116 i915_gem_object_put(smoke.batch); 3117 err_free: 3118 kfree(smoke.contexts); 3119 3120 return err; 3121 } 3122 3123 static int nop_virtual_engine(struct intel_gt *gt, 3124 struct intel_engine_cs **siblings, 3125 unsigned int nsibling, 3126 unsigned int nctx, 3127 unsigned int flags) 3128 #define CHAIN BIT(0) 3129 { 3130 IGT_TIMEOUT(end_time); 3131 struct i915_request *request[16] = {}; 3132 struct intel_context *ve[16]; 3133 unsigned long n, prime, nc; 3134 struct igt_live_test t; 3135 ktime_t times[2] = {}; 3136 int err; 3137 3138 GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve)); 3139 3140 for (n = 0; n < nctx; n++) { 3141 ve[n] = intel_execlists_create_virtual(siblings, nsibling); 3142 if (IS_ERR(ve[n])) { 3143 err = PTR_ERR(ve[n]); 3144 nctx = n; 3145 goto out; 3146 } 3147 3148 err = intel_context_pin(ve[n]); 3149 if (err) { 3150 intel_context_put(ve[n]); 3151 nctx = n; 3152 goto out; 3153 } 3154 } 3155 3156 err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name); 3157 if (err) 3158 goto out; 3159 3160 for_each_prime_number_from(prime, 1, 8192) { 3161 times[1] = ktime_get_raw(); 3162 3163 if (flags & CHAIN) { 3164 for (nc = 0; nc < nctx; nc++) { 3165 for (n = 0; n < prime; n++) { 3166 struct i915_request *rq; 3167 3168 rq = i915_request_create(ve[nc]); 3169 if (IS_ERR(rq)) { 3170 err = PTR_ERR(rq); 3171 goto out; 3172 } 3173 3174 if (request[nc]) 3175 i915_request_put(request[nc]); 3176 request[nc] = i915_request_get(rq); 3177 i915_request_add(rq); 3178 } 3179 } 3180 } else { 3181 for (n = 0; n < prime; n++) { 3182 for (nc = 0; nc < nctx; nc++) { 3183 struct i915_request *rq; 3184 3185 rq = i915_request_create(ve[nc]); 3186 if (IS_ERR(rq)) { 3187 err = PTR_ERR(rq); 3188 goto out; 3189 } 3190 3191 if (request[nc]) 3192 i915_request_put(request[nc]); 3193 request[nc] = i915_request_get(rq); 3194 i915_request_add(rq); 3195 } 3196 } 3197 } 3198 3199 for (nc = 0; nc < nctx; nc++) { 3200 if (i915_request_wait(request[nc], 0, HZ / 10) < 0) { 3201 pr_err("%s(%s): wait for %llx:%lld timed out\n", 3202 __func__, ve[0]->engine->name, 3203 request[nc]->fence.context, 3204 request[nc]->fence.seqno); 3205 3206 GEM_TRACE("%s(%s) failed at request %llx:%lld\n", 3207 __func__, ve[0]->engine->name, 3208 request[nc]->fence.context, 3209 request[nc]->fence.seqno); 3210 GEM_TRACE_DUMP(); 3211 intel_gt_set_wedged(gt); 3212 break; 3213 } 3214 } 3215 3216 times[1] = ktime_sub(ktime_get_raw(), times[1]); 3217 if (prime == 1) 3218 times[0] = times[1]; 3219 3220 for (nc = 0; nc < nctx; nc++) { 3221 i915_request_put(request[nc]); 3222 request[nc] = NULL; 3223 } 3224 3225 if (__igt_timeout(end_time, NULL)) 3226 break; 3227 } 3228 3229 err = igt_live_test_end(&t); 3230 if (err) 3231 goto out; 3232 3233 pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n", 3234 nctx, ve[0]->engine->name, ktime_to_ns(times[0]), 3235 prime, div64_u64(ktime_to_ns(times[1]), prime)); 3236 3237 out: 3238 if (igt_flush_test(gt->i915)) 3239 err = -EIO; 3240 3241 for (nc = 0; nc < nctx; nc++) { 3242 i915_request_put(request[nc]); 3243 intel_context_unpin(ve[nc]); 3244 intel_context_put(ve[nc]); 3245 } 3246 return err; 3247 } 3248 3249 static int live_virtual_engine(void *arg) 3250 { 3251 struct intel_gt *gt = arg; 3252 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 3253 struct intel_engine_cs *engine; 3254 enum intel_engine_id id; 3255 unsigned int class, inst; 3256 int err; 3257 3258 if (intel_uc_uses_guc_submission(>->uc)) 3259 return 0; 3260 3261 for_each_engine(engine, gt, id) { 3262 err = nop_virtual_engine(gt, &engine, 1, 1, 0); 3263 if (err) { 3264 pr_err("Failed to wrap engine %s: err=%d\n", 3265 engine->name, err); 3266 return err; 3267 } 3268 } 3269 3270 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 3271 int nsibling, n; 3272 3273 nsibling = 0; 3274 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { 3275 if (!gt->engine_class[class][inst]) 3276 continue; 3277 3278 siblings[nsibling++] = gt->engine_class[class][inst]; 3279 } 3280 if (nsibling < 2) 3281 continue; 3282 3283 for (n = 1; n <= nsibling + 1; n++) { 3284 err = nop_virtual_engine(gt, siblings, nsibling, 3285 n, 0); 3286 if (err) 3287 return err; 3288 } 3289 3290 err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN); 3291 if (err) 3292 return err; 3293 } 3294 3295 return 0; 3296 } 3297 3298 static int mask_virtual_engine(struct intel_gt *gt, 3299 struct intel_engine_cs **siblings, 3300 unsigned int nsibling) 3301 { 3302 struct i915_request *request[MAX_ENGINE_INSTANCE + 1]; 3303 struct intel_context *ve; 3304 struct igt_live_test t; 3305 unsigned int n; 3306 int err; 3307 3308 /* 3309 * Check that by setting the execution mask on a request, we can 3310 * restrict it to our desired engine within the virtual engine. 3311 */ 3312 3313 ve = intel_execlists_create_virtual(siblings, nsibling); 3314 if (IS_ERR(ve)) { 3315 err = PTR_ERR(ve); 3316 goto out_close; 3317 } 3318 3319 err = intel_context_pin(ve); 3320 if (err) 3321 goto out_put; 3322 3323 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name); 3324 if (err) 3325 goto out_unpin; 3326 3327 for (n = 0; n < nsibling; n++) { 3328 request[n] = i915_request_create(ve); 3329 if (IS_ERR(request[n])) { 3330 err = PTR_ERR(request[n]); 3331 nsibling = n; 3332 goto out; 3333 } 3334 3335 /* Reverse order as it's more likely to be unnatural */ 3336 request[n]->execution_mask = siblings[nsibling - n - 1]->mask; 3337 3338 i915_request_get(request[n]); 3339 i915_request_add(request[n]); 3340 } 3341 3342 for (n = 0; n < nsibling; n++) { 3343 if (i915_request_wait(request[n], 0, HZ / 10) < 0) { 3344 pr_err("%s(%s): wait for %llx:%lld timed out\n", 3345 __func__, ve->engine->name, 3346 request[n]->fence.context, 3347 request[n]->fence.seqno); 3348 3349 GEM_TRACE("%s(%s) failed at request %llx:%lld\n", 3350 __func__, ve->engine->name, 3351 request[n]->fence.context, 3352 request[n]->fence.seqno); 3353 GEM_TRACE_DUMP(); 3354 intel_gt_set_wedged(gt); 3355 err = -EIO; 3356 goto out; 3357 } 3358 3359 if (request[n]->engine != siblings[nsibling - n - 1]) { 3360 pr_err("Executed on wrong sibling '%s', expected '%s'\n", 3361 request[n]->engine->name, 3362 siblings[nsibling - n - 1]->name); 3363 err = -EINVAL; 3364 goto out; 3365 } 3366 } 3367 3368 err = igt_live_test_end(&t); 3369 out: 3370 if (igt_flush_test(gt->i915)) 3371 err = -EIO; 3372 3373 for (n = 0; n < nsibling; n++) 3374 i915_request_put(request[n]); 3375 3376 out_unpin: 3377 intel_context_unpin(ve); 3378 out_put: 3379 intel_context_put(ve); 3380 out_close: 3381 return err; 3382 } 3383 3384 static int live_virtual_mask(void *arg) 3385 { 3386 struct intel_gt *gt = arg; 3387 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 3388 unsigned int class, inst; 3389 int err; 3390 3391 if (intel_uc_uses_guc_submission(>->uc)) 3392 return 0; 3393 3394 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 3395 unsigned int nsibling; 3396 3397 nsibling = 0; 3398 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { 3399 if (!gt->engine_class[class][inst]) 3400 break; 3401 3402 siblings[nsibling++] = gt->engine_class[class][inst]; 3403 } 3404 if (nsibling < 2) 3405 continue; 3406 3407 err = mask_virtual_engine(gt, siblings, nsibling); 3408 if (err) 3409 return err; 3410 } 3411 3412 return 0; 3413 } 3414 3415 static int preserved_virtual_engine(struct intel_gt *gt, 3416 struct intel_engine_cs **siblings, 3417 unsigned int nsibling) 3418 { 3419 struct i915_request *last = NULL; 3420 struct intel_context *ve; 3421 struct i915_vma *scratch; 3422 struct igt_live_test t; 3423 unsigned int n; 3424 int err = 0; 3425 u32 *cs; 3426 3427 scratch = create_scratch(siblings[0]->gt); 3428 if (IS_ERR(scratch)) 3429 return PTR_ERR(scratch); 3430 3431 err = i915_vma_sync(scratch); 3432 if (err) 3433 goto out_scratch; 3434 3435 ve = intel_execlists_create_virtual(siblings, nsibling); 3436 if (IS_ERR(ve)) { 3437 err = PTR_ERR(ve); 3438 goto out_scratch; 3439 } 3440 3441 err = intel_context_pin(ve); 3442 if (err) 3443 goto out_put; 3444 3445 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name); 3446 if (err) 3447 goto out_unpin; 3448 3449 for (n = 0; n < NUM_GPR_DW; n++) { 3450 struct intel_engine_cs *engine = siblings[n % nsibling]; 3451 struct i915_request *rq; 3452 3453 rq = i915_request_create(ve); 3454 if (IS_ERR(rq)) { 3455 err = PTR_ERR(rq); 3456 goto out_end; 3457 } 3458 3459 i915_request_put(last); 3460 last = i915_request_get(rq); 3461 3462 cs = intel_ring_begin(rq, 8); 3463 if (IS_ERR(cs)) { 3464 i915_request_add(rq); 3465 err = PTR_ERR(cs); 3466 goto out_end; 3467 } 3468 3469 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 3470 *cs++ = CS_GPR(engine, n); 3471 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32); 3472 *cs++ = 0; 3473 3474 *cs++ = MI_LOAD_REGISTER_IMM(1); 3475 *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW); 3476 *cs++ = n + 1; 3477 3478 *cs++ = MI_NOOP; 3479 intel_ring_advance(rq, cs); 3480 3481 /* Restrict this request to run on a particular engine */ 3482 rq->execution_mask = engine->mask; 3483 i915_request_add(rq); 3484 } 3485 3486 if (i915_request_wait(last, 0, HZ / 5) < 0) { 3487 err = -ETIME; 3488 goto out_end; 3489 } 3490 3491 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); 3492 if (IS_ERR(cs)) { 3493 err = PTR_ERR(cs); 3494 goto out_end; 3495 } 3496 3497 for (n = 0; n < NUM_GPR_DW; n++) { 3498 if (cs[n] != n) { 3499 pr_err("Incorrect value[%d] found for GPR[%d]\n", 3500 cs[n], n); 3501 err = -EINVAL; 3502 break; 3503 } 3504 } 3505 3506 i915_gem_object_unpin_map(scratch->obj); 3507 3508 out_end: 3509 if (igt_live_test_end(&t)) 3510 err = -EIO; 3511 i915_request_put(last); 3512 out_unpin: 3513 intel_context_unpin(ve); 3514 out_put: 3515 intel_context_put(ve); 3516 out_scratch: 3517 i915_vma_unpin_and_release(&scratch, 0); 3518 return err; 3519 } 3520 3521 static int live_virtual_preserved(void *arg) 3522 { 3523 struct intel_gt *gt = arg; 3524 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 3525 unsigned int class, inst; 3526 3527 /* 3528 * Check that the context image retains non-privileged (user) registers 3529 * from one engine to the next. For this we check that the CS_GPR 3530 * are preserved. 3531 */ 3532 3533 if (intel_uc_uses_guc_submission(>->uc)) 3534 return 0; 3535 3536 /* As we use CS_GPR we cannot run before they existed on all engines. */ 3537 if (INTEL_GEN(gt->i915) < 9) 3538 return 0; 3539 3540 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 3541 int nsibling, err; 3542 3543 nsibling = 0; 3544 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { 3545 if (!gt->engine_class[class][inst]) 3546 continue; 3547 3548 siblings[nsibling++] = gt->engine_class[class][inst]; 3549 } 3550 if (nsibling < 2) 3551 continue; 3552 3553 err = preserved_virtual_engine(gt, siblings, nsibling); 3554 if (err) 3555 return err; 3556 } 3557 3558 return 0; 3559 } 3560 3561 static int bond_virtual_engine(struct intel_gt *gt, 3562 unsigned int class, 3563 struct intel_engine_cs **siblings, 3564 unsigned int nsibling, 3565 unsigned int flags) 3566 #define BOND_SCHEDULE BIT(0) 3567 { 3568 struct intel_engine_cs *master; 3569 struct i915_request *rq[16]; 3570 enum intel_engine_id id; 3571 struct igt_spinner spin; 3572 unsigned long n; 3573 int err; 3574 3575 /* 3576 * A set of bonded requests is intended to be run concurrently 3577 * across a number of engines. We use one request per-engine 3578 * and a magic fence to schedule each of the bonded requests 3579 * at the same time. A consequence of our current scheduler is that 3580 * we only move requests to the HW ready queue when the request 3581 * becomes ready, that is when all of its prerequisite fences have 3582 * been signaled. As one of those fences is the master submit fence, 3583 * there is a delay on all secondary fences as the HW may be 3584 * currently busy. Equally, as all the requests are independent, 3585 * they may have other fences that delay individual request 3586 * submission to HW. Ergo, we do not guarantee that all requests are 3587 * immediately submitted to HW at the same time, just that if the 3588 * rules are abided by, they are ready at the same time as the 3589 * first is submitted. Userspace can embed semaphores in its batch 3590 * to ensure parallel execution of its phases as it requires. 3591 * Though naturally it gets requested that perhaps the scheduler should 3592 * take care of parallel execution, even across preemption events on 3593 * different HW. (The proper answer is of course "lalalala".) 3594 * 3595 * With the submit-fence, we have identified three possible phases 3596 * of synchronisation depending on the master fence: queued (not 3597 * ready), executing, and signaled. The first two are quite simple 3598 * and checked below. However, the signaled master fence handling is 3599 * contentious. Currently we do not distinguish between a signaled 3600 * fence and an expired fence, as once signaled it does not convey 3601 * any information about the previous execution. It may even be freed 3602 * and hence checking later it may not exist at all. Ergo we currently 3603 * do not apply the bonding constraint for an already signaled fence, 3604 * as our expectation is that it should not constrain the secondaries 3605 * and is outside of the scope of the bonded request API (i.e. all 3606 * userspace requests are meant to be running in parallel). As 3607 * it imposes no constraint, and is effectively a no-op, we do not 3608 * check below as normal execution flows are checked extensively above. 3609 * 3610 * XXX Is the degenerate handling of signaled submit fences the 3611 * expected behaviour for userpace? 3612 */ 3613 3614 GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1); 3615 3616 if (igt_spinner_init(&spin, gt)) 3617 return -ENOMEM; 3618 3619 err = 0; 3620 rq[0] = ERR_PTR(-ENOMEM); 3621 for_each_engine(master, gt, id) { 3622 struct i915_sw_fence fence = {}; 3623 struct intel_context *ce; 3624 3625 if (master->class == class) 3626 continue; 3627 3628 ce = intel_context_create(master); 3629 if (IS_ERR(ce)) { 3630 err = PTR_ERR(ce); 3631 goto out; 3632 } 3633 3634 memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq)); 3635 3636 rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP); 3637 intel_context_put(ce); 3638 if (IS_ERR(rq[0])) { 3639 err = PTR_ERR(rq[0]); 3640 goto out; 3641 } 3642 i915_request_get(rq[0]); 3643 3644 if (flags & BOND_SCHEDULE) { 3645 onstack_fence_init(&fence); 3646 err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit, 3647 &fence, 3648 GFP_KERNEL); 3649 } 3650 3651 i915_request_add(rq[0]); 3652 if (err < 0) 3653 goto out; 3654 3655 if (!(flags & BOND_SCHEDULE) && 3656 !igt_wait_for_spinner(&spin, rq[0])) { 3657 err = -EIO; 3658 goto out; 3659 } 3660 3661 for (n = 0; n < nsibling; n++) { 3662 struct intel_context *ve; 3663 3664 ve = intel_execlists_create_virtual(siblings, nsibling); 3665 if (IS_ERR(ve)) { 3666 err = PTR_ERR(ve); 3667 onstack_fence_fini(&fence); 3668 goto out; 3669 } 3670 3671 err = intel_virtual_engine_attach_bond(ve->engine, 3672 master, 3673 siblings[n]); 3674 if (err) { 3675 intel_context_put(ve); 3676 onstack_fence_fini(&fence); 3677 goto out; 3678 } 3679 3680 err = intel_context_pin(ve); 3681 intel_context_put(ve); 3682 if (err) { 3683 onstack_fence_fini(&fence); 3684 goto out; 3685 } 3686 3687 rq[n + 1] = i915_request_create(ve); 3688 intel_context_unpin(ve); 3689 if (IS_ERR(rq[n + 1])) { 3690 err = PTR_ERR(rq[n + 1]); 3691 onstack_fence_fini(&fence); 3692 goto out; 3693 } 3694 i915_request_get(rq[n + 1]); 3695 3696 err = i915_request_await_execution(rq[n + 1], 3697 &rq[0]->fence, 3698 ve->engine->bond_execute); 3699 i915_request_add(rq[n + 1]); 3700 if (err < 0) { 3701 onstack_fence_fini(&fence); 3702 goto out; 3703 } 3704 } 3705 onstack_fence_fini(&fence); 3706 intel_engine_flush_submission(master); 3707 igt_spinner_end(&spin); 3708 3709 if (i915_request_wait(rq[0], 0, HZ / 10) < 0) { 3710 pr_err("Master request did not execute (on %s)!\n", 3711 rq[0]->engine->name); 3712 err = -EIO; 3713 goto out; 3714 } 3715 3716 for (n = 0; n < nsibling; n++) { 3717 if (i915_request_wait(rq[n + 1], 0, 3718 MAX_SCHEDULE_TIMEOUT) < 0) { 3719 err = -EIO; 3720 goto out; 3721 } 3722 3723 if (rq[n + 1]->engine != siblings[n]) { 3724 pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n", 3725 siblings[n]->name, 3726 rq[n + 1]->engine->name, 3727 rq[0]->engine->name); 3728 err = -EINVAL; 3729 goto out; 3730 } 3731 } 3732 3733 for (n = 0; !IS_ERR(rq[n]); n++) 3734 i915_request_put(rq[n]); 3735 rq[0] = ERR_PTR(-ENOMEM); 3736 } 3737 3738 out: 3739 for (n = 0; !IS_ERR(rq[n]); n++) 3740 i915_request_put(rq[n]); 3741 if (igt_flush_test(gt->i915)) 3742 err = -EIO; 3743 3744 igt_spinner_fini(&spin); 3745 return err; 3746 } 3747 3748 static int live_virtual_bond(void *arg) 3749 { 3750 static const struct phase { 3751 const char *name; 3752 unsigned int flags; 3753 } phases[] = { 3754 { "", 0 }, 3755 { "schedule", BOND_SCHEDULE }, 3756 { }, 3757 }; 3758 struct intel_gt *gt = arg; 3759 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 3760 unsigned int class, inst; 3761 int err; 3762 3763 if (intel_uc_uses_guc_submission(>->uc)) 3764 return 0; 3765 3766 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 3767 const struct phase *p; 3768 int nsibling; 3769 3770 nsibling = 0; 3771 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { 3772 if (!gt->engine_class[class][inst]) 3773 break; 3774 3775 GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings)); 3776 siblings[nsibling++] = gt->engine_class[class][inst]; 3777 } 3778 if (nsibling < 2) 3779 continue; 3780 3781 for (p = phases; p->name; p++) { 3782 err = bond_virtual_engine(gt, 3783 class, siblings, nsibling, 3784 p->flags); 3785 if (err) { 3786 pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n", 3787 __func__, p->name, class, nsibling, err); 3788 return err; 3789 } 3790 } 3791 } 3792 3793 return 0; 3794 } 3795 3796 static int reset_virtual_engine(struct intel_gt *gt, 3797 struct intel_engine_cs **siblings, 3798 unsigned int nsibling) 3799 { 3800 struct intel_engine_cs *engine; 3801 struct intel_context *ve; 3802 unsigned long *heartbeat; 3803 struct igt_spinner spin; 3804 struct i915_request *rq; 3805 unsigned int n; 3806 int err = 0; 3807 3808 /* 3809 * In order to support offline error capture for fast preempt reset, 3810 * we need to decouple the guilty request and ensure that it and its 3811 * descendents are not executed while the capture is in progress. 3812 */ 3813 3814 heartbeat = kmalloc_array(nsibling, sizeof(*heartbeat), GFP_KERNEL); 3815 if (!heartbeat) 3816 return -ENOMEM; 3817 3818 if (igt_spinner_init(&spin, gt)) { 3819 err = -ENOMEM; 3820 goto out_free; 3821 } 3822 3823 ve = intel_execlists_create_virtual(siblings, nsibling); 3824 if (IS_ERR(ve)) { 3825 err = PTR_ERR(ve); 3826 goto out_spin; 3827 } 3828 3829 for (n = 0; n < nsibling; n++) 3830 engine_heartbeat_disable(siblings[n], &heartbeat[n]); 3831 3832 rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK); 3833 if (IS_ERR(rq)) { 3834 err = PTR_ERR(rq); 3835 goto out_heartbeat; 3836 } 3837 i915_request_add(rq); 3838 3839 if (!igt_wait_for_spinner(&spin, rq)) { 3840 intel_gt_set_wedged(gt); 3841 err = -ETIME; 3842 goto out_heartbeat; 3843 } 3844 3845 engine = rq->engine; 3846 GEM_BUG_ON(engine == ve->engine); 3847 3848 /* Take ownership of the reset and tasklet */ 3849 if (test_and_set_bit(I915_RESET_ENGINE + engine->id, 3850 >->reset.flags)) { 3851 intel_gt_set_wedged(gt); 3852 err = -EBUSY; 3853 goto out_heartbeat; 3854 } 3855 tasklet_disable(&engine->execlists.tasklet); 3856 3857 engine->execlists.tasklet.func(engine->execlists.tasklet.data); 3858 GEM_BUG_ON(execlists_active(&engine->execlists) != rq); 3859 3860 /* Fake a preemption event; failed of course */ 3861 spin_lock_irq(&engine->active.lock); 3862 __unwind_incomplete_requests(engine); 3863 spin_unlock_irq(&engine->active.lock); 3864 GEM_BUG_ON(rq->engine != ve->engine); 3865 3866 /* Reset the engine while keeping our active request on hold */ 3867 execlists_hold(engine, rq); 3868 GEM_BUG_ON(!i915_request_on_hold(rq)); 3869 3870 intel_engine_reset(engine, NULL); 3871 GEM_BUG_ON(rq->fence.error != -EIO); 3872 3873 /* Release our grasp on the engine, letting CS flow again */ 3874 tasklet_enable(&engine->execlists.tasklet); 3875 clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, >->reset.flags); 3876 3877 /* Check that we do not resubmit the held request */ 3878 i915_request_get(rq); 3879 if (!i915_request_wait(rq, 0, HZ / 5)) { 3880 pr_err("%s: on hold request completed!\n", 3881 engine->name); 3882 intel_gt_set_wedged(gt); 3883 err = -EIO; 3884 goto out_rq; 3885 } 3886 GEM_BUG_ON(!i915_request_on_hold(rq)); 3887 3888 /* But is resubmitted on release */ 3889 execlists_unhold(engine, rq); 3890 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 3891 pr_err("%s: held request did not complete!\n", 3892 engine->name); 3893 intel_gt_set_wedged(gt); 3894 err = -ETIME; 3895 } 3896 3897 out_rq: 3898 i915_request_put(rq); 3899 out_heartbeat: 3900 for (n = 0; n < nsibling; n++) 3901 engine_heartbeat_enable(siblings[n], heartbeat[n]); 3902 3903 intel_context_put(ve); 3904 out_spin: 3905 igt_spinner_fini(&spin); 3906 out_free: 3907 kfree(heartbeat); 3908 return err; 3909 } 3910 3911 static int live_virtual_reset(void *arg) 3912 { 3913 struct intel_gt *gt = arg; 3914 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 3915 unsigned int class, inst; 3916 3917 /* 3918 * Check that we handle a reset event within a virtual engine. 3919 * Only the physical engine is reset, but we have to check the flow 3920 * of the virtual requests around the reset, and make sure it is not 3921 * forgotten. 3922 */ 3923 3924 if (intel_uc_uses_guc_submission(>->uc)) 3925 return 0; 3926 3927 if (!intel_has_reset_engine(gt)) 3928 return 0; 3929 3930 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 3931 int nsibling, err; 3932 3933 nsibling = 0; 3934 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { 3935 if (!gt->engine_class[class][inst]) 3936 continue; 3937 3938 siblings[nsibling++] = gt->engine_class[class][inst]; 3939 } 3940 if (nsibling < 2) 3941 continue; 3942 3943 err = reset_virtual_engine(gt, siblings, nsibling); 3944 if (err) 3945 return err; 3946 } 3947 3948 return 0; 3949 } 3950 3951 int intel_execlists_live_selftests(struct drm_i915_private *i915) 3952 { 3953 static const struct i915_subtest tests[] = { 3954 SUBTEST(live_sanitycheck), 3955 SUBTEST(live_unlite_switch), 3956 SUBTEST(live_unlite_preempt), 3957 SUBTEST(live_pin_rewind), 3958 SUBTEST(live_hold_reset), 3959 SUBTEST(live_error_interrupt), 3960 SUBTEST(live_timeslice_preempt), 3961 SUBTEST(live_timeslice_rewind), 3962 SUBTEST(live_timeslice_queue), 3963 SUBTEST(live_busywait_preempt), 3964 SUBTEST(live_preempt), 3965 SUBTEST(live_late_preempt), 3966 SUBTEST(live_nopreempt), 3967 SUBTEST(live_preempt_cancel), 3968 SUBTEST(live_suppress_self_preempt), 3969 SUBTEST(live_suppress_wait_preempt), 3970 SUBTEST(live_chain_preempt), 3971 SUBTEST(live_preempt_gang), 3972 SUBTEST(live_preempt_timeout), 3973 SUBTEST(live_preempt_smoke), 3974 SUBTEST(live_virtual_engine), 3975 SUBTEST(live_virtual_mask), 3976 SUBTEST(live_virtual_preserved), 3977 SUBTEST(live_virtual_bond), 3978 SUBTEST(live_virtual_reset), 3979 }; 3980 3981 if (!HAS_EXECLISTS(i915)) 3982 return 0; 3983 3984 if (intel_gt_is_wedged(&i915->gt)) 3985 return 0; 3986 3987 return intel_gt_live_subtests(tests, &i915->gt); 3988 } 3989 3990 static void hexdump(const void *buf, size_t len) 3991 { 3992 const size_t rowsize = 8 * sizeof(u32); 3993 const void *prev = NULL; 3994 bool skip = false; 3995 size_t pos; 3996 3997 for (pos = 0; pos < len; pos += rowsize) { 3998 char line[128]; 3999 4000 if (prev && !memcmp(prev, buf + pos, rowsize)) { 4001 if (!skip) { 4002 pr_info("*\n"); 4003 skip = true; 4004 } 4005 continue; 4006 } 4007 4008 WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos, 4009 rowsize, sizeof(u32), 4010 line, sizeof(line), 4011 false) >= sizeof(line)); 4012 pr_info("[%04zx] %s\n", pos, line); 4013 4014 prev = buf + pos; 4015 skip = false; 4016 } 4017 } 4018 4019 static int emit_semaphore_signal(struct intel_context *ce, void *slot) 4020 { 4021 const u32 offset = 4022 i915_ggtt_offset(ce->engine->status_page.vma) + 4023 offset_in_page(slot); 4024 struct i915_request *rq; 4025 u32 *cs; 4026 4027 rq = intel_context_create_request(ce); 4028 if (IS_ERR(rq)) 4029 return PTR_ERR(rq); 4030 4031 cs = intel_ring_begin(rq, 4); 4032 if (IS_ERR(cs)) { 4033 i915_request_add(rq); 4034 return PTR_ERR(cs); 4035 } 4036 4037 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 4038 *cs++ = offset; 4039 *cs++ = 0; 4040 *cs++ = 1; 4041 4042 intel_ring_advance(rq, cs); 4043 4044 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 4045 i915_request_add(rq); 4046 return 0; 4047 } 4048 4049 static int context_flush(struct intel_context *ce, long timeout) 4050 { 4051 struct i915_request *rq; 4052 struct dma_fence *fence; 4053 int err = 0; 4054 4055 rq = intel_engine_create_kernel_request(ce->engine); 4056 if (IS_ERR(rq)) 4057 return PTR_ERR(rq); 4058 4059 fence = i915_active_fence_get(&ce->timeline->last_request); 4060 if (fence) { 4061 i915_request_await_dma_fence(rq, fence); 4062 dma_fence_put(fence); 4063 } 4064 4065 rq = i915_request_get(rq); 4066 i915_request_add(rq); 4067 if (i915_request_wait(rq, 0, timeout) < 0) 4068 err = -ETIME; 4069 i915_request_put(rq); 4070 4071 rmb(); /* We know the request is written, make sure all state is too! */ 4072 return err; 4073 } 4074 4075 static int live_lrc_layout(void *arg) 4076 { 4077 struct intel_gt *gt = arg; 4078 struct intel_engine_cs *engine; 4079 enum intel_engine_id id; 4080 u32 *lrc; 4081 int err; 4082 4083 /* 4084 * Check the registers offsets we use to create the initial reg state 4085 * match the layout saved by HW. 4086 */ 4087 4088 lrc = kmalloc(PAGE_SIZE, GFP_KERNEL); 4089 if (!lrc) 4090 return -ENOMEM; 4091 4092 err = 0; 4093 for_each_engine(engine, gt, id) { 4094 u32 *hw; 4095 int dw; 4096 4097 if (!engine->default_state) 4098 continue; 4099 4100 hw = i915_gem_object_pin_map(engine->default_state, 4101 I915_MAP_WB); 4102 if (IS_ERR(hw)) { 4103 err = PTR_ERR(hw); 4104 break; 4105 } 4106 hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw); 4107 4108 execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE), 4109 engine->kernel_context, 4110 engine, 4111 engine->kernel_context->ring, 4112 true); 4113 4114 dw = 0; 4115 do { 4116 u32 lri = hw[dw]; 4117 4118 if (lri == 0) { 4119 dw++; 4120 continue; 4121 } 4122 4123 if (lrc[dw] == 0) { 4124 pr_debug("%s: skipped instruction %x at dword %d\n", 4125 engine->name, lri, dw); 4126 dw++; 4127 continue; 4128 } 4129 4130 if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { 4131 pr_err("%s: Expected LRI command at dword %d, found %08x\n", 4132 engine->name, dw, lri); 4133 err = -EINVAL; 4134 break; 4135 } 4136 4137 if (lrc[dw] != lri) { 4138 pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n", 4139 engine->name, dw, lri, lrc[dw]); 4140 err = -EINVAL; 4141 break; 4142 } 4143 4144 lri &= 0x7f; 4145 lri++; 4146 dw++; 4147 4148 while (lri) { 4149 if (hw[dw] != lrc[dw]) { 4150 pr_err("%s: Different registers found at dword %d, expected %x, found %x\n", 4151 engine->name, dw, hw[dw], lrc[dw]); 4152 err = -EINVAL; 4153 break; 4154 } 4155 4156 /* 4157 * Skip over the actual register value as we 4158 * expect that to differ. 4159 */ 4160 dw += 2; 4161 lri -= 2; 4162 } 4163 } while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); 4164 4165 if (err) { 4166 pr_info("%s: HW register image:\n", engine->name); 4167 hexdump(hw, PAGE_SIZE); 4168 4169 pr_info("%s: SW register image:\n", engine->name); 4170 hexdump(lrc, PAGE_SIZE); 4171 } 4172 4173 i915_gem_object_unpin_map(engine->default_state); 4174 if (err) 4175 break; 4176 } 4177 4178 kfree(lrc); 4179 return err; 4180 } 4181 4182 static int find_offset(const u32 *lri, u32 offset) 4183 { 4184 int i; 4185 4186 for (i = 0; i < PAGE_SIZE / sizeof(u32); i++) 4187 if (lri[i] == offset) 4188 return i; 4189 4190 return -1; 4191 } 4192 4193 static int live_lrc_fixed(void *arg) 4194 { 4195 struct intel_gt *gt = arg; 4196 struct intel_engine_cs *engine; 4197 enum intel_engine_id id; 4198 int err = 0; 4199 4200 /* 4201 * Check the assumed register offsets match the actual locations in 4202 * the context image. 4203 */ 4204 4205 for_each_engine(engine, gt, id) { 4206 const struct { 4207 u32 reg; 4208 u32 offset; 4209 const char *name; 4210 } tbl[] = { 4211 { 4212 i915_mmio_reg_offset(RING_START(engine->mmio_base)), 4213 CTX_RING_START - 1, 4214 "RING_START" 4215 }, 4216 { 4217 i915_mmio_reg_offset(RING_CTL(engine->mmio_base)), 4218 CTX_RING_CTL - 1, 4219 "RING_CTL" 4220 }, 4221 { 4222 i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)), 4223 CTX_RING_HEAD - 1, 4224 "RING_HEAD" 4225 }, 4226 { 4227 i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)), 4228 CTX_RING_TAIL - 1, 4229 "RING_TAIL" 4230 }, 4231 { 4232 i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)), 4233 lrc_ring_mi_mode(engine), 4234 "RING_MI_MODE" 4235 }, 4236 { 4237 i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)), 4238 CTX_BB_STATE - 1, 4239 "BB_STATE" 4240 }, 4241 { 4242 i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)), 4243 CTX_TIMESTAMP - 1, 4244 "RING_CTX_TIMESTAMP" 4245 }, 4246 { }, 4247 }, *t; 4248 u32 *hw; 4249 4250 if (!engine->default_state) 4251 continue; 4252 4253 hw = i915_gem_object_pin_map(engine->default_state, 4254 I915_MAP_WB); 4255 if (IS_ERR(hw)) { 4256 err = PTR_ERR(hw); 4257 break; 4258 } 4259 hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw); 4260 4261 for (t = tbl; t->name; t++) { 4262 int dw = find_offset(hw, t->reg); 4263 4264 if (dw != t->offset) { 4265 pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n", 4266 engine->name, 4267 t->name, 4268 t->reg, 4269 dw, 4270 t->offset); 4271 err = -EINVAL; 4272 } 4273 } 4274 4275 i915_gem_object_unpin_map(engine->default_state); 4276 } 4277 4278 return err; 4279 } 4280 4281 static int __live_lrc_state(struct intel_engine_cs *engine, 4282 struct i915_vma *scratch) 4283 { 4284 struct intel_context *ce; 4285 struct i915_request *rq; 4286 enum { 4287 RING_START_IDX = 0, 4288 RING_TAIL_IDX, 4289 MAX_IDX 4290 }; 4291 u32 expected[MAX_IDX]; 4292 u32 *cs; 4293 int err; 4294 int n; 4295 4296 ce = intel_context_create(engine); 4297 if (IS_ERR(ce)) 4298 return PTR_ERR(ce); 4299 4300 err = intel_context_pin(ce); 4301 if (err) 4302 goto err_put; 4303 4304 rq = i915_request_create(ce); 4305 if (IS_ERR(rq)) { 4306 err = PTR_ERR(rq); 4307 goto err_unpin; 4308 } 4309 4310 cs = intel_ring_begin(rq, 4 * MAX_IDX); 4311 if (IS_ERR(cs)) { 4312 err = PTR_ERR(cs); 4313 i915_request_add(rq); 4314 goto err_unpin; 4315 } 4316 4317 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 4318 *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base)); 4319 *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32); 4320 *cs++ = 0; 4321 4322 expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma); 4323 4324 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 4325 *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)); 4326 *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32); 4327 *cs++ = 0; 4328 4329 i915_vma_lock(scratch); 4330 err = i915_request_await_object(rq, scratch->obj, true); 4331 if (!err) 4332 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE); 4333 i915_vma_unlock(scratch); 4334 4335 i915_request_get(rq); 4336 i915_request_add(rq); 4337 if (err) 4338 goto err_rq; 4339 4340 intel_engine_flush_submission(engine); 4341 expected[RING_TAIL_IDX] = ce->ring->tail; 4342 4343 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 4344 err = -ETIME; 4345 goto err_rq; 4346 } 4347 4348 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); 4349 if (IS_ERR(cs)) { 4350 err = PTR_ERR(cs); 4351 goto err_rq; 4352 } 4353 4354 for (n = 0; n < MAX_IDX; n++) { 4355 if (cs[n] != expected[n]) { 4356 pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n", 4357 engine->name, n, cs[n], expected[n]); 4358 err = -EINVAL; 4359 break; 4360 } 4361 } 4362 4363 i915_gem_object_unpin_map(scratch->obj); 4364 4365 err_rq: 4366 i915_request_put(rq); 4367 err_unpin: 4368 intel_context_unpin(ce); 4369 err_put: 4370 intel_context_put(ce); 4371 return err; 4372 } 4373 4374 static int live_lrc_state(void *arg) 4375 { 4376 struct intel_gt *gt = arg; 4377 struct intel_engine_cs *engine; 4378 struct i915_vma *scratch; 4379 enum intel_engine_id id; 4380 int err = 0; 4381 4382 /* 4383 * Check the live register state matches what we expect for this 4384 * intel_context. 4385 */ 4386 4387 scratch = create_scratch(gt); 4388 if (IS_ERR(scratch)) 4389 return PTR_ERR(scratch); 4390 4391 for_each_engine(engine, gt, id) { 4392 err = __live_lrc_state(engine, scratch); 4393 if (err) 4394 break; 4395 } 4396 4397 if (igt_flush_test(gt->i915)) 4398 err = -EIO; 4399 4400 i915_vma_unpin_and_release(&scratch, 0); 4401 return err; 4402 } 4403 4404 static int gpr_make_dirty(struct intel_context *ce) 4405 { 4406 struct i915_request *rq; 4407 u32 *cs; 4408 int n; 4409 4410 rq = intel_context_create_request(ce); 4411 if (IS_ERR(rq)) 4412 return PTR_ERR(rq); 4413 4414 cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2); 4415 if (IS_ERR(cs)) { 4416 i915_request_add(rq); 4417 return PTR_ERR(cs); 4418 } 4419 4420 *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW); 4421 for (n = 0; n < NUM_GPR_DW; n++) { 4422 *cs++ = CS_GPR(ce->engine, n); 4423 *cs++ = STACK_MAGIC; 4424 } 4425 *cs++ = MI_NOOP; 4426 4427 intel_ring_advance(rq, cs); 4428 4429 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 4430 i915_request_add(rq); 4431 4432 return 0; 4433 } 4434 4435 static struct i915_request * 4436 __gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot) 4437 { 4438 const u32 offset = 4439 i915_ggtt_offset(ce->engine->status_page.vma) + 4440 offset_in_page(slot); 4441 struct i915_request *rq; 4442 u32 *cs; 4443 int err; 4444 int n; 4445 4446 rq = intel_context_create_request(ce); 4447 if (IS_ERR(rq)) 4448 return rq; 4449 4450 cs = intel_ring_begin(rq, 6 + 4 * NUM_GPR_DW); 4451 if (IS_ERR(cs)) { 4452 i915_request_add(rq); 4453 return ERR_CAST(cs); 4454 } 4455 4456 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 4457 *cs++ = MI_NOOP; 4458 4459 *cs++ = MI_SEMAPHORE_WAIT | 4460 MI_SEMAPHORE_GLOBAL_GTT | 4461 MI_SEMAPHORE_POLL | 4462 MI_SEMAPHORE_SAD_NEQ_SDD; 4463 *cs++ = 0; 4464 *cs++ = offset; 4465 *cs++ = 0; 4466 4467 for (n = 0; n < NUM_GPR_DW; n++) { 4468 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 4469 *cs++ = CS_GPR(ce->engine, n); 4470 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32); 4471 *cs++ = 0; 4472 } 4473 4474 i915_vma_lock(scratch); 4475 err = i915_request_await_object(rq, scratch->obj, true); 4476 if (!err) 4477 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE); 4478 i915_vma_unlock(scratch); 4479 4480 i915_request_get(rq); 4481 i915_request_add(rq); 4482 if (err) { 4483 i915_request_put(rq); 4484 rq = ERR_PTR(err); 4485 } 4486 4487 return rq; 4488 } 4489 4490 static int __live_lrc_gpr(struct intel_engine_cs *engine, 4491 struct i915_vma *scratch, 4492 bool preempt) 4493 { 4494 u32 *slot = memset32(engine->status_page.addr + 1000, 0, 4); 4495 struct intel_context *ce; 4496 struct i915_request *rq; 4497 u32 *cs; 4498 int err; 4499 int n; 4500 4501 if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS) 4502 return 0; /* GPR only on rcs0 for gen8 */ 4503 4504 err = gpr_make_dirty(engine->kernel_context); 4505 if (err) 4506 return err; 4507 4508 ce = intel_context_create(engine); 4509 if (IS_ERR(ce)) 4510 return PTR_ERR(ce); 4511 4512 rq = __gpr_read(ce, scratch, slot); 4513 if (IS_ERR(rq)) { 4514 err = PTR_ERR(rq); 4515 goto err_put; 4516 } 4517 4518 err = wait_for_submit(engine, rq, HZ / 2); 4519 if (err) 4520 goto err_rq; 4521 4522 if (preempt) { 4523 err = gpr_make_dirty(engine->kernel_context); 4524 if (err) 4525 goto err_rq; 4526 4527 err = emit_semaphore_signal(engine->kernel_context, slot); 4528 if (err) 4529 goto err_rq; 4530 } else { 4531 slot[0] = 1; 4532 wmb(); 4533 } 4534 4535 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 4536 err = -ETIME; 4537 goto err_rq; 4538 } 4539 4540 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); 4541 if (IS_ERR(cs)) { 4542 err = PTR_ERR(cs); 4543 goto err_rq; 4544 } 4545 4546 for (n = 0; n < NUM_GPR_DW; n++) { 4547 if (cs[n]) { 4548 pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n", 4549 engine->name, 4550 n / 2, n & 1 ? "udw" : "ldw", 4551 cs[n]); 4552 err = -EINVAL; 4553 break; 4554 } 4555 } 4556 4557 i915_gem_object_unpin_map(scratch->obj); 4558 4559 err_rq: 4560 memset32(&slot[0], -1, 4); 4561 wmb(); 4562 i915_request_put(rq); 4563 err_put: 4564 intel_context_put(ce); 4565 return err; 4566 } 4567 4568 static int live_lrc_gpr(void *arg) 4569 { 4570 struct intel_gt *gt = arg; 4571 struct intel_engine_cs *engine; 4572 struct i915_vma *scratch; 4573 enum intel_engine_id id; 4574 int err = 0; 4575 4576 /* 4577 * Check that GPR registers are cleared in new contexts as we need 4578 * to avoid leaking any information from previous contexts. 4579 */ 4580 4581 scratch = create_scratch(gt); 4582 if (IS_ERR(scratch)) 4583 return PTR_ERR(scratch); 4584 4585 for_each_engine(engine, gt, id) { 4586 unsigned long heartbeat; 4587 4588 engine_heartbeat_disable(engine, &heartbeat); 4589 4590 err = __live_lrc_gpr(engine, scratch, false); 4591 if (err) 4592 goto err; 4593 4594 err = __live_lrc_gpr(engine, scratch, true); 4595 if (err) 4596 goto err; 4597 4598 err: 4599 engine_heartbeat_enable(engine, heartbeat); 4600 if (igt_flush_test(gt->i915)) 4601 err = -EIO; 4602 if (err) 4603 break; 4604 } 4605 4606 i915_vma_unpin_and_release(&scratch, 0); 4607 return err; 4608 } 4609 4610 static struct i915_request * 4611 create_timestamp(struct intel_context *ce, void *slot, int idx) 4612 { 4613 const u32 offset = 4614 i915_ggtt_offset(ce->engine->status_page.vma) + 4615 offset_in_page(slot); 4616 struct i915_request *rq; 4617 u32 *cs; 4618 int err; 4619 4620 rq = intel_context_create_request(ce); 4621 if (IS_ERR(rq)) 4622 return rq; 4623 4624 cs = intel_ring_begin(rq, 10); 4625 if (IS_ERR(cs)) { 4626 err = PTR_ERR(cs); 4627 goto err; 4628 } 4629 4630 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 4631 *cs++ = MI_NOOP; 4632 4633 *cs++ = MI_SEMAPHORE_WAIT | 4634 MI_SEMAPHORE_GLOBAL_GTT | 4635 MI_SEMAPHORE_POLL | 4636 MI_SEMAPHORE_SAD_NEQ_SDD; 4637 *cs++ = 0; 4638 *cs++ = offset; 4639 *cs++ = 0; 4640 4641 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 4642 *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base)); 4643 *cs++ = offset + idx * sizeof(u32); 4644 *cs++ = 0; 4645 4646 intel_ring_advance(rq, cs); 4647 4648 rq->sched.attr.priority = I915_PRIORITY_MASK; 4649 err = 0; 4650 err: 4651 i915_request_get(rq); 4652 i915_request_add(rq); 4653 if (err) { 4654 i915_request_put(rq); 4655 return ERR_PTR(err); 4656 } 4657 4658 return rq; 4659 } 4660 4661 struct lrc_timestamp { 4662 struct intel_engine_cs *engine; 4663 struct intel_context *ce[2]; 4664 u32 poison; 4665 }; 4666 4667 static bool timestamp_advanced(u32 start, u32 end) 4668 { 4669 return (s32)(end - start) > 0; 4670 } 4671 4672 static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt) 4673 { 4674 u32 *slot = memset32(arg->engine->status_page.addr + 1000, 0, 4); 4675 struct i915_request *rq; 4676 u32 timestamp; 4677 int err = 0; 4678 4679 arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison; 4680 rq = create_timestamp(arg->ce[0], slot, 1); 4681 if (IS_ERR(rq)) 4682 return PTR_ERR(rq); 4683 4684 err = wait_for_submit(rq->engine, rq, HZ / 2); 4685 if (err) 4686 goto err; 4687 4688 if (preempt) { 4689 arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef; 4690 err = emit_semaphore_signal(arg->ce[1], slot); 4691 if (err) 4692 goto err; 4693 } else { 4694 slot[0] = 1; 4695 wmb(); 4696 } 4697 4698 /* And wait for switch to kernel (to save our context to memory) */ 4699 err = context_flush(arg->ce[0], HZ / 2); 4700 if (err) 4701 goto err; 4702 4703 if (!timestamp_advanced(arg->poison, slot[1])) { 4704 pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n", 4705 arg->engine->name, preempt ? "preempt" : "simple", 4706 arg->poison, slot[1]); 4707 err = -EINVAL; 4708 } 4709 4710 timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]); 4711 if (!timestamp_advanced(slot[1], timestamp)) { 4712 pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n", 4713 arg->engine->name, preempt ? "preempt" : "simple", 4714 slot[1], timestamp); 4715 err = -EINVAL; 4716 } 4717 4718 err: 4719 memset32(slot, -1, 4); 4720 i915_request_put(rq); 4721 return err; 4722 } 4723 4724 static int live_lrc_timestamp(void *arg) 4725 { 4726 struct lrc_timestamp data = {}; 4727 struct intel_gt *gt = arg; 4728 enum intel_engine_id id; 4729 const u32 poison[] = { 4730 0, 4731 S32_MAX, 4732 (u32)S32_MAX + 1, 4733 U32_MAX, 4734 }; 4735 4736 /* 4737 * We want to verify that the timestamp is saved and restore across 4738 * context switches and is monotonic. 4739 * 4740 * So we do this with a little bit of LRC poisoning to check various 4741 * boundary conditions, and see what happens if we preempt the context 4742 * with a second request (carrying more poison into the timestamp). 4743 */ 4744 4745 for_each_engine(data.engine, gt, id) { 4746 unsigned long heartbeat; 4747 int i, err = 0; 4748 4749 engine_heartbeat_disable(data.engine, &heartbeat); 4750 4751 for (i = 0; i < ARRAY_SIZE(data.ce); i++) { 4752 struct intel_context *tmp; 4753 4754 tmp = intel_context_create(data.engine); 4755 if (IS_ERR(tmp)) { 4756 err = PTR_ERR(tmp); 4757 goto err; 4758 } 4759 4760 err = intel_context_pin(tmp); 4761 if (err) { 4762 intel_context_put(tmp); 4763 goto err; 4764 } 4765 4766 data.ce[i] = tmp; 4767 } 4768 4769 for (i = 0; i < ARRAY_SIZE(poison); i++) { 4770 data.poison = poison[i]; 4771 4772 err = __lrc_timestamp(&data, false); 4773 if (err) 4774 break; 4775 4776 err = __lrc_timestamp(&data, true); 4777 if (err) 4778 break; 4779 } 4780 4781 err: 4782 engine_heartbeat_enable(data.engine, heartbeat); 4783 for (i = 0; i < ARRAY_SIZE(data.ce); i++) { 4784 if (!data.ce[i]) 4785 break; 4786 4787 intel_context_unpin(data.ce[i]); 4788 intel_context_put(data.ce[i]); 4789 } 4790 4791 if (igt_flush_test(gt->i915)) 4792 err = -EIO; 4793 if (err) 4794 return err; 4795 } 4796 4797 return 0; 4798 } 4799 4800 static struct i915_vma * 4801 create_user_vma(struct i915_address_space *vm, unsigned long size) 4802 { 4803 struct drm_i915_gem_object *obj; 4804 struct i915_vma *vma; 4805 int err; 4806 4807 obj = i915_gem_object_create_internal(vm->i915, size); 4808 if (IS_ERR(obj)) 4809 return ERR_CAST(obj); 4810 4811 vma = i915_vma_instance(obj, vm, NULL); 4812 if (IS_ERR(vma)) { 4813 i915_gem_object_put(obj); 4814 return vma; 4815 } 4816 4817 err = i915_vma_pin(vma, 0, 0, PIN_USER); 4818 if (err) { 4819 i915_gem_object_put(obj); 4820 return ERR_PTR(err); 4821 } 4822 4823 return vma; 4824 } 4825 4826 static struct i915_vma * 4827 store_context(struct intel_context *ce, struct i915_vma *scratch) 4828 { 4829 struct i915_vma *batch; 4830 u32 dw, x, *cs, *hw; 4831 4832 batch = create_user_vma(ce->vm, SZ_64K); 4833 if (IS_ERR(batch)) 4834 return batch; 4835 4836 cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); 4837 if (IS_ERR(cs)) { 4838 i915_vma_put(batch); 4839 return ERR_CAST(cs); 4840 } 4841 4842 x = 0; 4843 dw = 0; 4844 hw = ce->engine->pinned_default_state; 4845 hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw); 4846 do { 4847 u32 len = hw[dw] & 0x7f; 4848 4849 if (hw[dw] == 0) { 4850 dw++; 4851 continue; 4852 } 4853 4854 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { 4855 dw += len + 2; 4856 continue; 4857 } 4858 4859 dw++; 4860 len = (len + 1) / 2; 4861 while (len--) { 4862 *cs++ = MI_STORE_REGISTER_MEM_GEN8; 4863 *cs++ = hw[dw]; 4864 *cs++ = lower_32_bits(scratch->node.start + x); 4865 *cs++ = upper_32_bits(scratch->node.start + x); 4866 4867 dw += 2; 4868 x += 4; 4869 } 4870 } while (dw < PAGE_SIZE / sizeof(u32) && 4871 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); 4872 4873 *cs++ = MI_BATCH_BUFFER_END; 4874 4875 i915_gem_object_flush_map(batch->obj); 4876 i915_gem_object_unpin_map(batch->obj); 4877 4878 return batch; 4879 } 4880 4881 static int move_to_active(struct i915_request *rq, 4882 struct i915_vma *vma, 4883 unsigned int flags) 4884 { 4885 int err; 4886 4887 i915_vma_lock(vma); 4888 err = i915_request_await_object(rq, vma->obj, flags); 4889 if (!err) 4890 err = i915_vma_move_to_active(vma, rq, flags); 4891 i915_vma_unlock(vma); 4892 4893 return err; 4894 } 4895 4896 static struct i915_request * 4897 record_registers(struct intel_context *ce, 4898 struct i915_vma *before, 4899 struct i915_vma *after, 4900 u32 *sema) 4901 { 4902 struct i915_vma *b_before, *b_after; 4903 struct i915_request *rq; 4904 u32 *cs; 4905 int err; 4906 4907 b_before = store_context(ce, before); 4908 if (IS_ERR(b_before)) 4909 return ERR_CAST(b_before); 4910 4911 b_after = store_context(ce, after); 4912 if (IS_ERR(b_after)) { 4913 rq = ERR_CAST(b_after); 4914 goto err_before; 4915 } 4916 4917 rq = intel_context_create_request(ce); 4918 if (IS_ERR(rq)) 4919 goto err_after; 4920 4921 err = move_to_active(rq, before, EXEC_OBJECT_WRITE); 4922 if (err) 4923 goto err_rq; 4924 4925 err = move_to_active(rq, b_before, 0); 4926 if (err) 4927 goto err_rq; 4928 4929 err = move_to_active(rq, after, EXEC_OBJECT_WRITE); 4930 if (err) 4931 goto err_rq; 4932 4933 err = move_to_active(rq, b_after, 0); 4934 if (err) 4935 goto err_rq; 4936 4937 cs = intel_ring_begin(rq, 14); 4938 if (IS_ERR(cs)) { 4939 err = PTR_ERR(cs); 4940 goto err_rq; 4941 } 4942 4943 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 4944 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); 4945 *cs++ = lower_32_bits(b_before->node.start); 4946 *cs++ = upper_32_bits(b_before->node.start); 4947 4948 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 4949 *cs++ = MI_SEMAPHORE_WAIT | 4950 MI_SEMAPHORE_GLOBAL_GTT | 4951 MI_SEMAPHORE_POLL | 4952 MI_SEMAPHORE_SAD_NEQ_SDD; 4953 *cs++ = 0; 4954 *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) + 4955 offset_in_page(sema); 4956 *cs++ = 0; 4957 *cs++ = MI_NOOP; 4958 4959 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 4960 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); 4961 *cs++ = lower_32_bits(b_after->node.start); 4962 *cs++ = upper_32_bits(b_after->node.start); 4963 4964 intel_ring_advance(rq, cs); 4965 4966 WRITE_ONCE(*sema, 0); 4967 i915_request_get(rq); 4968 i915_request_add(rq); 4969 err_after: 4970 i915_vma_put(b_after); 4971 err_before: 4972 i915_vma_put(b_before); 4973 return rq; 4974 4975 err_rq: 4976 i915_request_add(rq); 4977 rq = ERR_PTR(err); 4978 goto err_after; 4979 } 4980 4981 static struct i915_vma *load_context(struct intel_context *ce, u32 poison) 4982 { 4983 struct i915_vma *batch; 4984 u32 dw, *cs, *hw; 4985 4986 batch = create_user_vma(ce->vm, SZ_64K); 4987 if (IS_ERR(batch)) 4988 return batch; 4989 4990 cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); 4991 if (IS_ERR(cs)) { 4992 i915_vma_put(batch); 4993 return ERR_CAST(cs); 4994 } 4995 4996 dw = 0; 4997 hw = ce->engine->pinned_default_state; 4998 hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw); 4999 do { 5000 u32 len = hw[dw] & 0x7f; 5001 5002 if (hw[dw] == 0) { 5003 dw++; 5004 continue; 5005 } 5006 5007 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { 5008 dw += len + 2; 5009 continue; 5010 } 5011 5012 dw++; 5013 len = (len + 1) / 2; 5014 *cs++ = MI_LOAD_REGISTER_IMM(len); 5015 while (len--) { 5016 *cs++ = hw[dw]; 5017 *cs++ = poison; 5018 dw += 2; 5019 } 5020 } while (dw < PAGE_SIZE / sizeof(u32) && 5021 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); 5022 5023 *cs++ = MI_BATCH_BUFFER_END; 5024 5025 i915_gem_object_flush_map(batch->obj); 5026 i915_gem_object_unpin_map(batch->obj); 5027 5028 return batch; 5029 } 5030 5031 static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema) 5032 { 5033 struct i915_request *rq; 5034 struct i915_vma *batch; 5035 u32 *cs; 5036 int err; 5037 5038 batch = load_context(ce, poison); 5039 if (IS_ERR(batch)) 5040 return PTR_ERR(batch); 5041 5042 rq = intel_context_create_request(ce); 5043 if (IS_ERR(rq)) { 5044 err = PTR_ERR(rq); 5045 goto err_batch; 5046 } 5047 5048 err = move_to_active(rq, batch, 0); 5049 if (err) 5050 goto err_rq; 5051 5052 cs = intel_ring_begin(rq, 8); 5053 if (IS_ERR(cs)) { 5054 err = PTR_ERR(cs); 5055 goto err_rq; 5056 } 5057 5058 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 5059 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); 5060 *cs++ = lower_32_bits(batch->node.start); 5061 *cs++ = upper_32_bits(batch->node.start); 5062 5063 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 5064 *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) + 5065 offset_in_page(sema); 5066 *cs++ = 0; 5067 *cs++ = 1; 5068 5069 intel_ring_advance(rq, cs); 5070 5071 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 5072 err_rq: 5073 i915_request_add(rq); 5074 err_batch: 5075 i915_vma_put(batch); 5076 return err; 5077 } 5078 5079 static bool is_moving(u32 a, u32 b) 5080 { 5081 return a != b; 5082 } 5083 5084 static int compare_isolation(struct intel_engine_cs *engine, 5085 struct i915_vma *ref[2], 5086 struct i915_vma *result[2], 5087 struct intel_context *ce, 5088 u32 poison) 5089 { 5090 u32 x, dw, *hw, *lrc; 5091 u32 *A[2], *B[2]; 5092 int err = 0; 5093 5094 A[0] = i915_gem_object_pin_map(ref[0]->obj, I915_MAP_WC); 5095 if (IS_ERR(A[0])) 5096 return PTR_ERR(A[0]); 5097 5098 A[1] = i915_gem_object_pin_map(ref[1]->obj, I915_MAP_WC); 5099 if (IS_ERR(A[1])) { 5100 err = PTR_ERR(A[1]); 5101 goto err_A0; 5102 } 5103 5104 B[0] = i915_gem_object_pin_map(result[0]->obj, I915_MAP_WC); 5105 if (IS_ERR(B[0])) { 5106 err = PTR_ERR(B[0]); 5107 goto err_A1; 5108 } 5109 5110 B[1] = i915_gem_object_pin_map(result[1]->obj, I915_MAP_WC); 5111 if (IS_ERR(B[1])) { 5112 err = PTR_ERR(B[1]); 5113 goto err_B0; 5114 } 5115 5116 lrc = i915_gem_object_pin_map(ce->state->obj, 5117 i915_coherent_map_type(engine->i915)); 5118 if (IS_ERR(lrc)) { 5119 err = PTR_ERR(lrc); 5120 goto err_B1; 5121 } 5122 lrc += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw); 5123 5124 x = 0; 5125 dw = 0; 5126 hw = engine->pinned_default_state; 5127 hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw); 5128 do { 5129 u32 len = hw[dw] & 0x7f; 5130 5131 if (hw[dw] == 0) { 5132 dw++; 5133 continue; 5134 } 5135 5136 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { 5137 dw += len + 2; 5138 continue; 5139 } 5140 5141 dw++; 5142 len = (len + 1) / 2; 5143 while (len--) { 5144 if (!is_moving(A[0][x], A[1][x]) && 5145 (A[0][x] != B[0][x] || A[1][x] != B[1][x])) { 5146 switch (hw[dw] & 4095) { 5147 case 0x30: /* RING_HEAD */ 5148 case 0x34: /* RING_TAIL */ 5149 break; 5150 5151 default: 5152 pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n", 5153 engine->name, dw, 5154 hw[dw], hw[dw + 1], 5155 A[0][x], B[0][x], B[1][x], 5156 poison, lrc[dw + 1]); 5157 err = -EINVAL; 5158 break; 5159 } 5160 } 5161 dw += 2; 5162 x++; 5163 } 5164 } while (dw < PAGE_SIZE / sizeof(u32) && 5165 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); 5166 5167 i915_gem_object_unpin_map(ce->state->obj); 5168 err_B1: 5169 i915_gem_object_unpin_map(result[1]->obj); 5170 err_B0: 5171 i915_gem_object_unpin_map(result[0]->obj); 5172 err_A1: 5173 i915_gem_object_unpin_map(ref[1]->obj); 5174 err_A0: 5175 i915_gem_object_unpin_map(ref[0]->obj); 5176 return err; 5177 } 5178 5179 static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison) 5180 { 5181 u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1); 5182 struct i915_vma *ref[2], *result[2]; 5183 struct intel_context *A, *B; 5184 struct i915_request *rq; 5185 int err; 5186 5187 A = intel_context_create(engine); 5188 if (IS_ERR(A)) 5189 return PTR_ERR(A); 5190 5191 B = intel_context_create(engine); 5192 if (IS_ERR(B)) { 5193 err = PTR_ERR(B); 5194 goto err_A; 5195 } 5196 5197 ref[0] = create_user_vma(A->vm, SZ_64K); 5198 if (IS_ERR(ref[0])) { 5199 err = PTR_ERR(ref[0]); 5200 goto err_B; 5201 } 5202 5203 ref[1] = create_user_vma(A->vm, SZ_64K); 5204 if (IS_ERR(ref[1])) { 5205 err = PTR_ERR(ref[1]); 5206 goto err_ref0; 5207 } 5208 5209 rq = record_registers(A, ref[0], ref[1], sema); 5210 if (IS_ERR(rq)) { 5211 err = PTR_ERR(rq); 5212 goto err_ref1; 5213 } 5214 5215 WRITE_ONCE(*sema, 1); 5216 wmb(); 5217 5218 if (i915_request_wait(rq, 0, HZ / 2) < 0) { 5219 i915_request_put(rq); 5220 err = -ETIME; 5221 goto err_ref1; 5222 } 5223 i915_request_put(rq); 5224 5225 result[0] = create_user_vma(A->vm, SZ_64K); 5226 if (IS_ERR(result[0])) { 5227 err = PTR_ERR(result[0]); 5228 goto err_ref1; 5229 } 5230 5231 result[1] = create_user_vma(A->vm, SZ_64K); 5232 if (IS_ERR(result[1])) { 5233 err = PTR_ERR(result[1]); 5234 goto err_result0; 5235 } 5236 5237 rq = record_registers(A, result[0], result[1], sema); 5238 if (IS_ERR(rq)) { 5239 err = PTR_ERR(rq); 5240 goto err_result1; 5241 } 5242 5243 err = poison_registers(B, poison, sema); 5244 if (err) { 5245 WRITE_ONCE(*sema, -1); 5246 i915_request_put(rq); 5247 goto err_result1; 5248 } 5249 5250 if (i915_request_wait(rq, 0, HZ / 2) < 0) { 5251 i915_request_put(rq); 5252 err = -ETIME; 5253 goto err_result1; 5254 } 5255 i915_request_put(rq); 5256 5257 err = compare_isolation(engine, ref, result, A, poison); 5258 5259 err_result1: 5260 i915_vma_put(result[1]); 5261 err_result0: 5262 i915_vma_put(result[0]); 5263 err_ref1: 5264 i915_vma_put(ref[1]); 5265 err_ref0: 5266 i915_vma_put(ref[0]); 5267 err_B: 5268 intel_context_put(B); 5269 err_A: 5270 intel_context_put(A); 5271 return err; 5272 } 5273 5274 static bool skip_isolation(const struct intel_engine_cs *engine) 5275 { 5276 if (engine->class == COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) == 9) 5277 return true; 5278 5279 if (engine->class == RENDER_CLASS && INTEL_GEN(engine->i915) == 11) 5280 return true; 5281 5282 return false; 5283 } 5284 5285 static int live_lrc_isolation(void *arg) 5286 { 5287 struct intel_gt *gt = arg; 5288 struct intel_engine_cs *engine; 5289 enum intel_engine_id id; 5290 const u32 poison[] = { 5291 STACK_MAGIC, 5292 0x3a3a3a3a, 5293 0x5c5c5c5c, 5294 0xffffffff, 5295 0xffff0000, 5296 }; 5297 5298 /* 5299 * Our goal is try and verify that per-context state cannot be 5300 * tampered with by another non-privileged client. 5301 * 5302 * We take the list of context registers from the LRI in the default 5303 * context image and attempt to modify that list from a remote context. 5304 */ 5305 5306 for_each_engine(engine, gt, id) { 5307 int err = 0; 5308 int i; 5309 5310 /* Just don't even ask */ 5311 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) && 5312 skip_isolation(engine)) 5313 continue; 5314 5315 intel_engine_pm_get(engine); 5316 if (engine->pinned_default_state) { 5317 for (i = 0; i < ARRAY_SIZE(poison); i++) { 5318 err = __lrc_isolation(engine, poison[i]); 5319 if (err) 5320 break; 5321 5322 err = __lrc_isolation(engine, ~poison[i]); 5323 if (err) 5324 break; 5325 } 5326 } 5327 intel_engine_pm_put(engine); 5328 if (igt_flush_test(gt->i915)) 5329 err = -EIO; 5330 if (err) 5331 return err; 5332 } 5333 5334 return 0; 5335 } 5336 5337 static void garbage_reset(struct intel_engine_cs *engine, 5338 struct i915_request *rq) 5339 { 5340 const unsigned int bit = I915_RESET_ENGINE + engine->id; 5341 unsigned long *lock = &engine->gt->reset.flags; 5342 5343 if (test_and_set_bit(bit, lock)) 5344 return; 5345 5346 tasklet_disable(&engine->execlists.tasklet); 5347 5348 if (!rq->fence.error) 5349 intel_engine_reset(engine, NULL); 5350 5351 tasklet_enable(&engine->execlists.tasklet); 5352 clear_and_wake_up_bit(bit, lock); 5353 } 5354 5355 static struct i915_request *garbage(struct intel_context *ce, 5356 struct rnd_state *prng) 5357 { 5358 struct i915_request *rq; 5359 int err; 5360 5361 err = intel_context_pin(ce); 5362 if (err) 5363 return ERR_PTR(err); 5364 5365 prandom_bytes_state(prng, 5366 ce->lrc_reg_state, 5367 ce->engine->context_size - 5368 LRC_STATE_PN * PAGE_SIZE); 5369 5370 rq = intel_context_create_request(ce); 5371 if (IS_ERR(rq)) { 5372 err = PTR_ERR(rq); 5373 goto err_unpin; 5374 } 5375 5376 i915_request_get(rq); 5377 i915_request_add(rq); 5378 return rq; 5379 5380 err_unpin: 5381 intel_context_unpin(ce); 5382 return ERR_PTR(err); 5383 } 5384 5385 static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng) 5386 { 5387 struct intel_context *ce; 5388 struct i915_request *hang; 5389 int err = 0; 5390 5391 ce = intel_context_create(engine); 5392 if (IS_ERR(ce)) 5393 return PTR_ERR(ce); 5394 5395 hang = garbage(ce, prng); 5396 if (IS_ERR(hang)) { 5397 err = PTR_ERR(hang); 5398 goto err_ce; 5399 } 5400 5401 if (wait_for_submit(engine, hang, HZ / 2)) { 5402 i915_request_put(hang); 5403 err = -ETIME; 5404 goto err_ce; 5405 } 5406 5407 intel_context_set_banned(ce); 5408 garbage_reset(engine, hang); 5409 5410 intel_engine_flush_submission(engine); 5411 if (!hang->fence.error) { 5412 i915_request_put(hang); 5413 pr_err("%s: corrupted context was not reset\n", 5414 engine->name); 5415 err = -EINVAL; 5416 goto err_ce; 5417 } 5418 5419 if (i915_request_wait(hang, 0, HZ / 2) < 0) { 5420 pr_err("%s: corrupted context did not recover\n", 5421 engine->name); 5422 i915_request_put(hang); 5423 err = -EIO; 5424 goto err_ce; 5425 } 5426 i915_request_put(hang); 5427 5428 err_ce: 5429 intel_context_put(ce); 5430 return err; 5431 } 5432 5433 static int live_lrc_garbage(void *arg) 5434 { 5435 struct intel_gt *gt = arg; 5436 struct intel_engine_cs *engine; 5437 enum intel_engine_id id; 5438 5439 /* 5440 * Verify that we can recover if one context state is completely 5441 * corrupted. 5442 */ 5443 5444 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN)) 5445 return 0; 5446 5447 for_each_engine(engine, gt, id) { 5448 I915_RND_STATE(prng); 5449 int err = 0, i; 5450 5451 if (!intel_has_reset_engine(engine->gt)) 5452 continue; 5453 5454 intel_engine_pm_get(engine); 5455 for (i = 0; i < 3; i++) { 5456 err = __lrc_garbage(engine, &prng); 5457 if (err) 5458 break; 5459 } 5460 intel_engine_pm_put(engine); 5461 5462 if (igt_flush_test(gt->i915)) 5463 err = -EIO; 5464 if (err) 5465 return err; 5466 } 5467 5468 return 0; 5469 } 5470 5471 static int __live_pphwsp_runtime(struct intel_engine_cs *engine) 5472 { 5473 struct intel_context *ce; 5474 struct i915_request *rq; 5475 IGT_TIMEOUT(end_time); 5476 int err; 5477 5478 ce = intel_context_create(engine); 5479 if (IS_ERR(ce)) 5480 return PTR_ERR(ce); 5481 5482 ce->runtime.num_underflow = 0; 5483 ce->runtime.max_underflow = 0; 5484 5485 do { 5486 unsigned int loop = 1024; 5487 5488 while (loop) { 5489 rq = intel_context_create_request(ce); 5490 if (IS_ERR(rq)) { 5491 err = PTR_ERR(rq); 5492 goto err_rq; 5493 } 5494 5495 if (--loop == 0) 5496 i915_request_get(rq); 5497 5498 i915_request_add(rq); 5499 } 5500 5501 if (__igt_timeout(end_time, NULL)) 5502 break; 5503 5504 i915_request_put(rq); 5505 } while (1); 5506 5507 err = i915_request_wait(rq, 0, HZ / 5); 5508 if (err < 0) { 5509 pr_err("%s: request not completed!\n", engine->name); 5510 goto err_wait; 5511 } 5512 5513 igt_flush_test(engine->i915); 5514 5515 pr_info("%s: pphwsp runtime %lluns, average %lluns\n", 5516 engine->name, 5517 intel_context_get_total_runtime_ns(ce), 5518 intel_context_get_avg_runtime_ns(ce)); 5519 5520 err = 0; 5521 if (ce->runtime.num_underflow) { 5522 pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n", 5523 engine->name, 5524 ce->runtime.num_underflow, 5525 ce->runtime.max_underflow); 5526 GEM_TRACE_DUMP(); 5527 err = -EOVERFLOW; 5528 } 5529 5530 err_wait: 5531 i915_request_put(rq); 5532 err_rq: 5533 intel_context_put(ce); 5534 return err; 5535 } 5536 5537 static int live_pphwsp_runtime(void *arg) 5538 { 5539 struct intel_gt *gt = arg; 5540 struct intel_engine_cs *engine; 5541 enum intel_engine_id id; 5542 int err = 0; 5543 5544 /* 5545 * Check that cumulative context runtime as stored in the pphwsp[16] 5546 * is monotonic. 5547 */ 5548 5549 for_each_engine(engine, gt, id) { 5550 err = __live_pphwsp_runtime(engine); 5551 if (err) 5552 break; 5553 } 5554 5555 if (igt_flush_test(gt->i915)) 5556 err = -EIO; 5557 5558 return err; 5559 } 5560 5561 int intel_lrc_live_selftests(struct drm_i915_private *i915) 5562 { 5563 static const struct i915_subtest tests[] = { 5564 SUBTEST(live_lrc_layout), 5565 SUBTEST(live_lrc_fixed), 5566 SUBTEST(live_lrc_state), 5567 SUBTEST(live_lrc_gpr), 5568 SUBTEST(live_lrc_isolation), 5569 SUBTEST(live_lrc_timestamp), 5570 SUBTEST(live_lrc_garbage), 5571 SUBTEST(live_pphwsp_runtime), 5572 }; 5573 5574 if (!HAS_LOGICAL_RING_CONTEXTS(i915)) 5575 return 0; 5576 5577 return intel_gt_live_subtests(tests, &i915->gt); 5578 } 5579