1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2018 Intel Corporation 5 */ 6 7 #include <linux/prime_numbers.h> 8 9 #include "gem/i915_gem_pm.h" 10 #include "gt/intel_engine_heartbeat.h" 11 #include "gt/intel_reset.h" 12 13 #include "i915_selftest.h" 14 #include "selftests/i915_random.h" 15 #include "selftests/igt_flush_test.h" 16 #include "selftests/igt_live_test.h" 17 #include "selftests/igt_spinner.h" 18 #include "selftests/lib_sw_fence.h" 19 20 #include "gem/selftests/igt_gem_utils.h" 21 #include "gem/selftests/mock_context.h" 22 23 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4) 24 #define NUM_GPR_DW (16 * 2) /* each GPR is 2 dwords */ 25 26 static struct i915_vma *create_scratch(struct intel_gt *gt) 27 { 28 struct drm_i915_gem_object *obj; 29 struct i915_vma *vma; 30 int err; 31 32 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 33 if (IS_ERR(obj)) 34 return ERR_CAST(obj); 35 36 i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED); 37 38 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 39 if (IS_ERR(vma)) { 40 i915_gem_object_put(obj); 41 return vma; 42 } 43 44 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 45 if (err) { 46 i915_gem_object_put(obj); 47 return ERR_PTR(err); 48 } 49 50 return vma; 51 } 52 53 static void engine_heartbeat_disable(struct intel_engine_cs *engine, 54 unsigned long *saved) 55 { 56 *saved = engine->props.heartbeat_interval_ms; 57 engine->props.heartbeat_interval_ms = 0; 58 59 intel_engine_pm_get(engine); 60 intel_engine_park_heartbeat(engine); 61 } 62 63 static void engine_heartbeat_enable(struct intel_engine_cs *engine, 64 unsigned long saved) 65 { 66 intel_engine_pm_put(engine); 67 68 engine->props.heartbeat_interval_ms = saved; 69 } 70 71 static int wait_for_submit(struct intel_engine_cs *engine, 72 struct i915_request *rq, 73 unsigned long timeout) 74 { 75 timeout += jiffies; 76 do { 77 cond_resched(); 78 intel_engine_flush_submission(engine); 79 80 if (READ_ONCE(engine->execlists.pending[0])) 81 continue; 82 83 if (i915_request_is_active(rq)) 84 return 0; 85 86 if (i915_request_started(rq)) /* that was quick! */ 87 return 0; 88 } while (time_before(jiffies, timeout)); 89 90 return -ETIME; 91 } 92 93 static int live_sanitycheck(void *arg) 94 { 95 struct intel_gt *gt = arg; 96 struct intel_engine_cs *engine; 97 enum intel_engine_id id; 98 struct igt_spinner spin; 99 int err = 0; 100 101 if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915)) 102 return 0; 103 104 if (igt_spinner_init(&spin, gt)) 105 return -ENOMEM; 106 107 for_each_engine(engine, gt, id) { 108 struct intel_context *ce; 109 struct i915_request *rq; 110 111 ce = intel_context_create(engine); 112 if (IS_ERR(ce)) { 113 err = PTR_ERR(ce); 114 break; 115 } 116 117 rq = igt_spinner_create_request(&spin, ce, MI_NOOP); 118 if (IS_ERR(rq)) { 119 err = PTR_ERR(rq); 120 goto out_ctx; 121 } 122 123 i915_request_add(rq); 124 if (!igt_wait_for_spinner(&spin, rq)) { 125 GEM_TRACE("spinner failed to start\n"); 126 GEM_TRACE_DUMP(); 127 intel_gt_set_wedged(gt); 128 err = -EIO; 129 goto out_ctx; 130 } 131 132 igt_spinner_end(&spin); 133 if (igt_flush_test(gt->i915)) { 134 err = -EIO; 135 goto out_ctx; 136 } 137 138 out_ctx: 139 intel_context_put(ce); 140 if (err) 141 break; 142 } 143 144 igt_spinner_fini(&spin); 145 return err; 146 } 147 148 static int live_unlite_restore(struct intel_gt *gt, int prio) 149 { 150 struct intel_engine_cs *engine; 151 enum intel_engine_id id; 152 struct igt_spinner spin; 153 int err = -ENOMEM; 154 155 /* 156 * Check that we can correctly context switch between 2 instances 157 * on the same engine from the same parent context. 158 */ 159 160 if (igt_spinner_init(&spin, gt)) 161 return err; 162 163 err = 0; 164 for_each_engine(engine, gt, id) { 165 struct intel_context *ce[2] = {}; 166 struct i915_request *rq[2]; 167 struct igt_live_test t; 168 unsigned long saved; 169 int n; 170 171 if (prio && !intel_engine_has_preemption(engine)) 172 continue; 173 174 if (!intel_engine_can_store_dword(engine)) 175 continue; 176 177 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 178 err = -EIO; 179 break; 180 } 181 engine_heartbeat_disable(engine, &saved); 182 183 for (n = 0; n < ARRAY_SIZE(ce); n++) { 184 struct intel_context *tmp; 185 186 tmp = intel_context_create(engine); 187 if (IS_ERR(tmp)) { 188 err = PTR_ERR(tmp); 189 goto err_ce; 190 } 191 192 err = intel_context_pin(tmp); 193 if (err) { 194 intel_context_put(tmp); 195 goto err_ce; 196 } 197 198 /* 199 * Setup the pair of contexts such that if we 200 * lite-restore using the RING_TAIL from ce[1] it 201 * will execute garbage from ce[0]->ring. 202 */ 203 memset(tmp->ring->vaddr, 204 POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */ 205 tmp->ring->vma->size); 206 207 ce[n] = tmp; 208 } 209 GEM_BUG_ON(!ce[1]->ring->size); 210 intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2); 211 __execlists_update_reg_state(ce[1], engine, ce[1]->ring->head); 212 213 rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK); 214 if (IS_ERR(rq[0])) { 215 err = PTR_ERR(rq[0]); 216 goto err_ce; 217 } 218 219 i915_request_get(rq[0]); 220 i915_request_add(rq[0]); 221 GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit); 222 223 if (!igt_wait_for_spinner(&spin, rq[0])) { 224 i915_request_put(rq[0]); 225 goto err_ce; 226 } 227 228 rq[1] = i915_request_create(ce[1]); 229 if (IS_ERR(rq[1])) { 230 err = PTR_ERR(rq[1]); 231 i915_request_put(rq[0]); 232 goto err_ce; 233 } 234 235 if (!prio) { 236 /* 237 * Ensure we do the switch to ce[1] on completion. 238 * 239 * rq[0] is already submitted, so this should reduce 240 * to a no-op (a wait on a request on the same engine 241 * uses the submit fence, not the completion fence), 242 * but it will install a dependency on rq[1] for rq[0] 243 * that will prevent the pair being reordered by 244 * timeslicing. 245 */ 246 i915_request_await_dma_fence(rq[1], &rq[0]->fence); 247 } 248 249 i915_request_get(rq[1]); 250 i915_request_add(rq[1]); 251 GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix); 252 i915_request_put(rq[0]); 253 254 if (prio) { 255 struct i915_sched_attr attr = { 256 .priority = prio, 257 }; 258 259 /* Alternatively preempt the spinner with ce[1] */ 260 engine->schedule(rq[1], &attr); 261 } 262 263 /* And switch back to ce[0] for good measure */ 264 rq[0] = i915_request_create(ce[0]); 265 if (IS_ERR(rq[0])) { 266 err = PTR_ERR(rq[0]); 267 i915_request_put(rq[1]); 268 goto err_ce; 269 } 270 271 i915_request_await_dma_fence(rq[0], &rq[1]->fence); 272 i915_request_get(rq[0]); 273 i915_request_add(rq[0]); 274 GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix); 275 i915_request_put(rq[1]); 276 i915_request_put(rq[0]); 277 278 err_ce: 279 tasklet_kill(&engine->execlists.tasklet); /* flush submission */ 280 igt_spinner_end(&spin); 281 for (n = 0; n < ARRAY_SIZE(ce); n++) { 282 if (IS_ERR_OR_NULL(ce[n])) 283 break; 284 285 intel_context_unpin(ce[n]); 286 intel_context_put(ce[n]); 287 } 288 289 engine_heartbeat_enable(engine, saved); 290 if (igt_live_test_end(&t)) 291 err = -EIO; 292 if (err) 293 break; 294 } 295 296 igt_spinner_fini(&spin); 297 return err; 298 } 299 300 static int live_unlite_switch(void *arg) 301 { 302 return live_unlite_restore(arg, 0); 303 } 304 305 static int live_unlite_preempt(void *arg) 306 { 307 return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX)); 308 } 309 310 static int live_pin_rewind(void *arg) 311 { 312 struct intel_gt *gt = arg; 313 struct intel_engine_cs *engine; 314 enum intel_engine_id id; 315 int err = 0; 316 317 /* 318 * We have to be careful not to trust intel_ring too much, for example 319 * ring->head is updated upon retire which is out of sync with pinning 320 * the context. Thus we cannot use ring->head to set CTX_RING_HEAD, 321 * or else we risk writing an older, stale value. 322 * 323 * To simulate this, let's apply a bit of deliberate sabotague. 324 */ 325 326 for_each_engine(engine, gt, id) { 327 struct intel_context *ce; 328 struct i915_request *rq; 329 struct intel_ring *ring; 330 struct igt_live_test t; 331 332 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 333 err = -EIO; 334 break; 335 } 336 337 ce = intel_context_create(engine); 338 if (IS_ERR(ce)) { 339 err = PTR_ERR(ce); 340 break; 341 } 342 343 err = intel_context_pin(ce); 344 if (err) { 345 intel_context_put(ce); 346 break; 347 } 348 349 /* Keep the context awake while we play games */ 350 err = i915_active_acquire(&ce->active); 351 if (err) { 352 intel_context_unpin(ce); 353 intel_context_put(ce); 354 break; 355 } 356 ring = ce->ring; 357 358 /* Poison the ring, and offset the next request from HEAD */ 359 memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32)); 360 ring->emit = ring->size / 2; 361 ring->tail = ring->emit; 362 GEM_BUG_ON(ring->head); 363 364 intel_context_unpin(ce); 365 366 /* Submit a simple nop request */ 367 GEM_BUG_ON(intel_context_is_pinned(ce)); 368 rq = intel_context_create_request(ce); 369 i915_active_release(&ce->active); /* e.g. async retire */ 370 intel_context_put(ce); 371 if (IS_ERR(rq)) { 372 err = PTR_ERR(rq); 373 break; 374 } 375 GEM_BUG_ON(!rq->head); 376 i915_request_add(rq); 377 378 /* Expect not to hang! */ 379 if (igt_live_test_end(&t)) { 380 err = -EIO; 381 break; 382 } 383 } 384 385 return err; 386 } 387 388 static int live_hold_reset(void *arg) 389 { 390 struct intel_gt *gt = arg; 391 struct intel_engine_cs *engine; 392 enum intel_engine_id id; 393 struct igt_spinner spin; 394 int err = 0; 395 396 /* 397 * In order to support offline error capture for fast preempt reset, 398 * we need to decouple the guilty request and ensure that it and its 399 * descendents are not executed while the capture is in progress. 400 */ 401 402 if (!intel_has_reset_engine(gt)) 403 return 0; 404 405 if (igt_spinner_init(&spin, gt)) 406 return -ENOMEM; 407 408 for_each_engine(engine, gt, id) { 409 struct intel_context *ce; 410 unsigned long heartbeat; 411 struct i915_request *rq; 412 413 ce = intel_context_create(engine); 414 if (IS_ERR(ce)) { 415 err = PTR_ERR(ce); 416 break; 417 } 418 419 engine_heartbeat_disable(engine, &heartbeat); 420 421 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); 422 if (IS_ERR(rq)) { 423 err = PTR_ERR(rq); 424 goto out; 425 } 426 i915_request_add(rq); 427 428 if (!igt_wait_for_spinner(&spin, rq)) { 429 intel_gt_set_wedged(gt); 430 err = -ETIME; 431 goto out; 432 } 433 434 /* We have our request executing, now remove it and reset */ 435 436 if (test_and_set_bit(I915_RESET_ENGINE + id, 437 >->reset.flags)) { 438 intel_gt_set_wedged(gt); 439 err = -EBUSY; 440 goto out; 441 } 442 tasklet_disable(&engine->execlists.tasklet); 443 444 engine->execlists.tasklet.func(engine->execlists.tasklet.data); 445 GEM_BUG_ON(execlists_active(&engine->execlists) != rq); 446 447 i915_request_get(rq); 448 execlists_hold(engine, rq); 449 GEM_BUG_ON(!i915_request_on_hold(rq)); 450 451 intel_engine_reset(engine, NULL); 452 GEM_BUG_ON(rq->fence.error != -EIO); 453 454 tasklet_enable(&engine->execlists.tasklet); 455 clear_and_wake_up_bit(I915_RESET_ENGINE + id, 456 >->reset.flags); 457 458 /* Check that we do not resubmit the held request */ 459 if (!i915_request_wait(rq, 0, HZ / 5)) { 460 pr_err("%s: on hold request completed!\n", 461 engine->name); 462 i915_request_put(rq); 463 err = -EIO; 464 goto out; 465 } 466 GEM_BUG_ON(!i915_request_on_hold(rq)); 467 468 /* But is resubmitted on release */ 469 execlists_unhold(engine, rq); 470 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 471 pr_err("%s: held request did not complete!\n", 472 engine->name); 473 intel_gt_set_wedged(gt); 474 err = -ETIME; 475 } 476 i915_request_put(rq); 477 478 out: 479 engine_heartbeat_enable(engine, heartbeat); 480 intel_context_put(ce); 481 if (err) 482 break; 483 } 484 485 igt_spinner_fini(&spin); 486 return err; 487 } 488 489 static const char *error_repr(int err) 490 { 491 return err ? "bad" : "good"; 492 } 493 494 static int live_error_interrupt(void *arg) 495 { 496 static const struct error_phase { 497 enum { GOOD = 0, BAD = -EIO } error[2]; 498 } phases[] = { 499 { { BAD, GOOD } }, 500 { { BAD, BAD } }, 501 { { BAD, GOOD } }, 502 { { GOOD, GOOD } }, /* sentinel */ 503 }; 504 struct intel_gt *gt = arg; 505 struct intel_engine_cs *engine; 506 enum intel_engine_id id; 507 508 /* 509 * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning 510 * of invalid commands in user batches that will cause a GPU hang. 511 * This is a faster mechanism than using hangcheck/heartbeats, but 512 * only detects problems the HW knows about -- it will not warn when 513 * we kill the HW! 514 * 515 * To verify our detection and reset, we throw some invalid commands 516 * at the HW and wait for the interrupt. 517 */ 518 519 if (!intel_has_reset_engine(gt)) 520 return 0; 521 522 for_each_engine(engine, gt, id) { 523 const struct error_phase *p; 524 unsigned long heartbeat; 525 int err = 0; 526 527 engine_heartbeat_disable(engine, &heartbeat); 528 529 for (p = phases; p->error[0] != GOOD; p++) { 530 struct i915_request *client[ARRAY_SIZE(phases->error)]; 531 u32 *cs; 532 int i; 533 534 memset(client, 0, sizeof(*client)); 535 for (i = 0; i < ARRAY_SIZE(client); i++) { 536 struct intel_context *ce; 537 struct i915_request *rq; 538 539 ce = intel_context_create(engine); 540 if (IS_ERR(ce)) { 541 err = PTR_ERR(ce); 542 goto out; 543 } 544 545 rq = intel_context_create_request(ce); 546 intel_context_put(ce); 547 if (IS_ERR(rq)) { 548 err = PTR_ERR(rq); 549 goto out; 550 } 551 552 if (rq->engine->emit_init_breadcrumb) { 553 err = rq->engine->emit_init_breadcrumb(rq); 554 if (err) { 555 i915_request_add(rq); 556 goto out; 557 } 558 } 559 560 cs = intel_ring_begin(rq, 2); 561 if (IS_ERR(cs)) { 562 i915_request_add(rq); 563 err = PTR_ERR(cs); 564 goto out; 565 } 566 567 if (p->error[i]) { 568 *cs++ = 0xdeadbeef; 569 *cs++ = 0xdeadbeef; 570 } else { 571 *cs++ = MI_NOOP; 572 *cs++ = MI_NOOP; 573 } 574 575 client[i] = i915_request_get(rq); 576 i915_request_add(rq); 577 } 578 579 err = wait_for_submit(engine, client[0], HZ / 2); 580 if (err) { 581 pr_err("%s: first request did not start within time!\n", 582 engine->name); 583 err = -ETIME; 584 goto out; 585 } 586 587 for (i = 0; i < ARRAY_SIZE(client); i++) { 588 if (i915_request_wait(client[i], 0, HZ / 5) < 0) 589 pr_debug("%s: %s request incomplete!\n", 590 engine->name, 591 error_repr(p->error[i])); 592 593 if (!i915_request_started(client[i])) { 594 pr_debug("%s: %s request not stated!\n", 595 engine->name, 596 error_repr(p->error[i])); 597 err = -ETIME; 598 goto out; 599 } 600 601 /* Kick the tasklet to process the error */ 602 intel_engine_flush_submission(engine); 603 if (client[i]->fence.error != p->error[i]) { 604 pr_err("%s: %s request completed with wrong error code: %d\n", 605 engine->name, 606 error_repr(p->error[i]), 607 client[i]->fence.error); 608 err = -EINVAL; 609 goto out; 610 } 611 } 612 613 out: 614 for (i = 0; i < ARRAY_SIZE(client); i++) 615 if (client[i]) 616 i915_request_put(client[i]); 617 if (err) { 618 pr_err("%s: failed at phase[%zd] { %d, %d }\n", 619 engine->name, p - phases, 620 p->error[0], p->error[1]); 621 break; 622 } 623 } 624 625 engine_heartbeat_enable(engine, heartbeat); 626 if (err) { 627 intel_gt_set_wedged(gt); 628 return err; 629 } 630 } 631 632 return 0; 633 } 634 635 static int 636 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx) 637 { 638 u32 *cs; 639 640 cs = intel_ring_begin(rq, 10); 641 if (IS_ERR(cs)) 642 return PTR_ERR(cs); 643 644 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 645 646 *cs++ = MI_SEMAPHORE_WAIT | 647 MI_SEMAPHORE_GLOBAL_GTT | 648 MI_SEMAPHORE_POLL | 649 MI_SEMAPHORE_SAD_NEQ_SDD; 650 *cs++ = 0; 651 *cs++ = i915_ggtt_offset(vma) + 4 * idx; 652 *cs++ = 0; 653 654 if (idx > 0) { 655 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 656 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1); 657 *cs++ = 0; 658 *cs++ = 1; 659 } else { 660 *cs++ = MI_NOOP; 661 *cs++ = MI_NOOP; 662 *cs++ = MI_NOOP; 663 *cs++ = MI_NOOP; 664 } 665 666 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 667 668 intel_ring_advance(rq, cs); 669 return 0; 670 } 671 672 static struct i915_request * 673 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx) 674 { 675 struct intel_context *ce; 676 struct i915_request *rq; 677 int err; 678 679 ce = intel_context_create(engine); 680 if (IS_ERR(ce)) 681 return ERR_CAST(ce); 682 683 rq = intel_context_create_request(ce); 684 if (IS_ERR(rq)) 685 goto out_ce; 686 687 err = 0; 688 if (rq->engine->emit_init_breadcrumb) 689 err = rq->engine->emit_init_breadcrumb(rq); 690 if (err == 0) 691 err = emit_semaphore_chain(rq, vma, idx); 692 if (err == 0) 693 i915_request_get(rq); 694 i915_request_add(rq); 695 if (err) 696 rq = ERR_PTR(err); 697 698 out_ce: 699 intel_context_put(ce); 700 return rq; 701 } 702 703 static int 704 release_queue(struct intel_engine_cs *engine, 705 struct i915_vma *vma, 706 int idx, int prio) 707 { 708 struct i915_sched_attr attr = { 709 .priority = prio, 710 }; 711 struct i915_request *rq; 712 u32 *cs; 713 714 rq = intel_engine_create_kernel_request(engine); 715 if (IS_ERR(rq)) 716 return PTR_ERR(rq); 717 718 cs = intel_ring_begin(rq, 4); 719 if (IS_ERR(cs)) { 720 i915_request_add(rq); 721 return PTR_ERR(cs); 722 } 723 724 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 725 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1); 726 *cs++ = 0; 727 *cs++ = 1; 728 729 intel_ring_advance(rq, cs); 730 731 i915_request_get(rq); 732 i915_request_add(rq); 733 734 local_bh_disable(); 735 engine->schedule(rq, &attr); 736 local_bh_enable(); /* kick tasklet */ 737 738 i915_request_put(rq); 739 740 return 0; 741 } 742 743 static int 744 slice_semaphore_queue(struct intel_engine_cs *outer, 745 struct i915_vma *vma, 746 int count) 747 { 748 struct intel_engine_cs *engine; 749 struct i915_request *head; 750 enum intel_engine_id id; 751 int err, i, n = 0; 752 753 head = semaphore_queue(outer, vma, n++); 754 if (IS_ERR(head)) 755 return PTR_ERR(head); 756 757 for_each_engine(engine, outer->gt, id) { 758 for (i = 0; i < count; i++) { 759 struct i915_request *rq; 760 761 rq = semaphore_queue(engine, vma, n++); 762 if (IS_ERR(rq)) { 763 err = PTR_ERR(rq); 764 goto out; 765 } 766 767 i915_request_put(rq); 768 } 769 } 770 771 err = release_queue(outer, vma, n, INT_MAX); 772 if (err) 773 goto out; 774 775 if (i915_request_wait(head, 0, 776 2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) { 777 pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n", 778 count, n); 779 GEM_TRACE_DUMP(); 780 intel_gt_set_wedged(outer->gt); 781 err = -EIO; 782 } 783 784 out: 785 i915_request_put(head); 786 return err; 787 } 788 789 static int live_timeslice_preempt(void *arg) 790 { 791 struct intel_gt *gt = arg; 792 struct drm_i915_gem_object *obj; 793 struct i915_vma *vma; 794 void *vaddr; 795 int err = 0; 796 int count; 797 798 /* 799 * If a request takes too long, we would like to give other users 800 * a fair go on the GPU. In particular, users may create batches 801 * that wait upon external input, where that input may even be 802 * supplied by another GPU job. To avoid blocking forever, we 803 * need to preempt the current task and replace it with another 804 * ready task. 805 */ 806 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 807 return 0; 808 809 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 810 if (IS_ERR(obj)) 811 return PTR_ERR(obj); 812 813 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 814 if (IS_ERR(vma)) { 815 err = PTR_ERR(vma); 816 goto err_obj; 817 } 818 819 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); 820 if (IS_ERR(vaddr)) { 821 err = PTR_ERR(vaddr); 822 goto err_obj; 823 } 824 825 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 826 if (err) 827 goto err_map; 828 829 err = i915_vma_sync(vma); 830 if (err) 831 goto err_pin; 832 833 for_each_prime_number_from(count, 1, 16) { 834 struct intel_engine_cs *engine; 835 enum intel_engine_id id; 836 837 for_each_engine(engine, gt, id) { 838 unsigned long saved; 839 840 if (!intel_engine_has_preemption(engine)) 841 continue; 842 843 memset(vaddr, 0, PAGE_SIZE); 844 845 engine_heartbeat_disable(engine, &saved); 846 err = slice_semaphore_queue(engine, vma, count); 847 engine_heartbeat_enable(engine, saved); 848 if (err) 849 goto err_pin; 850 851 if (igt_flush_test(gt->i915)) { 852 err = -EIO; 853 goto err_pin; 854 } 855 } 856 } 857 858 err_pin: 859 i915_vma_unpin(vma); 860 err_map: 861 i915_gem_object_unpin_map(obj); 862 err_obj: 863 i915_gem_object_put(obj); 864 return err; 865 } 866 867 static struct i915_request * 868 create_rewinder(struct intel_context *ce, 869 struct i915_request *wait, 870 void *slot, int idx) 871 { 872 const u32 offset = 873 i915_ggtt_offset(ce->engine->status_page.vma) + 874 offset_in_page(slot); 875 struct i915_request *rq; 876 u32 *cs; 877 int err; 878 879 rq = intel_context_create_request(ce); 880 if (IS_ERR(rq)) 881 return rq; 882 883 if (wait) { 884 err = i915_request_await_dma_fence(rq, &wait->fence); 885 if (err) 886 goto err; 887 } 888 889 cs = intel_ring_begin(rq, 10); 890 if (IS_ERR(cs)) { 891 err = PTR_ERR(cs); 892 goto err; 893 } 894 895 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 896 *cs++ = MI_NOOP; 897 898 *cs++ = MI_SEMAPHORE_WAIT | 899 MI_SEMAPHORE_GLOBAL_GTT | 900 MI_SEMAPHORE_POLL | 901 MI_SEMAPHORE_SAD_NEQ_SDD; 902 *cs++ = 0; 903 *cs++ = offset; 904 *cs++ = 0; 905 906 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 907 *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base)); 908 *cs++ = offset + idx * sizeof(u32); 909 *cs++ = 0; 910 911 intel_ring_advance(rq, cs); 912 913 rq->sched.attr.priority = I915_PRIORITY_MASK; 914 err = 0; 915 err: 916 i915_request_get(rq); 917 i915_request_add(rq); 918 if (err) { 919 i915_request_put(rq); 920 return ERR_PTR(err); 921 } 922 923 return rq; 924 } 925 926 static int live_timeslice_rewind(void *arg) 927 { 928 struct intel_gt *gt = arg; 929 struct intel_engine_cs *engine; 930 enum intel_engine_id id; 931 932 /* 933 * The usual presumption on timeslice expiration is that we replace 934 * the active context with another. However, given a chain of 935 * dependencies we may end up with replacing the context with itself, 936 * but only a few of those requests, forcing us to rewind the 937 * RING_TAIL of the original request. 938 */ 939 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 940 return 0; 941 942 for_each_engine(engine, gt, id) { 943 enum { A1, A2, B1 }; 944 enum { X = 1, Y, Z }; 945 struct i915_request *rq[3] = {}; 946 struct intel_context *ce; 947 unsigned long heartbeat; 948 unsigned long timeslice; 949 int i, err = 0; 950 u32 *slot; 951 952 if (!intel_engine_has_timeslices(engine)) 953 continue; 954 955 /* 956 * A:rq1 -- semaphore wait, timestamp X 957 * A:rq2 -- write timestamp Y 958 * 959 * B:rq1 [await A:rq1] -- write timestamp Z 960 * 961 * Force timeslice, release semaphore. 962 * 963 * Expect execution/evaluation order XZY 964 */ 965 966 engine_heartbeat_disable(engine, &heartbeat); 967 timeslice = xchg(&engine->props.timeslice_duration_ms, 1); 968 969 slot = memset32(engine->status_page.addr + 1000, 0, 4); 970 971 ce = intel_context_create(engine); 972 if (IS_ERR(ce)) { 973 err = PTR_ERR(ce); 974 goto err; 975 } 976 977 rq[0] = create_rewinder(ce, NULL, slot, 1); 978 if (IS_ERR(rq[0])) { 979 intel_context_put(ce); 980 goto err; 981 } 982 983 rq[1] = create_rewinder(ce, NULL, slot, 2); 984 intel_context_put(ce); 985 if (IS_ERR(rq[1])) 986 goto err; 987 988 err = wait_for_submit(engine, rq[1], HZ / 2); 989 if (err) { 990 pr_err("%s: failed to submit first context\n", 991 engine->name); 992 goto err; 993 } 994 995 ce = intel_context_create(engine); 996 if (IS_ERR(ce)) { 997 err = PTR_ERR(ce); 998 goto err; 999 } 1000 1001 rq[2] = create_rewinder(ce, rq[0], slot, 3); 1002 intel_context_put(ce); 1003 if (IS_ERR(rq[2])) 1004 goto err; 1005 1006 err = wait_for_submit(engine, rq[2], HZ / 2); 1007 if (err) { 1008 pr_err("%s: failed to submit second context\n", 1009 engine->name); 1010 goto err; 1011 } 1012 GEM_BUG_ON(!timer_pending(&engine->execlists.timer)); 1013 1014 /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */ 1015 GEM_BUG_ON(!i915_request_is_active(rq[A1])); 1016 GEM_BUG_ON(!i915_request_is_active(rq[A2])); 1017 GEM_BUG_ON(!i915_request_is_active(rq[B1])); 1018 1019 /* Wait for the timeslice to kick in */ 1020 del_timer(&engine->execlists.timer); 1021 tasklet_hi_schedule(&engine->execlists.tasklet); 1022 intel_engine_flush_submission(engine); 1023 1024 /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */ 1025 GEM_BUG_ON(!i915_request_is_active(rq[A1])); 1026 GEM_BUG_ON(!i915_request_is_active(rq[B1])); 1027 GEM_BUG_ON(i915_request_is_active(rq[A2])); 1028 1029 /* Release the hounds! */ 1030 slot[0] = 1; 1031 wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */ 1032 1033 for (i = 1; i <= 3; i++) { 1034 unsigned long timeout = jiffies + HZ / 2; 1035 1036 while (!READ_ONCE(slot[i]) && 1037 time_before(jiffies, timeout)) 1038 ; 1039 1040 if (!time_before(jiffies, timeout)) { 1041 pr_err("%s: rq[%d] timed out\n", 1042 engine->name, i - 1); 1043 err = -ETIME; 1044 goto err; 1045 } 1046 1047 pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]); 1048 } 1049 1050 /* XZY: XZ < XY */ 1051 if (slot[Z] - slot[X] >= slot[Y] - slot[X]) { 1052 pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n", 1053 engine->name, 1054 slot[Z] - slot[X], 1055 slot[Y] - slot[X]); 1056 err = -EINVAL; 1057 } 1058 1059 err: 1060 memset32(&slot[0], -1, 4); 1061 wmb(); 1062 1063 engine->props.timeslice_duration_ms = timeslice; 1064 engine_heartbeat_enable(engine, heartbeat); 1065 for (i = 0; i < 3; i++) 1066 i915_request_put(rq[i]); 1067 if (igt_flush_test(gt->i915)) 1068 err = -EIO; 1069 if (err) 1070 return err; 1071 } 1072 1073 return 0; 1074 } 1075 1076 static struct i915_request *nop_request(struct intel_engine_cs *engine) 1077 { 1078 struct i915_request *rq; 1079 1080 rq = intel_engine_create_kernel_request(engine); 1081 if (IS_ERR(rq)) 1082 return rq; 1083 1084 i915_request_get(rq); 1085 i915_request_add(rq); 1086 1087 return rq; 1088 } 1089 1090 static long timeslice_threshold(const struct intel_engine_cs *engine) 1091 { 1092 return 2 * msecs_to_jiffies_timeout(timeslice(engine)) + 1; 1093 } 1094 1095 static int live_timeslice_queue(void *arg) 1096 { 1097 struct intel_gt *gt = arg; 1098 struct drm_i915_gem_object *obj; 1099 struct intel_engine_cs *engine; 1100 enum intel_engine_id id; 1101 struct i915_vma *vma; 1102 void *vaddr; 1103 int err = 0; 1104 1105 /* 1106 * Make sure that even if ELSP[0] and ELSP[1] are filled with 1107 * timeslicing between them disabled, we *do* enable timeslicing 1108 * if the queue demands it. (Normally, we do not submit if 1109 * ELSP[1] is already occupied, so must rely on timeslicing to 1110 * eject ELSP[0] in favour of the queue.) 1111 */ 1112 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 1113 return 0; 1114 1115 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 1116 if (IS_ERR(obj)) 1117 return PTR_ERR(obj); 1118 1119 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 1120 if (IS_ERR(vma)) { 1121 err = PTR_ERR(vma); 1122 goto err_obj; 1123 } 1124 1125 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); 1126 if (IS_ERR(vaddr)) { 1127 err = PTR_ERR(vaddr); 1128 goto err_obj; 1129 } 1130 1131 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 1132 if (err) 1133 goto err_map; 1134 1135 err = i915_vma_sync(vma); 1136 if (err) 1137 goto err_pin; 1138 1139 for_each_engine(engine, gt, id) { 1140 struct i915_sched_attr attr = { 1141 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), 1142 }; 1143 struct i915_request *rq, *nop; 1144 unsigned long saved; 1145 1146 if (!intel_engine_has_preemption(engine)) 1147 continue; 1148 1149 engine_heartbeat_disable(engine, &saved); 1150 memset(vaddr, 0, PAGE_SIZE); 1151 1152 /* ELSP[0]: semaphore wait */ 1153 rq = semaphore_queue(engine, vma, 0); 1154 if (IS_ERR(rq)) { 1155 err = PTR_ERR(rq); 1156 goto err_heartbeat; 1157 } 1158 engine->schedule(rq, &attr); 1159 err = wait_for_submit(engine, rq, HZ / 2); 1160 if (err) { 1161 pr_err("%s: Timed out trying to submit semaphores\n", 1162 engine->name); 1163 goto err_rq; 1164 } 1165 1166 /* ELSP[1]: nop request */ 1167 nop = nop_request(engine); 1168 if (IS_ERR(nop)) { 1169 err = PTR_ERR(nop); 1170 goto err_rq; 1171 } 1172 err = wait_for_submit(engine, nop, HZ / 2); 1173 i915_request_put(nop); 1174 if (err) { 1175 pr_err("%s: Timed out trying to submit nop\n", 1176 engine->name); 1177 goto err_rq; 1178 } 1179 1180 GEM_BUG_ON(i915_request_completed(rq)); 1181 GEM_BUG_ON(execlists_active(&engine->execlists) != rq); 1182 1183 /* Queue: semaphore signal, matching priority as semaphore */ 1184 err = release_queue(engine, vma, 1, effective_prio(rq)); 1185 if (err) 1186 goto err_rq; 1187 1188 intel_engine_flush_submission(engine); 1189 if (!READ_ONCE(engine->execlists.timer.expires) && 1190 !i915_request_completed(rq)) { 1191 struct drm_printer p = 1192 drm_info_printer(gt->i915->drm.dev); 1193 1194 GEM_TRACE_ERR("%s: Failed to enable timeslicing!\n", 1195 engine->name); 1196 intel_engine_dump(engine, &p, 1197 "%s\n", engine->name); 1198 GEM_TRACE_DUMP(); 1199 1200 memset(vaddr, 0xff, PAGE_SIZE); 1201 err = -EINVAL; 1202 } 1203 1204 /* Timeslice every jiffy, so within 2 we should signal */ 1205 if (i915_request_wait(rq, 0, timeslice_threshold(engine)) < 0) { 1206 struct drm_printer p = 1207 drm_info_printer(gt->i915->drm.dev); 1208 1209 pr_err("%s: Failed to timeslice into queue\n", 1210 engine->name); 1211 intel_engine_dump(engine, &p, 1212 "%s\n", engine->name); 1213 1214 memset(vaddr, 0xff, PAGE_SIZE); 1215 err = -EIO; 1216 } 1217 err_rq: 1218 i915_request_put(rq); 1219 err_heartbeat: 1220 engine_heartbeat_enable(engine, saved); 1221 if (err) 1222 break; 1223 } 1224 1225 err_pin: 1226 i915_vma_unpin(vma); 1227 err_map: 1228 i915_gem_object_unpin_map(obj); 1229 err_obj: 1230 i915_gem_object_put(obj); 1231 return err; 1232 } 1233 1234 static int live_busywait_preempt(void *arg) 1235 { 1236 struct intel_gt *gt = arg; 1237 struct i915_gem_context *ctx_hi, *ctx_lo; 1238 struct intel_engine_cs *engine; 1239 struct drm_i915_gem_object *obj; 1240 struct i915_vma *vma; 1241 enum intel_engine_id id; 1242 int err = -ENOMEM; 1243 u32 *map; 1244 1245 /* 1246 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can 1247 * preempt the busywaits used to synchronise between rings. 1248 */ 1249 1250 ctx_hi = kernel_context(gt->i915); 1251 if (!ctx_hi) 1252 return -ENOMEM; 1253 ctx_hi->sched.priority = 1254 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); 1255 1256 ctx_lo = kernel_context(gt->i915); 1257 if (!ctx_lo) 1258 goto err_ctx_hi; 1259 ctx_lo->sched.priority = 1260 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); 1261 1262 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 1263 if (IS_ERR(obj)) { 1264 err = PTR_ERR(obj); 1265 goto err_ctx_lo; 1266 } 1267 1268 map = i915_gem_object_pin_map(obj, I915_MAP_WC); 1269 if (IS_ERR(map)) { 1270 err = PTR_ERR(map); 1271 goto err_obj; 1272 } 1273 1274 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 1275 if (IS_ERR(vma)) { 1276 err = PTR_ERR(vma); 1277 goto err_map; 1278 } 1279 1280 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 1281 if (err) 1282 goto err_map; 1283 1284 err = i915_vma_sync(vma); 1285 if (err) 1286 goto err_vma; 1287 1288 for_each_engine(engine, gt, id) { 1289 struct i915_request *lo, *hi; 1290 struct igt_live_test t; 1291 u32 *cs; 1292 1293 if (!intel_engine_has_preemption(engine)) 1294 continue; 1295 1296 if (!intel_engine_can_store_dword(engine)) 1297 continue; 1298 1299 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 1300 err = -EIO; 1301 goto err_vma; 1302 } 1303 1304 /* 1305 * We create two requests. The low priority request 1306 * busywaits on a semaphore (inside the ringbuffer where 1307 * is should be preemptible) and the high priority requests 1308 * uses a MI_STORE_DWORD_IMM to update the semaphore value 1309 * allowing the first request to complete. If preemption 1310 * fails, we hang instead. 1311 */ 1312 1313 lo = igt_request_alloc(ctx_lo, engine); 1314 if (IS_ERR(lo)) { 1315 err = PTR_ERR(lo); 1316 goto err_vma; 1317 } 1318 1319 cs = intel_ring_begin(lo, 8); 1320 if (IS_ERR(cs)) { 1321 err = PTR_ERR(cs); 1322 i915_request_add(lo); 1323 goto err_vma; 1324 } 1325 1326 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 1327 *cs++ = i915_ggtt_offset(vma); 1328 *cs++ = 0; 1329 *cs++ = 1; 1330 1331 /* XXX Do we need a flush + invalidate here? */ 1332 1333 *cs++ = MI_SEMAPHORE_WAIT | 1334 MI_SEMAPHORE_GLOBAL_GTT | 1335 MI_SEMAPHORE_POLL | 1336 MI_SEMAPHORE_SAD_EQ_SDD; 1337 *cs++ = 0; 1338 *cs++ = i915_ggtt_offset(vma); 1339 *cs++ = 0; 1340 1341 intel_ring_advance(lo, cs); 1342 1343 i915_request_get(lo); 1344 i915_request_add(lo); 1345 1346 if (wait_for(READ_ONCE(*map), 10)) { 1347 i915_request_put(lo); 1348 err = -ETIMEDOUT; 1349 goto err_vma; 1350 } 1351 1352 /* Low priority request should be busywaiting now */ 1353 if (i915_request_wait(lo, 0, 1) != -ETIME) { 1354 i915_request_put(lo); 1355 pr_err("%s: Busywaiting request did not!\n", 1356 engine->name); 1357 err = -EIO; 1358 goto err_vma; 1359 } 1360 1361 hi = igt_request_alloc(ctx_hi, engine); 1362 if (IS_ERR(hi)) { 1363 err = PTR_ERR(hi); 1364 i915_request_put(lo); 1365 goto err_vma; 1366 } 1367 1368 cs = intel_ring_begin(hi, 4); 1369 if (IS_ERR(cs)) { 1370 err = PTR_ERR(cs); 1371 i915_request_add(hi); 1372 i915_request_put(lo); 1373 goto err_vma; 1374 } 1375 1376 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 1377 *cs++ = i915_ggtt_offset(vma); 1378 *cs++ = 0; 1379 *cs++ = 0; 1380 1381 intel_ring_advance(hi, cs); 1382 i915_request_add(hi); 1383 1384 if (i915_request_wait(lo, 0, HZ / 5) < 0) { 1385 struct drm_printer p = drm_info_printer(gt->i915->drm.dev); 1386 1387 pr_err("%s: Failed to preempt semaphore busywait!\n", 1388 engine->name); 1389 1390 intel_engine_dump(engine, &p, "%s\n", engine->name); 1391 GEM_TRACE_DUMP(); 1392 1393 i915_request_put(lo); 1394 intel_gt_set_wedged(gt); 1395 err = -EIO; 1396 goto err_vma; 1397 } 1398 GEM_BUG_ON(READ_ONCE(*map)); 1399 i915_request_put(lo); 1400 1401 if (igt_live_test_end(&t)) { 1402 err = -EIO; 1403 goto err_vma; 1404 } 1405 } 1406 1407 err = 0; 1408 err_vma: 1409 i915_vma_unpin(vma); 1410 err_map: 1411 i915_gem_object_unpin_map(obj); 1412 err_obj: 1413 i915_gem_object_put(obj); 1414 err_ctx_lo: 1415 kernel_context_close(ctx_lo); 1416 err_ctx_hi: 1417 kernel_context_close(ctx_hi); 1418 return err; 1419 } 1420 1421 static struct i915_request * 1422 spinner_create_request(struct igt_spinner *spin, 1423 struct i915_gem_context *ctx, 1424 struct intel_engine_cs *engine, 1425 u32 arb) 1426 { 1427 struct intel_context *ce; 1428 struct i915_request *rq; 1429 1430 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); 1431 if (IS_ERR(ce)) 1432 return ERR_CAST(ce); 1433 1434 rq = igt_spinner_create_request(spin, ce, arb); 1435 intel_context_put(ce); 1436 return rq; 1437 } 1438 1439 static int live_preempt(void *arg) 1440 { 1441 struct intel_gt *gt = arg; 1442 struct i915_gem_context *ctx_hi, *ctx_lo; 1443 struct igt_spinner spin_hi, spin_lo; 1444 struct intel_engine_cs *engine; 1445 enum intel_engine_id id; 1446 int err = -ENOMEM; 1447 1448 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 1449 return 0; 1450 1451 if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION)) 1452 pr_err("Logical preemption supported, but not exposed\n"); 1453 1454 if (igt_spinner_init(&spin_hi, gt)) 1455 return -ENOMEM; 1456 1457 if (igt_spinner_init(&spin_lo, gt)) 1458 goto err_spin_hi; 1459 1460 ctx_hi = kernel_context(gt->i915); 1461 if (!ctx_hi) 1462 goto err_spin_lo; 1463 ctx_hi->sched.priority = 1464 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); 1465 1466 ctx_lo = kernel_context(gt->i915); 1467 if (!ctx_lo) 1468 goto err_ctx_hi; 1469 ctx_lo->sched.priority = 1470 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); 1471 1472 for_each_engine(engine, gt, id) { 1473 struct igt_live_test t; 1474 struct i915_request *rq; 1475 1476 if (!intel_engine_has_preemption(engine)) 1477 continue; 1478 1479 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 1480 err = -EIO; 1481 goto err_ctx_lo; 1482 } 1483 1484 rq = spinner_create_request(&spin_lo, ctx_lo, engine, 1485 MI_ARB_CHECK); 1486 if (IS_ERR(rq)) { 1487 err = PTR_ERR(rq); 1488 goto err_ctx_lo; 1489 } 1490 1491 i915_request_add(rq); 1492 if (!igt_wait_for_spinner(&spin_lo, rq)) { 1493 GEM_TRACE("lo spinner failed to start\n"); 1494 GEM_TRACE_DUMP(); 1495 intel_gt_set_wedged(gt); 1496 err = -EIO; 1497 goto err_ctx_lo; 1498 } 1499 1500 rq = spinner_create_request(&spin_hi, ctx_hi, engine, 1501 MI_ARB_CHECK); 1502 if (IS_ERR(rq)) { 1503 igt_spinner_end(&spin_lo); 1504 err = PTR_ERR(rq); 1505 goto err_ctx_lo; 1506 } 1507 1508 i915_request_add(rq); 1509 if (!igt_wait_for_spinner(&spin_hi, rq)) { 1510 GEM_TRACE("hi spinner failed to start\n"); 1511 GEM_TRACE_DUMP(); 1512 intel_gt_set_wedged(gt); 1513 err = -EIO; 1514 goto err_ctx_lo; 1515 } 1516 1517 igt_spinner_end(&spin_hi); 1518 igt_spinner_end(&spin_lo); 1519 1520 if (igt_live_test_end(&t)) { 1521 err = -EIO; 1522 goto err_ctx_lo; 1523 } 1524 } 1525 1526 err = 0; 1527 err_ctx_lo: 1528 kernel_context_close(ctx_lo); 1529 err_ctx_hi: 1530 kernel_context_close(ctx_hi); 1531 err_spin_lo: 1532 igt_spinner_fini(&spin_lo); 1533 err_spin_hi: 1534 igt_spinner_fini(&spin_hi); 1535 return err; 1536 } 1537 1538 static int live_late_preempt(void *arg) 1539 { 1540 struct intel_gt *gt = arg; 1541 struct i915_gem_context *ctx_hi, *ctx_lo; 1542 struct igt_spinner spin_hi, spin_lo; 1543 struct intel_engine_cs *engine; 1544 struct i915_sched_attr attr = {}; 1545 enum intel_engine_id id; 1546 int err = -ENOMEM; 1547 1548 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 1549 return 0; 1550 1551 if (igt_spinner_init(&spin_hi, gt)) 1552 return -ENOMEM; 1553 1554 if (igt_spinner_init(&spin_lo, gt)) 1555 goto err_spin_hi; 1556 1557 ctx_hi = kernel_context(gt->i915); 1558 if (!ctx_hi) 1559 goto err_spin_lo; 1560 1561 ctx_lo = kernel_context(gt->i915); 1562 if (!ctx_lo) 1563 goto err_ctx_hi; 1564 1565 /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */ 1566 ctx_lo->sched.priority = I915_USER_PRIORITY(1); 1567 1568 for_each_engine(engine, gt, id) { 1569 struct igt_live_test t; 1570 struct i915_request *rq; 1571 1572 if (!intel_engine_has_preemption(engine)) 1573 continue; 1574 1575 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 1576 err = -EIO; 1577 goto err_ctx_lo; 1578 } 1579 1580 rq = spinner_create_request(&spin_lo, ctx_lo, engine, 1581 MI_ARB_CHECK); 1582 if (IS_ERR(rq)) { 1583 err = PTR_ERR(rq); 1584 goto err_ctx_lo; 1585 } 1586 1587 i915_request_add(rq); 1588 if (!igt_wait_for_spinner(&spin_lo, rq)) { 1589 pr_err("First context failed to start\n"); 1590 goto err_wedged; 1591 } 1592 1593 rq = spinner_create_request(&spin_hi, ctx_hi, engine, 1594 MI_NOOP); 1595 if (IS_ERR(rq)) { 1596 igt_spinner_end(&spin_lo); 1597 err = PTR_ERR(rq); 1598 goto err_ctx_lo; 1599 } 1600 1601 i915_request_add(rq); 1602 if (igt_wait_for_spinner(&spin_hi, rq)) { 1603 pr_err("Second context overtook first?\n"); 1604 goto err_wedged; 1605 } 1606 1607 attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX); 1608 engine->schedule(rq, &attr); 1609 1610 if (!igt_wait_for_spinner(&spin_hi, rq)) { 1611 pr_err("High priority context failed to preempt the low priority context\n"); 1612 GEM_TRACE_DUMP(); 1613 goto err_wedged; 1614 } 1615 1616 igt_spinner_end(&spin_hi); 1617 igt_spinner_end(&spin_lo); 1618 1619 if (igt_live_test_end(&t)) { 1620 err = -EIO; 1621 goto err_ctx_lo; 1622 } 1623 } 1624 1625 err = 0; 1626 err_ctx_lo: 1627 kernel_context_close(ctx_lo); 1628 err_ctx_hi: 1629 kernel_context_close(ctx_hi); 1630 err_spin_lo: 1631 igt_spinner_fini(&spin_lo); 1632 err_spin_hi: 1633 igt_spinner_fini(&spin_hi); 1634 return err; 1635 1636 err_wedged: 1637 igt_spinner_end(&spin_hi); 1638 igt_spinner_end(&spin_lo); 1639 intel_gt_set_wedged(gt); 1640 err = -EIO; 1641 goto err_ctx_lo; 1642 } 1643 1644 struct preempt_client { 1645 struct igt_spinner spin; 1646 struct i915_gem_context *ctx; 1647 }; 1648 1649 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c) 1650 { 1651 c->ctx = kernel_context(gt->i915); 1652 if (!c->ctx) 1653 return -ENOMEM; 1654 1655 if (igt_spinner_init(&c->spin, gt)) 1656 goto err_ctx; 1657 1658 return 0; 1659 1660 err_ctx: 1661 kernel_context_close(c->ctx); 1662 return -ENOMEM; 1663 } 1664 1665 static void preempt_client_fini(struct preempt_client *c) 1666 { 1667 igt_spinner_fini(&c->spin); 1668 kernel_context_close(c->ctx); 1669 } 1670 1671 static int live_nopreempt(void *arg) 1672 { 1673 struct intel_gt *gt = arg; 1674 struct intel_engine_cs *engine; 1675 struct preempt_client a, b; 1676 enum intel_engine_id id; 1677 int err = -ENOMEM; 1678 1679 /* 1680 * Verify that we can disable preemption for an individual request 1681 * that may be being observed and not want to be interrupted. 1682 */ 1683 1684 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 1685 return 0; 1686 1687 if (preempt_client_init(gt, &a)) 1688 return -ENOMEM; 1689 if (preempt_client_init(gt, &b)) 1690 goto err_client_a; 1691 b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX); 1692 1693 for_each_engine(engine, gt, id) { 1694 struct i915_request *rq_a, *rq_b; 1695 1696 if (!intel_engine_has_preemption(engine)) 1697 continue; 1698 1699 engine->execlists.preempt_hang.count = 0; 1700 1701 rq_a = spinner_create_request(&a.spin, 1702 a.ctx, engine, 1703 MI_ARB_CHECK); 1704 if (IS_ERR(rq_a)) { 1705 err = PTR_ERR(rq_a); 1706 goto err_client_b; 1707 } 1708 1709 /* Low priority client, but unpreemptable! */ 1710 __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags); 1711 1712 i915_request_add(rq_a); 1713 if (!igt_wait_for_spinner(&a.spin, rq_a)) { 1714 pr_err("First client failed to start\n"); 1715 goto err_wedged; 1716 } 1717 1718 rq_b = spinner_create_request(&b.spin, 1719 b.ctx, engine, 1720 MI_ARB_CHECK); 1721 if (IS_ERR(rq_b)) { 1722 err = PTR_ERR(rq_b); 1723 goto err_client_b; 1724 } 1725 1726 i915_request_add(rq_b); 1727 1728 /* B is much more important than A! (But A is unpreemptable.) */ 1729 GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a)); 1730 1731 /* Wait long enough for preemption and timeslicing */ 1732 if (igt_wait_for_spinner(&b.spin, rq_b)) { 1733 pr_err("Second client started too early!\n"); 1734 goto err_wedged; 1735 } 1736 1737 igt_spinner_end(&a.spin); 1738 1739 if (!igt_wait_for_spinner(&b.spin, rq_b)) { 1740 pr_err("Second client failed to start\n"); 1741 goto err_wedged; 1742 } 1743 1744 igt_spinner_end(&b.spin); 1745 1746 if (engine->execlists.preempt_hang.count) { 1747 pr_err("Preemption recorded x%d; should have been suppressed!\n", 1748 engine->execlists.preempt_hang.count); 1749 err = -EINVAL; 1750 goto err_wedged; 1751 } 1752 1753 if (igt_flush_test(gt->i915)) 1754 goto err_wedged; 1755 } 1756 1757 err = 0; 1758 err_client_b: 1759 preempt_client_fini(&b); 1760 err_client_a: 1761 preempt_client_fini(&a); 1762 return err; 1763 1764 err_wedged: 1765 igt_spinner_end(&b.spin); 1766 igt_spinner_end(&a.spin); 1767 intel_gt_set_wedged(gt); 1768 err = -EIO; 1769 goto err_client_b; 1770 } 1771 1772 struct live_preempt_cancel { 1773 struct intel_engine_cs *engine; 1774 struct preempt_client a, b; 1775 }; 1776 1777 static int __cancel_active0(struct live_preempt_cancel *arg) 1778 { 1779 struct i915_request *rq; 1780 struct igt_live_test t; 1781 int err; 1782 1783 /* Preempt cancel of ELSP0 */ 1784 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 1785 if (igt_live_test_begin(&t, arg->engine->i915, 1786 __func__, arg->engine->name)) 1787 return -EIO; 1788 1789 rq = spinner_create_request(&arg->a.spin, 1790 arg->a.ctx, arg->engine, 1791 MI_ARB_CHECK); 1792 if (IS_ERR(rq)) 1793 return PTR_ERR(rq); 1794 1795 clear_bit(CONTEXT_BANNED, &rq->context->flags); 1796 i915_request_get(rq); 1797 i915_request_add(rq); 1798 if (!igt_wait_for_spinner(&arg->a.spin, rq)) { 1799 err = -EIO; 1800 goto out; 1801 } 1802 1803 intel_context_set_banned(rq->context); 1804 err = intel_engine_pulse(arg->engine); 1805 if (err) 1806 goto out; 1807 1808 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 1809 err = -EIO; 1810 goto out; 1811 } 1812 1813 if (rq->fence.error != -EIO) { 1814 pr_err("Cancelled inflight0 request did not report -EIO\n"); 1815 err = -EINVAL; 1816 goto out; 1817 } 1818 1819 out: 1820 i915_request_put(rq); 1821 if (igt_live_test_end(&t)) 1822 err = -EIO; 1823 return err; 1824 } 1825 1826 static int __cancel_active1(struct live_preempt_cancel *arg) 1827 { 1828 struct i915_request *rq[2] = {}; 1829 struct igt_live_test t; 1830 int err; 1831 1832 /* Preempt cancel of ELSP1 */ 1833 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 1834 if (igt_live_test_begin(&t, arg->engine->i915, 1835 __func__, arg->engine->name)) 1836 return -EIO; 1837 1838 rq[0] = spinner_create_request(&arg->a.spin, 1839 arg->a.ctx, arg->engine, 1840 MI_NOOP); /* no preemption */ 1841 if (IS_ERR(rq[0])) 1842 return PTR_ERR(rq[0]); 1843 1844 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); 1845 i915_request_get(rq[0]); 1846 i915_request_add(rq[0]); 1847 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { 1848 err = -EIO; 1849 goto out; 1850 } 1851 1852 rq[1] = spinner_create_request(&arg->b.spin, 1853 arg->b.ctx, arg->engine, 1854 MI_ARB_CHECK); 1855 if (IS_ERR(rq[1])) { 1856 err = PTR_ERR(rq[1]); 1857 goto out; 1858 } 1859 1860 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); 1861 i915_request_get(rq[1]); 1862 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); 1863 i915_request_add(rq[1]); 1864 if (err) 1865 goto out; 1866 1867 intel_context_set_banned(rq[1]->context); 1868 err = intel_engine_pulse(arg->engine); 1869 if (err) 1870 goto out; 1871 1872 igt_spinner_end(&arg->a.spin); 1873 if (i915_request_wait(rq[1], 0, HZ / 5) < 0) { 1874 err = -EIO; 1875 goto out; 1876 } 1877 1878 if (rq[0]->fence.error != 0) { 1879 pr_err("Normal inflight0 request did not complete\n"); 1880 err = -EINVAL; 1881 goto out; 1882 } 1883 1884 if (rq[1]->fence.error != -EIO) { 1885 pr_err("Cancelled inflight1 request did not report -EIO\n"); 1886 err = -EINVAL; 1887 goto out; 1888 } 1889 1890 out: 1891 i915_request_put(rq[1]); 1892 i915_request_put(rq[0]); 1893 if (igt_live_test_end(&t)) 1894 err = -EIO; 1895 return err; 1896 } 1897 1898 static int __cancel_queued(struct live_preempt_cancel *arg) 1899 { 1900 struct i915_request *rq[3] = {}; 1901 struct igt_live_test t; 1902 int err; 1903 1904 /* Full ELSP and one in the wings */ 1905 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 1906 if (igt_live_test_begin(&t, arg->engine->i915, 1907 __func__, arg->engine->name)) 1908 return -EIO; 1909 1910 rq[0] = spinner_create_request(&arg->a.spin, 1911 arg->a.ctx, arg->engine, 1912 MI_ARB_CHECK); 1913 if (IS_ERR(rq[0])) 1914 return PTR_ERR(rq[0]); 1915 1916 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); 1917 i915_request_get(rq[0]); 1918 i915_request_add(rq[0]); 1919 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { 1920 err = -EIO; 1921 goto out; 1922 } 1923 1924 rq[1] = igt_request_alloc(arg->b.ctx, arg->engine); 1925 if (IS_ERR(rq[1])) { 1926 err = PTR_ERR(rq[1]); 1927 goto out; 1928 } 1929 1930 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); 1931 i915_request_get(rq[1]); 1932 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); 1933 i915_request_add(rq[1]); 1934 if (err) 1935 goto out; 1936 1937 rq[2] = spinner_create_request(&arg->b.spin, 1938 arg->a.ctx, arg->engine, 1939 MI_ARB_CHECK); 1940 if (IS_ERR(rq[2])) { 1941 err = PTR_ERR(rq[2]); 1942 goto out; 1943 } 1944 1945 i915_request_get(rq[2]); 1946 err = i915_request_await_dma_fence(rq[2], &rq[1]->fence); 1947 i915_request_add(rq[2]); 1948 if (err) 1949 goto out; 1950 1951 intel_context_set_banned(rq[2]->context); 1952 err = intel_engine_pulse(arg->engine); 1953 if (err) 1954 goto out; 1955 1956 if (i915_request_wait(rq[2], 0, HZ / 5) < 0) { 1957 err = -EIO; 1958 goto out; 1959 } 1960 1961 if (rq[0]->fence.error != -EIO) { 1962 pr_err("Cancelled inflight0 request did not report -EIO\n"); 1963 err = -EINVAL; 1964 goto out; 1965 } 1966 1967 if (rq[1]->fence.error != 0) { 1968 pr_err("Normal inflight1 request did not complete\n"); 1969 err = -EINVAL; 1970 goto out; 1971 } 1972 1973 if (rq[2]->fence.error != -EIO) { 1974 pr_err("Cancelled queued request did not report -EIO\n"); 1975 err = -EINVAL; 1976 goto out; 1977 } 1978 1979 out: 1980 i915_request_put(rq[2]); 1981 i915_request_put(rq[1]); 1982 i915_request_put(rq[0]); 1983 if (igt_live_test_end(&t)) 1984 err = -EIO; 1985 return err; 1986 } 1987 1988 static int __cancel_hostile(struct live_preempt_cancel *arg) 1989 { 1990 struct i915_request *rq; 1991 int err; 1992 1993 /* Preempt cancel non-preemptible spinner in ELSP0 */ 1994 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) 1995 return 0; 1996 1997 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 1998 rq = spinner_create_request(&arg->a.spin, 1999 arg->a.ctx, arg->engine, 2000 MI_NOOP); /* preemption disabled */ 2001 if (IS_ERR(rq)) 2002 return PTR_ERR(rq); 2003 2004 clear_bit(CONTEXT_BANNED, &rq->context->flags); 2005 i915_request_get(rq); 2006 i915_request_add(rq); 2007 if (!igt_wait_for_spinner(&arg->a.spin, rq)) { 2008 err = -EIO; 2009 goto out; 2010 } 2011 2012 intel_context_set_banned(rq->context); 2013 err = intel_engine_pulse(arg->engine); /* force reset */ 2014 if (err) 2015 goto out; 2016 2017 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 2018 err = -EIO; 2019 goto out; 2020 } 2021 2022 if (rq->fence.error != -EIO) { 2023 pr_err("Cancelled inflight0 request did not report -EIO\n"); 2024 err = -EINVAL; 2025 goto out; 2026 } 2027 2028 out: 2029 i915_request_put(rq); 2030 if (igt_flush_test(arg->engine->i915)) 2031 err = -EIO; 2032 return err; 2033 } 2034 2035 static int live_preempt_cancel(void *arg) 2036 { 2037 struct intel_gt *gt = arg; 2038 struct live_preempt_cancel data; 2039 enum intel_engine_id id; 2040 int err = -ENOMEM; 2041 2042 /* 2043 * To cancel an inflight context, we need to first remove it from the 2044 * GPU. That sounds like preemption! Plus a little bit of bookkeeping. 2045 */ 2046 2047 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2048 return 0; 2049 2050 if (preempt_client_init(gt, &data.a)) 2051 return -ENOMEM; 2052 if (preempt_client_init(gt, &data.b)) 2053 goto err_client_a; 2054 2055 for_each_engine(data.engine, gt, id) { 2056 if (!intel_engine_has_preemption(data.engine)) 2057 continue; 2058 2059 err = __cancel_active0(&data); 2060 if (err) 2061 goto err_wedged; 2062 2063 err = __cancel_active1(&data); 2064 if (err) 2065 goto err_wedged; 2066 2067 err = __cancel_queued(&data); 2068 if (err) 2069 goto err_wedged; 2070 2071 err = __cancel_hostile(&data); 2072 if (err) 2073 goto err_wedged; 2074 } 2075 2076 err = 0; 2077 err_client_b: 2078 preempt_client_fini(&data.b); 2079 err_client_a: 2080 preempt_client_fini(&data.a); 2081 return err; 2082 2083 err_wedged: 2084 GEM_TRACE_DUMP(); 2085 igt_spinner_end(&data.b.spin); 2086 igt_spinner_end(&data.a.spin); 2087 intel_gt_set_wedged(gt); 2088 goto err_client_b; 2089 } 2090 2091 static int live_suppress_self_preempt(void *arg) 2092 { 2093 struct intel_gt *gt = arg; 2094 struct intel_engine_cs *engine; 2095 struct i915_sched_attr attr = { 2096 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX) 2097 }; 2098 struct preempt_client a, b; 2099 enum intel_engine_id id; 2100 int err = -ENOMEM; 2101 2102 /* 2103 * Verify that if a preemption request does not cause a change in 2104 * the current execution order, the preempt-to-idle injection is 2105 * skipped and that we do not accidentally apply it after the CS 2106 * completion event. 2107 */ 2108 2109 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2110 return 0; 2111 2112 if (intel_uc_uses_guc_submission(>->uc)) 2113 return 0; /* presume black blox */ 2114 2115 if (intel_vgpu_active(gt->i915)) 2116 return 0; /* GVT forces single port & request submission */ 2117 2118 if (preempt_client_init(gt, &a)) 2119 return -ENOMEM; 2120 if (preempt_client_init(gt, &b)) 2121 goto err_client_a; 2122 2123 for_each_engine(engine, gt, id) { 2124 struct i915_request *rq_a, *rq_b; 2125 int depth; 2126 2127 if (!intel_engine_has_preemption(engine)) 2128 continue; 2129 2130 if (igt_flush_test(gt->i915)) 2131 goto err_wedged; 2132 2133 intel_engine_pm_get(engine); 2134 engine->execlists.preempt_hang.count = 0; 2135 2136 rq_a = spinner_create_request(&a.spin, 2137 a.ctx, engine, 2138 MI_NOOP); 2139 if (IS_ERR(rq_a)) { 2140 err = PTR_ERR(rq_a); 2141 intel_engine_pm_put(engine); 2142 goto err_client_b; 2143 } 2144 2145 i915_request_add(rq_a); 2146 if (!igt_wait_for_spinner(&a.spin, rq_a)) { 2147 pr_err("First client failed to start\n"); 2148 intel_engine_pm_put(engine); 2149 goto err_wedged; 2150 } 2151 2152 /* Keep postponing the timer to avoid premature slicing */ 2153 mod_timer(&engine->execlists.timer, jiffies + HZ); 2154 for (depth = 0; depth < 8; depth++) { 2155 rq_b = spinner_create_request(&b.spin, 2156 b.ctx, engine, 2157 MI_NOOP); 2158 if (IS_ERR(rq_b)) { 2159 err = PTR_ERR(rq_b); 2160 intel_engine_pm_put(engine); 2161 goto err_client_b; 2162 } 2163 i915_request_add(rq_b); 2164 2165 GEM_BUG_ON(i915_request_completed(rq_a)); 2166 engine->schedule(rq_a, &attr); 2167 igt_spinner_end(&a.spin); 2168 2169 if (!igt_wait_for_spinner(&b.spin, rq_b)) { 2170 pr_err("Second client failed to start\n"); 2171 intel_engine_pm_put(engine); 2172 goto err_wedged; 2173 } 2174 2175 swap(a, b); 2176 rq_a = rq_b; 2177 } 2178 igt_spinner_end(&a.spin); 2179 2180 if (engine->execlists.preempt_hang.count) { 2181 pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n", 2182 engine->name, 2183 engine->execlists.preempt_hang.count, 2184 depth); 2185 intel_engine_pm_put(engine); 2186 err = -EINVAL; 2187 goto err_client_b; 2188 } 2189 2190 intel_engine_pm_put(engine); 2191 if (igt_flush_test(gt->i915)) 2192 goto err_wedged; 2193 } 2194 2195 err = 0; 2196 err_client_b: 2197 preempt_client_fini(&b); 2198 err_client_a: 2199 preempt_client_fini(&a); 2200 return err; 2201 2202 err_wedged: 2203 igt_spinner_end(&b.spin); 2204 igt_spinner_end(&a.spin); 2205 intel_gt_set_wedged(gt); 2206 err = -EIO; 2207 goto err_client_b; 2208 } 2209 2210 static int __i915_sw_fence_call 2211 dummy_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) 2212 { 2213 return NOTIFY_DONE; 2214 } 2215 2216 static struct i915_request *dummy_request(struct intel_engine_cs *engine) 2217 { 2218 struct i915_request *rq; 2219 2220 rq = kzalloc(sizeof(*rq), GFP_KERNEL); 2221 if (!rq) 2222 return NULL; 2223 2224 rq->engine = engine; 2225 2226 spin_lock_init(&rq->lock); 2227 INIT_LIST_HEAD(&rq->fence.cb_list); 2228 rq->fence.lock = &rq->lock; 2229 rq->fence.ops = &i915_fence_ops; 2230 2231 i915_sched_node_init(&rq->sched); 2232 2233 /* mark this request as permanently incomplete */ 2234 rq->fence.seqno = 1; 2235 BUILD_BUG_ON(sizeof(rq->fence.seqno) != 8); /* upper 32b == 0 */ 2236 rq->hwsp_seqno = (u32 *)&rq->fence.seqno + 1; 2237 GEM_BUG_ON(i915_request_completed(rq)); 2238 2239 i915_sw_fence_init(&rq->submit, dummy_notify); 2240 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); 2241 2242 spin_lock_init(&rq->lock); 2243 rq->fence.lock = &rq->lock; 2244 INIT_LIST_HEAD(&rq->fence.cb_list); 2245 2246 return rq; 2247 } 2248 2249 static void dummy_request_free(struct i915_request *dummy) 2250 { 2251 /* We have to fake the CS interrupt to kick the next request */ 2252 i915_sw_fence_commit(&dummy->submit); 2253 2254 i915_request_mark_complete(dummy); 2255 dma_fence_signal(&dummy->fence); 2256 2257 i915_sched_node_fini(&dummy->sched); 2258 i915_sw_fence_fini(&dummy->submit); 2259 2260 dma_fence_free(&dummy->fence); 2261 } 2262 2263 static int live_suppress_wait_preempt(void *arg) 2264 { 2265 struct intel_gt *gt = arg; 2266 struct preempt_client client[4]; 2267 struct i915_request *rq[ARRAY_SIZE(client)] = {}; 2268 struct intel_engine_cs *engine; 2269 enum intel_engine_id id; 2270 int err = -ENOMEM; 2271 int i; 2272 2273 /* 2274 * Waiters are given a little priority nudge, but not enough 2275 * to actually cause any preemption. Double check that we do 2276 * not needlessly generate preempt-to-idle cycles. 2277 */ 2278 2279 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2280 return 0; 2281 2282 if (preempt_client_init(gt, &client[0])) /* ELSP[0] */ 2283 return -ENOMEM; 2284 if (preempt_client_init(gt, &client[1])) /* ELSP[1] */ 2285 goto err_client_0; 2286 if (preempt_client_init(gt, &client[2])) /* head of queue */ 2287 goto err_client_1; 2288 if (preempt_client_init(gt, &client[3])) /* bystander */ 2289 goto err_client_2; 2290 2291 for_each_engine(engine, gt, id) { 2292 int depth; 2293 2294 if (!intel_engine_has_preemption(engine)) 2295 continue; 2296 2297 if (!engine->emit_init_breadcrumb) 2298 continue; 2299 2300 for (depth = 0; depth < ARRAY_SIZE(client); depth++) { 2301 struct i915_request *dummy; 2302 2303 engine->execlists.preempt_hang.count = 0; 2304 2305 dummy = dummy_request(engine); 2306 if (!dummy) 2307 goto err_client_3; 2308 2309 for (i = 0; i < ARRAY_SIZE(client); i++) { 2310 struct i915_request *this; 2311 2312 this = spinner_create_request(&client[i].spin, 2313 client[i].ctx, engine, 2314 MI_NOOP); 2315 if (IS_ERR(this)) { 2316 err = PTR_ERR(this); 2317 goto err_wedged; 2318 } 2319 2320 /* Disable NEWCLIENT promotion */ 2321 __i915_active_fence_set(&i915_request_timeline(this)->last_request, 2322 &dummy->fence); 2323 2324 rq[i] = i915_request_get(this); 2325 i915_request_add(this); 2326 } 2327 2328 dummy_request_free(dummy); 2329 2330 GEM_BUG_ON(i915_request_completed(rq[0])); 2331 if (!igt_wait_for_spinner(&client[0].spin, rq[0])) { 2332 pr_err("%s: First client failed to start\n", 2333 engine->name); 2334 goto err_wedged; 2335 } 2336 GEM_BUG_ON(!i915_request_started(rq[0])); 2337 2338 if (i915_request_wait(rq[depth], 2339 I915_WAIT_PRIORITY, 2340 1) != -ETIME) { 2341 pr_err("%s: Waiter depth:%d completed!\n", 2342 engine->name, depth); 2343 goto err_wedged; 2344 } 2345 2346 for (i = 0; i < ARRAY_SIZE(client); i++) { 2347 igt_spinner_end(&client[i].spin); 2348 i915_request_put(rq[i]); 2349 rq[i] = NULL; 2350 } 2351 2352 if (igt_flush_test(gt->i915)) 2353 goto err_wedged; 2354 2355 if (engine->execlists.preempt_hang.count) { 2356 pr_err("%s: Preemption recorded x%d, depth %d; should have been suppressed!\n", 2357 engine->name, 2358 engine->execlists.preempt_hang.count, 2359 depth); 2360 err = -EINVAL; 2361 goto err_client_3; 2362 } 2363 } 2364 } 2365 2366 err = 0; 2367 err_client_3: 2368 preempt_client_fini(&client[3]); 2369 err_client_2: 2370 preempt_client_fini(&client[2]); 2371 err_client_1: 2372 preempt_client_fini(&client[1]); 2373 err_client_0: 2374 preempt_client_fini(&client[0]); 2375 return err; 2376 2377 err_wedged: 2378 for (i = 0; i < ARRAY_SIZE(client); i++) { 2379 igt_spinner_end(&client[i].spin); 2380 i915_request_put(rq[i]); 2381 } 2382 intel_gt_set_wedged(gt); 2383 err = -EIO; 2384 goto err_client_3; 2385 } 2386 2387 static int live_chain_preempt(void *arg) 2388 { 2389 struct intel_gt *gt = arg; 2390 struct intel_engine_cs *engine; 2391 struct preempt_client hi, lo; 2392 enum intel_engine_id id; 2393 int err = -ENOMEM; 2394 2395 /* 2396 * Build a chain AB...BA between two contexts (A, B) and request 2397 * preemption of the last request. It should then complete before 2398 * the previously submitted spinner in B. 2399 */ 2400 2401 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2402 return 0; 2403 2404 if (preempt_client_init(gt, &hi)) 2405 return -ENOMEM; 2406 2407 if (preempt_client_init(gt, &lo)) 2408 goto err_client_hi; 2409 2410 for_each_engine(engine, gt, id) { 2411 struct i915_sched_attr attr = { 2412 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), 2413 }; 2414 struct igt_live_test t; 2415 struct i915_request *rq; 2416 int ring_size, count, i; 2417 2418 if (!intel_engine_has_preemption(engine)) 2419 continue; 2420 2421 rq = spinner_create_request(&lo.spin, 2422 lo.ctx, engine, 2423 MI_ARB_CHECK); 2424 if (IS_ERR(rq)) 2425 goto err_wedged; 2426 2427 i915_request_get(rq); 2428 i915_request_add(rq); 2429 2430 ring_size = rq->wa_tail - rq->head; 2431 if (ring_size < 0) 2432 ring_size += rq->ring->size; 2433 ring_size = rq->ring->size / ring_size; 2434 pr_debug("%s(%s): Using maximum of %d requests\n", 2435 __func__, engine->name, ring_size); 2436 2437 igt_spinner_end(&lo.spin); 2438 if (i915_request_wait(rq, 0, HZ / 2) < 0) { 2439 pr_err("Timed out waiting to flush %s\n", engine->name); 2440 i915_request_put(rq); 2441 goto err_wedged; 2442 } 2443 i915_request_put(rq); 2444 2445 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 2446 err = -EIO; 2447 goto err_wedged; 2448 } 2449 2450 for_each_prime_number_from(count, 1, ring_size) { 2451 rq = spinner_create_request(&hi.spin, 2452 hi.ctx, engine, 2453 MI_ARB_CHECK); 2454 if (IS_ERR(rq)) 2455 goto err_wedged; 2456 i915_request_add(rq); 2457 if (!igt_wait_for_spinner(&hi.spin, rq)) 2458 goto err_wedged; 2459 2460 rq = spinner_create_request(&lo.spin, 2461 lo.ctx, engine, 2462 MI_ARB_CHECK); 2463 if (IS_ERR(rq)) 2464 goto err_wedged; 2465 i915_request_add(rq); 2466 2467 for (i = 0; i < count; i++) { 2468 rq = igt_request_alloc(lo.ctx, engine); 2469 if (IS_ERR(rq)) 2470 goto err_wedged; 2471 i915_request_add(rq); 2472 } 2473 2474 rq = igt_request_alloc(hi.ctx, engine); 2475 if (IS_ERR(rq)) 2476 goto err_wedged; 2477 2478 i915_request_get(rq); 2479 i915_request_add(rq); 2480 engine->schedule(rq, &attr); 2481 2482 igt_spinner_end(&hi.spin); 2483 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 2484 struct drm_printer p = 2485 drm_info_printer(gt->i915->drm.dev); 2486 2487 pr_err("Failed to preempt over chain of %d\n", 2488 count); 2489 intel_engine_dump(engine, &p, 2490 "%s\n", engine->name); 2491 i915_request_put(rq); 2492 goto err_wedged; 2493 } 2494 igt_spinner_end(&lo.spin); 2495 i915_request_put(rq); 2496 2497 rq = igt_request_alloc(lo.ctx, engine); 2498 if (IS_ERR(rq)) 2499 goto err_wedged; 2500 2501 i915_request_get(rq); 2502 i915_request_add(rq); 2503 2504 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 2505 struct drm_printer p = 2506 drm_info_printer(gt->i915->drm.dev); 2507 2508 pr_err("Failed to flush low priority chain of %d requests\n", 2509 count); 2510 intel_engine_dump(engine, &p, 2511 "%s\n", engine->name); 2512 2513 i915_request_put(rq); 2514 goto err_wedged; 2515 } 2516 i915_request_put(rq); 2517 } 2518 2519 if (igt_live_test_end(&t)) { 2520 err = -EIO; 2521 goto err_wedged; 2522 } 2523 } 2524 2525 err = 0; 2526 err_client_lo: 2527 preempt_client_fini(&lo); 2528 err_client_hi: 2529 preempt_client_fini(&hi); 2530 return err; 2531 2532 err_wedged: 2533 igt_spinner_end(&hi.spin); 2534 igt_spinner_end(&lo.spin); 2535 intel_gt_set_wedged(gt); 2536 err = -EIO; 2537 goto err_client_lo; 2538 } 2539 2540 static int create_gang(struct intel_engine_cs *engine, 2541 struct i915_request **prev) 2542 { 2543 struct drm_i915_gem_object *obj; 2544 struct intel_context *ce; 2545 struct i915_request *rq; 2546 struct i915_vma *vma; 2547 u32 *cs; 2548 int err; 2549 2550 ce = intel_context_create(engine); 2551 if (IS_ERR(ce)) 2552 return PTR_ERR(ce); 2553 2554 obj = i915_gem_object_create_internal(engine->i915, 4096); 2555 if (IS_ERR(obj)) { 2556 err = PTR_ERR(obj); 2557 goto err_ce; 2558 } 2559 2560 vma = i915_vma_instance(obj, ce->vm, NULL); 2561 if (IS_ERR(vma)) { 2562 err = PTR_ERR(vma); 2563 goto err_obj; 2564 } 2565 2566 err = i915_vma_pin(vma, 0, 0, PIN_USER); 2567 if (err) 2568 goto err_obj; 2569 2570 cs = i915_gem_object_pin_map(obj, I915_MAP_WC); 2571 if (IS_ERR(cs)) 2572 goto err_obj; 2573 2574 /* Semaphore target: spin until zero */ 2575 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 2576 2577 *cs++ = MI_SEMAPHORE_WAIT | 2578 MI_SEMAPHORE_POLL | 2579 MI_SEMAPHORE_SAD_EQ_SDD; 2580 *cs++ = 0; 2581 *cs++ = lower_32_bits(vma->node.start); 2582 *cs++ = upper_32_bits(vma->node.start); 2583 2584 if (*prev) { 2585 u64 offset = (*prev)->batch->node.start; 2586 2587 /* Terminate the spinner in the next lower priority batch. */ 2588 *cs++ = MI_STORE_DWORD_IMM_GEN4; 2589 *cs++ = lower_32_bits(offset); 2590 *cs++ = upper_32_bits(offset); 2591 *cs++ = 0; 2592 } 2593 2594 *cs++ = MI_BATCH_BUFFER_END; 2595 i915_gem_object_flush_map(obj); 2596 i915_gem_object_unpin_map(obj); 2597 2598 rq = intel_context_create_request(ce); 2599 if (IS_ERR(rq)) 2600 goto err_obj; 2601 2602 rq->batch = vma; 2603 i915_request_get(rq); 2604 2605 i915_vma_lock(vma); 2606 err = i915_request_await_object(rq, vma->obj, false); 2607 if (!err) 2608 err = i915_vma_move_to_active(vma, rq, 0); 2609 if (!err) 2610 err = rq->engine->emit_bb_start(rq, 2611 vma->node.start, 2612 PAGE_SIZE, 0); 2613 i915_vma_unlock(vma); 2614 i915_request_add(rq); 2615 if (err) 2616 goto err_rq; 2617 2618 i915_gem_object_put(obj); 2619 intel_context_put(ce); 2620 2621 rq->client_link.next = &(*prev)->client_link; 2622 *prev = rq; 2623 return 0; 2624 2625 err_rq: 2626 i915_request_put(rq); 2627 err_obj: 2628 i915_gem_object_put(obj); 2629 err_ce: 2630 intel_context_put(ce); 2631 return err; 2632 } 2633 2634 static int live_preempt_gang(void *arg) 2635 { 2636 struct intel_gt *gt = arg; 2637 struct intel_engine_cs *engine; 2638 enum intel_engine_id id; 2639 2640 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2641 return 0; 2642 2643 /* 2644 * Build as long a chain of preempters as we can, with each 2645 * request higher priority than the last. Once we are ready, we release 2646 * the last batch which then precolates down the chain, each releasing 2647 * the next oldest in turn. The intent is to simply push as hard as we 2648 * can with the number of preemptions, trying to exceed narrow HW 2649 * limits. At a minimum, we insist that we can sort all the user 2650 * high priority levels into execution order. 2651 */ 2652 2653 for_each_engine(engine, gt, id) { 2654 struct i915_request *rq = NULL; 2655 struct igt_live_test t; 2656 IGT_TIMEOUT(end_time); 2657 int prio = 0; 2658 int err = 0; 2659 u32 *cs; 2660 2661 if (!intel_engine_has_preemption(engine)) 2662 continue; 2663 2664 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) 2665 return -EIO; 2666 2667 do { 2668 struct i915_sched_attr attr = { 2669 .priority = I915_USER_PRIORITY(prio++), 2670 }; 2671 2672 err = create_gang(engine, &rq); 2673 if (err) 2674 break; 2675 2676 /* Submit each spinner at increasing priority */ 2677 engine->schedule(rq, &attr); 2678 2679 if (prio <= I915_PRIORITY_MAX) 2680 continue; 2681 2682 if (prio > (INT_MAX >> I915_USER_PRIORITY_SHIFT)) 2683 break; 2684 2685 if (__igt_timeout(end_time, NULL)) 2686 break; 2687 } while (1); 2688 pr_debug("%s: Preempt chain of %d requests\n", 2689 engine->name, prio); 2690 2691 /* 2692 * Such that the last spinner is the highest priority and 2693 * should execute first. When that spinner completes, 2694 * it will terminate the next lowest spinner until there 2695 * are no more spinners and the gang is complete. 2696 */ 2697 cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC); 2698 if (!IS_ERR(cs)) { 2699 *cs = 0; 2700 i915_gem_object_unpin_map(rq->batch->obj); 2701 } else { 2702 err = PTR_ERR(cs); 2703 intel_gt_set_wedged(gt); 2704 } 2705 2706 while (rq) { /* wait for each rq from highest to lowest prio */ 2707 struct i915_request *n = 2708 list_next_entry(rq, client_link); 2709 2710 if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) { 2711 struct drm_printer p = 2712 drm_info_printer(engine->i915->drm.dev); 2713 2714 pr_err("Failed to flush chain of %d requests, at %d\n", 2715 prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT); 2716 intel_engine_dump(engine, &p, 2717 "%s\n", engine->name); 2718 2719 err = -ETIME; 2720 } 2721 2722 i915_request_put(rq); 2723 rq = n; 2724 } 2725 2726 if (igt_live_test_end(&t)) 2727 err = -EIO; 2728 if (err) 2729 return err; 2730 } 2731 2732 return 0; 2733 } 2734 2735 static int live_preempt_timeout(void *arg) 2736 { 2737 struct intel_gt *gt = arg; 2738 struct i915_gem_context *ctx_hi, *ctx_lo; 2739 struct igt_spinner spin_lo; 2740 struct intel_engine_cs *engine; 2741 enum intel_engine_id id; 2742 int err = -ENOMEM; 2743 2744 /* 2745 * Check that we force preemption to occur by cancelling the previous 2746 * context if it refuses to yield the GPU. 2747 */ 2748 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) 2749 return 0; 2750 2751 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2752 return 0; 2753 2754 if (!intel_has_reset_engine(gt)) 2755 return 0; 2756 2757 if (igt_spinner_init(&spin_lo, gt)) 2758 return -ENOMEM; 2759 2760 ctx_hi = kernel_context(gt->i915); 2761 if (!ctx_hi) 2762 goto err_spin_lo; 2763 ctx_hi->sched.priority = 2764 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); 2765 2766 ctx_lo = kernel_context(gt->i915); 2767 if (!ctx_lo) 2768 goto err_ctx_hi; 2769 ctx_lo->sched.priority = 2770 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); 2771 2772 for_each_engine(engine, gt, id) { 2773 unsigned long saved_timeout; 2774 struct i915_request *rq; 2775 2776 if (!intel_engine_has_preemption(engine)) 2777 continue; 2778 2779 rq = spinner_create_request(&spin_lo, ctx_lo, engine, 2780 MI_NOOP); /* preemption disabled */ 2781 if (IS_ERR(rq)) { 2782 err = PTR_ERR(rq); 2783 goto err_ctx_lo; 2784 } 2785 2786 i915_request_add(rq); 2787 if (!igt_wait_for_spinner(&spin_lo, rq)) { 2788 intel_gt_set_wedged(gt); 2789 err = -EIO; 2790 goto err_ctx_lo; 2791 } 2792 2793 rq = igt_request_alloc(ctx_hi, engine); 2794 if (IS_ERR(rq)) { 2795 igt_spinner_end(&spin_lo); 2796 err = PTR_ERR(rq); 2797 goto err_ctx_lo; 2798 } 2799 2800 /* Flush the previous CS ack before changing timeouts */ 2801 while (READ_ONCE(engine->execlists.pending[0])) 2802 cpu_relax(); 2803 2804 saved_timeout = engine->props.preempt_timeout_ms; 2805 engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */ 2806 2807 i915_request_get(rq); 2808 i915_request_add(rq); 2809 2810 intel_engine_flush_submission(engine); 2811 engine->props.preempt_timeout_ms = saved_timeout; 2812 2813 if (i915_request_wait(rq, 0, HZ / 10) < 0) { 2814 intel_gt_set_wedged(gt); 2815 i915_request_put(rq); 2816 err = -ETIME; 2817 goto err_ctx_lo; 2818 } 2819 2820 igt_spinner_end(&spin_lo); 2821 i915_request_put(rq); 2822 } 2823 2824 err = 0; 2825 err_ctx_lo: 2826 kernel_context_close(ctx_lo); 2827 err_ctx_hi: 2828 kernel_context_close(ctx_hi); 2829 err_spin_lo: 2830 igt_spinner_fini(&spin_lo); 2831 return err; 2832 } 2833 2834 static int random_range(struct rnd_state *rnd, int min, int max) 2835 { 2836 return i915_prandom_u32_max_state(max - min, rnd) + min; 2837 } 2838 2839 static int random_priority(struct rnd_state *rnd) 2840 { 2841 return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX); 2842 } 2843 2844 struct preempt_smoke { 2845 struct intel_gt *gt; 2846 struct i915_gem_context **contexts; 2847 struct intel_engine_cs *engine; 2848 struct drm_i915_gem_object *batch; 2849 unsigned int ncontext; 2850 struct rnd_state prng; 2851 unsigned long count; 2852 }; 2853 2854 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke) 2855 { 2856 return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext, 2857 &smoke->prng)]; 2858 } 2859 2860 static int smoke_submit(struct preempt_smoke *smoke, 2861 struct i915_gem_context *ctx, int prio, 2862 struct drm_i915_gem_object *batch) 2863 { 2864 struct i915_request *rq; 2865 struct i915_vma *vma = NULL; 2866 int err = 0; 2867 2868 if (batch) { 2869 struct i915_address_space *vm; 2870 2871 vm = i915_gem_context_get_vm_rcu(ctx); 2872 vma = i915_vma_instance(batch, vm, NULL); 2873 i915_vm_put(vm); 2874 if (IS_ERR(vma)) 2875 return PTR_ERR(vma); 2876 2877 err = i915_vma_pin(vma, 0, 0, PIN_USER); 2878 if (err) 2879 return err; 2880 } 2881 2882 ctx->sched.priority = prio; 2883 2884 rq = igt_request_alloc(ctx, smoke->engine); 2885 if (IS_ERR(rq)) { 2886 err = PTR_ERR(rq); 2887 goto unpin; 2888 } 2889 2890 if (vma) { 2891 i915_vma_lock(vma); 2892 err = i915_request_await_object(rq, vma->obj, false); 2893 if (!err) 2894 err = i915_vma_move_to_active(vma, rq, 0); 2895 if (!err) 2896 err = rq->engine->emit_bb_start(rq, 2897 vma->node.start, 2898 PAGE_SIZE, 0); 2899 i915_vma_unlock(vma); 2900 } 2901 2902 i915_request_add(rq); 2903 2904 unpin: 2905 if (vma) 2906 i915_vma_unpin(vma); 2907 2908 return err; 2909 } 2910 2911 static int smoke_crescendo_thread(void *arg) 2912 { 2913 struct preempt_smoke *smoke = arg; 2914 IGT_TIMEOUT(end_time); 2915 unsigned long count; 2916 2917 count = 0; 2918 do { 2919 struct i915_gem_context *ctx = smoke_context(smoke); 2920 int err; 2921 2922 err = smoke_submit(smoke, 2923 ctx, count % I915_PRIORITY_MAX, 2924 smoke->batch); 2925 if (err) 2926 return err; 2927 2928 count++; 2929 } while (!__igt_timeout(end_time, NULL)); 2930 2931 smoke->count = count; 2932 return 0; 2933 } 2934 2935 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) 2936 #define BATCH BIT(0) 2937 { 2938 struct task_struct *tsk[I915_NUM_ENGINES] = {}; 2939 struct preempt_smoke arg[I915_NUM_ENGINES]; 2940 struct intel_engine_cs *engine; 2941 enum intel_engine_id id; 2942 unsigned long count; 2943 int err = 0; 2944 2945 for_each_engine(engine, smoke->gt, id) { 2946 arg[id] = *smoke; 2947 arg[id].engine = engine; 2948 if (!(flags & BATCH)) 2949 arg[id].batch = NULL; 2950 arg[id].count = 0; 2951 2952 tsk[id] = kthread_run(smoke_crescendo_thread, &arg, 2953 "igt/smoke:%d", id); 2954 if (IS_ERR(tsk[id])) { 2955 err = PTR_ERR(tsk[id]); 2956 break; 2957 } 2958 get_task_struct(tsk[id]); 2959 } 2960 2961 yield(); /* start all threads before we kthread_stop() */ 2962 2963 count = 0; 2964 for_each_engine(engine, smoke->gt, id) { 2965 int status; 2966 2967 if (IS_ERR_OR_NULL(tsk[id])) 2968 continue; 2969 2970 status = kthread_stop(tsk[id]); 2971 if (status && !err) 2972 err = status; 2973 2974 count += arg[id].count; 2975 2976 put_task_struct(tsk[id]); 2977 } 2978 2979 pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n", 2980 count, flags, 2981 RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext); 2982 return 0; 2983 } 2984 2985 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags) 2986 { 2987 enum intel_engine_id id; 2988 IGT_TIMEOUT(end_time); 2989 unsigned long count; 2990 2991 count = 0; 2992 do { 2993 for_each_engine(smoke->engine, smoke->gt, id) { 2994 struct i915_gem_context *ctx = smoke_context(smoke); 2995 int err; 2996 2997 err = smoke_submit(smoke, 2998 ctx, random_priority(&smoke->prng), 2999 flags & BATCH ? smoke->batch : NULL); 3000 if (err) 3001 return err; 3002 3003 count++; 3004 } 3005 } while (!__igt_timeout(end_time, NULL)); 3006 3007 pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n", 3008 count, flags, 3009 RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext); 3010 return 0; 3011 } 3012 3013 static int live_preempt_smoke(void *arg) 3014 { 3015 struct preempt_smoke smoke = { 3016 .gt = arg, 3017 .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed), 3018 .ncontext = 1024, 3019 }; 3020 const unsigned int phase[] = { 0, BATCH }; 3021 struct igt_live_test t; 3022 int err = -ENOMEM; 3023 u32 *cs; 3024 int n; 3025 3026 if (!HAS_LOGICAL_RING_PREEMPTION(smoke.gt->i915)) 3027 return 0; 3028 3029 smoke.contexts = kmalloc_array(smoke.ncontext, 3030 sizeof(*smoke.contexts), 3031 GFP_KERNEL); 3032 if (!smoke.contexts) 3033 return -ENOMEM; 3034 3035 smoke.batch = 3036 i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE); 3037 if (IS_ERR(smoke.batch)) { 3038 err = PTR_ERR(smoke.batch); 3039 goto err_free; 3040 } 3041 3042 cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB); 3043 if (IS_ERR(cs)) { 3044 err = PTR_ERR(cs); 3045 goto err_batch; 3046 } 3047 for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++) 3048 cs[n] = MI_ARB_CHECK; 3049 cs[n] = MI_BATCH_BUFFER_END; 3050 i915_gem_object_flush_map(smoke.batch); 3051 i915_gem_object_unpin_map(smoke.batch); 3052 3053 if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) { 3054 err = -EIO; 3055 goto err_batch; 3056 } 3057 3058 for (n = 0; n < smoke.ncontext; n++) { 3059 smoke.contexts[n] = kernel_context(smoke.gt->i915); 3060 if (!smoke.contexts[n]) 3061 goto err_ctx; 3062 } 3063 3064 for (n = 0; n < ARRAY_SIZE(phase); n++) { 3065 err = smoke_crescendo(&smoke, phase[n]); 3066 if (err) 3067 goto err_ctx; 3068 3069 err = smoke_random(&smoke, phase[n]); 3070 if (err) 3071 goto err_ctx; 3072 } 3073 3074 err_ctx: 3075 if (igt_live_test_end(&t)) 3076 err = -EIO; 3077 3078 for (n = 0; n < smoke.ncontext; n++) { 3079 if (!smoke.contexts[n]) 3080 break; 3081 kernel_context_close(smoke.contexts[n]); 3082 } 3083 3084 err_batch: 3085 i915_gem_object_put(smoke.batch); 3086 err_free: 3087 kfree(smoke.contexts); 3088 3089 return err; 3090 } 3091 3092 static int nop_virtual_engine(struct intel_gt *gt, 3093 struct intel_engine_cs **siblings, 3094 unsigned int nsibling, 3095 unsigned int nctx, 3096 unsigned int flags) 3097 #define CHAIN BIT(0) 3098 { 3099 IGT_TIMEOUT(end_time); 3100 struct i915_request *request[16] = {}; 3101 struct intel_context *ve[16]; 3102 unsigned long n, prime, nc; 3103 struct igt_live_test t; 3104 ktime_t times[2] = {}; 3105 int err; 3106 3107 GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve)); 3108 3109 for (n = 0; n < nctx; n++) { 3110 ve[n] = intel_execlists_create_virtual(siblings, nsibling); 3111 if (IS_ERR(ve[n])) { 3112 err = PTR_ERR(ve[n]); 3113 nctx = n; 3114 goto out; 3115 } 3116 3117 err = intel_context_pin(ve[n]); 3118 if (err) { 3119 intel_context_put(ve[n]); 3120 nctx = n; 3121 goto out; 3122 } 3123 } 3124 3125 err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name); 3126 if (err) 3127 goto out; 3128 3129 for_each_prime_number_from(prime, 1, 8192) { 3130 times[1] = ktime_get_raw(); 3131 3132 if (flags & CHAIN) { 3133 for (nc = 0; nc < nctx; nc++) { 3134 for (n = 0; n < prime; n++) { 3135 struct i915_request *rq; 3136 3137 rq = i915_request_create(ve[nc]); 3138 if (IS_ERR(rq)) { 3139 err = PTR_ERR(rq); 3140 goto out; 3141 } 3142 3143 if (request[nc]) 3144 i915_request_put(request[nc]); 3145 request[nc] = i915_request_get(rq); 3146 i915_request_add(rq); 3147 } 3148 } 3149 } else { 3150 for (n = 0; n < prime; n++) { 3151 for (nc = 0; nc < nctx; nc++) { 3152 struct i915_request *rq; 3153 3154 rq = i915_request_create(ve[nc]); 3155 if (IS_ERR(rq)) { 3156 err = PTR_ERR(rq); 3157 goto out; 3158 } 3159 3160 if (request[nc]) 3161 i915_request_put(request[nc]); 3162 request[nc] = i915_request_get(rq); 3163 i915_request_add(rq); 3164 } 3165 } 3166 } 3167 3168 for (nc = 0; nc < nctx; nc++) { 3169 if (i915_request_wait(request[nc], 0, HZ / 10) < 0) { 3170 pr_err("%s(%s): wait for %llx:%lld timed out\n", 3171 __func__, ve[0]->engine->name, 3172 request[nc]->fence.context, 3173 request[nc]->fence.seqno); 3174 3175 GEM_TRACE("%s(%s) failed at request %llx:%lld\n", 3176 __func__, ve[0]->engine->name, 3177 request[nc]->fence.context, 3178 request[nc]->fence.seqno); 3179 GEM_TRACE_DUMP(); 3180 intel_gt_set_wedged(gt); 3181 break; 3182 } 3183 } 3184 3185 times[1] = ktime_sub(ktime_get_raw(), times[1]); 3186 if (prime == 1) 3187 times[0] = times[1]; 3188 3189 for (nc = 0; nc < nctx; nc++) { 3190 i915_request_put(request[nc]); 3191 request[nc] = NULL; 3192 } 3193 3194 if (__igt_timeout(end_time, NULL)) 3195 break; 3196 } 3197 3198 err = igt_live_test_end(&t); 3199 if (err) 3200 goto out; 3201 3202 pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n", 3203 nctx, ve[0]->engine->name, ktime_to_ns(times[0]), 3204 prime, div64_u64(ktime_to_ns(times[1]), prime)); 3205 3206 out: 3207 if (igt_flush_test(gt->i915)) 3208 err = -EIO; 3209 3210 for (nc = 0; nc < nctx; nc++) { 3211 i915_request_put(request[nc]); 3212 intel_context_unpin(ve[nc]); 3213 intel_context_put(ve[nc]); 3214 } 3215 return err; 3216 } 3217 3218 static int live_virtual_engine(void *arg) 3219 { 3220 struct intel_gt *gt = arg; 3221 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 3222 struct intel_engine_cs *engine; 3223 enum intel_engine_id id; 3224 unsigned int class, inst; 3225 int err; 3226 3227 if (intel_uc_uses_guc_submission(>->uc)) 3228 return 0; 3229 3230 for_each_engine(engine, gt, id) { 3231 err = nop_virtual_engine(gt, &engine, 1, 1, 0); 3232 if (err) { 3233 pr_err("Failed to wrap engine %s: err=%d\n", 3234 engine->name, err); 3235 return err; 3236 } 3237 } 3238 3239 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 3240 int nsibling, n; 3241 3242 nsibling = 0; 3243 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { 3244 if (!gt->engine_class[class][inst]) 3245 continue; 3246 3247 siblings[nsibling++] = gt->engine_class[class][inst]; 3248 } 3249 if (nsibling < 2) 3250 continue; 3251 3252 for (n = 1; n <= nsibling + 1; n++) { 3253 err = nop_virtual_engine(gt, siblings, nsibling, 3254 n, 0); 3255 if (err) 3256 return err; 3257 } 3258 3259 err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN); 3260 if (err) 3261 return err; 3262 } 3263 3264 return 0; 3265 } 3266 3267 static int mask_virtual_engine(struct intel_gt *gt, 3268 struct intel_engine_cs **siblings, 3269 unsigned int nsibling) 3270 { 3271 struct i915_request *request[MAX_ENGINE_INSTANCE + 1]; 3272 struct intel_context *ve; 3273 struct igt_live_test t; 3274 unsigned int n; 3275 int err; 3276 3277 /* 3278 * Check that by setting the execution mask on a request, we can 3279 * restrict it to our desired engine within the virtual engine. 3280 */ 3281 3282 ve = intel_execlists_create_virtual(siblings, nsibling); 3283 if (IS_ERR(ve)) { 3284 err = PTR_ERR(ve); 3285 goto out_close; 3286 } 3287 3288 err = intel_context_pin(ve); 3289 if (err) 3290 goto out_put; 3291 3292 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name); 3293 if (err) 3294 goto out_unpin; 3295 3296 for (n = 0; n < nsibling; n++) { 3297 request[n] = i915_request_create(ve); 3298 if (IS_ERR(request[n])) { 3299 err = PTR_ERR(request[n]); 3300 nsibling = n; 3301 goto out; 3302 } 3303 3304 /* Reverse order as it's more likely to be unnatural */ 3305 request[n]->execution_mask = siblings[nsibling - n - 1]->mask; 3306 3307 i915_request_get(request[n]); 3308 i915_request_add(request[n]); 3309 } 3310 3311 for (n = 0; n < nsibling; n++) { 3312 if (i915_request_wait(request[n], 0, HZ / 10) < 0) { 3313 pr_err("%s(%s): wait for %llx:%lld timed out\n", 3314 __func__, ve->engine->name, 3315 request[n]->fence.context, 3316 request[n]->fence.seqno); 3317 3318 GEM_TRACE("%s(%s) failed at request %llx:%lld\n", 3319 __func__, ve->engine->name, 3320 request[n]->fence.context, 3321 request[n]->fence.seqno); 3322 GEM_TRACE_DUMP(); 3323 intel_gt_set_wedged(gt); 3324 err = -EIO; 3325 goto out; 3326 } 3327 3328 if (request[n]->engine != siblings[nsibling - n - 1]) { 3329 pr_err("Executed on wrong sibling '%s', expected '%s'\n", 3330 request[n]->engine->name, 3331 siblings[nsibling - n - 1]->name); 3332 err = -EINVAL; 3333 goto out; 3334 } 3335 } 3336 3337 err = igt_live_test_end(&t); 3338 out: 3339 if (igt_flush_test(gt->i915)) 3340 err = -EIO; 3341 3342 for (n = 0; n < nsibling; n++) 3343 i915_request_put(request[n]); 3344 3345 out_unpin: 3346 intel_context_unpin(ve); 3347 out_put: 3348 intel_context_put(ve); 3349 out_close: 3350 return err; 3351 } 3352 3353 static int live_virtual_mask(void *arg) 3354 { 3355 struct intel_gt *gt = arg; 3356 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 3357 unsigned int class, inst; 3358 int err; 3359 3360 if (intel_uc_uses_guc_submission(>->uc)) 3361 return 0; 3362 3363 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 3364 unsigned int nsibling; 3365 3366 nsibling = 0; 3367 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { 3368 if (!gt->engine_class[class][inst]) 3369 break; 3370 3371 siblings[nsibling++] = gt->engine_class[class][inst]; 3372 } 3373 if (nsibling < 2) 3374 continue; 3375 3376 err = mask_virtual_engine(gt, siblings, nsibling); 3377 if (err) 3378 return err; 3379 } 3380 3381 return 0; 3382 } 3383 3384 static int preserved_virtual_engine(struct intel_gt *gt, 3385 struct intel_engine_cs **siblings, 3386 unsigned int nsibling) 3387 { 3388 struct i915_request *last = NULL; 3389 struct intel_context *ve; 3390 struct i915_vma *scratch; 3391 struct igt_live_test t; 3392 unsigned int n; 3393 int err = 0; 3394 u32 *cs; 3395 3396 scratch = create_scratch(siblings[0]->gt); 3397 if (IS_ERR(scratch)) 3398 return PTR_ERR(scratch); 3399 3400 err = i915_vma_sync(scratch); 3401 if (err) 3402 goto out_scratch; 3403 3404 ve = intel_execlists_create_virtual(siblings, nsibling); 3405 if (IS_ERR(ve)) { 3406 err = PTR_ERR(ve); 3407 goto out_scratch; 3408 } 3409 3410 err = intel_context_pin(ve); 3411 if (err) 3412 goto out_put; 3413 3414 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name); 3415 if (err) 3416 goto out_unpin; 3417 3418 for (n = 0; n < NUM_GPR_DW; n++) { 3419 struct intel_engine_cs *engine = siblings[n % nsibling]; 3420 struct i915_request *rq; 3421 3422 rq = i915_request_create(ve); 3423 if (IS_ERR(rq)) { 3424 err = PTR_ERR(rq); 3425 goto out_end; 3426 } 3427 3428 i915_request_put(last); 3429 last = i915_request_get(rq); 3430 3431 cs = intel_ring_begin(rq, 8); 3432 if (IS_ERR(cs)) { 3433 i915_request_add(rq); 3434 err = PTR_ERR(cs); 3435 goto out_end; 3436 } 3437 3438 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 3439 *cs++ = CS_GPR(engine, n); 3440 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32); 3441 *cs++ = 0; 3442 3443 *cs++ = MI_LOAD_REGISTER_IMM(1); 3444 *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW); 3445 *cs++ = n + 1; 3446 3447 *cs++ = MI_NOOP; 3448 intel_ring_advance(rq, cs); 3449 3450 /* Restrict this request to run on a particular engine */ 3451 rq->execution_mask = engine->mask; 3452 i915_request_add(rq); 3453 } 3454 3455 if (i915_request_wait(last, 0, HZ / 5) < 0) { 3456 err = -ETIME; 3457 goto out_end; 3458 } 3459 3460 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); 3461 if (IS_ERR(cs)) { 3462 err = PTR_ERR(cs); 3463 goto out_end; 3464 } 3465 3466 for (n = 0; n < NUM_GPR_DW; n++) { 3467 if (cs[n] != n) { 3468 pr_err("Incorrect value[%d] found for GPR[%d]\n", 3469 cs[n], n); 3470 err = -EINVAL; 3471 break; 3472 } 3473 } 3474 3475 i915_gem_object_unpin_map(scratch->obj); 3476 3477 out_end: 3478 if (igt_live_test_end(&t)) 3479 err = -EIO; 3480 i915_request_put(last); 3481 out_unpin: 3482 intel_context_unpin(ve); 3483 out_put: 3484 intel_context_put(ve); 3485 out_scratch: 3486 i915_vma_unpin_and_release(&scratch, 0); 3487 return err; 3488 } 3489 3490 static int live_virtual_preserved(void *arg) 3491 { 3492 struct intel_gt *gt = arg; 3493 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 3494 unsigned int class, inst; 3495 3496 /* 3497 * Check that the context image retains non-privileged (user) registers 3498 * from one engine to the next. For this we check that the CS_GPR 3499 * are preserved. 3500 */ 3501 3502 if (intel_uc_uses_guc_submission(>->uc)) 3503 return 0; 3504 3505 /* As we use CS_GPR we cannot run before they existed on all engines. */ 3506 if (INTEL_GEN(gt->i915) < 9) 3507 return 0; 3508 3509 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 3510 int nsibling, err; 3511 3512 nsibling = 0; 3513 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { 3514 if (!gt->engine_class[class][inst]) 3515 continue; 3516 3517 siblings[nsibling++] = gt->engine_class[class][inst]; 3518 } 3519 if (nsibling < 2) 3520 continue; 3521 3522 err = preserved_virtual_engine(gt, siblings, nsibling); 3523 if (err) 3524 return err; 3525 } 3526 3527 return 0; 3528 } 3529 3530 static int bond_virtual_engine(struct intel_gt *gt, 3531 unsigned int class, 3532 struct intel_engine_cs **siblings, 3533 unsigned int nsibling, 3534 unsigned int flags) 3535 #define BOND_SCHEDULE BIT(0) 3536 { 3537 struct intel_engine_cs *master; 3538 struct i915_request *rq[16]; 3539 enum intel_engine_id id; 3540 struct igt_spinner spin; 3541 unsigned long n; 3542 int err; 3543 3544 /* 3545 * A set of bonded requests is intended to be run concurrently 3546 * across a number of engines. We use one request per-engine 3547 * and a magic fence to schedule each of the bonded requests 3548 * at the same time. A consequence of our current scheduler is that 3549 * we only move requests to the HW ready queue when the request 3550 * becomes ready, that is when all of its prerequisite fences have 3551 * been signaled. As one of those fences is the master submit fence, 3552 * there is a delay on all secondary fences as the HW may be 3553 * currently busy. Equally, as all the requests are independent, 3554 * they may have other fences that delay individual request 3555 * submission to HW. Ergo, we do not guarantee that all requests are 3556 * immediately submitted to HW at the same time, just that if the 3557 * rules are abided by, they are ready at the same time as the 3558 * first is submitted. Userspace can embed semaphores in its batch 3559 * to ensure parallel execution of its phases as it requires. 3560 * Though naturally it gets requested that perhaps the scheduler should 3561 * take care of parallel execution, even across preemption events on 3562 * different HW. (The proper answer is of course "lalalala".) 3563 * 3564 * With the submit-fence, we have identified three possible phases 3565 * of synchronisation depending on the master fence: queued (not 3566 * ready), executing, and signaled. The first two are quite simple 3567 * and checked below. However, the signaled master fence handling is 3568 * contentious. Currently we do not distinguish between a signaled 3569 * fence and an expired fence, as once signaled it does not convey 3570 * any information about the previous execution. It may even be freed 3571 * and hence checking later it may not exist at all. Ergo we currently 3572 * do not apply the bonding constraint for an already signaled fence, 3573 * as our expectation is that it should not constrain the secondaries 3574 * and is outside of the scope of the bonded request API (i.e. all 3575 * userspace requests are meant to be running in parallel). As 3576 * it imposes no constraint, and is effectively a no-op, we do not 3577 * check below as normal execution flows are checked extensively above. 3578 * 3579 * XXX Is the degenerate handling of signaled submit fences the 3580 * expected behaviour for userpace? 3581 */ 3582 3583 GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1); 3584 3585 if (igt_spinner_init(&spin, gt)) 3586 return -ENOMEM; 3587 3588 err = 0; 3589 rq[0] = ERR_PTR(-ENOMEM); 3590 for_each_engine(master, gt, id) { 3591 struct i915_sw_fence fence = {}; 3592 struct intel_context *ce; 3593 3594 if (master->class == class) 3595 continue; 3596 3597 ce = intel_context_create(master); 3598 if (IS_ERR(ce)) { 3599 err = PTR_ERR(ce); 3600 goto out; 3601 } 3602 3603 memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq)); 3604 3605 rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP); 3606 intel_context_put(ce); 3607 if (IS_ERR(rq[0])) { 3608 err = PTR_ERR(rq[0]); 3609 goto out; 3610 } 3611 i915_request_get(rq[0]); 3612 3613 if (flags & BOND_SCHEDULE) { 3614 onstack_fence_init(&fence); 3615 err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit, 3616 &fence, 3617 GFP_KERNEL); 3618 } 3619 3620 i915_request_add(rq[0]); 3621 if (err < 0) 3622 goto out; 3623 3624 if (!(flags & BOND_SCHEDULE) && 3625 !igt_wait_for_spinner(&spin, rq[0])) { 3626 err = -EIO; 3627 goto out; 3628 } 3629 3630 for (n = 0; n < nsibling; n++) { 3631 struct intel_context *ve; 3632 3633 ve = intel_execlists_create_virtual(siblings, nsibling); 3634 if (IS_ERR(ve)) { 3635 err = PTR_ERR(ve); 3636 onstack_fence_fini(&fence); 3637 goto out; 3638 } 3639 3640 err = intel_virtual_engine_attach_bond(ve->engine, 3641 master, 3642 siblings[n]); 3643 if (err) { 3644 intel_context_put(ve); 3645 onstack_fence_fini(&fence); 3646 goto out; 3647 } 3648 3649 err = intel_context_pin(ve); 3650 intel_context_put(ve); 3651 if (err) { 3652 onstack_fence_fini(&fence); 3653 goto out; 3654 } 3655 3656 rq[n + 1] = i915_request_create(ve); 3657 intel_context_unpin(ve); 3658 if (IS_ERR(rq[n + 1])) { 3659 err = PTR_ERR(rq[n + 1]); 3660 onstack_fence_fini(&fence); 3661 goto out; 3662 } 3663 i915_request_get(rq[n + 1]); 3664 3665 err = i915_request_await_execution(rq[n + 1], 3666 &rq[0]->fence, 3667 ve->engine->bond_execute); 3668 i915_request_add(rq[n + 1]); 3669 if (err < 0) { 3670 onstack_fence_fini(&fence); 3671 goto out; 3672 } 3673 } 3674 onstack_fence_fini(&fence); 3675 intel_engine_flush_submission(master); 3676 igt_spinner_end(&spin); 3677 3678 if (i915_request_wait(rq[0], 0, HZ / 10) < 0) { 3679 pr_err("Master request did not execute (on %s)!\n", 3680 rq[0]->engine->name); 3681 err = -EIO; 3682 goto out; 3683 } 3684 3685 for (n = 0; n < nsibling; n++) { 3686 if (i915_request_wait(rq[n + 1], 0, 3687 MAX_SCHEDULE_TIMEOUT) < 0) { 3688 err = -EIO; 3689 goto out; 3690 } 3691 3692 if (rq[n + 1]->engine != siblings[n]) { 3693 pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n", 3694 siblings[n]->name, 3695 rq[n + 1]->engine->name, 3696 rq[0]->engine->name); 3697 err = -EINVAL; 3698 goto out; 3699 } 3700 } 3701 3702 for (n = 0; !IS_ERR(rq[n]); n++) 3703 i915_request_put(rq[n]); 3704 rq[0] = ERR_PTR(-ENOMEM); 3705 } 3706 3707 out: 3708 for (n = 0; !IS_ERR(rq[n]); n++) 3709 i915_request_put(rq[n]); 3710 if (igt_flush_test(gt->i915)) 3711 err = -EIO; 3712 3713 igt_spinner_fini(&spin); 3714 return err; 3715 } 3716 3717 static int live_virtual_bond(void *arg) 3718 { 3719 static const struct phase { 3720 const char *name; 3721 unsigned int flags; 3722 } phases[] = { 3723 { "", 0 }, 3724 { "schedule", BOND_SCHEDULE }, 3725 { }, 3726 }; 3727 struct intel_gt *gt = arg; 3728 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 3729 unsigned int class, inst; 3730 int err; 3731 3732 if (intel_uc_uses_guc_submission(>->uc)) 3733 return 0; 3734 3735 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 3736 const struct phase *p; 3737 int nsibling; 3738 3739 nsibling = 0; 3740 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { 3741 if (!gt->engine_class[class][inst]) 3742 break; 3743 3744 GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings)); 3745 siblings[nsibling++] = gt->engine_class[class][inst]; 3746 } 3747 if (nsibling < 2) 3748 continue; 3749 3750 for (p = phases; p->name; p++) { 3751 err = bond_virtual_engine(gt, 3752 class, siblings, nsibling, 3753 p->flags); 3754 if (err) { 3755 pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n", 3756 __func__, p->name, class, nsibling, err); 3757 return err; 3758 } 3759 } 3760 } 3761 3762 return 0; 3763 } 3764 3765 static int reset_virtual_engine(struct intel_gt *gt, 3766 struct intel_engine_cs **siblings, 3767 unsigned int nsibling) 3768 { 3769 struct intel_engine_cs *engine; 3770 struct intel_context *ve; 3771 unsigned long *heartbeat; 3772 struct igt_spinner spin; 3773 struct i915_request *rq; 3774 unsigned int n; 3775 int err = 0; 3776 3777 /* 3778 * In order to support offline error capture for fast preempt reset, 3779 * we need to decouple the guilty request and ensure that it and its 3780 * descendents are not executed while the capture is in progress. 3781 */ 3782 3783 heartbeat = kmalloc_array(nsibling, sizeof(*heartbeat), GFP_KERNEL); 3784 if (!heartbeat) 3785 return -ENOMEM; 3786 3787 if (igt_spinner_init(&spin, gt)) { 3788 err = -ENOMEM; 3789 goto out_free; 3790 } 3791 3792 ve = intel_execlists_create_virtual(siblings, nsibling); 3793 if (IS_ERR(ve)) { 3794 err = PTR_ERR(ve); 3795 goto out_spin; 3796 } 3797 3798 for (n = 0; n < nsibling; n++) 3799 engine_heartbeat_disable(siblings[n], &heartbeat[n]); 3800 3801 rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK); 3802 if (IS_ERR(rq)) { 3803 err = PTR_ERR(rq); 3804 goto out_heartbeat; 3805 } 3806 i915_request_add(rq); 3807 3808 if (!igt_wait_for_spinner(&spin, rq)) { 3809 intel_gt_set_wedged(gt); 3810 err = -ETIME; 3811 goto out_heartbeat; 3812 } 3813 3814 engine = rq->engine; 3815 GEM_BUG_ON(engine == ve->engine); 3816 3817 /* Take ownership of the reset and tasklet */ 3818 if (test_and_set_bit(I915_RESET_ENGINE + engine->id, 3819 >->reset.flags)) { 3820 intel_gt_set_wedged(gt); 3821 err = -EBUSY; 3822 goto out_heartbeat; 3823 } 3824 tasklet_disable(&engine->execlists.tasklet); 3825 3826 engine->execlists.tasklet.func(engine->execlists.tasklet.data); 3827 GEM_BUG_ON(execlists_active(&engine->execlists) != rq); 3828 3829 /* Fake a preemption event; failed of course */ 3830 spin_lock_irq(&engine->active.lock); 3831 __unwind_incomplete_requests(engine); 3832 spin_unlock_irq(&engine->active.lock); 3833 GEM_BUG_ON(rq->engine != ve->engine); 3834 3835 /* Reset the engine while keeping our active request on hold */ 3836 execlists_hold(engine, rq); 3837 GEM_BUG_ON(!i915_request_on_hold(rq)); 3838 3839 intel_engine_reset(engine, NULL); 3840 GEM_BUG_ON(rq->fence.error != -EIO); 3841 3842 /* Release our grasp on the engine, letting CS flow again */ 3843 tasklet_enable(&engine->execlists.tasklet); 3844 clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, >->reset.flags); 3845 3846 /* Check that we do not resubmit the held request */ 3847 i915_request_get(rq); 3848 if (!i915_request_wait(rq, 0, HZ / 5)) { 3849 pr_err("%s: on hold request completed!\n", 3850 engine->name); 3851 intel_gt_set_wedged(gt); 3852 err = -EIO; 3853 goto out_rq; 3854 } 3855 GEM_BUG_ON(!i915_request_on_hold(rq)); 3856 3857 /* But is resubmitted on release */ 3858 execlists_unhold(engine, rq); 3859 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 3860 pr_err("%s: held request did not complete!\n", 3861 engine->name); 3862 intel_gt_set_wedged(gt); 3863 err = -ETIME; 3864 } 3865 3866 out_rq: 3867 i915_request_put(rq); 3868 out_heartbeat: 3869 for (n = 0; n < nsibling; n++) 3870 engine_heartbeat_enable(siblings[n], heartbeat[n]); 3871 3872 intel_context_put(ve); 3873 out_spin: 3874 igt_spinner_fini(&spin); 3875 out_free: 3876 kfree(heartbeat); 3877 return err; 3878 } 3879 3880 static int live_virtual_reset(void *arg) 3881 { 3882 struct intel_gt *gt = arg; 3883 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 3884 unsigned int class, inst; 3885 3886 /* 3887 * Check that we handle a reset event within a virtual engine. 3888 * Only the physical engine is reset, but we have to check the flow 3889 * of the virtual requests around the reset, and make sure it is not 3890 * forgotten. 3891 */ 3892 3893 if (intel_uc_uses_guc_submission(>->uc)) 3894 return 0; 3895 3896 if (!intel_has_reset_engine(gt)) 3897 return 0; 3898 3899 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 3900 int nsibling, err; 3901 3902 nsibling = 0; 3903 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { 3904 if (!gt->engine_class[class][inst]) 3905 continue; 3906 3907 siblings[nsibling++] = gt->engine_class[class][inst]; 3908 } 3909 if (nsibling < 2) 3910 continue; 3911 3912 err = reset_virtual_engine(gt, siblings, nsibling); 3913 if (err) 3914 return err; 3915 } 3916 3917 return 0; 3918 } 3919 3920 int intel_execlists_live_selftests(struct drm_i915_private *i915) 3921 { 3922 static const struct i915_subtest tests[] = { 3923 SUBTEST(live_sanitycheck), 3924 SUBTEST(live_unlite_switch), 3925 SUBTEST(live_unlite_preempt), 3926 SUBTEST(live_pin_rewind), 3927 SUBTEST(live_hold_reset), 3928 SUBTEST(live_error_interrupt), 3929 SUBTEST(live_timeslice_preempt), 3930 SUBTEST(live_timeslice_rewind), 3931 SUBTEST(live_timeslice_queue), 3932 SUBTEST(live_busywait_preempt), 3933 SUBTEST(live_preempt), 3934 SUBTEST(live_late_preempt), 3935 SUBTEST(live_nopreempt), 3936 SUBTEST(live_preempt_cancel), 3937 SUBTEST(live_suppress_self_preempt), 3938 SUBTEST(live_suppress_wait_preempt), 3939 SUBTEST(live_chain_preempt), 3940 SUBTEST(live_preempt_gang), 3941 SUBTEST(live_preempt_timeout), 3942 SUBTEST(live_preempt_smoke), 3943 SUBTEST(live_virtual_engine), 3944 SUBTEST(live_virtual_mask), 3945 SUBTEST(live_virtual_preserved), 3946 SUBTEST(live_virtual_bond), 3947 SUBTEST(live_virtual_reset), 3948 }; 3949 3950 if (!HAS_EXECLISTS(i915)) 3951 return 0; 3952 3953 if (intel_gt_is_wedged(&i915->gt)) 3954 return 0; 3955 3956 return intel_gt_live_subtests(tests, &i915->gt); 3957 } 3958 3959 static void hexdump(const void *buf, size_t len) 3960 { 3961 const size_t rowsize = 8 * sizeof(u32); 3962 const void *prev = NULL; 3963 bool skip = false; 3964 size_t pos; 3965 3966 for (pos = 0; pos < len; pos += rowsize) { 3967 char line[128]; 3968 3969 if (prev && !memcmp(prev, buf + pos, rowsize)) { 3970 if (!skip) { 3971 pr_info("*\n"); 3972 skip = true; 3973 } 3974 continue; 3975 } 3976 3977 WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos, 3978 rowsize, sizeof(u32), 3979 line, sizeof(line), 3980 false) >= sizeof(line)); 3981 pr_info("[%04zx] %s\n", pos, line); 3982 3983 prev = buf + pos; 3984 skip = false; 3985 } 3986 } 3987 3988 static int emit_semaphore_signal(struct intel_context *ce, void *slot) 3989 { 3990 const u32 offset = 3991 i915_ggtt_offset(ce->engine->status_page.vma) + 3992 offset_in_page(slot); 3993 struct i915_request *rq; 3994 u32 *cs; 3995 3996 rq = intel_context_create_request(ce); 3997 if (IS_ERR(rq)) 3998 return PTR_ERR(rq); 3999 4000 cs = intel_ring_begin(rq, 4); 4001 if (IS_ERR(cs)) { 4002 i915_request_add(rq); 4003 return PTR_ERR(cs); 4004 } 4005 4006 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 4007 *cs++ = offset; 4008 *cs++ = 0; 4009 *cs++ = 1; 4010 4011 intel_ring_advance(rq, cs); 4012 4013 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 4014 i915_request_add(rq); 4015 return 0; 4016 } 4017 4018 static int live_lrc_layout(void *arg) 4019 { 4020 struct intel_gt *gt = arg; 4021 struct intel_engine_cs *engine; 4022 enum intel_engine_id id; 4023 u32 *lrc; 4024 int err; 4025 4026 /* 4027 * Check the registers offsets we use to create the initial reg state 4028 * match the layout saved by HW. 4029 */ 4030 4031 lrc = kmalloc(PAGE_SIZE, GFP_KERNEL); 4032 if (!lrc) 4033 return -ENOMEM; 4034 4035 err = 0; 4036 for_each_engine(engine, gt, id) { 4037 u32 *hw; 4038 int dw; 4039 4040 if (!engine->default_state) 4041 continue; 4042 4043 hw = i915_gem_object_pin_map(engine->default_state, 4044 I915_MAP_WB); 4045 if (IS_ERR(hw)) { 4046 err = PTR_ERR(hw); 4047 break; 4048 } 4049 hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw); 4050 4051 execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE), 4052 engine->kernel_context, 4053 engine, 4054 engine->kernel_context->ring, 4055 true); 4056 4057 dw = 0; 4058 do { 4059 u32 lri = hw[dw]; 4060 4061 if (lri == 0) { 4062 dw++; 4063 continue; 4064 } 4065 4066 if (lrc[dw] == 0) { 4067 pr_debug("%s: skipped instruction %x at dword %d\n", 4068 engine->name, lri, dw); 4069 dw++; 4070 continue; 4071 } 4072 4073 if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { 4074 pr_err("%s: Expected LRI command at dword %d, found %08x\n", 4075 engine->name, dw, lri); 4076 err = -EINVAL; 4077 break; 4078 } 4079 4080 if (lrc[dw] != lri) { 4081 pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n", 4082 engine->name, dw, lri, lrc[dw]); 4083 err = -EINVAL; 4084 break; 4085 } 4086 4087 lri &= 0x7f; 4088 lri++; 4089 dw++; 4090 4091 while (lri) { 4092 if (hw[dw] != lrc[dw]) { 4093 pr_err("%s: Different registers found at dword %d, expected %x, found %x\n", 4094 engine->name, dw, hw[dw], lrc[dw]); 4095 err = -EINVAL; 4096 break; 4097 } 4098 4099 /* 4100 * Skip over the actual register value as we 4101 * expect that to differ. 4102 */ 4103 dw += 2; 4104 lri -= 2; 4105 } 4106 } while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); 4107 4108 if (err) { 4109 pr_info("%s: HW register image:\n", engine->name); 4110 hexdump(hw, PAGE_SIZE); 4111 4112 pr_info("%s: SW register image:\n", engine->name); 4113 hexdump(lrc, PAGE_SIZE); 4114 } 4115 4116 i915_gem_object_unpin_map(engine->default_state); 4117 if (err) 4118 break; 4119 } 4120 4121 kfree(lrc); 4122 return err; 4123 } 4124 4125 static int find_offset(const u32 *lri, u32 offset) 4126 { 4127 int i; 4128 4129 for (i = 0; i < PAGE_SIZE / sizeof(u32); i++) 4130 if (lri[i] == offset) 4131 return i; 4132 4133 return -1; 4134 } 4135 4136 static int live_lrc_fixed(void *arg) 4137 { 4138 struct intel_gt *gt = arg; 4139 struct intel_engine_cs *engine; 4140 enum intel_engine_id id; 4141 int err = 0; 4142 4143 /* 4144 * Check the assumed register offsets match the actual locations in 4145 * the context image. 4146 */ 4147 4148 for_each_engine(engine, gt, id) { 4149 const struct { 4150 u32 reg; 4151 u32 offset; 4152 const char *name; 4153 } tbl[] = { 4154 { 4155 i915_mmio_reg_offset(RING_START(engine->mmio_base)), 4156 CTX_RING_START - 1, 4157 "RING_START" 4158 }, 4159 { 4160 i915_mmio_reg_offset(RING_CTL(engine->mmio_base)), 4161 CTX_RING_CTL - 1, 4162 "RING_CTL" 4163 }, 4164 { 4165 i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)), 4166 CTX_RING_HEAD - 1, 4167 "RING_HEAD" 4168 }, 4169 { 4170 i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)), 4171 CTX_RING_TAIL - 1, 4172 "RING_TAIL" 4173 }, 4174 { 4175 i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)), 4176 lrc_ring_mi_mode(engine), 4177 "RING_MI_MODE" 4178 }, 4179 { 4180 i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)), 4181 CTX_BB_STATE - 1, 4182 "BB_STATE" 4183 }, 4184 { 4185 i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)), 4186 CTX_TIMESTAMP - 1, 4187 "RING_CTX_TIMESTAMP" 4188 }, 4189 { }, 4190 }, *t; 4191 u32 *hw; 4192 4193 if (!engine->default_state) 4194 continue; 4195 4196 hw = i915_gem_object_pin_map(engine->default_state, 4197 I915_MAP_WB); 4198 if (IS_ERR(hw)) { 4199 err = PTR_ERR(hw); 4200 break; 4201 } 4202 hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw); 4203 4204 for (t = tbl; t->name; t++) { 4205 int dw = find_offset(hw, t->reg); 4206 4207 if (dw != t->offset) { 4208 pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n", 4209 engine->name, 4210 t->name, 4211 t->reg, 4212 dw, 4213 t->offset); 4214 err = -EINVAL; 4215 } 4216 } 4217 4218 i915_gem_object_unpin_map(engine->default_state); 4219 } 4220 4221 return err; 4222 } 4223 4224 static int __live_lrc_state(struct intel_engine_cs *engine, 4225 struct i915_vma *scratch) 4226 { 4227 struct intel_context *ce; 4228 struct i915_request *rq; 4229 enum { 4230 RING_START_IDX = 0, 4231 RING_TAIL_IDX, 4232 MAX_IDX 4233 }; 4234 u32 expected[MAX_IDX]; 4235 u32 *cs; 4236 int err; 4237 int n; 4238 4239 ce = intel_context_create(engine); 4240 if (IS_ERR(ce)) 4241 return PTR_ERR(ce); 4242 4243 err = intel_context_pin(ce); 4244 if (err) 4245 goto err_put; 4246 4247 rq = i915_request_create(ce); 4248 if (IS_ERR(rq)) { 4249 err = PTR_ERR(rq); 4250 goto err_unpin; 4251 } 4252 4253 cs = intel_ring_begin(rq, 4 * MAX_IDX); 4254 if (IS_ERR(cs)) { 4255 err = PTR_ERR(cs); 4256 i915_request_add(rq); 4257 goto err_unpin; 4258 } 4259 4260 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 4261 *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base)); 4262 *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32); 4263 *cs++ = 0; 4264 4265 expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma); 4266 4267 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 4268 *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)); 4269 *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32); 4270 *cs++ = 0; 4271 4272 i915_vma_lock(scratch); 4273 err = i915_request_await_object(rq, scratch->obj, true); 4274 if (!err) 4275 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE); 4276 i915_vma_unlock(scratch); 4277 4278 i915_request_get(rq); 4279 i915_request_add(rq); 4280 if (err) 4281 goto err_rq; 4282 4283 intel_engine_flush_submission(engine); 4284 expected[RING_TAIL_IDX] = ce->ring->tail; 4285 4286 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 4287 err = -ETIME; 4288 goto err_rq; 4289 } 4290 4291 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); 4292 if (IS_ERR(cs)) { 4293 err = PTR_ERR(cs); 4294 goto err_rq; 4295 } 4296 4297 for (n = 0; n < MAX_IDX; n++) { 4298 if (cs[n] != expected[n]) { 4299 pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n", 4300 engine->name, n, cs[n], expected[n]); 4301 err = -EINVAL; 4302 break; 4303 } 4304 } 4305 4306 i915_gem_object_unpin_map(scratch->obj); 4307 4308 err_rq: 4309 i915_request_put(rq); 4310 err_unpin: 4311 intel_context_unpin(ce); 4312 err_put: 4313 intel_context_put(ce); 4314 return err; 4315 } 4316 4317 static int live_lrc_state(void *arg) 4318 { 4319 struct intel_gt *gt = arg; 4320 struct intel_engine_cs *engine; 4321 struct i915_vma *scratch; 4322 enum intel_engine_id id; 4323 int err = 0; 4324 4325 /* 4326 * Check the live register state matches what we expect for this 4327 * intel_context. 4328 */ 4329 4330 scratch = create_scratch(gt); 4331 if (IS_ERR(scratch)) 4332 return PTR_ERR(scratch); 4333 4334 for_each_engine(engine, gt, id) { 4335 err = __live_lrc_state(engine, scratch); 4336 if (err) 4337 break; 4338 } 4339 4340 if (igt_flush_test(gt->i915)) 4341 err = -EIO; 4342 4343 i915_vma_unpin_and_release(&scratch, 0); 4344 return err; 4345 } 4346 4347 static int gpr_make_dirty(struct intel_context *ce) 4348 { 4349 struct i915_request *rq; 4350 u32 *cs; 4351 int n; 4352 4353 rq = intel_context_create_request(ce); 4354 if (IS_ERR(rq)) 4355 return PTR_ERR(rq); 4356 4357 cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2); 4358 if (IS_ERR(cs)) { 4359 i915_request_add(rq); 4360 return PTR_ERR(cs); 4361 } 4362 4363 *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW); 4364 for (n = 0; n < NUM_GPR_DW; n++) { 4365 *cs++ = CS_GPR(ce->engine, n); 4366 *cs++ = STACK_MAGIC; 4367 } 4368 *cs++ = MI_NOOP; 4369 4370 intel_ring_advance(rq, cs); 4371 4372 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 4373 i915_request_add(rq); 4374 4375 return 0; 4376 } 4377 4378 static struct i915_request * 4379 __gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot) 4380 { 4381 const u32 offset = 4382 i915_ggtt_offset(ce->engine->status_page.vma) + 4383 offset_in_page(slot); 4384 struct i915_request *rq; 4385 u32 *cs; 4386 int err; 4387 int n; 4388 4389 rq = intel_context_create_request(ce); 4390 if (IS_ERR(rq)) 4391 return rq; 4392 4393 cs = intel_ring_begin(rq, 6 + 4 * NUM_GPR_DW); 4394 if (IS_ERR(cs)) { 4395 i915_request_add(rq); 4396 return ERR_CAST(cs); 4397 } 4398 4399 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 4400 *cs++ = MI_NOOP; 4401 4402 *cs++ = MI_SEMAPHORE_WAIT | 4403 MI_SEMAPHORE_GLOBAL_GTT | 4404 MI_SEMAPHORE_POLL | 4405 MI_SEMAPHORE_SAD_NEQ_SDD; 4406 *cs++ = 0; 4407 *cs++ = offset; 4408 *cs++ = 0; 4409 4410 for (n = 0; n < NUM_GPR_DW; n++) { 4411 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 4412 *cs++ = CS_GPR(ce->engine, n); 4413 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32); 4414 *cs++ = 0; 4415 } 4416 4417 i915_vma_lock(scratch); 4418 err = i915_request_await_object(rq, scratch->obj, true); 4419 if (!err) 4420 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE); 4421 i915_vma_unlock(scratch); 4422 4423 i915_request_get(rq); 4424 i915_request_add(rq); 4425 if (err) { 4426 i915_request_put(rq); 4427 rq = ERR_PTR(err); 4428 } 4429 4430 return rq; 4431 } 4432 4433 static int __live_lrc_gpr(struct intel_engine_cs *engine, 4434 struct i915_vma *scratch, 4435 bool preempt) 4436 { 4437 u32 *slot = memset32(engine->status_page.addr + 1000, 0, 4); 4438 struct intel_context *ce; 4439 struct i915_request *rq; 4440 u32 *cs; 4441 int err; 4442 int n; 4443 4444 if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS) 4445 return 0; /* GPR only on rcs0 for gen8 */ 4446 4447 err = gpr_make_dirty(engine->kernel_context); 4448 if (err) 4449 return err; 4450 4451 ce = intel_context_create(engine); 4452 if (IS_ERR(ce)) 4453 return PTR_ERR(ce); 4454 4455 rq = __gpr_read(ce, scratch, slot); 4456 if (IS_ERR(rq)) { 4457 err = PTR_ERR(rq); 4458 goto err_put; 4459 } 4460 4461 err = wait_for_submit(engine, rq, HZ / 2); 4462 if (err) 4463 goto err_rq; 4464 4465 if (preempt) { 4466 err = gpr_make_dirty(engine->kernel_context); 4467 if (err) 4468 goto err_rq; 4469 4470 err = emit_semaphore_signal(engine->kernel_context, slot); 4471 if (err) 4472 goto err_rq; 4473 } else { 4474 slot[0] = 1; 4475 wmb(); 4476 } 4477 4478 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 4479 err = -ETIME; 4480 goto err_rq; 4481 } 4482 4483 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); 4484 if (IS_ERR(cs)) { 4485 err = PTR_ERR(cs); 4486 goto err_rq; 4487 } 4488 4489 for (n = 0; n < NUM_GPR_DW; n++) { 4490 if (cs[n]) { 4491 pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n", 4492 engine->name, 4493 n / 2, n & 1 ? "udw" : "ldw", 4494 cs[n]); 4495 err = -EINVAL; 4496 break; 4497 } 4498 } 4499 4500 i915_gem_object_unpin_map(scratch->obj); 4501 4502 err_rq: 4503 memset32(&slot[0], -1, 4); 4504 wmb(); 4505 i915_request_put(rq); 4506 err_put: 4507 intel_context_put(ce); 4508 return err; 4509 } 4510 4511 static int live_lrc_gpr(void *arg) 4512 { 4513 struct intel_gt *gt = arg; 4514 struct intel_engine_cs *engine; 4515 struct i915_vma *scratch; 4516 enum intel_engine_id id; 4517 int err = 0; 4518 4519 /* 4520 * Check that GPR registers are cleared in new contexts as we need 4521 * to avoid leaking any information from previous contexts. 4522 */ 4523 4524 scratch = create_scratch(gt); 4525 if (IS_ERR(scratch)) 4526 return PTR_ERR(scratch); 4527 4528 for_each_engine(engine, gt, id) { 4529 unsigned long heartbeat; 4530 4531 engine_heartbeat_disable(engine, &heartbeat); 4532 4533 err = __live_lrc_gpr(engine, scratch, false); 4534 if (err) 4535 goto err; 4536 4537 err = __live_lrc_gpr(engine, scratch, true); 4538 if (err) 4539 goto err; 4540 4541 err: 4542 engine_heartbeat_enable(engine, heartbeat); 4543 if (igt_flush_test(gt->i915)) 4544 err = -EIO; 4545 if (err) 4546 break; 4547 } 4548 4549 i915_vma_unpin_and_release(&scratch, 0); 4550 return err; 4551 } 4552 4553 static struct i915_request * 4554 create_timestamp(struct intel_context *ce, void *slot, int idx) 4555 { 4556 const u32 offset = 4557 i915_ggtt_offset(ce->engine->status_page.vma) + 4558 offset_in_page(slot); 4559 struct i915_request *rq; 4560 u32 *cs; 4561 int err; 4562 4563 rq = intel_context_create_request(ce); 4564 if (IS_ERR(rq)) 4565 return rq; 4566 4567 cs = intel_ring_begin(rq, 10); 4568 if (IS_ERR(cs)) { 4569 err = PTR_ERR(cs); 4570 goto err; 4571 } 4572 4573 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 4574 *cs++ = MI_NOOP; 4575 4576 *cs++ = MI_SEMAPHORE_WAIT | 4577 MI_SEMAPHORE_GLOBAL_GTT | 4578 MI_SEMAPHORE_POLL | 4579 MI_SEMAPHORE_SAD_NEQ_SDD; 4580 *cs++ = 0; 4581 *cs++ = offset; 4582 *cs++ = 0; 4583 4584 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 4585 *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base)); 4586 *cs++ = offset + idx * sizeof(u32); 4587 *cs++ = 0; 4588 4589 intel_ring_advance(rq, cs); 4590 4591 rq->sched.attr.priority = I915_PRIORITY_MASK; 4592 err = 0; 4593 err: 4594 i915_request_get(rq); 4595 i915_request_add(rq); 4596 if (err) { 4597 i915_request_put(rq); 4598 return ERR_PTR(err); 4599 } 4600 4601 return rq; 4602 } 4603 4604 struct lrc_timestamp { 4605 struct intel_engine_cs *engine; 4606 struct intel_context *ce[2]; 4607 u32 poison; 4608 }; 4609 4610 static bool timestamp_advanced(u32 start, u32 end) 4611 { 4612 return (s32)(end - start) > 0; 4613 } 4614 4615 static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt) 4616 { 4617 u32 *slot = memset32(arg->engine->status_page.addr + 1000, 0, 4); 4618 struct i915_request *rq; 4619 u32 timestamp; 4620 int err = 0; 4621 4622 arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison; 4623 rq = create_timestamp(arg->ce[0], slot, 1); 4624 if (IS_ERR(rq)) 4625 return PTR_ERR(rq); 4626 4627 err = wait_for_submit(rq->engine, rq, HZ / 2); 4628 if (err) 4629 goto err; 4630 4631 if (preempt) { 4632 arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef; 4633 err = emit_semaphore_signal(arg->ce[1], slot); 4634 if (err) 4635 goto err; 4636 } else { 4637 slot[0] = 1; 4638 wmb(); 4639 } 4640 4641 if (i915_request_wait(rq, 0, HZ / 2) < 0) { 4642 err = -ETIME; 4643 goto err; 4644 } 4645 4646 /* and wait for switch to kernel */ 4647 if (igt_flush_test(arg->engine->i915)) { 4648 err = -EIO; 4649 goto err; 4650 } 4651 4652 rmb(); 4653 4654 if (!timestamp_advanced(arg->poison, slot[1])) { 4655 pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n", 4656 arg->engine->name, preempt ? "preempt" : "simple", 4657 arg->poison, slot[1]); 4658 err = -EINVAL; 4659 } 4660 4661 timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]); 4662 if (!timestamp_advanced(slot[1], timestamp)) { 4663 pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n", 4664 arg->engine->name, preempt ? "preempt" : "simple", 4665 slot[1], timestamp); 4666 err = -EINVAL; 4667 } 4668 4669 err: 4670 memset32(slot, -1, 4); 4671 i915_request_put(rq); 4672 return err; 4673 } 4674 4675 static int live_lrc_timestamp(void *arg) 4676 { 4677 struct intel_gt *gt = arg; 4678 enum intel_engine_id id; 4679 struct lrc_timestamp data; 4680 const u32 poison[] = { 4681 0, 4682 S32_MAX, 4683 (u32)S32_MAX + 1, 4684 U32_MAX, 4685 }; 4686 4687 /* 4688 * We want to verify that the timestamp is saved and restore across 4689 * context switches and is monotonic. 4690 * 4691 * So we do this with a little bit of LRC poisoning to check various 4692 * boundary conditions, and see what happens if we preempt the context 4693 * with a second request (carrying more poison into the timestamp). 4694 */ 4695 4696 for_each_engine(data.engine, gt, id) { 4697 unsigned long heartbeat; 4698 int i, err = 0; 4699 4700 engine_heartbeat_disable(data.engine, &heartbeat); 4701 4702 for (i = 0; i < ARRAY_SIZE(data.ce); i++) { 4703 struct intel_context *tmp; 4704 4705 tmp = intel_context_create(data.engine); 4706 if (IS_ERR(tmp)) { 4707 err = PTR_ERR(tmp); 4708 goto err; 4709 } 4710 4711 err = intel_context_pin(tmp); 4712 if (err) { 4713 intel_context_put(tmp); 4714 goto err; 4715 } 4716 4717 data.ce[i] = tmp; 4718 } 4719 4720 for (i = 0; i < ARRAY_SIZE(poison); i++) { 4721 data.poison = poison[i]; 4722 4723 err = __lrc_timestamp(&data, false); 4724 if (err) 4725 break; 4726 4727 err = __lrc_timestamp(&data, true); 4728 if (err) 4729 break; 4730 } 4731 4732 err: 4733 engine_heartbeat_enable(data.engine, heartbeat); 4734 for (i = 0; i < ARRAY_SIZE(data.ce); i++) { 4735 if (!data.ce[i]) 4736 break; 4737 4738 intel_context_unpin(data.ce[i]); 4739 intel_context_put(data.ce[i]); 4740 } 4741 4742 if (igt_flush_test(gt->i915)) 4743 err = -EIO; 4744 if (err) 4745 return err; 4746 } 4747 4748 return 0; 4749 } 4750 4751 static int __live_pphwsp_runtime(struct intel_engine_cs *engine) 4752 { 4753 struct intel_context *ce; 4754 struct i915_request *rq; 4755 IGT_TIMEOUT(end_time); 4756 int err; 4757 4758 ce = intel_context_create(engine); 4759 if (IS_ERR(ce)) 4760 return PTR_ERR(ce); 4761 4762 ce->runtime.num_underflow = 0; 4763 ce->runtime.max_underflow = 0; 4764 4765 do { 4766 unsigned int loop = 1024; 4767 4768 while (loop) { 4769 rq = intel_context_create_request(ce); 4770 if (IS_ERR(rq)) { 4771 err = PTR_ERR(rq); 4772 goto err_rq; 4773 } 4774 4775 if (--loop == 0) 4776 i915_request_get(rq); 4777 4778 i915_request_add(rq); 4779 } 4780 4781 if (__igt_timeout(end_time, NULL)) 4782 break; 4783 4784 i915_request_put(rq); 4785 } while (1); 4786 4787 err = i915_request_wait(rq, 0, HZ / 5); 4788 if (err < 0) { 4789 pr_err("%s: request not completed!\n", engine->name); 4790 goto err_wait; 4791 } 4792 4793 igt_flush_test(engine->i915); 4794 4795 pr_info("%s: pphwsp runtime %lluns, average %lluns\n", 4796 engine->name, 4797 intel_context_get_total_runtime_ns(ce), 4798 intel_context_get_avg_runtime_ns(ce)); 4799 4800 err = 0; 4801 if (ce->runtime.num_underflow) { 4802 pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n", 4803 engine->name, 4804 ce->runtime.num_underflow, 4805 ce->runtime.max_underflow); 4806 GEM_TRACE_DUMP(); 4807 err = -EOVERFLOW; 4808 } 4809 4810 err_wait: 4811 i915_request_put(rq); 4812 err_rq: 4813 intel_context_put(ce); 4814 return err; 4815 } 4816 4817 static int live_pphwsp_runtime(void *arg) 4818 { 4819 struct intel_gt *gt = arg; 4820 struct intel_engine_cs *engine; 4821 enum intel_engine_id id; 4822 int err = 0; 4823 4824 /* 4825 * Check that cumulative context runtime as stored in the pphwsp[16] 4826 * is monotonic. 4827 */ 4828 4829 for_each_engine(engine, gt, id) { 4830 err = __live_pphwsp_runtime(engine); 4831 if (err) 4832 break; 4833 } 4834 4835 if (igt_flush_test(gt->i915)) 4836 err = -EIO; 4837 4838 return err; 4839 } 4840 4841 int intel_lrc_live_selftests(struct drm_i915_private *i915) 4842 { 4843 static const struct i915_subtest tests[] = { 4844 SUBTEST(live_lrc_layout), 4845 SUBTEST(live_lrc_fixed), 4846 SUBTEST(live_lrc_state), 4847 SUBTEST(live_lrc_gpr), 4848 SUBTEST(live_lrc_timestamp), 4849 SUBTEST(live_pphwsp_runtime), 4850 }; 4851 4852 if (!HAS_LOGICAL_RING_CONTEXTS(i915)) 4853 return 0; 4854 4855 return intel_gt_live_subtests(tests, &i915->gt); 4856 } 4857