1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2018 Intel Corporation 5 */ 6 7 #include <linux/prime_numbers.h> 8 9 #include "gem/i915_gem_pm.h" 10 #include "gt/intel_engine_heartbeat.h" 11 #include "gt/intel_reset.h" 12 13 #include "i915_selftest.h" 14 #include "selftests/i915_random.h" 15 #include "selftests/igt_flush_test.h" 16 #include "selftests/igt_live_test.h" 17 #include "selftests/igt_spinner.h" 18 #include "selftests/lib_sw_fence.h" 19 20 #include "gem/selftests/igt_gem_utils.h" 21 #include "gem/selftests/mock_context.h" 22 23 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4) 24 #define NUM_GPR_DW (16 * 2) /* each GPR is 2 dwords */ 25 26 static struct i915_vma *create_scratch(struct intel_gt *gt) 27 { 28 struct drm_i915_gem_object *obj; 29 struct i915_vma *vma; 30 int err; 31 32 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 33 if (IS_ERR(obj)) 34 return ERR_CAST(obj); 35 36 i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED); 37 38 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 39 if (IS_ERR(vma)) { 40 i915_gem_object_put(obj); 41 return vma; 42 } 43 44 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 45 if (err) { 46 i915_gem_object_put(obj); 47 return ERR_PTR(err); 48 } 49 50 return vma; 51 } 52 53 static void engine_heartbeat_disable(struct intel_engine_cs *engine, 54 unsigned long *saved) 55 { 56 *saved = engine->props.heartbeat_interval_ms; 57 engine->props.heartbeat_interval_ms = 0; 58 59 intel_engine_pm_get(engine); 60 intel_engine_park_heartbeat(engine); 61 } 62 63 static void engine_heartbeat_enable(struct intel_engine_cs *engine, 64 unsigned long saved) 65 { 66 intel_engine_pm_put(engine); 67 68 engine->props.heartbeat_interval_ms = saved; 69 } 70 71 static int live_sanitycheck(void *arg) 72 { 73 struct intel_gt *gt = arg; 74 struct intel_engine_cs *engine; 75 enum intel_engine_id id; 76 struct igt_spinner spin; 77 int err = 0; 78 79 if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915)) 80 return 0; 81 82 if (igt_spinner_init(&spin, gt)) 83 return -ENOMEM; 84 85 for_each_engine(engine, gt, id) { 86 struct intel_context *ce; 87 struct i915_request *rq; 88 89 ce = intel_context_create(engine); 90 if (IS_ERR(ce)) { 91 err = PTR_ERR(ce); 92 break; 93 } 94 95 rq = igt_spinner_create_request(&spin, ce, MI_NOOP); 96 if (IS_ERR(rq)) { 97 err = PTR_ERR(rq); 98 goto out_ctx; 99 } 100 101 i915_request_add(rq); 102 if (!igt_wait_for_spinner(&spin, rq)) { 103 GEM_TRACE("spinner failed to start\n"); 104 GEM_TRACE_DUMP(); 105 intel_gt_set_wedged(gt); 106 err = -EIO; 107 goto out_ctx; 108 } 109 110 igt_spinner_end(&spin); 111 if (igt_flush_test(gt->i915)) { 112 err = -EIO; 113 goto out_ctx; 114 } 115 116 out_ctx: 117 intel_context_put(ce); 118 if (err) 119 break; 120 } 121 122 igt_spinner_fini(&spin); 123 return err; 124 } 125 126 static int live_unlite_restore(struct intel_gt *gt, int prio) 127 { 128 struct intel_engine_cs *engine; 129 enum intel_engine_id id; 130 struct igt_spinner spin; 131 int err = -ENOMEM; 132 133 /* 134 * Check that we can correctly context switch between 2 instances 135 * on the same engine from the same parent context. 136 */ 137 138 if (igt_spinner_init(&spin, gt)) 139 return err; 140 141 err = 0; 142 for_each_engine(engine, gt, id) { 143 struct intel_context *ce[2] = {}; 144 struct i915_request *rq[2]; 145 struct igt_live_test t; 146 unsigned long saved; 147 int n; 148 149 if (prio && !intel_engine_has_preemption(engine)) 150 continue; 151 152 if (!intel_engine_can_store_dword(engine)) 153 continue; 154 155 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 156 err = -EIO; 157 break; 158 } 159 engine_heartbeat_disable(engine, &saved); 160 161 for (n = 0; n < ARRAY_SIZE(ce); n++) { 162 struct intel_context *tmp; 163 164 tmp = intel_context_create(engine); 165 if (IS_ERR(tmp)) { 166 err = PTR_ERR(tmp); 167 goto err_ce; 168 } 169 170 err = intel_context_pin(tmp); 171 if (err) { 172 intel_context_put(tmp); 173 goto err_ce; 174 } 175 176 /* 177 * Setup the pair of contexts such that if we 178 * lite-restore using the RING_TAIL from ce[1] it 179 * will execute garbage from ce[0]->ring. 180 */ 181 memset(tmp->ring->vaddr, 182 POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */ 183 tmp->ring->vma->size); 184 185 ce[n] = tmp; 186 } 187 GEM_BUG_ON(!ce[1]->ring->size); 188 intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2); 189 __execlists_update_reg_state(ce[1], engine); 190 191 rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK); 192 if (IS_ERR(rq[0])) { 193 err = PTR_ERR(rq[0]); 194 goto err_ce; 195 } 196 197 i915_request_get(rq[0]); 198 i915_request_add(rq[0]); 199 GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit); 200 201 if (!igt_wait_for_spinner(&spin, rq[0])) { 202 i915_request_put(rq[0]); 203 goto err_ce; 204 } 205 206 rq[1] = i915_request_create(ce[1]); 207 if (IS_ERR(rq[1])) { 208 err = PTR_ERR(rq[1]); 209 i915_request_put(rq[0]); 210 goto err_ce; 211 } 212 213 if (!prio) { 214 /* 215 * Ensure we do the switch to ce[1] on completion. 216 * 217 * rq[0] is already submitted, so this should reduce 218 * to a no-op (a wait on a request on the same engine 219 * uses the submit fence, not the completion fence), 220 * but it will install a dependency on rq[1] for rq[0] 221 * that will prevent the pair being reordered by 222 * timeslicing. 223 */ 224 i915_request_await_dma_fence(rq[1], &rq[0]->fence); 225 } 226 227 i915_request_get(rq[1]); 228 i915_request_add(rq[1]); 229 GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix); 230 i915_request_put(rq[0]); 231 232 if (prio) { 233 struct i915_sched_attr attr = { 234 .priority = prio, 235 }; 236 237 /* Alternatively preempt the spinner with ce[1] */ 238 engine->schedule(rq[1], &attr); 239 } 240 241 /* And switch back to ce[0] for good measure */ 242 rq[0] = i915_request_create(ce[0]); 243 if (IS_ERR(rq[0])) { 244 err = PTR_ERR(rq[0]); 245 i915_request_put(rq[1]); 246 goto err_ce; 247 } 248 249 i915_request_await_dma_fence(rq[0], &rq[1]->fence); 250 i915_request_get(rq[0]); 251 i915_request_add(rq[0]); 252 GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix); 253 i915_request_put(rq[1]); 254 i915_request_put(rq[0]); 255 256 err_ce: 257 tasklet_kill(&engine->execlists.tasklet); /* flush submission */ 258 igt_spinner_end(&spin); 259 for (n = 0; n < ARRAY_SIZE(ce); n++) { 260 if (IS_ERR_OR_NULL(ce[n])) 261 break; 262 263 intel_context_unpin(ce[n]); 264 intel_context_put(ce[n]); 265 } 266 267 engine_heartbeat_enable(engine, saved); 268 if (igt_live_test_end(&t)) 269 err = -EIO; 270 if (err) 271 break; 272 } 273 274 igt_spinner_fini(&spin); 275 return err; 276 } 277 278 static int live_unlite_switch(void *arg) 279 { 280 return live_unlite_restore(arg, 0); 281 } 282 283 static int live_unlite_preempt(void *arg) 284 { 285 return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX)); 286 } 287 288 static int live_hold_reset(void *arg) 289 { 290 struct intel_gt *gt = arg; 291 struct intel_engine_cs *engine; 292 enum intel_engine_id id; 293 struct igt_spinner spin; 294 int err = 0; 295 296 /* 297 * In order to support offline error capture for fast preempt reset, 298 * we need to decouple the guilty request and ensure that it and its 299 * descendents are not executed while the capture is in progress. 300 */ 301 302 if (!intel_has_reset_engine(gt)) 303 return 0; 304 305 if (igt_spinner_init(&spin, gt)) 306 return -ENOMEM; 307 308 for_each_engine(engine, gt, id) { 309 struct intel_context *ce; 310 unsigned long heartbeat; 311 struct i915_request *rq; 312 313 ce = intel_context_create(engine); 314 if (IS_ERR(ce)) { 315 err = PTR_ERR(ce); 316 break; 317 } 318 319 engine_heartbeat_disable(engine, &heartbeat); 320 321 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); 322 if (IS_ERR(rq)) { 323 err = PTR_ERR(rq); 324 goto out; 325 } 326 i915_request_add(rq); 327 328 if (!igt_wait_for_spinner(&spin, rq)) { 329 intel_gt_set_wedged(gt); 330 err = -ETIME; 331 goto out; 332 } 333 334 /* We have our request executing, now remove it and reset */ 335 336 if (test_and_set_bit(I915_RESET_ENGINE + id, 337 >->reset.flags)) { 338 intel_gt_set_wedged(gt); 339 err = -EBUSY; 340 goto out; 341 } 342 tasklet_disable(&engine->execlists.tasklet); 343 344 engine->execlists.tasklet.func(engine->execlists.tasklet.data); 345 GEM_BUG_ON(execlists_active(&engine->execlists) != rq); 346 347 i915_request_get(rq); 348 execlists_hold(engine, rq); 349 GEM_BUG_ON(!i915_request_on_hold(rq)); 350 351 intel_engine_reset(engine, NULL); 352 GEM_BUG_ON(rq->fence.error != -EIO); 353 354 tasklet_enable(&engine->execlists.tasklet); 355 clear_and_wake_up_bit(I915_RESET_ENGINE + id, 356 >->reset.flags); 357 358 /* Check that we do not resubmit the held request */ 359 if (!i915_request_wait(rq, 0, HZ / 5)) { 360 pr_err("%s: on hold request completed!\n", 361 engine->name); 362 i915_request_put(rq); 363 err = -EIO; 364 goto out; 365 } 366 GEM_BUG_ON(!i915_request_on_hold(rq)); 367 368 /* But is resubmitted on release */ 369 execlists_unhold(engine, rq); 370 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 371 pr_err("%s: held request did not complete!\n", 372 engine->name); 373 intel_gt_set_wedged(gt); 374 err = -ETIME; 375 } 376 i915_request_put(rq); 377 378 out: 379 engine_heartbeat_enable(engine, heartbeat); 380 intel_context_put(ce); 381 if (err) 382 break; 383 } 384 385 igt_spinner_fini(&spin); 386 return err; 387 } 388 389 static int 390 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx) 391 { 392 u32 *cs; 393 394 cs = intel_ring_begin(rq, 10); 395 if (IS_ERR(cs)) 396 return PTR_ERR(cs); 397 398 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 399 400 *cs++ = MI_SEMAPHORE_WAIT | 401 MI_SEMAPHORE_GLOBAL_GTT | 402 MI_SEMAPHORE_POLL | 403 MI_SEMAPHORE_SAD_NEQ_SDD; 404 *cs++ = 0; 405 *cs++ = i915_ggtt_offset(vma) + 4 * idx; 406 *cs++ = 0; 407 408 if (idx > 0) { 409 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 410 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1); 411 *cs++ = 0; 412 *cs++ = 1; 413 } else { 414 *cs++ = MI_NOOP; 415 *cs++ = MI_NOOP; 416 *cs++ = MI_NOOP; 417 *cs++ = MI_NOOP; 418 } 419 420 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 421 422 intel_ring_advance(rq, cs); 423 return 0; 424 } 425 426 static struct i915_request * 427 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx) 428 { 429 struct intel_context *ce; 430 struct i915_request *rq; 431 int err; 432 433 ce = intel_context_create(engine); 434 if (IS_ERR(ce)) 435 return ERR_CAST(ce); 436 437 rq = intel_context_create_request(ce); 438 if (IS_ERR(rq)) 439 goto out_ce; 440 441 err = 0; 442 if (rq->engine->emit_init_breadcrumb) 443 err = rq->engine->emit_init_breadcrumb(rq); 444 if (err == 0) 445 err = emit_semaphore_chain(rq, vma, idx); 446 if (err == 0) 447 i915_request_get(rq); 448 i915_request_add(rq); 449 if (err) 450 rq = ERR_PTR(err); 451 452 out_ce: 453 intel_context_put(ce); 454 return rq; 455 } 456 457 static int 458 release_queue(struct intel_engine_cs *engine, 459 struct i915_vma *vma, 460 int idx, int prio) 461 { 462 struct i915_sched_attr attr = { 463 .priority = prio, 464 }; 465 struct i915_request *rq; 466 u32 *cs; 467 468 rq = intel_engine_create_kernel_request(engine); 469 if (IS_ERR(rq)) 470 return PTR_ERR(rq); 471 472 cs = intel_ring_begin(rq, 4); 473 if (IS_ERR(cs)) { 474 i915_request_add(rq); 475 return PTR_ERR(cs); 476 } 477 478 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 479 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1); 480 *cs++ = 0; 481 *cs++ = 1; 482 483 intel_ring_advance(rq, cs); 484 485 i915_request_get(rq); 486 i915_request_add(rq); 487 488 local_bh_disable(); 489 engine->schedule(rq, &attr); 490 local_bh_enable(); /* kick tasklet */ 491 492 i915_request_put(rq); 493 494 return 0; 495 } 496 497 static int 498 slice_semaphore_queue(struct intel_engine_cs *outer, 499 struct i915_vma *vma, 500 int count) 501 { 502 struct intel_engine_cs *engine; 503 struct i915_request *head; 504 enum intel_engine_id id; 505 int err, i, n = 0; 506 507 head = semaphore_queue(outer, vma, n++); 508 if (IS_ERR(head)) 509 return PTR_ERR(head); 510 511 for_each_engine(engine, outer->gt, id) { 512 for (i = 0; i < count; i++) { 513 struct i915_request *rq; 514 515 rq = semaphore_queue(engine, vma, n++); 516 if (IS_ERR(rq)) { 517 err = PTR_ERR(rq); 518 goto out; 519 } 520 521 i915_request_put(rq); 522 } 523 } 524 525 err = release_queue(outer, vma, n, INT_MAX); 526 if (err) 527 goto out; 528 529 if (i915_request_wait(head, 0, 530 2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) { 531 pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n", 532 count, n); 533 GEM_TRACE_DUMP(); 534 intel_gt_set_wedged(outer->gt); 535 err = -EIO; 536 } 537 538 out: 539 i915_request_put(head); 540 return err; 541 } 542 543 static int live_timeslice_preempt(void *arg) 544 { 545 struct intel_gt *gt = arg; 546 struct drm_i915_gem_object *obj; 547 struct i915_vma *vma; 548 void *vaddr; 549 int err = 0; 550 int count; 551 552 /* 553 * If a request takes too long, we would like to give other users 554 * a fair go on the GPU. In particular, users may create batches 555 * that wait upon external input, where that input may even be 556 * supplied by another GPU job. To avoid blocking forever, we 557 * need to preempt the current task and replace it with another 558 * ready task. 559 */ 560 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 561 return 0; 562 563 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 564 if (IS_ERR(obj)) 565 return PTR_ERR(obj); 566 567 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 568 if (IS_ERR(vma)) { 569 err = PTR_ERR(vma); 570 goto err_obj; 571 } 572 573 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); 574 if (IS_ERR(vaddr)) { 575 err = PTR_ERR(vaddr); 576 goto err_obj; 577 } 578 579 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 580 if (err) 581 goto err_map; 582 583 for_each_prime_number_from(count, 1, 16) { 584 struct intel_engine_cs *engine; 585 enum intel_engine_id id; 586 587 for_each_engine(engine, gt, id) { 588 unsigned long saved; 589 590 if (!intel_engine_has_preemption(engine)) 591 continue; 592 593 memset(vaddr, 0, PAGE_SIZE); 594 595 engine_heartbeat_disable(engine, &saved); 596 err = slice_semaphore_queue(engine, vma, count); 597 engine_heartbeat_enable(engine, saved); 598 if (err) 599 goto err_pin; 600 601 if (igt_flush_test(gt->i915)) { 602 err = -EIO; 603 goto err_pin; 604 } 605 } 606 } 607 608 err_pin: 609 i915_vma_unpin(vma); 610 err_map: 611 i915_gem_object_unpin_map(obj); 612 err_obj: 613 i915_gem_object_put(obj); 614 return err; 615 } 616 617 static struct i915_request *nop_request(struct intel_engine_cs *engine) 618 { 619 struct i915_request *rq; 620 621 rq = intel_engine_create_kernel_request(engine); 622 if (IS_ERR(rq)) 623 return rq; 624 625 i915_request_get(rq); 626 i915_request_add(rq); 627 628 return rq; 629 } 630 631 static int wait_for_submit(struct intel_engine_cs *engine, 632 struct i915_request *rq, 633 unsigned long timeout) 634 { 635 timeout += jiffies; 636 do { 637 cond_resched(); 638 intel_engine_flush_submission(engine); 639 if (i915_request_is_active(rq)) 640 return 0; 641 } while (time_before(jiffies, timeout)); 642 643 return -ETIME; 644 } 645 646 static long timeslice_threshold(const struct intel_engine_cs *engine) 647 { 648 return 2 * msecs_to_jiffies_timeout(timeslice(engine)) + 1; 649 } 650 651 static int live_timeslice_queue(void *arg) 652 { 653 struct intel_gt *gt = arg; 654 struct drm_i915_gem_object *obj; 655 struct intel_engine_cs *engine; 656 enum intel_engine_id id; 657 struct i915_vma *vma; 658 void *vaddr; 659 int err = 0; 660 661 /* 662 * Make sure that even if ELSP[0] and ELSP[1] are filled with 663 * timeslicing between them disabled, we *do* enable timeslicing 664 * if the queue demands it. (Normally, we do not submit if 665 * ELSP[1] is already occupied, so must rely on timeslicing to 666 * eject ELSP[0] in favour of the queue.) 667 */ 668 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 669 return 0; 670 671 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 672 if (IS_ERR(obj)) 673 return PTR_ERR(obj); 674 675 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 676 if (IS_ERR(vma)) { 677 err = PTR_ERR(vma); 678 goto err_obj; 679 } 680 681 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); 682 if (IS_ERR(vaddr)) { 683 err = PTR_ERR(vaddr); 684 goto err_obj; 685 } 686 687 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 688 if (err) 689 goto err_map; 690 691 for_each_engine(engine, gt, id) { 692 struct i915_sched_attr attr = { 693 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), 694 }; 695 struct i915_request *rq, *nop; 696 unsigned long saved; 697 698 if (!intel_engine_has_preemption(engine)) 699 continue; 700 701 engine_heartbeat_disable(engine, &saved); 702 memset(vaddr, 0, PAGE_SIZE); 703 704 /* ELSP[0]: semaphore wait */ 705 rq = semaphore_queue(engine, vma, 0); 706 if (IS_ERR(rq)) { 707 err = PTR_ERR(rq); 708 goto err_heartbeat; 709 } 710 engine->schedule(rq, &attr); 711 err = wait_for_submit(engine, rq, HZ / 2); 712 if (err) { 713 pr_err("%s: Timed out trying to submit semaphores\n", 714 engine->name); 715 goto err_rq; 716 } 717 718 /* ELSP[1]: nop request */ 719 nop = nop_request(engine); 720 if (IS_ERR(nop)) { 721 err = PTR_ERR(nop); 722 goto err_rq; 723 } 724 err = wait_for_submit(engine, nop, HZ / 2); 725 i915_request_put(nop); 726 if (err) { 727 pr_err("%s: Timed out trying to submit nop\n", 728 engine->name); 729 goto err_rq; 730 } 731 732 GEM_BUG_ON(i915_request_completed(rq)); 733 GEM_BUG_ON(execlists_active(&engine->execlists) != rq); 734 735 /* Queue: semaphore signal, matching priority as semaphore */ 736 err = release_queue(engine, vma, 1, effective_prio(rq)); 737 if (err) 738 goto err_rq; 739 740 intel_engine_flush_submission(engine); 741 if (!READ_ONCE(engine->execlists.timer.expires) && 742 !i915_request_completed(rq)) { 743 struct drm_printer p = 744 drm_info_printer(gt->i915->drm.dev); 745 746 GEM_TRACE_ERR("%s: Failed to enable timeslicing!\n", 747 engine->name); 748 intel_engine_dump(engine, &p, 749 "%s\n", engine->name); 750 GEM_TRACE_DUMP(); 751 752 memset(vaddr, 0xff, PAGE_SIZE); 753 err = -EINVAL; 754 } 755 756 /* Timeslice every jiffy, so within 2 we should signal */ 757 if (i915_request_wait(rq, 0, timeslice_threshold(engine)) < 0) { 758 struct drm_printer p = 759 drm_info_printer(gt->i915->drm.dev); 760 761 pr_err("%s: Failed to timeslice into queue\n", 762 engine->name); 763 intel_engine_dump(engine, &p, 764 "%s\n", engine->name); 765 766 memset(vaddr, 0xff, PAGE_SIZE); 767 err = -EIO; 768 } 769 err_rq: 770 i915_request_put(rq); 771 err_heartbeat: 772 engine_heartbeat_enable(engine, saved); 773 if (err) 774 break; 775 } 776 777 i915_vma_unpin(vma); 778 err_map: 779 i915_gem_object_unpin_map(obj); 780 err_obj: 781 i915_gem_object_put(obj); 782 return err; 783 } 784 785 static int live_busywait_preempt(void *arg) 786 { 787 struct intel_gt *gt = arg; 788 struct i915_gem_context *ctx_hi, *ctx_lo; 789 struct intel_engine_cs *engine; 790 struct drm_i915_gem_object *obj; 791 struct i915_vma *vma; 792 enum intel_engine_id id; 793 int err = -ENOMEM; 794 u32 *map; 795 796 /* 797 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can 798 * preempt the busywaits used to synchronise between rings. 799 */ 800 801 ctx_hi = kernel_context(gt->i915); 802 if (!ctx_hi) 803 return -ENOMEM; 804 ctx_hi->sched.priority = 805 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); 806 807 ctx_lo = kernel_context(gt->i915); 808 if (!ctx_lo) 809 goto err_ctx_hi; 810 ctx_lo->sched.priority = 811 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); 812 813 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 814 if (IS_ERR(obj)) { 815 err = PTR_ERR(obj); 816 goto err_ctx_lo; 817 } 818 819 map = i915_gem_object_pin_map(obj, I915_MAP_WC); 820 if (IS_ERR(map)) { 821 err = PTR_ERR(map); 822 goto err_obj; 823 } 824 825 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 826 if (IS_ERR(vma)) { 827 err = PTR_ERR(vma); 828 goto err_map; 829 } 830 831 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 832 if (err) 833 goto err_map; 834 835 for_each_engine(engine, gt, id) { 836 struct i915_request *lo, *hi; 837 struct igt_live_test t; 838 u32 *cs; 839 840 if (!intel_engine_has_preemption(engine)) 841 continue; 842 843 if (!intel_engine_can_store_dword(engine)) 844 continue; 845 846 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 847 err = -EIO; 848 goto err_vma; 849 } 850 851 /* 852 * We create two requests. The low priority request 853 * busywaits on a semaphore (inside the ringbuffer where 854 * is should be preemptible) and the high priority requests 855 * uses a MI_STORE_DWORD_IMM to update the semaphore value 856 * allowing the first request to complete. If preemption 857 * fails, we hang instead. 858 */ 859 860 lo = igt_request_alloc(ctx_lo, engine); 861 if (IS_ERR(lo)) { 862 err = PTR_ERR(lo); 863 goto err_vma; 864 } 865 866 cs = intel_ring_begin(lo, 8); 867 if (IS_ERR(cs)) { 868 err = PTR_ERR(cs); 869 i915_request_add(lo); 870 goto err_vma; 871 } 872 873 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 874 *cs++ = i915_ggtt_offset(vma); 875 *cs++ = 0; 876 *cs++ = 1; 877 878 /* XXX Do we need a flush + invalidate here? */ 879 880 *cs++ = MI_SEMAPHORE_WAIT | 881 MI_SEMAPHORE_GLOBAL_GTT | 882 MI_SEMAPHORE_POLL | 883 MI_SEMAPHORE_SAD_EQ_SDD; 884 *cs++ = 0; 885 *cs++ = i915_ggtt_offset(vma); 886 *cs++ = 0; 887 888 intel_ring_advance(lo, cs); 889 890 i915_request_get(lo); 891 i915_request_add(lo); 892 893 if (wait_for(READ_ONCE(*map), 10)) { 894 i915_request_put(lo); 895 err = -ETIMEDOUT; 896 goto err_vma; 897 } 898 899 /* Low priority request should be busywaiting now */ 900 if (i915_request_wait(lo, 0, 1) != -ETIME) { 901 i915_request_put(lo); 902 pr_err("%s: Busywaiting request did not!\n", 903 engine->name); 904 err = -EIO; 905 goto err_vma; 906 } 907 908 hi = igt_request_alloc(ctx_hi, engine); 909 if (IS_ERR(hi)) { 910 err = PTR_ERR(hi); 911 i915_request_put(lo); 912 goto err_vma; 913 } 914 915 cs = intel_ring_begin(hi, 4); 916 if (IS_ERR(cs)) { 917 err = PTR_ERR(cs); 918 i915_request_add(hi); 919 i915_request_put(lo); 920 goto err_vma; 921 } 922 923 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 924 *cs++ = i915_ggtt_offset(vma); 925 *cs++ = 0; 926 *cs++ = 0; 927 928 intel_ring_advance(hi, cs); 929 i915_request_add(hi); 930 931 if (i915_request_wait(lo, 0, HZ / 5) < 0) { 932 struct drm_printer p = drm_info_printer(gt->i915->drm.dev); 933 934 pr_err("%s: Failed to preempt semaphore busywait!\n", 935 engine->name); 936 937 intel_engine_dump(engine, &p, "%s\n", engine->name); 938 GEM_TRACE_DUMP(); 939 940 i915_request_put(lo); 941 intel_gt_set_wedged(gt); 942 err = -EIO; 943 goto err_vma; 944 } 945 GEM_BUG_ON(READ_ONCE(*map)); 946 i915_request_put(lo); 947 948 if (igt_live_test_end(&t)) { 949 err = -EIO; 950 goto err_vma; 951 } 952 } 953 954 err = 0; 955 err_vma: 956 i915_vma_unpin(vma); 957 err_map: 958 i915_gem_object_unpin_map(obj); 959 err_obj: 960 i915_gem_object_put(obj); 961 err_ctx_lo: 962 kernel_context_close(ctx_lo); 963 err_ctx_hi: 964 kernel_context_close(ctx_hi); 965 return err; 966 } 967 968 static struct i915_request * 969 spinner_create_request(struct igt_spinner *spin, 970 struct i915_gem_context *ctx, 971 struct intel_engine_cs *engine, 972 u32 arb) 973 { 974 struct intel_context *ce; 975 struct i915_request *rq; 976 977 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); 978 if (IS_ERR(ce)) 979 return ERR_CAST(ce); 980 981 rq = igt_spinner_create_request(spin, ce, arb); 982 intel_context_put(ce); 983 return rq; 984 } 985 986 static int live_preempt(void *arg) 987 { 988 struct intel_gt *gt = arg; 989 struct i915_gem_context *ctx_hi, *ctx_lo; 990 struct igt_spinner spin_hi, spin_lo; 991 struct intel_engine_cs *engine; 992 enum intel_engine_id id; 993 int err = -ENOMEM; 994 995 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 996 return 0; 997 998 if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION)) 999 pr_err("Logical preemption supported, but not exposed\n"); 1000 1001 if (igt_spinner_init(&spin_hi, gt)) 1002 return -ENOMEM; 1003 1004 if (igt_spinner_init(&spin_lo, gt)) 1005 goto err_spin_hi; 1006 1007 ctx_hi = kernel_context(gt->i915); 1008 if (!ctx_hi) 1009 goto err_spin_lo; 1010 ctx_hi->sched.priority = 1011 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); 1012 1013 ctx_lo = kernel_context(gt->i915); 1014 if (!ctx_lo) 1015 goto err_ctx_hi; 1016 ctx_lo->sched.priority = 1017 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); 1018 1019 for_each_engine(engine, gt, id) { 1020 struct igt_live_test t; 1021 struct i915_request *rq; 1022 1023 if (!intel_engine_has_preemption(engine)) 1024 continue; 1025 1026 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 1027 err = -EIO; 1028 goto err_ctx_lo; 1029 } 1030 1031 rq = spinner_create_request(&spin_lo, ctx_lo, engine, 1032 MI_ARB_CHECK); 1033 if (IS_ERR(rq)) { 1034 err = PTR_ERR(rq); 1035 goto err_ctx_lo; 1036 } 1037 1038 i915_request_add(rq); 1039 if (!igt_wait_for_spinner(&spin_lo, rq)) { 1040 GEM_TRACE("lo spinner failed to start\n"); 1041 GEM_TRACE_DUMP(); 1042 intel_gt_set_wedged(gt); 1043 err = -EIO; 1044 goto err_ctx_lo; 1045 } 1046 1047 rq = spinner_create_request(&spin_hi, ctx_hi, engine, 1048 MI_ARB_CHECK); 1049 if (IS_ERR(rq)) { 1050 igt_spinner_end(&spin_lo); 1051 err = PTR_ERR(rq); 1052 goto err_ctx_lo; 1053 } 1054 1055 i915_request_add(rq); 1056 if (!igt_wait_for_spinner(&spin_hi, rq)) { 1057 GEM_TRACE("hi spinner failed to start\n"); 1058 GEM_TRACE_DUMP(); 1059 intel_gt_set_wedged(gt); 1060 err = -EIO; 1061 goto err_ctx_lo; 1062 } 1063 1064 igt_spinner_end(&spin_hi); 1065 igt_spinner_end(&spin_lo); 1066 1067 if (igt_live_test_end(&t)) { 1068 err = -EIO; 1069 goto err_ctx_lo; 1070 } 1071 } 1072 1073 err = 0; 1074 err_ctx_lo: 1075 kernel_context_close(ctx_lo); 1076 err_ctx_hi: 1077 kernel_context_close(ctx_hi); 1078 err_spin_lo: 1079 igt_spinner_fini(&spin_lo); 1080 err_spin_hi: 1081 igt_spinner_fini(&spin_hi); 1082 return err; 1083 } 1084 1085 static int live_late_preempt(void *arg) 1086 { 1087 struct intel_gt *gt = arg; 1088 struct i915_gem_context *ctx_hi, *ctx_lo; 1089 struct igt_spinner spin_hi, spin_lo; 1090 struct intel_engine_cs *engine; 1091 struct i915_sched_attr attr = {}; 1092 enum intel_engine_id id; 1093 int err = -ENOMEM; 1094 1095 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 1096 return 0; 1097 1098 if (igt_spinner_init(&spin_hi, gt)) 1099 return -ENOMEM; 1100 1101 if (igt_spinner_init(&spin_lo, gt)) 1102 goto err_spin_hi; 1103 1104 ctx_hi = kernel_context(gt->i915); 1105 if (!ctx_hi) 1106 goto err_spin_lo; 1107 1108 ctx_lo = kernel_context(gt->i915); 1109 if (!ctx_lo) 1110 goto err_ctx_hi; 1111 1112 /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */ 1113 ctx_lo->sched.priority = I915_USER_PRIORITY(1); 1114 1115 for_each_engine(engine, gt, id) { 1116 struct igt_live_test t; 1117 struct i915_request *rq; 1118 1119 if (!intel_engine_has_preemption(engine)) 1120 continue; 1121 1122 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 1123 err = -EIO; 1124 goto err_ctx_lo; 1125 } 1126 1127 rq = spinner_create_request(&spin_lo, ctx_lo, engine, 1128 MI_ARB_CHECK); 1129 if (IS_ERR(rq)) { 1130 err = PTR_ERR(rq); 1131 goto err_ctx_lo; 1132 } 1133 1134 i915_request_add(rq); 1135 if (!igt_wait_for_spinner(&spin_lo, rq)) { 1136 pr_err("First context failed to start\n"); 1137 goto err_wedged; 1138 } 1139 1140 rq = spinner_create_request(&spin_hi, ctx_hi, engine, 1141 MI_NOOP); 1142 if (IS_ERR(rq)) { 1143 igt_spinner_end(&spin_lo); 1144 err = PTR_ERR(rq); 1145 goto err_ctx_lo; 1146 } 1147 1148 i915_request_add(rq); 1149 if (igt_wait_for_spinner(&spin_hi, rq)) { 1150 pr_err("Second context overtook first?\n"); 1151 goto err_wedged; 1152 } 1153 1154 attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX); 1155 engine->schedule(rq, &attr); 1156 1157 if (!igt_wait_for_spinner(&spin_hi, rq)) { 1158 pr_err("High priority context failed to preempt the low priority context\n"); 1159 GEM_TRACE_DUMP(); 1160 goto err_wedged; 1161 } 1162 1163 igt_spinner_end(&spin_hi); 1164 igt_spinner_end(&spin_lo); 1165 1166 if (igt_live_test_end(&t)) { 1167 err = -EIO; 1168 goto err_ctx_lo; 1169 } 1170 } 1171 1172 err = 0; 1173 err_ctx_lo: 1174 kernel_context_close(ctx_lo); 1175 err_ctx_hi: 1176 kernel_context_close(ctx_hi); 1177 err_spin_lo: 1178 igt_spinner_fini(&spin_lo); 1179 err_spin_hi: 1180 igt_spinner_fini(&spin_hi); 1181 return err; 1182 1183 err_wedged: 1184 igt_spinner_end(&spin_hi); 1185 igt_spinner_end(&spin_lo); 1186 intel_gt_set_wedged(gt); 1187 err = -EIO; 1188 goto err_ctx_lo; 1189 } 1190 1191 struct preempt_client { 1192 struct igt_spinner spin; 1193 struct i915_gem_context *ctx; 1194 }; 1195 1196 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c) 1197 { 1198 c->ctx = kernel_context(gt->i915); 1199 if (!c->ctx) 1200 return -ENOMEM; 1201 1202 if (igt_spinner_init(&c->spin, gt)) 1203 goto err_ctx; 1204 1205 return 0; 1206 1207 err_ctx: 1208 kernel_context_close(c->ctx); 1209 return -ENOMEM; 1210 } 1211 1212 static void preempt_client_fini(struct preempt_client *c) 1213 { 1214 igt_spinner_fini(&c->spin); 1215 kernel_context_close(c->ctx); 1216 } 1217 1218 static int live_nopreempt(void *arg) 1219 { 1220 struct intel_gt *gt = arg; 1221 struct intel_engine_cs *engine; 1222 struct preempt_client a, b; 1223 enum intel_engine_id id; 1224 int err = -ENOMEM; 1225 1226 /* 1227 * Verify that we can disable preemption for an individual request 1228 * that may be being observed and not want to be interrupted. 1229 */ 1230 1231 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 1232 return 0; 1233 1234 if (preempt_client_init(gt, &a)) 1235 return -ENOMEM; 1236 if (preempt_client_init(gt, &b)) 1237 goto err_client_a; 1238 b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX); 1239 1240 for_each_engine(engine, gt, id) { 1241 struct i915_request *rq_a, *rq_b; 1242 1243 if (!intel_engine_has_preemption(engine)) 1244 continue; 1245 1246 engine->execlists.preempt_hang.count = 0; 1247 1248 rq_a = spinner_create_request(&a.spin, 1249 a.ctx, engine, 1250 MI_ARB_CHECK); 1251 if (IS_ERR(rq_a)) { 1252 err = PTR_ERR(rq_a); 1253 goto err_client_b; 1254 } 1255 1256 /* Low priority client, but unpreemptable! */ 1257 __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags); 1258 1259 i915_request_add(rq_a); 1260 if (!igt_wait_for_spinner(&a.spin, rq_a)) { 1261 pr_err("First client failed to start\n"); 1262 goto err_wedged; 1263 } 1264 1265 rq_b = spinner_create_request(&b.spin, 1266 b.ctx, engine, 1267 MI_ARB_CHECK); 1268 if (IS_ERR(rq_b)) { 1269 err = PTR_ERR(rq_b); 1270 goto err_client_b; 1271 } 1272 1273 i915_request_add(rq_b); 1274 1275 /* B is much more important than A! (But A is unpreemptable.) */ 1276 GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a)); 1277 1278 /* Wait long enough for preemption and timeslicing */ 1279 if (igt_wait_for_spinner(&b.spin, rq_b)) { 1280 pr_err("Second client started too early!\n"); 1281 goto err_wedged; 1282 } 1283 1284 igt_spinner_end(&a.spin); 1285 1286 if (!igt_wait_for_spinner(&b.spin, rq_b)) { 1287 pr_err("Second client failed to start\n"); 1288 goto err_wedged; 1289 } 1290 1291 igt_spinner_end(&b.spin); 1292 1293 if (engine->execlists.preempt_hang.count) { 1294 pr_err("Preemption recorded x%d; should have been suppressed!\n", 1295 engine->execlists.preempt_hang.count); 1296 err = -EINVAL; 1297 goto err_wedged; 1298 } 1299 1300 if (igt_flush_test(gt->i915)) 1301 goto err_wedged; 1302 } 1303 1304 err = 0; 1305 err_client_b: 1306 preempt_client_fini(&b); 1307 err_client_a: 1308 preempt_client_fini(&a); 1309 return err; 1310 1311 err_wedged: 1312 igt_spinner_end(&b.spin); 1313 igt_spinner_end(&a.spin); 1314 intel_gt_set_wedged(gt); 1315 err = -EIO; 1316 goto err_client_b; 1317 } 1318 1319 struct live_preempt_cancel { 1320 struct intel_engine_cs *engine; 1321 struct preempt_client a, b; 1322 }; 1323 1324 static int __cancel_active0(struct live_preempt_cancel *arg) 1325 { 1326 struct i915_request *rq; 1327 struct igt_live_test t; 1328 int err; 1329 1330 /* Preempt cancel of ELSP0 */ 1331 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 1332 if (igt_live_test_begin(&t, arg->engine->i915, 1333 __func__, arg->engine->name)) 1334 return -EIO; 1335 1336 rq = spinner_create_request(&arg->a.spin, 1337 arg->a.ctx, arg->engine, 1338 MI_ARB_CHECK); 1339 if (IS_ERR(rq)) 1340 return PTR_ERR(rq); 1341 1342 clear_bit(CONTEXT_BANNED, &rq->context->flags); 1343 i915_request_get(rq); 1344 i915_request_add(rq); 1345 if (!igt_wait_for_spinner(&arg->a.spin, rq)) { 1346 err = -EIO; 1347 goto out; 1348 } 1349 1350 intel_context_set_banned(rq->context); 1351 err = intel_engine_pulse(arg->engine); 1352 if (err) 1353 goto out; 1354 1355 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 1356 err = -EIO; 1357 goto out; 1358 } 1359 1360 if (rq->fence.error != -EIO) { 1361 pr_err("Cancelled inflight0 request did not report -EIO\n"); 1362 err = -EINVAL; 1363 goto out; 1364 } 1365 1366 out: 1367 i915_request_put(rq); 1368 if (igt_live_test_end(&t)) 1369 err = -EIO; 1370 return err; 1371 } 1372 1373 static int __cancel_active1(struct live_preempt_cancel *arg) 1374 { 1375 struct i915_request *rq[2] = {}; 1376 struct igt_live_test t; 1377 int err; 1378 1379 /* Preempt cancel of ELSP1 */ 1380 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 1381 if (igt_live_test_begin(&t, arg->engine->i915, 1382 __func__, arg->engine->name)) 1383 return -EIO; 1384 1385 rq[0] = spinner_create_request(&arg->a.spin, 1386 arg->a.ctx, arg->engine, 1387 MI_NOOP); /* no preemption */ 1388 if (IS_ERR(rq[0])) 1389 return PTR_ERR(rq[0]); 1390 1391 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); 1392 i915_request_get(rq[0]); 1393 i915_request_add(rq[0]); 1394 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { 1395 err = -EIO; 1396 goto out; 1397 } 1398 1399 rq[1] = spinner_create_request(&arg->b.spin, 1400 arg->b.ctx, arg->engine, 1401 MI_ARB_CHECK); 1402 if (IS_ERR(rq[1])) { 1403 err = PTR_ERR(rq[1]); 1404 goto out; 1405 } 1406 1407 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); 1408 i915_request_get(rq[1]); 1409 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); 1410 i915_request_add(rq[1]); 1411 if (err) 1412 goto out; 1413 1414 intel_context_set_banned(rq[1]->context); 1415 err = intel_engine_pulse(arg->engine); 1416 if (err) 1417 goto out; 1418 1419 igt_spinner_end(&arg->a.spin); 1420 if (i915_request_wait(rq[1], 0, HZ / 5) < 0) { 1421 err = -EIO; 1422 goto out; 1423 } 1424 1425 if (rq[0]->fence.error != 0) { 1426 pr_err("Normal inflight0 request did not complete\n"); 1427 err = -EINVAL; 1428 goto out; 1429 } 1430 1431 if (rq[1]->fence.error != -EIO) { 1432 pr_err("Cancelled inflight1 request did not report -EIO\n"); 1433 err = -EINVAL; 1434 goto out; 1435 } 1436 1437 out: 1438 i915_request_put(rq[1]); 1439 i915_request_put(rq[0]); 1440 if (igt_live_test_end(&t)) 1441 err = -EIO; 1442 return err; 1443 } 1444 1445 static int __cancel_queued(struct live_preempt_cancel *arg) 1446 { 1447 struct i915_request *rq[3] = {}; 1448 struct igt_live_test t; 1449 int err; 1450 1451 /* Full ELSP and one in the wings */ 1452 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 1453 if (igt_live_test_begin(&t, arg->engine->i915, 1454 __func__, arg->engine->name)) 1455 return -EIO; 1456 1457 rq[0] = spinner_create_request(&arg->a.spin, 1458 arg->a.ctx, arg->engine, 1459 MI_ARB_CHECK); 1460 if (IS_ERR(rq[0])) 1461 return PTR_ERR(rq[0]); 1462 1463 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); 1464 i915_request_get(rq[0]); 1465 i915_request_add(rq[0]); 1466 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { 1467 err = -EIO; 1468 goto out; 1469 } 1470 1471 rq[1] = igt_request_alloc(arg->b.ctx, arg->engine); 1472 if (IS_ERR(rq[1])) { 1473 err = PTR_ERR(rq[1]); 1474 goto out; 1475 } 1476 1477 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); 1478 i915_request_get(rq[1]); 1479 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); 1480 i915_request_add(rq[1]); 1481 if (err) 1482 goto out; 1483 1484 rq[2] = spinner_create_request(&arg->b.spin, 1485 arg->a.ctx, arg->engine, 1486 MI_ARB_CHECK); 1487 if (IS_ERR(rq[2])) { 1488 err = PTR_ERR(rq[2]); 1489 goto out; 1490 } 1491 1492 i915_request_get(rq[2]); 1493 err = i915_request_await_dma_fence(rq[2], &rq[1]->fence); 1494 i915_request_add(rq[2]); 1495 if (err) 1496 goto out; 1497 1498 intel_context_set_banned(rq[2]->context); 1499 err = intel_engine_pulse(arg->engine); 1500 if (err) 1501 goto out; 1502 1503 if (i915_request_wait(rq[2], 0, HZ / 5) < 0) { 1504 err = -EIO; 1505 goto out; 1506 } 1507 1508 if (rq[0]->fence.error != -EIO) { 1509 pr_err("Cancelled inflight0 request did not report -EIO\n"); 1510 err = -EINVAL; 1511 goto out; 1512 } 1513 1514 if (rq[1]->fence.error != 0) { 1515 pr_err("Normal inflight1 request did not complete\n"); 1516 err = -EINVAL; 1517 goto out; 1518 } 1519 1520 if (rq[2]->fence.error != -EIO) { 1521 pr_err("Cancelled queued request did not report -EIO\n"); 1522 err = -EINVAL; 1523 goto out; 1524 } 1525 1526 out: 1527 i915_request_put(rq[2]); 1528 i915_request_put(rq[1]); 1529 i915_request_put(rq[0]); 1530 if (igt_live_test_end(&t)) 1531 err = -EIO; 1532 return err; 1533 } 1534 1535 static int __cancel_hostile(struct live_preempt_cancel *arg) 1536 { 1537 struct i915_request *rq; 1538 int err; 1539 1540 /* Preempt cancel non-preemptible spinner in ELSP0 */ 1541 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) 1542 return 0; 1543 1544 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 1545 rq = spinner_create_request(&arg->a.spin, 1546 arg->a.ctx, arg->engine, 1547 MI_NOOP); /* preemption disabled */ 1548 if (IS_ERR(rq)) 1549 return PTR_ERR(rq); 1550 1551 clear_bit(CONTEXT_BANNED, &rq->context->flags); 1552 i915_request_get(rq); 1553 i915_request_add(rq); 1554 if (!igt_wait_for_spinner(&arg->a.spin, rq)) { 1555 err = -EIO; 1556 goto out; 1557 } 1558 1559 intel_context_set_banned(rq->context); 1560 err = intel_engine_pulse(arg->engine); /* force reset */ 1561 if (err) 1562 goto out; 1563 1564 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 1565 err = -EIO; 1566 goto out; 1567 } 1568 1569 if (rq->fence.error != -EIO) { 1570 pr_err("Cancelled inflight0 request did not report -EIO\n"); 1571 err = -EINVAL; 1572 goto out; 1573 } 1574 1575 out: 1576 i915_request_put(rq); 1577 if (igt_flush_test(arg->engine->i915)) 1578 err = -EIO; 1579 return err; 1580 } 1581 1582 static int live_preempt_cancel(void *arg) 1583 { 1584 struct intel_gt *gt = arg; 1585 struct live_preempt_cancel data; 1586 enum intel_engine_id id; 1587 int err = -ENOMEM; 1588 1589 /* 1590 * To cancel an inflight context, we need to first remove it from the 1591 * GPU. That sounds like preemption! Plus a little bit of bookkeeping. 1592 */ 1593 1594 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 1595 return 0; 1596 1597 if (preempt_client_init(gt, &data.a)) 1598 return -ENOMEM; 1599 if (preempt_client_init(gt, &data.b)) 1600 goto err_client_a; 1601 1602 for_each_engine(data.engine, gt, id) { 1603 if (!intel_engine_has_preemption(data.engine)) 1604 continue; 1605 1606 err = __cancel_active0(&data); 1607 if (err) 1608 goto err_wedged; 1609 1610 err = __cancel_active1(&data); 1611 if (err) 1612 goto err_wedged; 1613 1614 err = __cancel_queued(&data); 1615 if (err) 1616 goto err_wedged; 1617 1618 err = __cancel_hostile(&data); 1619 if (err) 1620 goto err_wedged; 1621 } 1622 1623 err = 0; 1624 err_client_b: 1625 preempt_client_fini(&data.b); 1626 err_client_a: 1627 preempt_client_fini(&data.a); 1628 return err; 1629 1630 err_wedged: 1631 GEM_TRACE_DUMP(); 1632 igt_spinner_end(&data.b.spin); 1633 igt_spinner_end(&data.a.spin); 1634 intel_gt_set_wedged(gt); 1635 goto err_client_b; 1636 } 1637 1638 static int live_suppress_self_preempt(void *arg) 1639 { 1640 struct intel_gt *gt = arg; 1641 struct intel_engine_cs *engine; 1642 struct i915_sched_attr attr = { 1643 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX) 1644 }; 1645 struct preempt_client a, b; 1646 enum intel_engine_id id; 1647 int err = -ENOMEM; 1648 1649 /* 1650 * Verify that if a preemption request does not cause a change in 1651 * the current execution order, the preempt-to-idle injection is 1652 * skipped and that we do not accidentally apply it after the CS 1653 * completion event. 1654 */ 1655 1656 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 1657 return 0; 1658 1659 if (USES_GUC_SUBMISSION(gt->i915)) 1660 return 0; /* presume black blox */ 1661 1662 if (intel_vgpu_active(gt->i915)) 1663 return 0; /* GVT forces single port & request submission */ 1664 1665 if (preempt_client_init(gt, &a)) 1666 return -ENOMEM; 1667 if (preempt_client_init(gt, &b)) 1668 goto err_client_a; 1669 1670 for_each_engine(engine, gt, id) { 1671 struct i915_request *rq_a, *rq_b; 1672 int depth; 1673 1674 if (!intel_engine_has_preemption(engine)) 1675 continue; 1676 1677 if (igt_flush_test(gt->i915)) 1678 goto err_wedged; 1679 1680 intel_engine_pm_get(engine); 1681 engine->execlists.preempt_hang.count = 0; 1682 1683 rq_a = spinner_create_request(&a.spin, 1684 a.ctx, engine, 1685 MI_NOOP); 1686 if (IS_ERR(rq_a)) { 1687 err = PTR_ERR(rq_a); 1688 intel_engine_pm_put(engine); 1689 goto err_client_b; 1690 } 1691 1692 i915_request_add(rq_a); 1693 if (!igt_wait_for_spinner(&a.spin, rq_a)) { 1694 pr_err("First client failed to start\n"); 1695 intel_engine_pm_put(engine); 1696 goto err_wedged; 1697 } 1698 1699 /* Keep postponing the timer to avoid premature slicing */ 1700 mod_timer(&engine->execlists.timer, jiffies + HZ); 1701 for (depth = 0; depth < 8; depth++) { 1702 rq_b = spinner_create_request(&b.spin, 1703 b.ctx, engine, 1704 MI_NOOP); 1705 if (IS_ERR(rq_b)) { 1706 err = PTR_ERR(rq_b); 1707 intel_engine_pm_put(engine); 1708 goto err_client_b; 1709 } 1710 i915_request_add(rq_b); 1711 1712 GEM_BUG_ON(i915_request_completed(rq_a)); 1713 engine->schedule(rq_a, &attr); 1714 igt_spinner_end(&a.spin); 1715 1716 if (!igt_wait_for_spinner(&b.spin, rq_b)) { 1717 pr_err("Second client failed to start\n"); 1718 intel_engine_pm_put(engine); 1719 goto err_wedged; 1720 } 1721 1722 swap(a, b); 1723 rq_a = rq_b; 1724 } 1725 igt_spinner_end(&a.spin); 1726 1727 if (engine->execlists.preempt_hang.count) { 1728 pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n", 1729 engine->name, 1730 engine->execlists.preempt_hang.count, 1731 depth); 1732 intel_engine_pm_put(engine); 1733 err = -EINVAL; 1734 goto err_client_b; 1735 } 1736 1737 intel_engine_pm_put(engine); 1738 if (igt_flush_test(gt->i915)) 1739 goto err_wedged; 1740 } 1741 1742 err = 0; 1743 err_client_b: 1744 preempt_client_fini(&b); 1745 err_client_a: 1746 preempt_client_fini(&a); 1747 return err; 1748 1749 err_wedged: 1750 igt_spinner_end(&b.spin); 1751 igt_spinner_end(&a.spin); 1752 intel_gt_set_wedged(gt); 1753 err = -EIO; 1754 goto err_client_b; 1755 } 1756 1757 static int __i915_sw_fence_call 1758 dummy_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) 1759 { 1760 return NOTIFY_DONE; 1761 } 1762 1763 static struct i915_request *dummy_request(struct intel_engine_cs *engine) 1764 { 1765 struct i915_request *rq; 1766 1767 rq = kzalloc(sizeof(*rq), GFP_KERNEL); 1768 if (!rq) 1769 return NULL; 1770 1771 rq->engine = engine; 1772 1773 spin_lock_init(&rq->lock); 1774 INIT_LIST_HEAD(&rq->fence.cb_list); 1775 rq->fence.lock = &rq->lock; 1776 rq->fence.ops = &i915_fence_ops; 1777 1778 i915_sched_node_init(&rq->sched); 1779 1780 /* mark this request as permanently incomplete */ 1781 rq->fence.seqno = 1; 1782 BUILD_BUG_ON(sizeof(rq->fence.seqno) != 8); /* upper 32b == 0 */ 1783 rq->hwsp_seqno = (u32 *)&rq->fence.seqno + 1; 1784 GEM_BUG_ON(i915_request_completed(rq)); 1785 1786 i915_sw_fence_init(&rq->submit, dummy_notify); 1787 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); 1788 1789 spin_lock_init(&rq->lock); 1790 rq->fence.lock = &rq->lock; 1791 INIT_LIST_HEAD(&rq->fence.cb_list); 1792 1793 return rq; 1794 } 1795 1796 static void dummy_request_free(struct i915_request *dummy) 1797 { 1798 /* We have to fake the CS interrupt to kick the next request */ 1799 i915_sw_fence_commit(&dummy->submit); 1800 1801 i915_request_mark_complete(dummy); 1802 dma_fence_signal(&dummy->fence); 1803 1804 i915_sched_node_fini(&dummy->sched); 1805 i915_sw_fence_fini(&dummy->submit); 1806 1807 dma_fence_free(&dummy->fence); 1808 } 1809 1810 static int live_suppress_wait_preempt(void *arg) 1811 { 1812 struct intel_gt *gt = arg; 1813 struct preempt_client client[4]; 1814 struct i915_request *rq[ARRAY_SIZE(client)] = {}; 1815 struct intel_engine_cs *engine; 1816 enum intel_engine_id id; 1817 int err = -ENOMEM; 1818 int i; 1819 1820 /* 1821 * Waiters are given a little priority nudge, but not enough 1822 * to actually cause any preemption. Double check that we do 1823 * not needlessly generate preempt-to-idle cycles. 1824 */ 1825 1826 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 1827 return 0; 1828 1829 if (preempt_client_init(gt, &client[0])) /* ELSP[0] */ 1830 return -ENOMEM; 1831 if (preempt_client_init(gt, &client[1])) /* ELSP[1] */ 1832 goto err_client_0; 1833 if (preempt_client_init(gt, &client[2])) /* head of queue */ 1834 goto err_client_1; 1835 if (preempt_client_init(gt, &client[3])) /* bystander */ 1836 goto err_client_2; 1837 1838 for_each_engine(engine, gt, id) { 1839 int depth; 1840 1841 if (!intel_engine_has_preemption(engine)) 1842 continue; 1843 1844 if (!engine->emit_init_breadcrumb) 1845 continue; 1846 1847 for (depth = 0; depth < ARRAY_SIZE(client); depth++) { 1848 struct i915_request *dummy; 1849 1850 engine->execlists.preempt_hang.count = 0; 1851 1852 dummy = dummy_request(engine); 1853 if (!dummy) 1854 goto err_client_3; 1855 1856 for (i = 0; i < ARRAY_SIZE(client); i++) { 1857 struct i915_request *this; 1858 1859 this = spinner_create_request(&client[i].spin, 1860 client[i].ctx, engine, 1861 MI_NOOP); 1862 if (IS_ERR(this)) { 1863 err = PTR_ERR(this); 1864 goto err_wedged; 1865 } 1866 1867 /* Disable NEWCLIENT promotion */ 1868 __i915_active_fence_set(&i915_request_timeline(this)->last_request, 1869 &dummy->fence); 1870 1871 rq[i] = i915_request_get(this); 1872 i915_request_add(this); 1873 } 1874 1875 dummy_request_free(dummy); 1876 1877 GEM_BUG_ON(i915_request_completed(rq[0])); 1878 if (!igt_wait_for_spinner(&client[0].spin, rq[0])) { 1879 pr_err("%s: First client failed to start\n", 1880 engine->name); 1881 goto err_wedged; 1882 } 1883 GEM_BUG_ON(!i915_request_started(rq[0])); 1884 1885 if (i915_request_wait(rq[depth], 1886 I915_WAIT_PRIORITY, 1887 1) != -ETIME) { 1888 pr_err("%s: Waiter depth:%d completed!\n", 1889 engine->name, depth); 1890 goto err_wedged; 1891 } 1892 1893 for (i = 0; i < ARRAY_SIZE(client); i++) { 1894 igt_spinner_end(&client[i].spin); 1895 i915_request_put(rq[i]); 1896 rq[i] = NULL; 1897 } 1898 1899 if (igt_flush_test(gt->i915)) 1900 goto err_wedged; 1901 1902 if (engine->execlists.preempt_hang.count) { 1903 pr_err("%s: Preemption recorded x%d, depth %d; should have been suppressed!\n", 1904 engine->name, 1905 engine->execlists.preempt_hang.count, 1906 depth); 1907 err = -EINVAL; 1908 goto err_client_3; 1909 } 1910 } 1911 } 1912 1913 err = 0; 1914 err_client_3: 1915 preempt_client_fini(&client[3]); 1916 err_client_2: 1917 preempt_client_fini(&client[2]); 1918 err_client_1: 1919 preempt_client_fini(&client[1]); 1920 err_client_0: 1921 preempt_client_fini(&client[0]); 1922 return err; 1923 1924 err_wedged: 1925 for (i = 0; i < ARRAY_SIZE(client); i++) { 1926 igt_spinner_end(&client[i].spin); 1927 i915_request_put(rq[i]); 1928 } 1929 intel_gt_set_wedged(gt); 1930 err = -EIO; 1931 goto err_client_3; 1932 } 1933 1934 static int live_chain_preempt(void *arg) 1935 { 1936 struct intel_gt *gt = arg; 1937 struct intel_engine_cs *engine; 1938 struct preempt_client hi, lo; 1939 enum intel_engine_id id; 1940 int err = -ENOMEM; 1941 1942 /* 1943 * Build a chain AB...BA between two contexts (A, B) and request 1944 * preemption of the last request. It should then complete before 1945 * the previously submitted spinner in B. 1946 */ 1947 1948 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 1949 return 0; 1950 1951 if (preempt_client_init(gt, &hi)) 1952 return -ENOMEM; 1953 1954 if (preempt_client_init(gt, &lo)) 1955 goto err_client_hi; 1956 1957 for_each_engine(engine, gt, id) { 1958 struct i915_sched_attr attr = { 1959 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), 1960 }; 1961 struct igt_live_test t; 1962 struct i915_request *rq; 1963 int ring_size, count, i; 1964 1965 if (!intel_engine_has_preemption(engine)) 1966 continue; 1967 1968 rq = spinner_create_request(&lo.spin, 1969 lo.ctx, engine, 1970 MI_ARB_CHECK); 1971 if (IS_ERR(rq)) 1972 goto err_wedged; 1973 1974 i915_request_get(rq); 1975 i915_request_add(rq); 1976 1977 ring_size = rq->wa_tail - rq->head; 1978 if (ring_size < 0) 1979 ring_size += rq->ring->size; 1980 ring_size = rq->ring->size / ring_size; 1981 pr_debug("%s(%s): Using maximum of %d requests\n", 1982 __func__, engine->name, ring_size); 1983 1984 igt_spinner_end(&lo.spin); 1985 if (i915_request_wait(rq, 0, HZ / 2) < 0) { 1986 pr_err("Timed out waiting to flush %s\n", engine->name); 1987 i915_request_put(rq); 1988 goto err_wedged; 1989 } 1990 i915_request_put(rq); 1991 1992 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 1993 err = -EIO; 1994 goto err_wedged; 1995 } 1996 1997 for_each_prime_number_from(count, 1, ring_size) { 1998 rq = spinner_create_request(&hi.spin, 1999 hi.ctx, engine, 2000 MI_ARB_CHECK); 2001 if (IS_ERR(rq)) 2002 goto err_wedged; 2003 i915_request_add(rq); 2004 if (!igt_wait_for_spinner(&hi.spin, rq)) 2005 goto err_wedged; 2006 2007 rq = spinner_create_request(&lo.spin, 2008 lo.ctx, engine, 2009 MI_ARB_CHECK); 2010 if (IS_ERR(rq)) 2011 goto err_wedged; 2012 i915_request_add(rq); 2013 2014 for (i = 0; i < count; i++) { 2015 rq = igt_request_alloc(lo.ctx, engine); 2016 if (IS_ERR(rq)) 2017 goto err_wedged; 2018 i915_request_add(rq); 2019 } 2020 2021 rq = igt_request_alloc(hi.ctx, engine); 2022 if (IS_ERR(rq)) 2023 goto err_wedged; 2024 2025 i915_request_get(rq); 2026 i915_request_add(rq); 2027 engine->schedule(rq, &attr); 2028 2029 igt_spinner_end(&hi.spin); 2030 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 2031 struct drm_printer p = 2032 drm_info_printer(gt->i915->drm.dev); 2033 2034 pr_err("Failed to preempt over chain of %d\n", 2035 count); 2036 intel_engine_dump(engine, &p, 2037 "%s\n", engine->name); 2038 i915_request_put(rq); 2039 goto err_wedged; 2040 } 2041 igt_spinner_end(&lo.spin); 2042 i915_request_put(rq); 2043 2044 rq = igt_request_alloc(lo.ctx, engine); 2045 if (IS_ERR(rq)) 2046 goto err_wedged; 2047 2048 i915_request_get(rq); 2049 i915_request_add(rq); 2050 2051 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 2052 struct drm_printer p = 2053 drm_info_printer(gt->i915->drm.dev); 2054 2055 pr_err("Failed to flush low priority chain of %d requests\n", 2056 count); 2057 intel_engine_dump(engine, &p, 2058 "%s\n", engine->name); 2059 2060 i915_request_put(rq); 2061 goto err_wedged; 2062 } 2063 i915_request_put(rq); 2064 } 2065 2066 if (igt_live_test_end(&t)) { 2067 err = -EIO; 2068 goto err_wedged; 2069 } 2070 } 2071 2072 err = 0; 2073 err_client_lo: 2074 preempt_client_fini(&lo); 2075 err_client_hi: 2076 preempt_client_fini(&hi); 2077 return err; 2078 2079 err_wedged: 2080 igt_spinner_end(&hi.spin); 2081 igt_spinner_end(&lo.spin); 2082 intel_gt_set_wedged(gt); 2083 err = -EIO; 2084 goto err_client_lo; 2085 } 2086 2087 static int create_gang(struct intel_engine_cs *engine, 2088 struct i915_request **prev) 2089 { 2090 struct drm_i915_gem_object *obj; 2091 struct intel_context *ce; 2092 struct i915_request *rq; 2093 struct i915_vma *vma; 2094 u32 *cs; 2095 int err; 2096 2097 ce = intel_context_create(engine); 2098 if (IS_ERR(ce)) 2099 return PTR_ERR(ce); 2100 2101 obj = i915_gem_object_create_internal(engine->i915, 4096); 2102 if (IS_ERR(obj)) { 2103 err = PTR_ERR(obj); 2104 goto err_ce; 2105 } 2106 2107 vma = i915_vma_instance(obj, ce->vm, NULL); 2108 if (IS_ERR(vma)) { 2109 err = PTR_ERR(vma); 2110 goto err_obj; 2111 } 2112 2113 err = i915_vma_pin(vma, 0, 0, PIN_USER); 2114 if (err) 2115 goto err_obj; 2116 2117 cs = i915_gem_object_pin_map(obj, I915_MAP_WC); 2118 if (IS_ERR(cs)) 2119 goto err_obj; 2120 2121 /* Semaphore target: spin until zero */ 2122 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 2123 2124 *cs++ = MI_SEMAPHORE_WAIT | 2125 MI_SEMAPHORE_POLL | 2126 MI_SEMAPHORE_SAD_EQ_SDD; 2127 *cs++ = 0; 2128 *cs++ = lower_32_bits(vma->node.start); 2129 *cs++ = upper_32_bits(vma->node.start); 2130 2131 if (*prev) { 2132 u64 offset = (*prev)->batch->node.start; 2133 2134 /* Terminate the spinner in the next lower priority batch. */ 2135 *cs++ = MI_STORE_DWORD_IMM_GEN4; 2136 *cs++ = lower_32_bits(offset); 2137 *cs++ = upper_32_bits(offset); 2138 *cs++ = 0; 2139 } 2140 2141 *cs++ = MI_BATCH_BUFFER_END; 2142 i915_gem_object_flush_map(obj); 2143 i915_gem_object_unpin_map(obj); 2144 2145 rq = intel_context_create_request(ce); 2146 if (IS_ERR(rq)) 2147 goto err_obj; 2148 2149 rq->batch = vma; 2150 i915_request_get(rq); 2151 2152 i915_vma_lock(vma); 2153 err = i915_request_await_object(rq, vma->obj, false); 2154 if (!err) 2155 err = i915_vma_move_to_active(vma, rq, 0); 2156 if (!err) 2157 err = rq->engine->emit_bb_start(rq, 2158 vma->node.start, 2159 PAGE_SIZE, 0); 2160 i915_vma_unlock(vma); 2161 i915_request_add(rq); 2162 if (err) 2163 goto err_rq; 2164 2165 i915_gem_object_put(obj); 2166 intel_context_put(ce); 2167 2168 rq->client_link.next = &(*prev)->client_link; 2169 *prev = rq; 2170 return 0; 2171 2172 err_rq: 2173 i915_request_put(rq); 2174 err_obj: 2175 i915_gem_object_put(obj); 2176 err_ce: 2177 intel_context_put(ce); 2178 return err; 2179 } 2180 2181 static int live_preempt_gang(void *arg) 2182 { 2183 struct intel_gt *gt = arg; 2184 struct intel_engine_cs *engine; 2185 enum intel_engine_id id; 2186 2187 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2188 return 0; 2189 2190 /* 2191 * Build as long a chain of preempters as we can, with each 2192 * request higher priority than the last. Once we are ready, we release 2193 * the last batch which then precolates down the chain, each releasing 2194 * the next oldest in turn. The intent is to simply push as hard as we 2195 * can with the number of preemptions, trying to exceed narrow HW 2196 * limits. At a minimum, we insist that we can sort all the user 2197 * high priority levels into execution order. 2198 */ 2199 2200 for_each_engine(engine, gt, id) { 2201 struct i915_request *rq = NULL; 2202 struct igt_live_test t; 2203 IGT_TIMEOUT(end_time); 2204 int prio = 0; 2205 int err = 0; 2206 u32 *cs; 2207 2208 if (!intel_engine_has_preemption(engine)) 2209 continue; 2210 2211 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) 2212 return -EIO; 2213 2214 do { 2215 struct i915_sched_attr attr = { 2216 .priority = I915_USER_PRIORITY(prio++), 2217 }; 2218 2219 err = create_gang(engine, &rq); 2220 if (err) 2221 break; 2222 2223 /* Submit each spinner at increasing priority */ 2224 engine->schedule(rq, &attr); 2225 2226 if (prio <= I915_PRIORITY_MAX) 2227 continue; 2228 2229 if (prio > (INT_MAX >> I915_USER_PRIORITY_SHIFT)) 2230 break; 2231 2232 if (__igt_timeout(end_time, NULL)) 2233 break; 2234 } while (1); 2235 pr_debug("%s: Preempt chain of %d requests\n", 2236 engine->name, prio); 2237 2238 /* 2239 * Such that the last spinner is the highest priority and 2240 * should execute first. When that spinner completes, 2241 * it will terminate the next lowest spinner until there 2242 * are no more spinners and the gang is complete. 2243 */ 2244 cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC); 2245 if (!IS_ERR(cs)) { 2246 *cs = 0; 2247 i915_gem_object_unpin_map(rq->batch->obj); 2248 } else { 2249 err = PTR_ERR(cs); 2250 intel_gt_set_wedged(gt); 2251 } 2252 2253 while (rq) { /* wait for each rq from highest to lowest prio */ 2254 struct i915_request *n = 2255 list_next_entry(rq, client_link); 2256 2257 if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) { 2258 struct drm_printer p = 2259 drm_info_printer(engine->i915->drm.dev); 2260 2261 pr_err("Failed to flush chain of %d requests, at %d\n", 2262 prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT); 2263 intel_engine_dump(engine, &p, 2264 "%s\n", engine->name); 2265 2266 err = -ETIME; 2267 } 2268 2269 i915_request_put(rq); 2270 rq = n; 2271 } 2272 2273 if (igt_live_test_end(&t)) 2274 err = -EIO; 2275 if (err) 2276 return err; 2277 } 2278 2279 return 0; 2280 } 2281 2282 static int live_preempt_hang(void *arg) 2283 { 2284 struct intel_gt *gt = arg; 2285 struct i915_gem_context *ctx_hi, *ctx_lo; 2286 struct igt_spinner spin_hi, spin_lo; 2287 struct intel_engine_cs *engine; 2288 enum intel_engine_id id; 2289 int err = -ENOMEM; 2290 2291 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2292 return 0; 2293 2294 if (!intel_has_reset_engine(gt)) 2295 return 0; 2296 2297 if (igt_spinner_init(&spin_hi, gt)) 2298 return -ENOMEM; 2299 2300 if (igt_spinner_init(&spin_lo, gt)) 2301 goto err_spin_hi; 2302 2303 ctx_hi = kernel_context(gt->i915); 2304 if (!ctx_hi) 2305 goto err_spin_lo; 2306 ctx_hi->sched.priority = 2307 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); 2308 2309 ctx_lo = kernel_context(gt->i915); 2310 if (!ctx_lo) 2311 goto err_ctx_hi; 2312 ctx_lo->sched.priority = 2313 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); 2314 2315 for_each_engine(engine, gt, id) { 2316 struct i915_request *rq; 2317 2318 if (!intel_engine_has_preemption(engine)) 2319 continue; 2320 2321 rq = spinner_create_request(&spin_lo, ctx_lo, engine, 2322 MI_ARB_CHECK); 2323 if (IS_ERR(rq)) { 2324 err = PTR_ERR(rq); 2325 goto err_ctx_lo; 2326 } 2327 2328 i915_request_add(rq); 2329 if (!igt_wait_for_spinner(&spin_lo, rq)) { 2330 GEM_TRACE("lo spinner failed to start\n"); 2331 GEM_TRACE_DUMP(); 2332 intel_gt_set_wedged(gt); 2333 err = -EIO; 2334 goto err_ctx_lo; 2335 } 2336 2337 rq = spinner_create_request(&spin_hi, ctx_hi, engine, 2338 MI_ARB_CHECK); 2339 if (IS_ERR(rq)) { 2340 igt_spinner_end(&spin_lo); 2341 err = PTR_ERR(rq); 2342 goto err_ctx_lo; 2343 } 2344 2345 init_completion(&engine->execlists.preempt_hang.completion); 2346 engine->execlists.preempt_hang.inject_hang = true; 2347 2348 i915_request_add(rq); 2349 2350 if (!wait_for_completion_timeout(&engine->execlists.preempt_hang.completion, 2351 HZ / 10)) { 2352 pr_err("Preemption did not occur within timeout!"); 2353 GEM_TRACE_DUMP(); 2354 intel_gt_set_wedged(gt); 2355 err = -EIO; 2356 goto err_ctx_lo; 2357 } 2358 2359 set_bit(I915_RESET_ENGINE + id, >->reset.flags); 2360 intel_engine_reset(engine, NULL); 2361 clear_bit(I915_RESET_ENGINE + id, >->reset.flags); 2362 2363 engine->execlists.preempt_hang.inject_hang = false; 2364 2365 if (!igt_wait_for_spinner(&spin_hi, rq)) { 2366 GEM_TRACE("hi spinner failed to start\n"); 2367 GEM_TRACE_DUMP(); 2368 intel_gt_set_wedged(gt); 2369 err = -EIO; 2370 goto err_ctx_lo; 2371 } 2372 2373 igt_spinner_end(&spin_hi); 2374 igt_spinner_end(&spin_lo); 2375 if (igt_flush_test(gt->i915)) { 2376 err = -EIO; 2377 goto err_ctx_lo; 2378 } 2379 } 2380 2381 err = 0; 2382 err_ctx_lo: 2383 kernel_context_close(ctx_lo); 2384 err_ctx_hi: 2385 kernel_context_close(ctx_hi); 2386 err_spin_lo: 2387 igt_spinner_fini(&spin_lo); 2388 err_spin_hi: 2389 igt_spinner_fini(&spin_hi); 2390 return err; 2391 } 2392 2393 static int live_preempt_timeout(void *arg) 2394 { 2395 struct intel_gt *gt = arg; 2396 struct i915_gem_context *ctx_hi, *ctx_lo; 2397 struct igt_spinner spin_lo; 2398 struct intel_engine_cs *engine; 2399 enum intel_engine_id id; 2400 int err = -ENOMEM; 2401 2402 /* 2403 * Check that we force preemption to occur by cancelling the previous 2404 * context if it refuses to yield the GPU. 2405 */ 2406 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) 2407 return 0; 2408 2409 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2410 return 0; 2411 2412 if (!intel_has_reset_engine(gt)) 2413 return 0; 2414 2415 if (igt_spinner_init(&spin_lo, gt)) 2416 return -ENOMEM; 2417 2418 ctx_hi = kernel_context(gt->i915); 2419 if (!ctx_hi) 2420 goto err_spin_lo; 2421 ctx_hi->sched.priority = 2422 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); 2423 2424 ctx_lo = kernel_context(gt->i915); 2425 if (!ctx_lo) 2426 goto err_ctx_hi; 2427 ctx_lo->sched.priority = 2428 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); 2429 2430 for_each_engine(engine, gt, id) { 2431 unsigned long saved_timeout; 2432 struct i915_request *rq; 2433 2434 if (!intel_engine_has_preemption(engine)) 2435 continue; 2436 2437 rq = spinner_create_request(&spin_lo, ctx_lo, engine, 2438 MI_NOOP); /* preemption disabled */ 2439 if (IS_ERR(rq)) { 2440 err = PTR_ERR(rq); 2441 goto err_ctx_lo; 2442 } 2443 2444 i915_request_add(rq); 2445 if (!igt_wait_for_spinner(&spin_lo, rq)) { 2446 intel_gt_set_wedged(gt); 2447 err = -EIO; 2448 goto err_ctx_lo; 2449 } 2450 2451 rq = igt_request_alloc(ctx_hi, engine); 2452 if (IS_ERR(rq)) { 2453 igt_spinner_end(&spin_lo); 2454 err = PTR_ERR(rq); 2455 goto err_ctx_lo; 2456 } 2457 2458 /* Flush the previous CS ack before changing timeouts */ 2459 while (READ_ONCE(engine->execlists.pending[0])) 2460 cpu_relax(); 2461 2462 saved_timeout = engine->props.preempt_timeout_ms; 2463 engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */ 2464 2465 i915_request_get(rq); 2466 i915_request_add(rq); 2467 2468 intel_engine_flush_submission(engine); 2469 engine->props.preempt_timeout_ms = saved_timeout; 2470 2471 if (i915_request_wait(rq, 0, HZ / 10) < 0) { 2472 intel_gt_set_wedged(gt); 2473 i915_request_put(rq); 2474 err = -ETIME; 2475 goto err_ctx_lo; 2476 } 2477 2478 igt_spinner_end(&spin_lo); 2479 i915_request_put(rq); 2480 } 2481 2482 err = 0; 2483 err_ctx_lo: 2484 kernel_context_close(ctx_lo); 2485 err_ctx_hi: 2486 kernel_context_close(ctx_hi); 2487 err_spin_lo: 2488 igt_spinner_fini(&spin_lo); 2489 return err; 2490 } 2491 2492 static int random_range(struct rnd_state *rnd, int min, int max) 2493 { 2494 return i915_prandom_u32_max_state(max - min, rnd) + min; 2495 } 2496 2497 static int random_priority(struct rnd_state *rnd) 2498 { 2499 return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX); 2500 } 2501 2502 struct preempt_smoke { 2503 struct intel_gt *gt; 2504 struct i915_gem_context **contexts; 2505 struct intel_engine_cs *engine; 2506 struct drm_i915_gem_object *batch; 2507 unsigned int ncontext; 2508 struct rnd_state prng; 2509 unsigned long count; 2510 }; 2511 2512 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke) 2513 { 2514 return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext, 2515 &smoke->prng)]; 2516 } 2517 2518 static int smoke_submit(struct preempt_smoke *smoke, 2519 struct i915_gem_context *ctx, int prio, 2520 struct drm_i915_gem_object *batch) 2521 { 2522 struct i915_request *rq; 2523 struct i915_vma *vma = NULL; 2524 int err = 0; 2525 2526 if (batch) { 2527 struct i915_address_space *vm; 2528 2529 vm = i915_gem_context_get_vm_rcu(ctx); 2530 vma = i915_vma_instance(batch, vm, NULL); 2531 i915_vm_put(vm); 2532 if (IS_ERR(vma)) 2533 return PTR_ERR(vma); 2534 2535 err = i915_vma_pin(vma, 0, 0, PIN_USER); 2536 if (err) 2537 return err; 2538 } 2539 2540 ctx->sched.priority = prio; 2541 2542 rq = igt_request_alloc(ctx, smoke->engine); 2543 if (IS_ERR(rq)) { 2544 err = PTR_ERR(rq); 2545 goto unpin; 2546 } 2547 2548 if (vma) { 2549 i915_vma_lock(vma); 2550 err = i915_request_await_object(rq, vma->obj, false); 2551 if (!err) 2552 err = i915_vma_move_to_active(vma, rq, 0); 2553 if (!err) 2554 err = rq->engine->emit_bb_start(rq, 2555 vma->node.start, 2556 PAGE_SIZE, 0); 2557 i915_vma_unlock(vma); 2558 } 2559 2560 i915_request_add(rq); 2561 2562 unpin: 2563 if (vma) 2564 i915_vma_unpin(vma); 2565 2566 return err; 2567 } 2568 2569 static int smoke_crescendo_thread(void *arg) 2570 { 2571 struct preempt_smoke *smoke = arg; 2572 IGT_TIMEOUT(end_time); 2573 unsigned long count; 2574 2575 count = 0; 2576 do { 2577 struct i915_gem_context *ctx = smoke_context(smoke); 2578 int err; 2579 2580 err = smoke_submit(smoke, 2581 ctx, count % I915_PRIORITY_MAX, 2582 smoke->batch); 2583 if (err) 2584 return err; 2585 2586 count++; 2587 } while (!__igt_timeout(end_time, NULL)); 2588 2589 smoke->count = count; 2590 return 0; 2591 } 2592 2593 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) 2594 #define BATCH BIT(0) 2595 { 2596 struct task_struct *tsk[I915_NUM_ENGINES] = {}; 2597 struct preempt_smoke arg[I915_NUM_ENGINES]; 2598 struct intel_engine_cs *engine; 2599 enum intel_engine_id id; 2600 unsigned long count; 2601 int err = 0; 2602 2603 for_each_engine(engine, smoke->gt, id) { 2604 arg[id] = *smoke; 2605 arg[id].engine = engine; 2606 if (!(flags & BATCH)) 2607 arg[id].batch = NULL; 2608 arg[id].count = 0; 2609 2610 tsk[id] = kthread_run(smoke_crescendo_thread, &arg, 2611 "igt/smoke:%d", id); 2612 if (IS_ERR(tsk[id])) { 2613 err = PTR_ERR(tsk[id]); 2614 break; 2615 } 2616 get_task_struct(tsk[id]); 2617 } 2618 2619 yield(); /* start all threads before we kthread_stop() */ 2620 2621 count = 0; 2622 for_each_engine(engine, smoke->gt, id) { 2623 int status; 2624 2625 if (IS_ERR_OR_NULL(tsk[id])) 2626 continue; 2627 2628 status = kthread_stop(tsk[id]); 2629 if (status && !err) 2630 err = status; 2631 2632 count += arg[id].count; 2633 2634 put_task_struct(tsk[id]); 2635 } 2636 2637 pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n", 2638 count, flags, 2639 RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext); 2640 return 0; 2641 } 2642 2643 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags) 2644 { 2645 enum intel_engine_id id; 2646 IGT_TIMEOUT(end_time); 2647 unsigned long count; 2648 2649 count = 0; 2650 do { 2651 for_each_engine(smoke->engine, smoke->gt, id) { 2652 struct i915_gem_context *ctx = smoke_context(smoke); 2653 int err; 2654 2655 err = smoke_submit(smoke, 2656 ctx, random_priority(&smoke->prng), 2657 flags & BATCH ? smoke->batch : NULL); 2658 if (err) 2659 return err; 2660 2661 count++; 2662 } 2663 } while (!__igt_timeout(end_time, NULL)); 2664 2665 pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n", 2666 count, flags, 2667 RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext); 2668 return 0; 2669 } 2670 2671 static int live_preempt_smoke(void *arg) 2672 { 2673 struct preempt_smoke smoke = { 2674 .gt = arg, 2675 .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed), 2676 .ncontext = 1024, 2677 }; 2678 const unsigned int phase[] = { 0, BATCH }; 2679 struct igt_live_test t; 2680 int err = -ENOMEM; 2681 u32 *cs; 2682 int n; 2683 2684 if (!HAS_LOGICAL_RING_PREEMPTION(smoke.gt->i915)) 2685 return 0; 2686 2687 smoke.contexts = kmalloc_array(smoke.ncontext, 2688 sizeof(*smoke.contexts), 2689 GFP_KERNEL); 2690 if (!smoke.contexts) 2691 return -ENOMEM; 2692 2693 smoke.batch = 2694 i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE); 2695 if (IS_ERR(smoke.batch)) { 2696 err = PTR_ERR(smoke.batch); 2697 goto err_free; 2698 } 2699 2700 cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB); 2701 if (IS_ERR(cs)) { 2702 err = PTR_ERR(cs); 2703 goto err_batch; 2704 } 2705 for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++) 2706 cs[n] = MI_ARB_CHECK; 2707 cs[n] = MI_BATCH_BUFFER_END; 2708 i915_gem_object_flush_map(smoke.batch); 2709 i915_gem_object_unpin_map(smoke.batch); 2710 2711 if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) { 2712 err = -EIO; 2713 goto err_batch; 2714 } 2715 2716 for (n = 0; n < smoke.ncontext; n++) { 2717 smoke.contexts[n] = kernel_context(smoke.gt->i915); 2718 if (!smoke.contexts[n]) 2719 goto err_ctx; 2720 } 2721 2722 for (n = 0; n < ARRAY_SIZE(phase); n++) { 2723 err = smoke_crescendo(&smoke, phase[n]); 2724 if (err) 2725 goto err_ctx; 2726 2727 err = smoke_random(&smoke, phase[n]); 2728 if (err) 2729 goto err_ctx; 2730 } 2731 2732 err_ctx: 2733 if (igt_live_test_end(&t)) 2734 err = -EIO; 2735 2736 for (n = 0; n < smoke.ncontext; n++) { 2737 if (!smoke.contexts[n]) 2738 break; 2739 kernel_context_close(smoke.contexts[n]); 2740 } 2741 2742 err_batch: 2743 i915_gem_object_put(smoke.batch); 2744 err_free: 2745 kfree(smoke.contexts); 2746 2747 return err; 2748 } 2749 2750 static int nop_virtual_engine(struct intel_gt *gt, 2751 struct intel_engine_cs **siblings, 2752 unsigned int nsibling, 2753 unsigned int nctx, 2754 unsigned int flags) 2755 #define CHAIN BIT(0) 2756 { 2757 IGT_TIMEOUT(end_time); 2758 struct i915_request *request[16] = {}; 2759 struct intel_context *ve[16]; 2760 unsigned long n, prime, nc; 2761 struct igt_live_test t; 2762 ktime_t times[2] = {}; 2763 int err; 2764 2765 GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve)); 2766 2767 for (n = 0; n < nctx; n++) { 2768 ve[n] = intel_execlists_create_virtual(siblings, nsibling); 2769 if (IS_ERR(ve[n])) { 2770 err = PTR_ERR(ve[n]); 2771 nctx = n; 2772 goto out; 2773 } 2774 2775 err = intel_context_pin(ve[n]); 2776 if (err) { 2777 intel_context_put(ve[n]); 2778 nctx = n; 2779 goto out; 2780 } 2781 } 2782 2783 err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name); 2784 if (err) 2785 goto out; 2786 2787 for_each_prime_number_from(prime, 1, 8192) { 2788 times[1] = ktime_get_raw(); 2789 2790 if (flags & CHAIN) { 2791 for (nc = 0; nc < nctx; nc++) { 2792 for (n = 0; n < prime; n++) { 2793 struct i915_request *rq; 2794 2795 rq = i915_request_create(ve[nc]); 2796 if (IS_ERR(rq)) { 2797 err = PTR_ERR(rq); 2798 goto out; 2799 } 2800 2801 if (request[nc]) 2802 i915_request_put(request[nc]); 2803 request[nc] = i915_request_get(rq); 2804 i915_request_add(rq); 2805 } 2806 } 2807 } else { 2808 for (n = 0; n < prime; n++) { 2809 for (nc = 0; nc < nctx; nc++) { 2810 struct i915_request *rq; 2811 2812 rq = i915_request_create(ve[nc]); 2813 if (IS_ERR(rq)) { 2814 err = PTR_ERR(rq); 2815 goto out; 2816 } 2817 2818 if (request[nc]) 2819 i915_request_put(request[nc]); 2820 request[nc] = i915_request_get(rq); 2821 i915_request_add(rq); 2822 } 2823 } 2824 } 2825 2826 for (nc = 0; nc < nctx; nc++) { 2827 if (i915_request_wait(request[nc], 0, HZ / 10) < 0) { 2828 pr_err("%s(%s): wait for %llx:%lld timed out\n", 2829 __func__, ve[0]->engine->name, 2830 request[nc]->fence.context, 2831 request[nc]->fence.seqno); 2832 2833 GEM_TRACE("%s(%s) failed at request %llx:%lld\n", 2834 __func__, ve[0]->engine->name, 2835 request[nc]->fence.context, 2836 request[nc]->fence.seqno); 2837 GEM_TRACE_DUMP(); 2838 intel_gt_set_wedged(gt); 2839 break; 2840 } 2841 } 2842 2843 times[1] = ktime_sub(ktime_get_raw(), times[1]); 2844 if (prime == 1) 2845 times[0] = times[1]; 2846 2847 for (nc = 0; nc < nctx; nc++) { 2848 i915_request_put(request[nc]); 2849 request[nc] = NULL; 2850 } 2851 2852 if (__igt_timeout(end_time, NULL)) 2853 break; 2854 } 2855 2856 err = igt_live_test_end(&t); 2857 if (err) 2858 goto out; 2859 2860 pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n", 2861 nctx, ve[0]->engine->name, ktime_to_ns(times[0]), 2862 prime, div64_u64(ktime_to_ns(times[1]), prime)); 2863 2864 out: 2865 if (igt_flush_test(gt->i915)) 2866 err = -EIO; 2867 2868 for (nc = 0; nc < nctx; nc++) { 2869 i915_request_put(request[nc]); 2870 intel_context_unpin(ve[nc]); 2871 intel_context_put(ve[nc]); 2872 } 2873 return err; 2874 } 2875 2876 static int live_virtual_engine(void *arg) 2877 { 2878 struct intel_gt *gt = arg; 2879 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 2880 struct intel_engine_cs *engine; 2881 enum intel_engine_id id; 2882 unsigned int class, inst; 2883 int err; 2884 2885 if (USES_GUC_SUBMISSION(gt->i915)) 2886 return 0; 2887 2888 for_each_engine(engine, gt, id) { 2889 err = nop_virtual_engine(gt, &engine, 1, 1, 0); 2890 if (err) { 2891 pr_err("Failed to wrap engine %s: err=%d\n", 2892 engine->name, err); 2893 return err; 2894 } 2895 } 2896 2897 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 2898 int nsibling, n; 2899 2900 nsibling = 0; 2901 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { 2902 if (!gt->engine_class[class][inst]) 2903 continue; 2904 2905 siblings[nsibling++] = gt->engine_class[class][inst]; 2906 } 2907 if (nsibling < 2) 2908 continue; 2909 2910 for (n = 1; n <= nsibling + 1; n++) { 2911 err = nop_virtual_engine(gt, siblings, nsibling, 2912 n, 0); 2913 if (err) 2914 return err; 2915 } 2916 2917 err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN); 2918 if (err) 2919 return err; 2920 } 2921 2922 return 0; 2923 } 2924 2925 static int mask_virtual_engine(struct intel_gt *gt, 2926 struct intel_engine_cs **siblings, 2927 unsigned int nsibling) 2928 { 2929 struct i915_request *request[MAX_ENGINE_INSTANCE + 1]; 2930 struct intel_context *ve; 2931 struct igt_live_test t; 2932 unsigned int n; 2933 int err; 2934 2935 /* 2936 * Check that by setting the execution mask on a request, we can 2937 * restrict it to our desired engine within the virtual engine. 2938 */ 2939 2940 ve = intel_execlists_create_virtual(siblings, nsibling); 2941 if (IS_ERR(ve)) { 2942 err = PTR_ERR(ve); 2943 goto out_close; 2944 } 2945 2946 err = intel_context_pin(ve); 2947 if (err) 2948 goto out_put; 2949 2950 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name); 2951 if (err) 2952 goto out_unpin; 2953 2954 for (n = 0; n < nsibling; n++) { 2955 request[n] = i915_request_create(ve); 2956 if (IS_ERR(request[n])) { 2957 err = PTR_ERR(request[n]); 2958 nsibling = n; 2959 goto out; 2960 } 2961 2962 /* Reverse order as it's more likely to be unnatural */ 2963 request[n]->execution_mask = siblings[nsibling - n - 1]->mask; 2964 2965 i915_request_get(request[n]); 2966 i915_request_add(request[n]); 2967 } 2968 2969 for (n = 0; n < nsibling; n++) { 2970 if (i915_request_wait(request[n], 0, HZ / 10) < 0) { 2971 pr_err("%s(%s): wait for %llx:%lld timed out\n", 2972 __func__, ve->engine->name, 2973 request[n]->fence.context, 2974 request[n]->fence.seqno); 2975 2976 GEM_TRACE("%s(%s) failed at request %llx:%lld\n", 2977 __func__, ve->engine->name, 2978 request[n]->fence.context, 2979 request[n]->fence.seqno); 2980 GEM_TRACE_DUMP(); 2981 intel_gt_set_wedged(gt); 2982 err = -EIO; 2983 goto out; 2984 } 2985 2986 if (request[n]->engine != siblings[nsibling - n - 1]) { 2987 pr_err("Executed on wrong sibling '%s', expected '%s'\n", 2988 request[n]->engine->name, 2989 siblings[nsibling - n - 1]->name); 2990 err = -EINVAL; 2991 goto out; 2992 } 2993 } 2994 2995 err = igt_live_test_end(&t); 2996 out: 2997 if (igt_flush_test(gt->i915)) 2998 err = -EIO; 2999 3000 for (n = 0; n < nsibling; n++) 3001 i915_request_put(request[n]); 3002 3003 out_unpin: 3004 intel_context_unpin(ve); 3005 out_put: 3006 intel_context_put(ve); 3007 out_close: 3008 return err; 3009 } 3010 3011 static int live_virtual_mask(void *arg) 3012 { 3013 struct intel_gt *gt = arg; 3014 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 3015 unsigned int class, inst; 3016 int err; 3017 3018 if (USES_GUC_SUBMISSION(gt->i915)) 3019 return 0; 3020 3021 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 3022 unsigned int nsibling; 3023 3024 nsibling = 0; 3025 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { 3026 if (!gt->engine_class[class][inst]) 3027 break; 3028 3029 siblings[nsibling++] = gt->engine_class[class][inst]; 3030 } 3031 if (nsibling < 2) 3032 continue; 3033 3034 err = mask_virtual_engine(gt, siblings, nsibling); 3035 if (err) 3036 return err; 3037 } 3038 3039 return 0; 3040 } 3041 3042 static int preserved_virtual_engine(struct intel_gt *gt, 3043 struct intel_engine_cs **siblings, 3044 unsigned int nsibling) 3045 { 3046 struct i915_request *last = NULL; 3047 struct intel_context *ve; 3048 struct i915_vma *scratch; 3049 struct igt_live_test t; 3050 unsigned int n; 3051 int err = 0; 3052 u32 *cs; 3053 3054 scratch = create_scratch(siblings[0]->gt); 3055 if (IS_ERR(scratch)) 3056 return PTR_ERR(scratch); 3057 3058 ve = intel_execlists_create_virtual(siblings, nsibling); 3059 if (IS_ERR(ve)) { 3060 err = PTR_ERR(ve); 3061 goto out_scratch; 3062 } 3063 3064 err = intel_context_pin(ve); 3065 if (err) 3066 goto out_put; 3067 3068 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name); 3069 if (err) 3070 goto out_unpin; 3071 3072 for (n = 0; n < NUM_GPR_DW; n++) { 3073 struct intel_engine_cs *engine = siblings[n % nsibling]; 3074 struct i915_request *rq; 3075 3076 rq = i915_request_create(ve); 3077 if (IS_ERR(rq)) { 3078 err = PTR_ERR(rq); 3079 goto out_end; 3080 } 3081 3082 i915_request_put(last); 3083 last = i915_request_get(rq); 3084 3085 cs = intel_ring_begin(rq, 8); 3086 if (IS_ERR(cs)) { 3087 i915_request_add(rq); 3088 err = PTR_ERR(cs); 3089 goto out_end; 3090 } 3091 3092 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 3093 *cs++ = CS_GPR(engine, n); 3094 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32); 3095 *cs++ = 0; 3096 3097 *cs++ = MI_LOAD_REGISTER_IMM(1); 3098 *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW); 3099 *cs++ = n + 1; 3100 3101 *cs++ = MI_NOOP; 3102 intel_ring_advance(rq, cs); 3103 3104 /* Restrict this request to run on a particular engine */ 3105 rq->execution_mask = engine->mask; 3106 i915_request_add(rq); 3107 } 3108 3109 if (i915_request_wait(last, 0, HZ / 5) < 0) { 3110 err = -ETIME; 3111 goto out_end; 3112 } 3113 3114 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); 3115 if (IS_ERR(cs)) { 3116 err = PTR_ERR(cs); 3117 goto out_end; 3118 } 3119 3120 for (n = 0; n < NUM_GPR_DW; n++) { 3121 if (cs[n] != n) { 3122 pr_err("Incorrect value[%d] found for GPR[%d]\n", 3123 cs[n], n); 3124 err = -EINVAL; 3125 break; 3126 } 3127 } 3128 3129 i915_gem_object_unpin_map(scratch->obj); 3130 3131 out_end: 3132 if (igt_live_test_end(&t)) 3133 err = -EIO; 3134 i915_request_put(last); 3135 out_unpin: 3136 intel_context_unpin(ve); 3137 out_put: 3138 intel_context_put(ve); 3139 out_scratch: 3140 i915_vma_unpin_and_release(&scratch, 0); 3141 return err; 3142 } 3143 3144 static int live_virtual_preserved(void *arg) 3145 { 3146 struct intel_gt *gt = arg; 3147 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 3148 unsigned int class, inst; 3149 3150 /* 3151 * Check that the context image retains non-privileged (user) registers 3152 * from one engine to the next. For this we check that the CS_GPR 3153 * are preserved. 3154 */ 3155 3156 if (USES_GUC_SUBMISSION(gt->i915)) 3157 return 0; 3158 3159 /* As we use CS_GPR we cannot run before they existed on all engines. */ 3160 if (INTEL_GEN(gt->i915) < 9) 3161 return 0; 3162 3163 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 3164 int nsibling, err; 3165 3166 nsibling = 0; 3167 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { 3168 if (!gt->engine_class[class][inst]) 3169 continue; 3170 3171 siblings[nsibling++] = gt->engine_class[class][inst]; 3172 } 3173 if (nsibling < 2) 3174 continue; 3175 3176 err = preserved_virtual_engine(gt, siblings, nsibling); 3177 if (err) 3178 return err; 3179 } 3180 3181 return 0; 3182 } 3183 3184 static int bond_virtual_engine(struct intel_gt *gt, 3185 unsigned int class, 3186 struct intel_engine_cs **siblings, 3187 unsigned int nsibling, 3188 unsigned int flags) 3189 #define BOND_SCHEDULE BIT(0) 3190 { 3191 struct intel_engine_cs *master; 3192 struct i915_request *rq[16]; 3193 enum intel_engine_id id; 3194 struct igt_spinner spin; 3195 unsigned long n; 3196 int err; 3197 3198 /* 3199 * A set of bonded requests is intended to be run concurrently 3200 * across a number of engines. We use one request per-engine 3201 * and a magic fence to schedule each of the bonded requests 3202 * at the same time. A consequence of our current scheduler is that 3203 * we only move requests to the HW ready queue when the request 3204 * becomes ready, that is when all of its prerequisite fences have 3205 * been signaled. As one of those fences is the master submit fence, 3206 * there is a delay on all secondary fences as the HW may be 3207 * currently busy. Equally, as all the requests are independent, 3208 * they may have other fences that delay individual request 3209 * submission to HW. Ergo, we do not guarantee that all requests are 3210 * immediately submitted to HW at the same time, just that if the 3211 * rules are abided by, they are ready at the same time as the 3212 * first is submitted. Userspace can embed semaphores in its batch 3213 * to ensure parallel execution of its phases as it requires. 3214 * Though naturally it gets requested that perhaps the scheduler should 3215 * take care of parallel execution, even across preemption events on 3216 * different HW. (The proper answer is of course "lalalala".) 3217 * 3218 * With the submit-fence, we have identified three possible phases 3219 * of synchronisation depending on the master fence: queued (not 3220 * ready), executing, and signaled. The first two are quite simple 3221 * and checked below. However, the signaled master fence handling is 3222 * contentious. Currently we do not distinguish between a signaled 3223 * fence and an expired fence, as once signaled it does not convey 3224 * any information about the previous execution. It may even be freed 3225 * and hence checking later it may not exist at all. Ergo we currently 3226 * do not apply the bonding constraint for an already signaled fence, 3227 * as our expectation is that it should not constrain the secondaries 3228 * and is outside of the scope of the bonded request API (i.e. all 3229 * userspace requests are meant to be running in parallel). As 3230 * it imposes no constraint, and is effectively a no-op, we do not 3231 * check below as normal execution flows are checked extensively above. 3232 * 3233 * XXX Is the degenerate handling of signaled submit fences the 3234 * expected behaviour for userpace? 3235 */ 3236 3237 GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1); 3238 3239 if (igt_spinner_init(&spin, gt)) 3240 return -ENOMEM; 3241 3242 err = 0; 3243 rq[0] = ERR_PTR(-ENOMEM); 3244 for_each_engine(master, gt, id) { 3245 struct i915_sw_fence fence = {}; 3246 3247 if (master->class == class) 3248 continue; 3249 3250 memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq)); 3251 3252 rq[0] = igt_spinner_create_request(&spin, 3253 master->kernel_context, 3254 MI_NOOP); 3255 if (IS_ERR(rq[0])) { 3256 err = PTR_ERR(rq[0]); 3257 goto out; 3258 } 3259 i915_request_get(rq[0]); 3260 3261 if (flags & BOND_SCHEDULE) { 3262 onstack_fence_init(&fence); 3263 err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit, 3264 &fence, 3265 GFP_KERNEL); 3266 } 3267 3268 i915_request_add(rq[0]); 3269 if (err < 0) 3270 goto out; 3271 3272 if (!(flags & BOND_SCHEDULE) && 3273 !igt_wait_for_spinner(&spin, rq[0])) { 3274 err = -EIO; 3275 goto out; 3276 } 3277 3278 for (n = 0; n < nsibling; n++) { 3279 struct intel_context *ve; 3280 3281 ve = intel_execlists_create_virtual(siblings, nsibling); 3282 if (IS_ERR(ve)) { 3283 err = PTR_ERR(ve); 3284 onstack_fence_fini(&fence); 3285 goto out; 3286 } 3287 3288 err = intel_virtual_engine_attach_bond(ve->engine, 3289 master, 3290 siblings[n]); 3291 if (err) { 3292 intel_context_put(ve); 3293 onstack_fence_fini(&fence); 3294 goto out; 3295 } 3296 3297 err = intel_context_pin(ve); 3298 intel_context_put(ve); 3299 if (err) { 3300 onstack_fence_fini(&fence); 3301 goto out; 3302 } 3303 3304 rq[n + 1] = i915_request_create(ve); 3305 intel_context_unpin(ve); 3306 if (IS_ERR(rq[n + 1])) { 3307 err = PTR_ERR(rq[n + 1]); 3308 onstack_fence_fini(&fence); 3309 goto out; 3310 } 3311 i915_request_get(rq[n + 1]); 3312 3313 err = i915_request_await_execution(rq[n + 1], 3314 &rq[0]->fence, 3315 ve->engine->bond_execute); 3316 i915_request_add(rq[n + 1]); 3317 if (err < 0) { 3318 onstack_fence_fini(&fence); 3319 goto out; 3320 } 3321 } 3322 onstack_fence_fini(&fence); 3323 intel_engine_flush_submission(master); 3324 igt_spinner_end(&spin); 3325 3326 if (i915_request_wait(rq[0], 0, HZ / 10) < 0) { 3327 pr_err("Master request did not execute (on %s)!\n", 3328 rq[0]->engine->name); 3329 err = -EIO; 3330 goto out; 3331 } 3332 3333 for (n = 0; n < nsibling; n++) { 3334 if (i915_request_wait(rq[n + 1], 0, 3335 MAX_SCHEDULE_TIMEOUT) < 0) { 3336 err = -EIO; 3337 goto out; 3338 } 3339 3340 if (rq[n + 1]->engine != siblings[n]) { 3341 pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n", 3342 siblings[n]->name, 3343 rq[n + 1]->engine->name, 3344 rq[0]->engine->name); 3345 err = -EINVAL; 3346 goto out; 3347 } 3348 } 3349 3350 for (n = 0; !IS_ERR(rq[n]); n++) 3351 i915_request_put(rq[n]); 3352 rq[0] = ERR_PTR(-ENOMEM); 3353 } 3354 3355 out: 3356 for (n = 0; !IS_ERR(rq[n]); n++) 3357 i915_request_put(rq[n]); 3358 if (igt_flush_test(gt->i915)) 3359 err = -EIO; 3360 3361 igt_spinner_fini(&spin); 3362 return err; 3363 } 3364 3365 static int live_virtual_bond(void *arg) 3366 { 3367 static const struct phase { 3368 const char *name; 3369 unsigned int flags; 3370 } phases[] = { 3371 { "", 0 }, 3372 { "schedule", BOND_SCHEDULE }, 3373 { }, 3374 }; 3375 struct intel_gt *gt = arg; 3376 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 3377 unsigned int class, inst; 3378 int err; 3379 3380 if (USES_GUC_SUBMISSION(gt->i915)) 3381 return 0; 3382 3383 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 3384 const struct phase *p; 3385 int nsibling; 3386 3387 nsibling = 0; 3388 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { 3389 if (!gt->engine_class[class][inst]) 3390 break; 3391 3392 GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings)); 3393 siblings[nsibling++] = gt->engine_class[class][inst]; 3394 } 3395 if (nsibling < 2) 3396 continue; 3397 3398 for (p = phases; p->name; p++) { 3399 err = bond_virtual_engine(gt, 3400 class, siblings, nsibling, 3401 p->flags); 3402 if (err) { 3403 pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n", 3404 __func__, p->name, class, nsibling, err); 3405 return err; 3406 } 3407 } 3408 } 3409 3410 return 0; 3411 } 3412 3413 static int reset_virtual_engine(struct intel_gt *gt, 3414 struct intel_engine_cs **siblings, 3415 unsigned int nsibling) 3416 { 3417 struct intel_engine_cs *engine; 3418 struct intel_context *ve; 3419 unsigned long *heartbeat; 3420 struct igt_spinner spin; 3421 struct i915_request *rq; 3422 unsigned int n; 3423 int err = 0; 3424 3425 /* 3426 * In order to support offline error capture for fast preempt reset, 3427 * we need to decouple the guilty request and ensure that it and its 3428 * descendents are not executed while the capture is in progress. 3429 */ 3430 3431 heartbeat = kmalloc_array(nsibling, sizeof(*heartbeat), GFP_KERNEL); 3432 if (!heartbeat) 3433 return -ENOMEM; 3434 3435 if (igt_spinner_init(&spin, gt)) { 3436 err = -ENOMEM; 3437 goto out_free; 3438 } 3439 3440 ve = intel_execlists_create_virtual(siblings, nsibling); 3441 if (IS_ERR(ve)) { 3442 err = PTR_ERR(ve); 3443 goto out_spin; 3444 } 3445 3446 for (n = 0; n < nsibling; n++) 3447 engine_heartbeat_disable(siblings[n], &heartbeat[n]); 3448 3449 rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK); 3450 if (IS_ERR(rq)) { 3451 err = PTR_ERR(rq); 3452 goto out_heartbeat; 3453 } 3454 i915_request_add(rq); 3455 3456 if (!igt_wait_for_spinner(&spin, rq)) { 3457 intel_gt_set_wedged(gt); 3458 err = -ETIME; 3459 goto out_heartbeat; 3460 } 3461 3462 engine = rq->engine; 3463 GEM_BUG_ON(engine == ve->engine); 3464 3465 /* Take ownership of the reset and tasklet */ 3466 if (test_and_set_bit(I915_RESET_ENGINE + engine->id, 3467 >->reset.flags)) { 3468 intel_gt_set_wedged(gt); 3469 err = -EBUSY; 3470 goto out_heartbeat; 3471 } 3472 tasklet_disable(&engine->execlists.tasklet); 3473 3474 engine->execlists.tasklet.func(engine->execlists.tasklet.data); 3475 GEM_BUG_ON(execlists_active(&engine->execlists) != rq); 3476 3477 /* Fake a preemption event; failed of course */ 3478 spin_lock_irq(&engine->active.lock); 3479 __unwind_incomplete_requests(engine); 3480 spin_unlock_irq(&engine->active.lock); 3481 GEM_BUG_ON(rq->engine != ve->engine); 3482 3483 /* Reset the engine while keeping our active request on hold */ 3484 execlists_hold(engine, rq); 3485 GEM_BUG_ON(!i915_request_on_hold(rq)); 3486 3487 intel_engine_reset(engine, NULL); 3488 GEM_BUG_ON(rq->fence.error != -EIO); 3489 3490 /* Release our grasp on the engine, letting CS flow again */ 3491 tasklet_enable(&engine->execlists.tasklet); 3492 clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, >->reset.flags); 3493 3494 /* Check that we do not resubmit the held request */ 3495 i915_request_get(rq); 3496 if (!i915_request_wait(rq, 0, HZ / 5)) { 3497 pr_err("%s: on hold request completed!\n", 3498 engine->name); 3499 intel_gt_set_wedged(gt); 3500 err = -EIO; 3501 goto out_rq; 3502 } 3503 GEM_BUG_ON(!i915_request_on_hold(rq)); 3504 3505 /* But is resubmitted on release */ 3506 execlists_unhold(engine, rq); 3507 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 3508 pr_err("%s: held request did not complete!\n", 3509 engine->name); 3510 intel_gt_set_wedged(gt); 3511 err = -ETIME; 3512 } 3513 3514 out_rq: 3515 i915_request_put(rq); 3516 out_heartbeat: 3517 for (n = 0; n < nsibling; n++) 3518 engine_heartbeat_enable(siblings[n], heartbeat[n]); 3519 3520 intel_context_put(ve); 3521 out_spin: 3522 igt_spinner_fini(&spin); 3523 out_free: 3524 kfree(heartbeat); 3525 return err; 3526 } 3527 3528 static int live_virtual_reset(void *arg) 3529 { 3530 struct intel_gt *gt = arg; 3531 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 3532 unsigned int class, inst; 3533 3534 /* 3535 * Check that we handle a reset event within a virtual engine. 3536 * Only the physical engine is reset, but we have to check the flow 3537 * of the virtual requests around the reset, and make sure it is not 3538 * forgotten. 3539 */ 3540 3541 if (USES_GUC_SUBMISSION(gt->i915)) 3542 return 0; 3543 3544 if (!intel_has_reset_engine(gt)) 3545 return 0; 3546 3547 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 3548 int nsibling, err; 3549 3550 nsibling = 0; 3551 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { 3552 if (!gt->engine_class[class][inst]) 3553 continue; 3554 3555 siblings[nsibling++] = gt->engine_class[class][inst]; 3556 } 3557 if (nsibling < 2) 3558 continue; 3559 3560 err = reset_virtual_engine(gt, siblings, nsibling); 3561 if (err) 3562 return err; 3563 } 3564 3565 return 0; 3566 } 3567 3568 int intel_execlists_live_selftests(struct drm_i915_private *i915) 3569 { 3570 static const struct i915_subtest tests[] = { 3571 SUBTEST(live_sanitycheck), 3572 SUBTEST(live_unlite_switch), 3573 SUBTEST(live_unlite_preempt), 3574 SUBTEST(live_hold_reset), 3575 SUBTEST(live_timeslice_preempt), 3576 SUBTEST(live_timeslice_queue), 3577 SUBTEST(live_busywait_preempt), 3578 SUBTEST(live_preempt), 3579 SUBTEST(live_late_preempt), 3580 SUBTEST(live_nopreempt), 3581 SUBTEST(live_preempt_cancel), 3582 SUBTEST(live_suppress_self_preempt), 3583 SUBTEST(live_suppress_wait_preempt), 3584 SUBTEST(live_chain_preempt), 3585 SUBTEST(live_preempt_gang), 3586 SUBTEST(live_preempt_hang), 3587 SUBTEST(live_preempt_timeout), 3588 SUBTEST(live_preempt_smoke), 3589 SUBTEST(live_virtual_engine), 3590 SUBTEST(live_virtual_mask), 3591 SUBTEST(live_virtual_preserved), 3592 SUBTEST(live_virtual_bond), 3593 SUBTEST(live_virtual_reset), 3594 }; 3595 3596 if (!HAS_EXECLISTS(i915)) 3597 return 0; 3598 3599 if (intel_gt_is_wedged(&i915->gt)) 3600 return 0; 3601 3602 return intel_gt_live_subtests(tests, &i915->gt); 3603 } 3604 3605 static void hexdump(const void *buf, size_t len) 3606 { 3607 const size_t rowsize = 8 * sizeof(u32); 3608 const void *prev = NULL; 3609 bool skip = false; 3610 size_t pos; 3611 3612 for (pos = 0; pos < len; pos += rowsize) { 3613 char line[128]; 3614 3615 if (prev && !memcmp(prev, buf + pos, rowsize)) { 3616 if (!skip) { 3617 pr_info("*\n"); 3618 skip = true; 3619 } 3620 continue; 3621 } 3622 3623 WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos, 3624 rowsize, sizeof(u32), 3625 line, sizeof(line), 3626 false) >= sizeof(line)); 3627 pr_info("[%04zx] %s\n", pos, line); 3628 3629 prev = buf + pos; 3630 skip = false; 3631 } 3632 } 3633 3634 static int live_lrc_layout(void *arg) 3635 { 3636 struct intel_gt *gt = arg; 3637 struct intel_engine_cs *engine; 3638 enum intel_engine_id id; 3639 u32 *lrc; 3640 int err; 3641 3642 /* 3643 * Check the registers offsets we use to create the initial reg state 3644 * match the layout saved by HW. 3645 */ 3646 3647 lrc = kmalloc(PAGE_SIZE, GFP_KERNEL); 3648 if (!lrc) 3649 return -ENOMEM; 3650 3651 err = 0; 3652 for_each_engine(engine, gt, id) { 3653 u32 *hw; 3654 int dw; 3655 3656 if (!engine->default_state) 3657 continue; 3658 3659 hw = i915_gem_object_pin_map(engine->default_state, 3660 I915_MAP_WB); 3661 if (IS_ERR(hw)) { 3662 err = PTR_ERR(hw); 3663 break; 3664 } 3665 hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw); 3666 3667 execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE), 3668 engine->kernel_context, 3669 engine, 3670 engine->kernel_context->ring, 3671 true); 3672 3673 dw = 0; 3674 do { 3675 u32 lri = hw[dw]; 3676 3677 if (lri == 0) { 3678 dw++; 3679 continue; 3680 } 3681 3682 if (lrc[dw] == 0) { 3683 pr_debug("%s: skipped instruction %x at dword %d\n", 3684 engine->name, lri, dw); 3685 dw++; 3686 continue; 3687 } 3688 3689 if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { 3690 pr_err("%s: Expected LRI command at dword %d, found %08x\n", 3691 engine->name, dw, lri); 3692 err = -EINVAL; 3693 break; 3694 } 3695 3696 if (lrc[dw] != lri) { 3697 pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n", 3698 engine->name, dw, lri, lrc[dw]); 3699 err = -EINVAL; 3700 break; 3701 } 3702 3703 lri &= 0x7f; 3704 lri++; 3705 dw++; 3706 3707 while (lri) { 3708 if (hw[dw] != lrc[dw]) { 3709 pr_err("%s: Different registers found at dword %d, expected %x, found %x\n", 3710 engine->name, dw, hw[dw], lrc[dw]); 3711 err = -EINVAL; 3712 break; 3713 } 3714 3715 /* 3716 * Skip over the actual register value as we 3717 * expect that to differ. 3718 */ 3719 dw += 2; 3720 lri -= 2; 3721 } 3722 } while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); 3723 3724 if (err) { 3725 pr_info("%s: HW register image:\n", engine->name); 3726 hexdump(hw, PAGE_SIZE); 3727 3728 pr_info("%s: SW register image:\n", engine->name); 3729 hexdump(lrc, PAGE_SIZE); 3730 } 3731 3732 i915_gem_object_unpin_map(engine->default_state); 3733 if (err) 3734 break; 3735 } 3736 3737 kfree(lrc); 3738 return err; 3739 } 3740 3741 static int find_offset(const u32 *lri, u32 offset) 3742 { 3743 int i; 3744 3745 for (i = 0; i < PAGE_SIZE / sizeof(u32); i++) 3746 if (lri[i] == offset) 3747 return i; 3748 3749 return -1; 3750 } 3751 3752 static int live_lrc_fixed(void *arg) 3753 { 3754 struct intel_gt *gt = arg; 3755 struct intel_engine_cs *engine; 3756 enum intel_engine_id id; 3757 int err = 0; 3758 3759 /* 3760 * Check the assumed register offsets match the actual locations in 3761 * the context image. 3762 */ 3763 3764 for_each_engine(engine, gt, id) { 3765 const struct { 3766 u32 reg; 3767 u32 offset; 3768 const char *name; 3769 } tbl[] = { 3770 { 3771 i915_mmio_reg_offset(RING_START(engine->mmio_base)), 3772 CTX_RING_START - 1, 3773 "RING_START" 3774 }, 3775 { 3776 i915_mmio_reg_offset(RING_CTL(engine->mmio_base)), 3777 CTX_RING_CTL - 1, 3778 "RING_CTL" 3779 }, 3780 { 3781 i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)), 3782 CTX_RING_HEAD - 1, 3783 "RING_HEAD" 3784 }, 3785 { 3786 i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)), 3787 CTX_RING_TAIL - 1, 3788 "RING_TAIL" 3789 }, 3790 { 3791 i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)), 3792 lrc_ring_mi_mode(engine), 3793 "RING_MI_MODE" 3794 }, 3795 { 3796 i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)), 3797 CTX_BB_STATE - 1, 3798 "BB_STATE" 3799 }, 3800 { }, 3801 }, *t; 3802 u32 *hw; 3803 3804 if (!engine->default_state) 3805 continue; 3806 3807 hw = i915_gem_object_pin_map(engine->default_state, 3808 I915_MAP_WB); 3809 if (IS_ERR(hw)) { 3810 err = PTR_ERR(hw); 3811 break; 3812 } 3813 hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw); 3814 3815 for (t = tbl; t->name; t++) { 3816 int dw = find_offset(hw, t->reg); 3817 3818 if (dw != t->offset) { 3819 pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n", 3820 engine->name, 3821 t->name, 3822 t->reg, 3823 dw, 3824 t->offset); 3825 err = -EINVAL; 3826 } 3827 } 3828 3829 i915_gem_object_unpin_map(engine->default_state); 3830 } 3831 3832 return err; 3833 } 3834 3835 static int __live_lrc_state(struct intel_engine_cs *engine, 3836 struct i915_vma *scratch) 3837 { 3838 struct intel_context *ce; 3839 struct i915_request *rq; 3840 enum { 3841 RING_START_IDX = 0, 3842 RING_TAIL_IDX, 3843 MAX_IDX 3844 }; 3845 u32 expected[MAX_IDX]; 3846 u32 *cs; 3847 int err; 3848 int n; 3849 3850 ce = intel_context_create(engine); 3851 if (IS_ERR(ce)) 3852 return PTR_ERR(ce); 3853 3854 err = intel_context_pin(ce); 3855 if (err) 3856 goto err_put; 3857 3858 rq = i915_request_create(ce); 3859 if (IS_ERR(rq)) { 3860 err = PTR_ERR(rq); 3861 goto err_unpin; 3862 } 3863 3864 cs = intel_ring_begin(rq, 4 * MAX_IDX); 3865 if (IS_ERR(cs)) { 3866 err = PTR_ERR(cs); 3867 i915_request_add(rq); 3868 goto err_unpin; 3869 } 3870 3871 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 3872 *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base)); 3873 *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32); 3874 *cs++ = 0; 3875 3876 expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma); 3877 3878 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 3879 *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)); 3880 *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32); 3881 *cs++ = 0; 3882 3883 i915_request_get(rq); 3884 i915_request_add(rq); 3885 3886 intel_engine_flush_submission(engine); 3887 expected[RING_TAIL_IDX] = ce->ring->tail; 3888 3889 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 3890 err = -ETIME; 3891 goto err_rq; 3892 } 3893 3894 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); 3895 if (IS_ERR(cs)) { 3896 err = PTR_ERR(cs); 3897 goto err_rq; 3898 } 3899 3900 for (n = 0; n < MAX_IDX; n++) { 3901 if (cs[n] != expected[n]) { 3902 pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n", 3903 engine->name, n, cs[n], expected[n]); 3904 err = -EINVAL; 3905 break; 3906 } 3907 } 3908 3909 i915_gem_object_unpin_map(scratch->obj); 3910 3911 err_rq: 3912 i915_request_put(rq); 3913 err_unpin: 3914 intel_context_unpin(ce); 3915 err_put: 3916 intel_context_put(ce); 3917 return err; 3918 } 3919 3920 static int live_lrc_state(void *arg) 3921 { 3922 struct intel_gt *gt = arg; 3923 struct intel_engine_cs *engine; 3924 struct i915_vma *scratch; 3925 enum intel_engine_id id; 3926 int err = 0; 3927 3928 /* 3929 * Check the live register state matches what we expect for this 3930 * intel_context. 3931 */ 3932 3933 scratch = create_scratch(gt); 3934 if (IS_ERR(scratch)) 3935 return PTR_ERR(scratch); 3936 3937 for_each_engine(engine, gt, id) { 3938 err = __live_lrc_state(engine, scratch); 3939 if (err) 3940 break; 3941 } 3942 3943 if (igt_flush_test(gt->i915)) 3944 err = -EIO; 3945 3946 i915_vma_unpin_and_release(&scratch, 0); 3947 return err; 3948 } 3949 3950 static int gpr_make_dirty(struct intel_engine_cs *engine) 3951 { 3952 struct i915_request *rq; 3953 u32 *cs; 3954 int n; 3955 3956 rq = intel_engine_create_kernel_request(engine); 3957 if (IS_ERR(rq)) 3958 return PTR_ERR(rq); 3959 3960 cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2); 3961 if (IS_ERR(cs)) { 3962 i915_request_add(rq); 3963 return PTR_ERR(cs); 3964 } 3965 3966 *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW); 3967 for (n = 0; n < NUM_GPR_DW; n++) { 3968 *cs++ = CS_GPR(engine, n); 3969 *cs++ = STACK_MAGIC; 3970 } 3971 *cs++ = MI_NOOP; 3972 3973 intel_ring_advance(rq, cs); 3974 i915_request_add(rq); 3975 3976 return 0; 3977 } 3978 3979 static int __live_gpr_clear(struct intel_engine_cs *engine, 3980 struct i915_vma *scratch) 3981 { 3982 struct intel_context *ce; 3983 struct i915_request *rq; 3984 u32 *cs; 3985 int err; 3986 int n; 3987 3988 if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS) 3989 return 0; /* GPR only on rcs0 for gen8 */ 3990 3991 err = gpr_make_dirty(engine); 3992 if (err) 3993 return err; 3994 3995 ce = intel_context_create(engine); 3996 if (IS_ERR(ce)) 3997 return PTR_ERR(ce); 3998 3999 rq = intel_context_create_request(ce); 4000 if (IS_ERR(rq)) { 4001 err = PTR_ERR(rq); 4002 goto err_put; 4003 } 4004 4005 cs = intel_ring_begin(rq, 4 * NUM_GPR_DW); 4006 if (IS_ERR(cs)) { 4007 err = PTR_ERR(cs); 4008 i915_request_add(rq); 4009 goto err_put; 4010 } 4011 4012 for (n = 0; n < NUM_GPR_DW; n++) { 4013 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 4014 *cs++ = CS_GPR(engine, n); 4015 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32); 4016 *cs++ = 0; 4017 } 4018 4019 i915_request_get(rq); 4020 i915_request_add(rq); 4021 4022 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 4023 err = -ETIME; 4024 goto err_rq; 4025 } 4026 4027 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); 4028 if (IS_ERR(cs)) { 4029 err = PTR_ERR(cs); 4030 goto err_rq; 4031 } 4032 4033 for (n = 0; n < NUM_GPR_DW; n++) { 4034 if (cs[n]) { 4035 pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n", 4036 engine->name, 4037 n / 2, n & 1 ? "udw" : "ldw", 4038 cs[n]); 4039 err = -EINVAL; 4040 break; 4041 } 4042 } 4043 4044 i915_gem_object_unpin_map(scratch->obj); 4045 4046 err_rq: 4047 i915_request_put(rq); 4048 err_put: 4049 intel_context_put(ce); 4050 return err; 4051 } 4052 4053 static int live_gpr_clear(void *arg) 4054 { 4055 struct intel_gt *gt = arg; 4056 struct intel_engine_cs *engine; 4057 struct i915_vma *scratch; 4058 enum intel_engine_id id; 4059 int err = 0; 4060 4061 /* 4062 * Check that GPR registers are cleared in new contexts as we need 4063 * to avoid leaking any information from previous contexts. 4064 */ 4065 4066 scratch = create_scratch(gt); 4067 if (IS_ERR(scratch)) 4068 return PTR_ERR(scratch); 4069 4070 for_each_engine(engine, gt, id) { 4071 err = __live_gpr_clear(engine, scratch); 4072 if (err) 4073 break; 4074 } 4075 4076 if (igt_flush_test(gt->i915)) 4077 err = -EIO; 4078 4079 i915_vma_unpin_and_release(&scratch, 0); 4080 return err; 4081 } 4082 4083 int intel_lrc_live_selftests(struct drm_i915_private *i915) 4084 { 4085 static const struct i915_subtest tests[] = { 4086 SUBTEST(live_lrc_layout), 4087 SUBTEST(live_lrc_fixed), 4088 SUBTEST(live_lrc_state), 4089 SUBTEST(live_gpr_clear), 4090 }; 4091 4092 if (!HAS_LOGICAL_RING_CONTEXTS(i915)) 4093 return 0; 4094 4095 return intel_gt_live_subtests(tests, &i915->gt); 4096 } 4097