1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2017-2018 Intel Corporation 5 */ 6 7 #include <linux/prime_numbers.h> 8 9 #include "intel_context.h" 10 #include "intel_engine_heartbeat.h" 11 #include "intel_engine_pm.h" 12 #include "intel_gt.h" 13 #include "intel_gt_requests.h" 14 #include "intel_ring.h" 15 #include "selftest_engine_heartbeat.h" 16 17 #include "../selftests/i915_random.h" 18 #include "../i915_selftest.h" 19 20 #include "selftests/igt_flush_test.h" 21 #include "selftests/lib_sw_fence.h" 22 #include "selftests/mock_gem_device.h" 23 #include "selftests/mock_timeline.h" 24 25 static struct page *hwsp_page(struct intel_timeline *tl) 26 { 27 struct drm_i915_gem_object *obj = tl->hwsp_ggtt->obj; 28 29 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 30 return sg_page(obj->mm.pages->sgl); 31 } 32 33 static unsigned long hwsp_cacheline(struct intel_timeline *tl) 34 { 35 unsigned long address = (unsigned long)page_address(hwsp_page(tl)); 36 37 return (address + tl->hwsp_offset) / CACHELINE_BYTES; 38 } 39 40 #define CACHELINES_PER_PAGE (PAGE_SIZE / CACHELINE_BYTES) 41 42 struct mock_hwsp_freelist { 43 struct intel_gt *gt; 44 struct radix_tree_root cachelines; 45 struct intel_timeline **history; 46 unsigned long count, max; 47 struct rnd_state prng; 48 }; 49 50 enum { 51 SHUFFLE = BIT(0), 52 }; 53 54 static void __mock_hwsp_record(struct mock_hwsp_freelist *state, 55 unsigned int idx, 56 struct intel_timeline *tl) 57 { 58 tl = xchg(&state->history[idx], tl); 59 if (tl) { 60 radix_tree_delete(&state->cachelines, hwsp_cacheline(tl)); 61 intel_timeline_put(tl); 62 } 63 } 64 65 static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state, 66 unsigned int count, 67 unsigned int flags) 68 { 69 struct intel_timeline *tl; 70 unsigned int idx; 71 72 while (count--) { 73 unsigned long cacheline; 74 int err; 75 76 tl = intel_timeline_create(state->gt); 77 if (IS_ERR(tl)) 78 return PTR_ERR(tl); 79 80 cacheline = hwsp_cacheline(tl); 81 err = radix_tree_insert(&state->cachelines, cacheline, tl); 82 if (err) { 83 if (err == -EEXIST) { 84 pr_err("HWSP cacheline %lu already used; duplicate allocation!\n", 85 cacheline); 86 } 87 intel_timeline_put(tl); 88 return err; 89 } 90 91 idx = state->count++ % state->max; 92 __mock_hwsp_record(state, idx, tl); 93 } 94 95 if (flags & SHUFFLE) 96 i915_prandom_shuffle(state->history, 97 sizeof(*state->history), 98 min(state->count, state->max), 99 &state->prng); 100 101 count = i915_prandom_u32_max_state(min(state->count, state->max), 102 &state->prng); 103 while (count--) { 104 idx = --state->count % state->max; 105 __mock_hwsp_record(state, idx, NULL); 106 } 107 108 return 0; 109 } 110 111 static int mock_hwsp_freelist(void *arg) 112 { 113 struct mock_hwsp_freelist state; 114 struct drm_i915_private *i915; 115 const struct { 116 const char *name; 117 unsigned int flags; 118 } phases[] = { 119 { "linear", 0 }, 120 { "shuffled", SHUFFLE }, 121 { }, 122 }, *p; 123 unsigned int na; 124 int err = 0; 125 126 i915 = mock_gem_device(); 127 if (!i915) 128 return -ENOMEM; 129 130 INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL); 131 state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed); 132 133 state.gt = &i915->gt; 134 135 /* 136 * Create a bunch of timelines and check that their HWSP do not overlap. 137 * Free some, and try again. 138 */ 139 140 state.max = PAGE_SIZE / sizeof(*state.history); 141 state.count = 0; 142 state.history = kcalloc(state.max, sizeof(*state.history), GFP_KERNEL); 143 if (!state.history) { 144 err = -ENOMEM; 145 goto err_put; 146 } 147 148 for (p = phases; p->name; p++) { 149 pr_debug("%s(%s)\n", __func__, p->name); 150 for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) { 151 err = __mock_hwsp_timeline(&state, na, p->flags); 152 if (err) 153 goto out; 154 } 155 } 156 157 out: 158 for (na = 0; na < state.max; na++) 159 __mock_hwsp_record(&state, na, NULL); 160 kfree(state.history); 161 err_put: 162 mock_destroy_device(i915); 163 return err; 164 } 165 166 struct __igt_sync { 167 const char *name; 168 u32 seqno; 169 bool expected; 170 bool set; 171 }; 172 173 static int __igt_sync(struct intel_timeline *tl, 174 u64 ctx, 175 const struct __igt_sync *p, 176 const char *name) 177 { 178 int ret; 179 180 if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) { 181 pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n", 182 name, p->name, ctx, p->seqno, yesno(p->expected)); 183 return -EINVAL; 184 } 185 186 if (p->set) { 187 ret = __intel_timeline_sync_set(tl, ctx, p->seqno); 188 if (ret) 189 return ret; 190 } 191 192 return 0; 193 } 194 195 static int igt_sync(void *arg) 196 { 197 const struct __igt_sync pass[] = { 198 { "unset", 0, false, false }, 199 { "new", 0, false, true }, 200 { "0a", 0, true, true }, 201 { "1a", 1, false, true }, 202 { "1b", 1, true, true }, 203 { "0b", 0, true, false }, 204 { "2a", 2, false, true }, 205 { "4", 4, false, true }, 206 { "INT_MAX", INT_MAX, false, true }, 207 { "INT_MAX-1", INT_MAX-1, true, false }, 208 { "INT_MAX+1", (u32)INT_MAX+1, false, true }, 209 { "INT_MAX", INT_MAX, true, false }, 210 { "UINT_MAX", UINT_MAX, false, true }, 211 { "wrap", 0, false, true }, 212 { "unwrap", UINT_MAX, true, false }, 213 {}, 214 }, *p; 215 struct intel_timeline tl; 216 int order, offset; 217 int ret = -ENODEV; 218 219 mock_timeline_init(&tl, 0); 220 for (p = pass; p->name; p++) { 221 for (order = 1; order < 64; order++) { 222 for (offset = -1; offset <= (order > 1); offset++) { 223 u64 ctx = BIT_ULL(order) + offset; 224 225 ret = __igt_sync(&tl, ctx, p, "1"); 226 if (ret) 227 goto out; 228 } 229 } 230 } 231 mock_timeline_fini(&tl); 232 233 mock_timeline_init(&tl, 0); 234 for (order = 1; order < 64; order++) { 235 for (offset = -1; offset <= (order > 1); offset++) { 236 u64 ctx = BIT_ULL(order) + offset; 237 238 for (p = pass; p->name; p++) { 239 ret = __igt_sync(&tl, ctx, p, "2"); 240 if (ret) 241 goto out; 242 } 243 } 244 } 245 246 out: 247 mock_timeline_fini(&tl); 248 return ret; 249 } 250 251 static unsigned int random_engine(struct rnd_state *rnd) 252 { 253 return i915_prandom_u32_max_state(I915_NUM_ENGINES, rnd); 254 } 255 256 static int bench_sync(void *arg) 257 { 258 struct rnd_state prng; 259 struct intel_timeline tl; 260 unsigned long end_time, count; 261 u64 prng32_1M; 262 ktime_t kt; 263 int order, last_order; 264 265 mock_timeline_init(&tl, 0); 266 267 /* Lookups from cache are very fast and so the random number generation 268 * and the loop itself becomes a significant factor in the per-iteration 269 * timings. We try to compensate the results by measuring the overhead 270 * of the prng and subtract it from the reported results. 271 */ 272 prandom_seed_state(&prng, i915_selftest.random_seed); 273 count = 0; 274 kt = ktime_get(); 275 end_time = jiffies + HZ/10; 276 do { 277 u32 x; 278 279 /* Make sure the compiler doesn't optimise away the prng call */ 280 WRITE_ONCE(x, prandom_u32_state(&prng)); 281 282 count++; 283 } while (!time_after(jiffies, end_time)); 284 kt = ktime_sub(ktime_get(), kt); 285 pr_debug("%s: %lu random evaluations, %lluns/prng\n", 286 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); 287 prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count); 288 289 /* Benchmark (only) setting random context ids */ 290 prandom_seed_state(&prng, i915_selftest.random_seed); 291 count = 0; 292 kt = ktime_get(); 293 end_time = jiffies + HZ/10; 294 do { 295 u64 id = i915_prandom_u64_state(&prng); 296 297 __intel_timeline_sync_set(&tl, id, 0); 298 count++; 299 } while (!time_after(jiffies, end_time)); 300 kt = ktime_sub(ktime_get(), kt); 301 kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); 302 pr_info("%s: %lu random insertions, %lluns/insert\n", 303 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); 304 305 /* Benchmark looking up the exact same context ids as we just set */ 306 prandom_seed_state(&prng, i915_selftest.random_seed); 307 end_time = count; 308 kt = ktime_get(); 309 while (end_time--) { 310 u64 id = i915_prandom_u64_state(&prng); 311 312 if (!__intel_timeline_sync_is_later(&tl, id, 0)) { 313 mock_timeline_fini(&tl); 314 pr_err("Lookup of %llu failed\n", id); 315 return -EINVAL; 316 } 317 } 318 kt = ktime_sub(ktime_get(), kt); 319 kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); 320 pr_info("%s: %lu random lookups, %lluns/lookup\n", 321 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); 322 323 mock_timeline_fini(&tl); 324 cond_resched(); 325 326 mock_timeline_init(&tl, 0); 327 328 /* Benchmark setting the first N (in order) contexts */ 329 count = 0; 330 kt = ktime_get(); 331 end_time = jiffies + HZ/10; 332 do { 333 __intel_timeline_sync_set(&tl, count++, 0); 334 } while (!time_after(jiffies, end_time)); 335 kt = ktime_sub(ktime_get(), kt); 336 pr_info("%s: %lu in-order insertions, %lluns/insert\n", 337 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); 338 339 /* Benchmark looking up the exact same context ids as we just set */ 340 end_time = count; 341 kt = ktime_get(); 342 while (end_time--) { 343 if (!__intel_timeline_sync_is_later(&tl, end_time, 0)) { 344 pr_err("Lookup of %lu failed\n", end_time); 345 mock_timeline_fini(&tl); 346 return -EINVAL; 347 } 348 } 349 kt = ktime_sub(ktime_get(), kt); 350 pr_info("%s: %lu in-order lookups, %lluns/lookup\n", 351 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); 352 353 mock_timeline_fini(&tl); 354 cond_resched(); 355 356 mock_timeline_init(&tl, 0); 357 358 /* Benchmark searching for a random context id and maybe changing it */ 359 prandom_seed_state(&prng, i915_selftest.random_seed); 360 count = 0; 361 kt = ktime_get(); 362 end_time = jiffies + HZ/10; 363 do { 364 u32 id = random_engine(&prng); 365 u32 seqno = prandom_u32_state(&prng); 366 367 if (!__intel_timeline_sync_is_later(&tl, id, seqno)) 368 __intel_timeline_sync_set(&tl, id, seqno); 369 370 count++; 371 } while (!time_after(jiffies, end_time)); 372 kt = ktime_sub(ktime_get(), kt); 373 kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); 374 pr_info("%s: %lu repeated insert/lookups, %lluns/op\n", 375 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); 376 mock_timeline_fini(&tl); 377 cond_resched(); 378 379 /* Benchmark searching for a known context id and changing the seqno */ 380 for (last_order = 1, order = 1; order < 32; 381 ({ int tmp = last_order; last_order = order; order += tmp; })) { 382 unsigned int mask = BIT(order) - 1; 383 384 mock_timeline_init(&tl, 0); 385 386 count = 0; 387 kt = ktime_get(); 388 end_time = jiffies + HZ/10; 389 do { 390 /* Without assuming too many details of the underlying 391 * implementation, try to identify its phase-changes 392 * (if any)! 393 */ 394 u64 id = (u64)(count & mask) << order; 395 396 __intel_timeline_sync_is_later(&tl, id, 0); 397 __intel_timeline_sync_set(&tl, id, 0); 398 399 count++; 400 } while (!time_after(jiffies, end_time)); 401 kt = ktime_sub(ktime_get(), kt); 402 pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n", 403 __func__, count, order, 404 (long long)div64_ul(ktime_to_ns(kt), count)); 405 mock_timeline_fini(&tl); 406 cond_resched(); 407 } 408 409 return 0; 410 } 411 412 int intel_timeline_mock_selftests(void) 413 { 414 static const struct i915_subtest tests[] = { 415 SUBTEST(mock_hwsp_freelist), 416 SUBTEST(igt_sync), 417 SUBTEST(bench_sync), 418 }; 419 420 return i915_subtests(tests, NULL); 421 } 422 423 static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value) 424 { 425 u32 *cs; 426 427 cs = intel_ring_begin(rq, 4); 428 if (IS_ERR(cs)) 429 return PTR_ERR(cs); 430 431 if (INTEL_GEN(rq->engine->i915) >= 8) { 432 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 433 *cs++ = addr; 434 *cs++ = 0; 435 *cs++ = value; 436 } else if (INTEL_GEN(rq->engine->i915) >= 4) { 437 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 438 *cs++ = 0; 439 *cs++ = addr; 440 *cs++ = value; 441 } else { 442 *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; 443 *cs++ = addr; 444 *cs++ = value; 445 *cs++ = MI_NOOP; 446 } 447 448 intel_ring_advance(rq, cs); 449 450 return 0; 451 } 452 453 static struct i915_request * 454 tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value) 455 { 456 struct i915_request *rq; 457 int err; 458 459 err = intel_timeline_pin(tl, NULL); 460 if (err) { 461 rq = ERR_PTR(err); 462 goto out; 463 } 464 465 rq = intel_engine_create_kernel_request(engine); 466 if (IS_ERR(rq)) 467 goto out_unpin; 468 469 i915_request_get(rq); 470 471 err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value); 472 i915_request_add(rq); 473 if (err) { 474 i915_request_put(rq); 475 rq = ERR_PTR(err); 476 } 477 478 out_unpin: 479 intel_timeline_unpin(tl); 480 out: 481 if (IS_ERR(rq)) 482 pr_err("Failed to write to timeline!\n"); 483 return rq; 484 } 485 486 static struct intel_timeline * 487 checked_intel_timeline_create(struct intel_gt *gt) 488 { 489 struct intel_timeline *tl; 490 491 tl = intel_timeline_create(gt); 492 if (IS_ERR(tl)) 493 return tl; 494 495 if (READ_ONCE(*tl->hwsp_seqno) != tl->seqno) { 496 pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n", 497 *tl->hwsp_seqno, tl->seqno); 498 intel_timeline_put(tl); 499 return ERR_PTR(-EINVAL); 500 } 501 502 return tl; 503 } 504 505 static int live_hwsp_engine(void *arg) 506 { 507 #define NUM_TIMELINES 4096 508 struct intel_gt *gt = arg; 509 struct intel_timeline **timelines; 510 struct intel_engine_cs *engine; 511 enum intel_engine_id id; 512 unsigned long count, n; 513 int err = 0; 514 515 /* 516 * Create a bunch of timelines and check we can write 517 * independently to each of their breadcrumb slots. 518 */ 519 520 timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES, 521 sizeof(*timelines), 522 GFP_KERNEL); 523 if (!timelines) 524 return -ENOMEM; 525 526 count = 0; 527 for_each_engine(engine, gt, id) { 528 if (!intel_engine_can_store_dword(engine)) 529 continue; 530 531 intel_engine_pm_get(engine); 532 533 for (n = 0; n < NUM_TIMELINES; n++) { 534 struct intel_timeline *tl; 535 struct i915_request *rq; 536 537 tl = checked_intel_timeline_create(gt); 538 if (IS_ERR(tl)) { 539 err = PTR_ERR(tl); 540 break; 541 } 542 543 rq = tl_write(tl, engine, count); 544 if (IS_ERR(rq)) { 545 intel_timeline_put(tl); 546 err = PTR_ERR(rq); 547 break; 548 } 549 550 timelines[count++] = tl; 551 i915_request_put(rq); 552 } 553 554 intel_engine_pm_put(engine); 555 if (err) 556 break; 557 } 558 559 if (igt_flush_test(gt->i915)) 560 err = -EIO; 561 562 for (n = 0; n < count; n++) { 563 struct intel_timeline *tl = timelines[n]; 564 565 if (!err && READ_ONCE(*tl->hwsp_seqno) != n) { 566 GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x, found 0x%x\n", 567 n, tl->fence_context, tl->hwsp_offset, *tl->hwsp_seqno); 568 GEM_TRACE_DUMP(); 569 err = -EINVAL; 570 } 571 intel_timeline_put(tl); 572 } 573 574 kvfree(timelines); 575 return err; 576 #undef NUM_TIMELINES 577 } 578 579 static int live_hwsp_alternate(void *arg) 580 { 581 #define NUM_TIMELINES 4096 582 struct intel_gt *gt = arg; 583 struct intel_timeline **timelines; 584 struct intel_engine_cs *engine; 585 enum intel_engine_id id; 586 unsigned long count, n; 587 int err = 0; 588 589 /* 590 * Create a bunch of timelines and check we can write 591 * independently to each of their breadcrumb slots with adjacent 592 * engines. 593 */ 594 595 timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES, 596 sizeof(*timelines), 597 GFP_KERNEL); 598 if (!timelines) 599 return -ENOMEM; 600 601 count = 0; 602 for (n = 0; n < NUM_TIMELINES; n++) { 603 for_each_engine(engine, gt, id) { 604 struct intel_timeline *tl; 605 struct i915_request *rq; 606 607 if (!intel_engine_can_store_dword(engine)) 608 continue; 609 610 tl = checked_intel_timeline_create(gt); 611 if (IS_ERR(tl)) { 612 err = PTR_ERR(tl); 613 goto out; 614 } 615 616 intel_engine_pm_get(engine); 617 rq = tl_write(tl, engine, count); 618 intel_engine_pm_put(engine); 619 if (IS_ERR(rq)) { 620 intel_timeline_put(tl); 621 err = PTR_ERR(rq); 622 goto out; 623 } 624 625 timelines[count++] = tl; 626 i915_request_put(rq); 627 } 628 } 629 630 out: 631 if (igt_flush_test(gt->i915)) 632 err = -EIO; 633 634 for (n = 0; n < count; n++) { 635 struct intel_timeline *tl = timelines[n]; 636 637 if (!err && READ_ONCE(*tl->hwsp_seqno) != n) { 638 GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x, found 0x%x\n", 639 n, tl->fence_context, tl->hwsp_offset, *tl->hwsp_seqno); 640 GEM_TRACE_DUMP(); 641 err = -EINVAL; 642 } 643 intel_timeline_put(tl); 644 } 645 646 kvfree(timelines); 647 return err; 648 #undef NUM_TIMELINES 649 } 650 651 static int live_hwsp_wrap(void *arg) 652 { 653 struct intel_gt *gt = arg; 654 struct intel_engine_cs *engine; 655 struct intel_timeline *tl; 656 enum intel_engine_id id; 657 int err = 0; 658 659 /* 660 * Across a seqno wrap, we need to keep the old cacheline alive for 661 * foreign GPU references. 662 */ 663 664 tl = intel_timeline_create(gt); 665 if (IS_ERR(tl)) 666 return PTR_ERR(tl); 667 668 if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline) 669 goto out_free; 670 671 err = intel_timeline_pin(tl, NULL); 672 if (err) 673 goto out_free; 674 675 for_each_engine(engine, gt, id) { 676 const u32 *hwsp_seqno[2]; 677 struct i915_request *rq; 678 u32 seqno[2]; 679 680 if (!intel_engine_can_store_dword(engine)) 681 continue; 682 683 rq = intel_engine_create_kernel_request(engine); 684 if (IS_ERR(rq)) { 685 err = PTR_ERR(rq); 686 goto out; 687 } 688 689 tl->seqno = -4u; 690 691 mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING); 692 err = intel_timeline_get_seqno(tl, rq, &seqno[0]); 693 mutex_unlock(&tl->mutex); 694 if (err) { 695 i915_request_add(rq); 696 goto out; 697 } 698 pr_debug("seqno[0]:%08x, hwsp_offset:%08x\n", 699 seqno[0], tl->hwsp_offset); 700 701 err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[0]); 702 if (err) { 703 i915_request_add(rq); 704 goto out; 705 } 706 hwsp_seqno[0] = tl->hwsp_seqno; 707 708 mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING); 709 err = intel_timeline_get_seqno(tl, rq, &seqno[1]); 710 mutex_unlock(&tl->mutex); 711 if (err) { 712 i915_request_add(rq); 713 goto out; 714 } 715 pr_debug("seqno[1]:%08x, hwsp_offset:%08x\n", 716 seqno[1], tl->hwsp_offset); 717 718 err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[1]); 719 if (err) { 720 i915_request_add(rq); 721 goto out; 722 } 723 hwsp_seqno[1] = tl->hwsp_seqno; 724 725 /* With wrap should come a new hwsp */ 726 GEM_BUG_ON(seqno[1] >= seqno[0]); 727 GEM_BUG_ON(hwsp_seqno[0] == hwsp_seqno[1]); 728 729 i915_request_add(rq); 730 731 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 732 pr_err("Wait for timeline writes timed out!\n"); 733 err = -EIO; 734 goto out; 735 } 736 737 if (READ_ONCE(*hwsp_seqno[0]) != seqno[0] || 738 READ_ONCE(*hwsp_seqno[1]) != seqno[1]) { 739 pr_err("Bad timeline values: found (%x, %x), expected (%x, %x)\n", 740 *hwsp_seqno[0], *hwsp_seqno[1], 741 seqno[0], seqno[1]); 742 err = -EINVAL; 743 goto out; 744 } 745 746 intel_gt_retire_requests(gt); /* recycle HWSP */ 747 } 748 749 out: 750 if (igt_flush_test(gt->i915)) 751 err = -EIO; 752 753 intel_timeline_unpin(tl); 754 out_free: 755 intel_timeline_put(tl); 756 return err; 757 } 758 759 static int emit_read_hwsp(struct i915_request *rq, 760 u32 seqno, u32 hwsp, 761 u32 *addr) 762 { 763 const u32 gpr = i915_mmio_reg_offset(GEN8_RING_CS_GPR(rq->engine->mmio_base, 0)); 764 u32 *cs; 765 766 cs = intel_ring_begin(rq, 12); 767 if (IS_ERR(cs)) 768 return PTR_ERR(cs); 769 770 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 771 *cs++ = *addr; 772 *cs++ = 0; 773 *cs++ = seqno; 774 *addr += 4; 775 776 *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_USE_GGTT; 777 *cs++ = gpr; 778 *cs++ = hwsp; 779 *cs++ = 0; 780 781 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 782 *cs++ = gpr; 783 *cs++ = *addr; 784 *cs++ = 0; 785 *addr += 4; 786 787 intel_ring_advance(rq, cs); 788 789 return 0; 790 } 791 792 struct hwsp_watcher { 793 struct i915_vma *vma; 794 struct i915_request *rq; 795 u32 addr; 796 u32 *map; 797 }; 798 799 static bool cmp_lt(u32 a, u32 b) 800 { 801 return a < b; 802 } 803 804 static bool cmp_gte(u32 a, u32 b) 805 { 806 return a >= b; 807 } 808 809 static int setup_watcher(struct hwsp_watcher *w, struct intel_gt *gt) 810 { 811 struct drm_i915_gem_object *obj; 812 struct i915_vma *vma; 813 814 obj = i915_gem_object_create_internal(gt->i915, SZ_2M); 815 if (IS_ERR(obj)) 816 return PTR_ERR(obj); 817 818 w->map = i915_gem_object_pin_map(obj, I915_MAP_WB); 819 if (IS_ERR(w->map)) { 820 i915_gem_object_put(obj); 821 return PTR_ERR(w->map); 822 } 823 824 vma = i915_gem_object_ggtt_pin_ww(obj, NULL, NULL, 0, 0, 0); 825 if (IS_ERR(vma)) { 826 i915_gem_object_put(obj); 827 return PTR_ERR(vma); 828 } 829 830 w->vma = vma; 831 w->addr = i915_ggtt_offset(vma); 832 return 0; 833 } 834 835 static int create_watcher(struct hwsp_watcher *w, 836 struct intel_engine_cs *engine, 837 int ringsz) 838 { 839 struct intel_context *ce; 840 struct intel_timeline *tl; 841 842 ce = intel_context_create(engine); 843 if (IS_ERR(ce)) 844 return PTR_ERR(ce); 845 846 ce->ring = __intel_context_ring_size(ringsz); 847 w->rq = intel_context_create_request(ce); 848 intel_context_put(ce); 849 if (IS_ERR(w->rq)) 850 return PTR_ERR(w->rq); 851 852 w->addr = i915_ggtt_offset(w->vma); 853 tl = w->rq->context->timeline; 854 855 /* some light mutex juggling required; think co-routines */ 856 lockdep_unpin_lock(&tl->mutex, w->rq->cookie); 857 mutex_unlock(&tl->mutex); 858 859 return 0; 860 } 861 862 static int check_watcher(struct hwsp_watcher *w, const char *name, 863 bool (*op)(u32 hwsp, u32 seqno)) 864 { 865 struct i915_request *rq = fetch_and_zero(&w->rq); 866 struct intel_timeline *tl = rq->context->timeline; 867 u32 offset, end; 868 int err; 869 870 GEM_BUG_ON(w->addr - i915_ggtt_offset(w->vma) > w->vma->size); 871 872 i915_request_get(rq); 873 mutex_lock(&tl->mutex); 874 rq->cookie = lockdep_pin_lock(&tl->mutex); 875 i915_request_add(rq); 876 877 if (i915_request_wait(rq, 0, HZ) < 0) { 878 err = -ETIME; 879 goto out; 880 } 881 882 err = 0; 883 offset = 0; 884 end = (w->addr - i915_ggtt_offset(w->vma)) / sizeof(*w->map); 885 while (offset < end) { 886 if (!op(w->map[offset + 1], w->map[offset])) { 887 pr_err("Watcher '%s' found HWSP value %x for seqno %x\n", 888 name, w->map[offset + 1], w->map[offset]); 889 err = -EINVAL; 890 } 891 892 offset += 2; 893 } 894 895 out: 896 i915_request_put(rq); 897 return err; 898 } 899 900 static void cleanup_watcher(struct hwsp_watcher *w) 901 { 902 if (w->rq) { 903 struct intel_timeline *tl = w->rq->context->timeline; 904 905 mutex_lock(&tl->mutex); 906 w->rq->cookie = lockdep_pin_lock(&tl->mutex); 907 908 i915_request_add(w->rq); 909 } 910 911 i915_vma_unpin_and_release(&w->vma, I915_VMA_RELEASE_MAP); 912 } 913 914 static bool retire_requests(struct intel_timeline *tl) 915 { 916 struct i915_request *rq, *rn; 917 918 mutex_lock(&tl->mutex); 919 list_for_each_entry_safe(rq, rn, &tl->requests, link) 920 if (!i915_request_retire(rq)) 921 break; 922 mutex_unlock(&tl->mutex); 923 924 return !i915_active_fence_isset(&tl->last_request); 925 } 926 927 static struct i915_request *wrap_timeline(struct i915_request *rq) 928 { 929 struct intel_context *ce = rq->context; 930 struct intel_timeline *tl = ce->timeline; 931 u32 seqno = rq->fence.seqno; 932 933 while (tl->seqno >= seqno) { /* Cause a wrap */ 934 i915_request_put(rq); 935 rq = intel_context_create_request(ce); 936 if (IS_ERR(rq)) 937 return rq; 938 939 i915_request_get(rq); 940 i915_request_add(rq); 941 } 942 943 i915_request_put(rq); 944 rq = intel_context_create_request(ce); 945 if (IS_ERR(rq)) 946 return rq; 947 948 i915_request_get(rq); 949 i915_request_add(rq); 950 951 return rq; 952 } 953 954 static int live_hwsp_read(void *arg) 955 { 956 struct intel_gt *gt = arg; 957 struct hwsp_watcher watcher[2] = {}; 958 struct intel_engine_cs *engine; 959 struct intel_timeline *tl; 960 enum intel_engine_id id; 961 int err = 0; 962 int i; 963 964 /* 965 * If we take a reference to the HWSP for reading on the GPU, that 966 * read may be arbitrarily delayed (either by foreign fence or 967 * priority saturation) and a wrap can happen within 30 minutes. 968 * When the GPU read is finally submitted it should be correct, 969 * even across multiple wraps. 970 */ 971 972 if (INTEL_GEN(gt->i915) < 8) /* CS convenience [SRM/LRM] */ 973 return 0; 974 975 tl = intel_timeline_create(gt); 976 if (IS_ERR(tl)) 977 return PTR_ERR(tl); 978 979 if (!tl->hwsp_cacheline) 980 goto out_free; 981 982 for (i = 0; i < ARRAY_SIZE(watcher); i++) { 983 err = setup_watcher(&watcher[i], gt); 984 if (err) 985 goto out; 986 } 987 988 for_each_engine(engine, gt, id) { 989 struct intel_context *ce; 990 unsigned long count = 0; 991 IGT_TIMEOUT(end_time); 992 993 /* Create a request we can use for remote reading of the HWSP */ 994 err = create_watcher(&watcher[1], engine, SZ_512K); 995 if (err) 996 goto out; 997 998 do { 999 struct i915_sw_fence *submit; 1000 struct i915_request *rq; 1001 u32 hwsp; 1002 1003 submit = heap_fence_create(GFP_KERNEL); 1004 if (!submit) { 1005 err = -ENOMEM; 1006 goto out; 1007 } 1008 1009 err = create_watcher(&watcher[0], engine, SZ_4K); 1010 if (err) 1011 goto out; 1012 1013 ce = intel_context_create(engine); 1014 if (IS_ERR(ce)) { 1015 err = PTR_ERR(ce); 1016 goto out; 1017 } 1018 1019 /* Skip to the end, saving 30 minutes of nops */ 1020 tl->seqno = -10u + 2 * (count & 3); 1021 WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno); 1022 ce->timeline = intel_timeline_get(tl); 1023 1024 rq = intel_context_create_request(ce); 1025 if (IS_ERR(rq)) { 1026 err = PTR_ERR(rq); 1027 intel_context_put(ce); 1028 goto out; 1029 } 1030 1031 err = i915_sw_fence_await_dma_fence(&rq->submit, 1032 &watcher[0].rq->fence, 0, 1033 GFP_KERNEL); 1034 if (err < 0) { 1035 i915_request_add(rq); 1036 intel_context_put(ce); 1037 goto out; 1038 } 1039 1040 mutex_lock(&watcher[0].rq->context->timeline->mutex); 1041 err = intel_timeline_read_hwsp(rq, watcher[0].rq, &hwsp); 1042 if (err == 0) 1043 err = emit_read_hwsp(watcher[0].rq, /* before */ 1044 rq->fence.seqno, hwsp, 1045 &watcher[0].addr); 1046 mutex_unlock(&watcher[0].rq->context->timeline->mutex); 1047 if (err) { 1048 i915_request_add(rq); 1049 intel_context_put(ce); 1050 goto out; 1051 } 1052 1053 mutex_lock(&watcher[1].rq->context->timeline->mutex); 1054 err = intel_timeline_read_hwsp(rq, watcher[1].rq, &hwsp); 1055 if (err == 0) 1056 err = emit_read_hwsp(watcher[1].rq, /* after */ 1057 rq->fence.seqno, hwsp, 1058 &watcher[1].addr); 1059 mutex_unlock(&watcher[1].rq->context->timeline->mutex); 1060 if (err) { 1061 i915_request_add(rq); 1062 intel_context_put(ce); 1063 goto out; 1064 } 1065 1066 i915_request_get(rq); 1067 i915_request_add(rq); 1068 1069 rq = wrap_timeline(rq); 1070 intel_context_put(ce); 1071 if (IS_ERR(rq)) { 1072 err = PTR_ERR(rq); 1073 goto out; 1074 } 1075 1076 err = i915_sw_fence_await_dma_fence(&watcher[1].rq->submit, 1077 &rq->fence, 0, 1078 GFP_KERNEL); 1079 if (err < 0) { 1080 i915_request_put(rq); 1081 goto out; 1082 } 1083 1084 err = check_watcher(&watcher[0], "before", cmp_lt); 1085 i915_sw_fence_commit(submit); 1086 heap_fence_put(submit); 1087 if (err) { 1088 i915_request_put(rq); 1089 goto out; 1090 } 1091 count++; 1092 1093 if (8 * watcher[1].rq->ring->emit > 1094 3 * watcher[1].rq->ring->size) { 1095 i915_request_put(rq); 1096 break; 1097 } 1098 1099 /* Flush the timeline before manually wrapping again */ 1100 if (i915_request_wait(rq, 1101 I915_WAIT_INTERRUPTIBLE, 1102 HZ) < 0) { 1103 err = -ETIME; 1104 i915_request_put(rq); 1105 goto out; 1106 } 1107 1108 retire_requests(tl); 1109 i915_request_put(rq); 1110 } while (!__igt_timeout(end_time, NULL)); 1111 WRITE_ONCE(*(u32 *)tl->hwsp_seqno, 0xdeadbeef); 1112 1113 pr_info("%s: simulated %lu wraps\n", engine->name, count); 1114 err = check_watcher(&watcher[1], "after", cmp_gte); 1115 if (err) 1116 goto out; 1117 } 1118 1119 out: 1120 for (i = 0; i < ARRAY_SIZE(watcher); i++) 1121 cleanup_watcher(&watcher[i]); 1122 1123 if (igt_flush_test(gt->i915)) 1124 err = -EIO; 1125 1126 out_free: 1127 intel_timeline_put(tl); 1128 return err; 1129 } 1130 1131 static int live_hwsp_rollover_kernel(void *arg) 1132 { 1133 struct intel_gt *gt = arg; 1134 struct intel_engine_cs *engine; 1135 enum intel_engine_id id; 1136 int err = 0; 1137 1138 /* 1139 * Run the host for long enough, and even the kernel context will 1140 * see a seqno rollover. 1141 */ 1142 1143 for_each_engine(engine, gt, id) { 1144 struct intel_context *ce = engine->kernel_context; 1145 struct intel_timeline *tl = ce->timeline; 1146 struct i915_request *rq[3] = {}; 1147 int i; 1148 1149 st_engine_heartbeat_disable(engine); 1150 if (intel_gt_wait_for_idle(gt, HZ / 2)) { 1151 err = -EIO; 1152 goto out; 1153 } 1154 1155 GEM_BUG_ON(i915_active_fence_isset(&tl->last_request)); 1156 tl->seqno = 0; 1157 timeline_rollback(tl); 1158 timeline_rollback(tl); 1159 WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno); 1160 1161 for (i = 0; i < ARRAY_SIZE(rq); i++) { 1162 struct i915_request *this; 1163 1164 this = i915_request_create(ce); 1165 if (IS_ERR(this)) { 1166 err = PTR_ERR(this); 1167 goto out; 1168 } 1169 1170 pr_debug("%s: create fence.seqnp:%d\n", 1171 engine->name, 1172 lower_32_bits(this->fence.seqno)); 1173 1174 GEM_BUG_ON(rcu_access_pointer(this->timeline) != tl); 1175 1176 rq[i] = i915_request_get(this); 1177 i915_request_add(this); 1178 } 1179 1180 /* We expected a wrap! */ 1181 GEM_BUG_ON(rq[2]->fence.seqno > rq[0]->fence.seqno); 1182 1183 if (i915_request_wait(rq[2], 0, HZ / 5) < 0) { 1184 pr_err("Wait for timeline wrap timed out!\n"); 1185 err = -EIO; 1186 goto out; 1187 } 1188 1189 for (i = 0; i < ARRAY_SIZE(rq); i++) { 1190 if (!i915_request_completed(rq[i])) { 1191 pr_err("Pre-wrap request not completed!\n"); 1192 err = -EINVAL; 1193 goto out; 1194 } 1195 } 1196 1197 out: 1198 for (i = 0; i < ARRAY_SIZE(rq); i++) 1199 i915_request_put(rq[i]); 1200 st_engine_heartbeat_enable(engine); 1201 if (err) 1202 break; 1203 } 1204 1205 if (igt_flush_test(gt->i915)) 1206 err = -EIO; 1207 1208 return err; 1209 } 1210 1211 static int live_hwsp_rollover_user(void *arg) 1212 { 1213 struct intel_gt *gt = arg; 1214 struct intel_engine_cs *engine; 1215 enum intel_engine_id id; 1216 int err = 0; 1217 1218 /* 1219 * Simulate a long running user context, and force the seqno wrap 1220 * on the user's timeline. 1221 */ 1222 1223 for_each_engine(engine, gt, id) { 1224 struct i915_request *rq[3] = {}; 1225 struct intel_timeline *tl; 1226 struct intel_context *ce; 1227 int i; 1228 1229 ce = intel_context_create(engine); 1230 if (IS_ERR(ce)) 1231 return PTR_ERR(ce); 1232 1233 err = intel_context_alloc_state(ce); 1234 if (err) 1235 goto out; 1236 1237 tl = ce->timeline; 1238 if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline) 1239 goto out; 1240 1241 timeline_rollback(tl); 1242 timeline_rollback(tl); 1243 WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno); 1244 1245 for (i = 0; i < ARRAY_SIZE(rq); i++) { 1246 struct i915_request *this; 1247 1248 this = intel_context_create_request(ce); 1249 if (IS_ERR(this)) { 1250 err = PTR_ERR(this); 1251 goto out; 1252 } 1253 1254 pr_debug("%s: create fence.seqnp:%d\n", 1255 engine->name, 1256 lower_32_bits(this->fence.seqno)); 1257 1258 GEM_BUG_ON(rcu_access_pointer(this->timeline) != tl); 1259 1260 rq[i] = i915_request_get(this); 1261 i915_request_add(this); 1262 } 1263 1264 /* We expected a wrap! */ 1265 GEM_BUG_ON(rq[2]->fence.seqno > rq[0]->fence.seqno); 1266 1267 if (i915_request_wait(rq[2], 0, HZ / 5) < 0) { 1268 pr_err("Wait for timeline wrap timed out!\n"); 1269 err = -EIO; 1270 goto out; 1271 } 1272 1273 for (i = 0; i < ARRAY_SIZE(rq); i++) { 1274 if (!i915_request_completed(rq[i])) { 1275 pr_err("Pre-wrap request not completed!\n"); 1276 err = -EINVAL; 1277 goto out; 1278 } 1279 } 1280 1281 out: 1282 for (i = 0; i < ARRAY_SIZE(rq); i++) 1283 i915_request_put(rq[i]); 1284 intel_context_put(ce); 1285 if (err) 1286 break; 1287 } 1288 1289 if (igt_flush_test(gt->i915)) 1290 err = -EIO; 1291 1292 return err; 1293 } 1294 1295 static int live_hwsp_recycle(void *arg) 1296 { 1297 struct intel_gt *gt = arg; 1298 struct intel_engine_cs *engine; 1299 enum intel_engine_id id; 1300 unsigned long count; 1301 int err = 0; 1302 1303 /* 1304 * Check seqno writes into one timeline at a time. We expect to 1305 * recycle the breadcrumb slot between iterations and neither 1306 * want to confuse ourselves or the GPU. 1307 */ 1308 1309 count = 0; 1310 for_each_engine(engine, gt, id) { 1311 IGT_TIMEOUT(end_time); 1312 1313 if (!intel_engine_can_store_dword(engine)) 1314 continue; 1315 1316 intel_engine_pm_get(engine); 1317 1318 do { 1319 struct intel_timeline *tl; 1320 struct i915_request *rq; 1321 1322 tl = checked_intel_timeline_create(gt); 1323 if (IS_ERR(tl)) { 1324 err = PTR_ERR(tl); 1325 break; 1326 } 1327 1328 rq = tl_write(tl, engine, count); 1329 if (IS_ERR(rq)) { 1330 intel_timeline_put(tl); 1331 err = PTR_ERR(rq); 1332 break; 1333 } 1334 1335 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 1336 pr_err("Wait for timeline writes timed out!\n"); 1337 i915_request_put(rq); 1338 intel_timeline_put(tl); 1339 err = -EIO; 1340 break; 1341 } 1342 1343 if (READ_ONCE(*tl->hwsp_seqno) != count) { 1344 GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x found 0x%x\n", 1345 count, tl->fence_context, 1346 tl->hwsp_offset, *tl->hwsp_seqno); 1347 GEM_TRACE_DUMP(); 1348 err = -EINVAL; 1349 } 1350 1351 i915_request_put(rq); 1352 intel_timeline_put(tl); 1353 count++; 1354 1355 if (err) 1356 break; 1357 } while (!__igt_timeout(end_time, NULL)); 1358 1359 intel_engine_pm_put(engine); 1360 if (err) 1361 break; 1362 } 1363 1364 return err; 1365 } 1366 1367 int intel_timeline_live_selftests(struct drm_i915_private *i915) 1368 { 1369 static const struct i915_subtest tests[] = { 1370 SUBTEST(live_hwsp_recycle), 1371 SUBTEST(live_hwsp_engine), 1372 SUBTEST(live_hwsp_alternate), 1373 SUBTEST(live_hwsp_wrap), 1374 SUBTEST(live_hwsp_read), 1375 SUBTEST(live_hwsp_rollover_kernel), 1376 SUBTEST(live_hwsp_rollover_user), 1377 }; 1378 1379 if (intel_gt_is_wedged(&i915->gt)) 1380 return 0; 1381 1382 return intel_gt_live_subtests(tests, &i915->gt); 1383 } 1384