124f90d66SChris Wilson // SPDX-License-Identifier: MIT 2f0c02c1bSTvrtko Ursulin /* 3f0c02c1bSTvrtko Ursulin * Copyright © 2017-2018 Intel Corporation 4f0c02c1bSTvrtko Ursulin */ 5f0c02c1bSTvrtko Ursulin 6f0c02c1bSTvrtko Ursulin #include <linux/prime_numbers.h> 7*01fabda8SLucas De Marchi #include <linux/string_helpers.h> 8f0c02c1bSTvrtko Ursulin 9bb5e4397SChris Wilson #include "intel_context.h" 10bb5e4397SChris Wilson #include "intel_engine_heartbeat.h" 117e805762SChris Wilson #include "intel_engine_pm.h" 12202b1f4cSMatt Roper #include "intel_engine_regs.h" 1345233ab2SChris Wilson #include "intel_gpu_commands.h" 14cb823ed9SChris Wilson #include "intel_gt.h" 1566101975SChris Wilson #include "intel_gt_requests.h" 162871ea85SChris Wilson #include "intel_ring.h" 171b90e4a4SChris Wilson #include "selftest_engine_heartbeat.h" 18f0c02c1bSTvrtko Ursulin 19f0c02c1bSTvrtko Ursulin #include "../selftests/i915_random.h" 20f0c02c1bSTvrtko Ursulin #include "../i915_selftest.h" 21f0c02c1bSTvrtko Ursulin 226e7a21e7SChris Wilson #include "selftests/igt_flush_test.h" 236e7a21e7SChris Wilson #include "selftests/lib_sw_fence.h" 246e7a21e7SChris Wilson #include "selftests/mock_gem_device.h" 25f0c02c1bSTvrtko Ursulin #include "selftests/mock_timeline.h" 26f0c02c1bSTvrtko Ursulin 27f0c02c1bSTvrtko Ursulin static struct page *hwsp_page(struct intel_timeline *tl) 28f0c02c1bSTvrtko Ursulin { 29f0c02c1bSTvrtko Ursulin struct drm_i915_gem_object *obj = tl->hwsp_ggtt->obj; 30f0c02c1bSTvrtko Ursulin 31f0c02c1bSTvrtko Ursulin GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 32f0c02c1bSTvrtko Ursulin return sg_page(obj->mm.pages->sgl); 33f0c02c1bSTvrtko Ursulin } 34f0c02c1bSTvrtko Ursulin 35f0c02c1bSTvrtko Ursulin static unsigned long hwsp_cacheline(struct intel_timeline *tl) 36f0c02c1bSTvrtko Ursulin { 37f0c02c1bSTvrtko Ursulin unsigned long address = (unsigned long)page_address(hwsp_page(tl)); 38f0c02c1bSTvrtko Ursulin 3912ca695dSMaarten Lankhorst return (address + offset_in_page(tl->hwsp_offset)) / TIMELINE_SEQNO_BYTES; 40f0c02c1bSTvrtko Ursulin } 41f0c02c1bSTvrtko Ursulin 421060974cSMaarten Lankhorst static int selftest_tl_pin(struct intel_timeline *tl) 431060974cSMaarten Lankhorst { 441060974cSMaarten Lankhorst struct i915_gem_ww_ctx ww; 451060974cSMaarten Lankhorst int err; 461060974cSMaarten Lankhorst 471060974cSMaarten Lankhorst i915_gem_ww_ctx_init(&ww, false); 481060974cSMaarten Lankhorst retry: 491060974cSMaarten Lankhorst err = i915_gem_object_lock(tl->hwsp_ggtt->obj, &ww); 501060974cSMaarten Lankhorst if (!err) 511060974cSMaarten Lankhorst err = intel_timeline_pin(tl, &ww); 521060974cSMaarten Lankhorst 531060974cSMaarten Lankhorst if (err == -EDEADLK) { 541060974cSMaarten Lankhorst err = i915_gem_ww_ctx_backoff(&ww); 551060974cSMaarten Lankhorst if (!err) 561060974cSMaarten Lankhorst goto retry; 571060974cSMaarten Lankhorst } 581060974cSMaarten Lankhorst i915_gem_ww_ctx_fini(&ww); 591060974cSMaarten Lankhorst return err; 601060974cSMaarten Lankhorst } 611060974cSMaarten Lankhorst 6212ca695dSMaarten Lankhorst /* Only half of seqno's are usable, see __intel_timeline_get_seqno() */ 6312ca695dSMaarten Lankhorst #define CACHELINES_PER_PAGE (PAGE_SIZE / TIMELINE_SEQNO_BYTES / 2) 64f0c02c1bSTvrtko Ursulin 65f0c02c1bSTvrtko Ursulin struct mock_hwsp_freelist { 665f65d5a6SChris Wilson struct intel_gt *gt; 67f0c02c1bSTvrtko Ursulin struct radix_tree_root cachelines; 68f0c02c1bSTvrtko Ursulin struct intel_timeline **history; 69f0c02c1bSTvrtko Ursulin unsigned long count, max; 70f0c02c1bSTvrtko Ursulin struct rnd_state prng; 71f0c02c1bSTvrtko Ursulin }; 72f0c02c1bSTvrtko Ursulin 73f0c02c1bSTvrtko Ursulin enum { 74f0c02c1bSTvrtko Ursulin SHUFFLE = BIT(0), 75f0c02c1bSTvrtko Ursulin }; 76f0c02c1bSTvrtko Ursulin 77f0c02c1bSTvrtko Ursulin static void __mock_hwsp_record(struct mock_hwsp_freelist *state, 78f0c02c1bSTvrtko Ursulin unsigned int idx, 79f0c02c1bSTvrtko Ursulin struct intel_timeline *tl) 80f0c02c1bSTvrtko Ursulin { 81f0c02c1bSTvrtko Ursulin tl = xchg(&state->history[idx], tl); 82f0c02c1bSTvrtko Ursulin if (tl) { 83f0c02c1bSTvrtko Ursulin radix_tree_delete(&state->cachelines, hwsp_cacheline(tl)); 842c8ab333SMaarten Lankhorst intel_timeline_unpin(tl); 85f0c02c1bSTvrtko Ursulin intel_timeline_put(tl); 86f0c02c1bSTvrtko Ursulin } 87f0c02c1bSTvrtko Ursulin } 88f0c02c1bSTvrtko Ursulin 89f0c02c1bSTvrtko Ursulin static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state, 90f0c02c1bSTvrtko Ursulin unsigned int count, 91f0c02c1bSTvrtko Ursulin unsigned int flags) 92f0c02c1bSTvrtko Ursulin { 93f0c02c1bSTvrtko Ursulin struct intel_timeline *tl; 94f0c02c1bSTvrtko Ursulin unsigned int idx; 95f0c02c1bSTvrtko Ursulin 96f0c02c1bSTvrtko Ursulin while (count--) { 97f0c02c1bSTvrtko Ursulin unsigned long cacheline; 98f0c02c1bSTvrtko Ursulin int err; 99f0c02c1bSTvrtko Ursulin 100d1bf5dd8SChris Wilson tl = intel_timeline_create(state->gt); 101f0c02c1bSTvrtko Ursulin if (IS_ERR(tl)) 102f0c02c1bSTvrtko Ursulin return PTR_ERR(tl); 103f0c02c1bSTvrtko Ursulin 1041060974cSMaarten Lankhorst err = selftest_tl_pin(tl); 1052c8ab333SMaarten Lankhorst if (err) { 1062c8ab333SMaarten Lankhorst intel_timeline_put(tl); 1072c8ab333SMaarten Lankhorst return err; 1082c8ab333SMaarten Lankhorst } 1092c8ab333SMaarten Lankhorst 110f0c02c1bSTvrtko Ursulin cacheline = hwsp_cacheline(tl); 111f0c02c1bSTvrtko Ursulin err = radix_tree_insert(&state->cachelines, cacheline, tl); 112f0c02c1bSTvrtko Ursulin if (err) { 113f0c02c1bSTvrtko Ursulin if (err == -EEXIST) { 114f0c02c1bSTvrtko Ursulin pr_err("HWSP cacheline %lu already used; duplicate allocation!\n", 115f0c02c1bSTvrtko Ursulin cacheline); 116f0c02c1bSTvrtko Ursulin } 1172c8ab333SMaarten Lankhorst intel_timeline_unpin(tl); 118f0c02c1bSTvrtko Ursulin intel_timeline_put(tl); 119f0c02c1bSTvrtko Ursulin return err; 120f0c02c1bSTvrtko Ursulin } 121f0c02c1bSTvrtko Ursulin 122f0c02c1bSTvrtko Ursulin idx = state->count++ % state->max; 123f0c02c1bSTvrtko Ursulin __mock_hwsp_record(state, idx, tl); 124f0c02c1bSTvrtko Ursulin } 125f0c02c1bSTvrtko Ursulin 126f0c02c1bSTvrtko Ursulin if (flags & SHUFFLE) 127f0c02c1bSTvrtko Ursulin i915_prandom_shuffle(state->history, 128f0c02c1bSTvrtko Ursulin sizeof(*state->history), 129f0c02c1bSTvrtko Ursulin min(state->count, state->max), 130f0c02c1bSTvrtko Ursulin &state->prng); 131f0c02c1bSTvrtko Ursulin 132f0c02c1bSTvrtko Ursulin count = i915_prandom_u32_max_state(min(state->count, state->max), 133f0c02c1bSTvrtko Ursulin &state->prng); 134f0c02c1bSTvrtko Ursulin while (count--) { 135f0c02c1bSTvrtko Ursulin idx = --state->count % state->max; 136f0c02c1bSTvrtko Ursulin __mock_hwsp_record(state, idx, NULL); 137f0c02c1bSTvrtko Ursulin } 138f0c02c1bSTvrtko Ursulin 139f0c02c1bSTvrtko Ursulin return 0; 140f0c02c1bSTvrtko Ursulin } 141f0c02c1bSTvrtko Ursulin 142f0c02c1bSTvrtko Ursulin static int mock_hwsp_freelist(void *arg) 143f0c02c1bSTvrtko Ursulin { 144f0c02c1bSTvrtko Ursulin struct mock_hwsp_freelist state; 1455f65d5a6SChris Wilson struct drm_i915_private *i915; 146f0c02c1bSTvrtko Ursulin const struct { 147f0c02c1bSTvrtko Ursulin const char *name; 148f0c02c1bSTvrtko Ursulin unsigned int flags; 149f0c02c1bSTvrtko Ursulin } phases[] = { 150f0c02c1bSTvrtko Ursulin { "linear", 0 }, 151f0c02c1bSTvrtko Ursulin { "shuffled", SHUFFLE }, 152f0c02c1bSTvrtko Ursulin { }, 153f0c02c1bSTvrtko Ursulin }, *p; 154f0c02c1bSTvrtko Ursulin unsigned int na; 155f0c02c1bSTvrtko Ursulin int err = 0; 156f0c02c1bSTvrtko Ursulin 1575f65d5a6SChris Wilson i915 = mock_gem_device(); 1585f65d5a6SChris Wilson if (!i915) 1595f65d5a6SChris Wilson return -ENOMEM; 1605f65d5a6SChris Wilson 161f0c02c1bSTvrtko Ursulin INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL); 162f0c02c1bSTvrtko Ursulin state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed); 163f0c02c1bSTvrtko Ursulin 164c14adcbdSMichał Winiarski state.gt = to_gt(i915); 165f0c02c1bSTvrtko Ursulin 166f0c02c1bSTvrtko Ursulin /* 167f0c02c1bSTvrtko Ursulin * Create a bunch of timelines and check that their HWSP do not overlap. 168f0c02c1bSTvrtko Ursulin * Free some, and try again. 169f0c02c1bSTvrtko Ursulin */ 170f0c02c1bSTvrtko Ursulin 171f0c02c1bSTvrtko Ursulin state.max = PAGE_SIZE / sizeof(*state.history); 172f0c02c1bSTvrtko Ursulin state.count = 0; 173f0c02c1bSTvrtko Ursulin state.history = kcalloc(state.max, sizeof(*state.history), GFP_KERNEL); 174f0c02c1bSTvrtko Ursulin if (!state.history) { 175f0c02c1bSTvrtko Ursulin err = -ENOMEM; 176f0c02c1bSTvrtko Ursulin goto err_put; 177f0c02c1bSTvrtko Ursulin } 178f0c02c1bSTvrtko Ursulin 179f0c02c1bSTvrtko Ursulin for (p = phases; p->name; p++) { 180f0c02c1bSTvrtko Ursulin pr_debug("%s(%s)\n", __func__, p->name); 181f0c02c1bSTvrtko Ursulin for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) { 182f0c02c1bSTvrtko Ursulin err = __mock_hwsp_timeline(&state, na, p->flags); 183f0c02c1bSTvrtko Ursulin if (err) 184f0c02c1bSTvrtko Ursulin goto out; 185f0c02c1bSTvrtko Ursulin } 186f0c02c1bSTvrtko Ursulin } 187f0c02c1bSTvrtko Ursulin 188f0c02c1bSTvrtko Ursulin out: 189f0c02c1bSTvrtko Ursulin for (na = 0; na < state.max; na++) 190f0c02c1bSTvrtko Ursulin __mock_hwsp_record(&state, na, NULL); 191f0c02c1bSTvrtko Ursulin kfree(state.history); 192f0c02c1bSTvrtko Ursulin err_put: 19382be0d75SDaniel Vetter mock_destroy_device(i915); 194f0c02c1bSTvrtko Ursulin return err; 195f0c02c1bSTvrtko Ursulin } 196f0c02c1bSTvrtko Ursulin 197f0c02c1bSTvrtko Ursulin struct __igt_sync { 198f0c02c1bSTvrtko Ursulin const char *name; 199f0c02c1bSTvrtko Ursulin u32 seqno; 200f0c02c1bSTvrtko Ursulin bool expected; 201f0c02c1bSTvrtko Ursulin bool set; 202f0c02c1bSTvrtko Ursulin }; 203f0c02c1bSTvrtko Ursulin 204f0c02c1bSTvrtko Ursulin static int __igt_sync(struct intel_timeline *tl, 205f0c02c1bSTvrtko Ursulin u64 ctx, 206f0c02c1bSTvrtko Ursulin const struct __igt_sync *p, 207f0c02c1bSTvrtko Ursulin const char *name) 208f0c02c1bSTvrtko Ursulin { 209f0c02c1bSTvrtko Ursulin int ret; 210f0c02c1bSTvrtko Ursulin 211f0c02c1bSTvrtko Ursulin if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) { 212f0c02c1bSTvrtko Ursulin pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n", 213*01fabda8SLucas De Marchi name, p->name, ctx, p->seqno, str_yes_no(p->expected)); 214f0c02c1bSTvrtko Ursulin return -EINVAL; 215f0c02c1bSTvrtko Ursulin } 216f0c02c1bSTvrtko Ursulin 217f0c02c1bSTvrtko Ursulin if (p->set) { 218f0c02c1bSTvrtko Ursulin ret = __intel_timeline_sync_set(tl, ctx, p->seqno); 219f0c02c1bSTvrtko Ursulin if (ret) 220f0c02c1bSTvrtko Ursulin return ret; 221f0c02c1bSTvrtko Ursulin } 222f0c02c1bSTvrtko Ursulin 223f0c02c1bSTvrtko Ursulin return 0; 224f0c02c1bSTvrtko Ursulin } 225f0c02c1bSTvrtko Ursulin 226f0c02c1bSTvrtko Ursulin static int igt_sync(void *arg) 227f0c02c1bSTvrtko Ursulin { 228f0c02c1bSTvrtko Ursulin const struct __igt_sync pass[] = { 229f0c02c1bSTvrtko Ursulin { "unset", 0, false, false }, 230f0c02c1bSTvrtko Ursulin { "new", 0, false, true }, 231f0c02c1bSTvrtko Ursulin { "0a", 0, true, true }, 232f0c02c1bSTvrtko Ursulin { "1a", 1, false, true }, 233f0c02c1bSTvrtko Ursulin { "1b", 1, true, true }, 234f0c02c1bSTvrtko Ursulin { "0b", 0, true, false }, 235f0c02c1bSTvrtko Ursulin { "2a", 2, false, true }, 236f0c02c1bSTvrtko Ursulin { "4", 4, false, true }, 237f0c02c1bSTvrtko Ursulin { "INT_MAX", INT_MAX, false, true }, 238f0c02c1bSTvrtko Ursulin { "INT_MAX-1", INT_MAX-1, true, false }, 239f0c02c1bSTvrtko Ursulin { "INT_MAX+1", (u32)INT_MAX+1, false, true }, 240f0c02c1bSTvrtko Ursulin { "INT_MAX", INT_MAX, true, false }, 241f0c02c1bSTvrtko Ursulin { "UINT_MAX", UINT_MAX, false, true }, 242f0c02c1bSTvrtko Ursulin { "wrap", 0, false, true }, 243f0c02c1bSTvrtko Ursulin { "unwrap", UINT_MAX, true, false }, 244f0c02c1bSTvrtko Ursulin {}, 245f0c02c1bSTvrtko Ursulin }, *p; 246f0c02c1bSTvrtko Ursulin struct intel_timeline tl; 247f0c02c1bSTvrtko Ursulin int order, offset; 248f0c02c1bSTvrtko Ursulin int ret = -ENODEV; 249f0c02c1bSTvrtko Ursulin 250f0c02c1bSTvrtko Ursulin mock_timeline_init(&tl, 0); 251f0c02c1bSTvrtko Ursulin for (p = pass; p->name; p++) { 252f0c02c1bSTvrtko Ursulin for (order = 1; order < 64; order++) { 253f0c02c1bSTvrtko Ursulin for (offset = -1; offset <= (order > 1); offset++) { 254f0c02c1bSTvrtko Ursulin u64 ctx = BIT_ULL(order) + offset; 255f0c02c1bSTvrtko Ursulin 256f0c02c1bSTvrtko Ursulin ret = __igt_sync(&tl, ctx, p, "1"); 257f0c02c1bSTvrtko Ursulin if (ret) 258f0c02c1bSTvrtko Ursulin goto out; 259f0c02c1bSTvrtko Ursulin } 260f0c02c1bSTvrtko Ursulin } 261f0c02c1bSTvrtko Ursulin } 262f0c02c1bSTvrtko Ursulin mock_timeline_fini(&tl); 263f0c02c1bSTvrtko Ursulin 264f0c02c1bSTvrtko Ursulin mock_timeline_init(&tl, 0); 265f0c02c1bSTvrtko Ursulin for (order = 1; order < 64; order++) { 266f0c02c1bSTvrtko Ursulin for (offset = -1; offset <= (order > 1); offset++) { 267f0c02c1bSTvrtko Ursulin u64 ctx = BIT_ULL(order) + offset; 268f0c02c1bSTvrtko Ursulin 269f0c02c1bSTvrtko Ursulin for (p = pass; p->name; p++) { 270f0c02c1bSTvrtko Ursulin ret = __igt_sync(&tl, ctx, p, "2"); 271f0c02c1bSTvrtko Ursulin if (ret) 272f0c02c1bSTvrtko Ursulin goto out; 273f0c02c1bSTvrtko Ursulin } 274f0c02c1bSTvrtko Ursulin } 275f0c02c1bSTvrtko Ursulin } 276f0c02c1bSTvrtko Ursulin 277f0c02c1bSTvrtko Ursulin out: 278f0c02c1bSTvrtko Ursulin mock_timeline_fini(&tl); 279f0c02c1bSTvrtko Ursulin return ret; 280f0c02c1bSTvrtko Ursulin } 281f0c02c1bSTvrtko Ursulin 282f0c02c1bSTvrtko Ursulin static unsigned int random_engine(struct rnd_state *rnd) 283f0c02c1bSTvrtko Ursulin { 284f0c02c1bSTvrtko Ursulin return i915_prandom_u32_max_state(I915_NUM_ENGINES, rnd); 285f0c02c1bSTvrtko Ursulin } 286f0c02c1bSTvrtko Ursulin 287f0c02c1bSTvrtko Ursulin static int bench_sync(void *arg) 288f0c02c1bSTvrtko Ursulin { 289f0c02c1bSTvrtko Ursulin struct rnd_state prng; 290f0c02c1bSTvrtko Ursulin struct intel_timeline tl; 291f0c02c1bSTvrtko Ursulin unsigned long end_time, count; 292f0c02c1bSTvrtko Ursulin u64 prng32_1M; 293f0c02c1bSTvrtko Ursulin ktime_t kt; 294f0c02c1bSTvrtko Ursulin int order, last_order; 295f0c02c1bSTvrtko Ursulin 296f0c02c1bSTvrtko Ursulin mock_timeline_init(&tl, 0); 297f0c02c1bSTvrtko Ursulin 298f0c02c1bSTvrtko Ursulin /* Lookups from cache are very fast and so the random number generation 299f0c02c1bSTvrtko Ursulin * and the loop itself becomes a significant factor in the per-iteration 300f0c02c1bSTvrtko Ursulin * timings. We try to compensate the results by measuring the overhead 301f0c02c1bSTvrtko Ursulin * of the prng and subtract it from the reported results. 302f0c02c1bSTvrtko Ursulin */ 303f0c02c1bSTvrtko Ursulin prandom_seed_state(&prng, i915_selftest.random_seed); 304f0c02c1bSTvrtko Ursulin count = 0; 305f0c02c1bSTvrtko Ursulin kt = ktime_get(); 306f0c02c1bSTvrtko Ursulin end_time = jiffies + HZ/10; 307f0c02c1bSTvrtko Ursulin do { 308f0c02c1bSTvrtko Ursulin u32 x; 309f0c02c1bSTvrtko Ursulin 310f0c02c1bSTvrtko Ursulin /* Make sure the compiler doesn't optimise away the prng call */ 311f0c02c1bSTvrtko Ursulin WRITE_ONCE(x, prandom_u32_state(&prng)); 312f0c02c1bSTvrtko Ursulin 313f0c02c1bSTvrtko Ursulin count++; 314f0c02c1bSTvrtko Ursulin } while (!time_after(jiffies, end_time)); 315f0c02c1bSTvrtko Ursulin kt = ktime_sub(ktime_get(), kt); 316f0c02c1bSTvrtko Ursulin pr_debug("%s: %lu random evaluations, %lluns/prng\n", 317f0c02c1bSTvrtko Ursulin __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); 318f0c02c1bSTvrtko Ursulin prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count); 319f0c02c1bSTvrtko Ursulin 320f0c02c1bSTvrtko Ursulin /* Benchmark (only) setting random context ids */ 321f0c02c1bSTvrtko Ursulin prandom_seed_state(&prng, i915_selftest.random_seed); 322f0c02c1bSTvrtko Ursulin count = 0; 323f0c02c1bSTvrtko Ursulin kt = ktime_get(); 324f0c02c1bSTvrtko Ursulin end_time = jiffies + HZ/10; 325f0c02c1bSTvrtko Ursulin do { 326f0c02c1bSTvrtko Ursulin u64 id = i915_prandom_u64_state(&prng); 327f0c02c1bSTvrtko Ursulin 328f0c02c1bSTvrtko Ursulin __intel_timeline_sync_set(&tl, id, 0); 329f0c02c1bSTvrtko Ursulin count++; 330f0c02c1bSTvrtko Ursulin } while (!time_after(jiffies, end_time)); 331f0c02c1bSTvrtko Ursulin kt = ktime_sub(ktime_get(), kt); 332f0c02c1bSTvrtko Ursulin kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); 333f0c02c1bSTvrtko Ursulin pr_info("%s: %lu random insertions, %lluns/insert\n", 334f0c02c1bSTvrtko Ursulin __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); 335f0c02c1bSTvrtko Ursulin 336f0c02c1bSTvrtko Ursulin /* Benchmark looking up the exact same context ids as we just set */ 337f0c02c1bSTvrtko Ursulin prandom_seed_state(&prng, i915_selftest.random_seed); 338f0c02c1bSTvrtko Ursulin end_time = count; 339f0c02c1bSTvrtko Ursulin kt = ktime_get(); 340f0c02c1bSTvrtko Ursulin while (end_time--) { 341f0c02c1bSTvrtko Ursulin u64 id = i915_prandom_u64_state(&prng); 342f0c02c1bSTvrtko Ursulin 343f0c02c1bSTvrtko Ursulin if (!__intel_timeline_sync_is_later(&tl, id, 0)) { 344f0c02c1bSTvrtko Ursulin mock_timeline_fini(&tl); 345f0c02c1bSTvrtko Ursulin pr_err("Lookup of %llu failed\n", id); 346f0c02c1bSTvrtko Ursulin return -EINVAL; 347f0c02c1bSTvrtko Ursulin } 348f0c02c1bSTvrtko Ursulin } 349f0c02c1bSTvrtko Ursulin kt = ktime_sub(ktime_get(), kt); 350f0c02c1bSTvrtko Ursulin kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); 351f0c02c1bSTvrtko Ursulin pr_info("%s: %lu random lookups, %lluns/lookup\n", 352f0c02c1bSTvrtko Ursulin __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); 353f0c02c1bSTvrtko Ursulin 354f0c02c1bSTvrtko Ursulin mock_timeline_fini(&tl); 355f0c02c1bSTvrtko Ursulin cond_resched(); 356f0c02c1bSTvrtko Ursulin 357f0c02c1bSTvrtko Ursulin mock_timeline_init(&tl, 0); 358f0c02c1bSTvrtko Ursulin 359f0c02c1bSTvrtko Ursulin /* Benchmark setting the first N (in order) contexts */ 360f0c02c1bSTvrtko Ursulin count = 0; 361f0c02c1bSTvrtko Ursulin kt = ktime_get(); 362f0c02c1bSTvrtko Ursulin end_time = jiffies + HZ/10; 363f0c02c1bSTvrtko Ursulin do { 364f0c02c1bSTvrtko Ursulin __intel_timeline_sync_set(&tl, count++, 0); 365f0c02c1bSTvrtko Ursulin } while (!time_after(jiffies, end_time)); 366f0c02c1bSTvrtko Ursulin kt = ktime_sub(ktime_get(), kt); 367f0c02c1bSTvrtko Ursulin pr_info("%s: %lu in-order insertions, %lluns/insert\n", 368f0c02c1bSTvrtko Ursulin __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); 369f0c02c1bSTvrtko Ursulin 370f0c02c1bSTvrtko Ursulin /* Benchmark looking up the exact same context ids as we just set */ 371f0c02c1bSTvrtko Ursulin end_time = count; 372f0c02c1bSTvrtko Ursulin kt = ktime_get(); 373f0c02c1bSTvrtko Ursulin while (end_time--) { 374f0c02c1bSTvrtko Ursulin if (!__intel_timeline_sync_is_later(&tl, end_time, 0)) { 375f0c02c1bSTvrtko Ursulin pr_err("Lookup of %lu failed\n", end_time); 376f0c02c1bSTvrtko Ursulin mock_timeline_fini(&tl); 377f0c02c1bSTvrtko Ursulin return -EINVAL; 378f0c02c1bSTvrtko Ursulin } 379f0c02c1bSTvrtko Ursulin } 380f0c02c1bSTvrtko Ursulin kt = ktime_sub(ktime_get(), kt); 381f0c02c1bSTvrtko Ursulin pr_info("%s: %lu in-order lookups, %lluns/lookup\n", 382f0c02c1bSTvrtko Ursulin __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); 383f0c02c1bSTvrtko Ursulin 384f0c02c1bSTvrtko Ursulin mock_timeline_fini(&tl); 385f0c02c1bSTvrtko Ursulin cond_resched(); 386f0c02c1bSTvrtko Ursulin 387f0c02c1bSTvrtko Ursulin mock_timeline_init(&tl, 0); 388f0c02c1bSTvrtko Ursulin 389f0c02c1bSTvrtko Ursulin /* Benchmark searching for a random context id and maybe changing it */ 390f0c02c1bSTvrtko Ursulin prandom_seed_state(&prng, i915_selftest.random_seed); 391f0c02c1bSTvrtko Ursulin count = 0; 392f0c02c1bSTvrtko Ursulin kt = ktime_get(); 393f0c02c1bSTvrtko Ursulin end_time = jiffies + HZ/10; 394f0c02c1bSTvrtko Ursulin do { 395f0c02c1bSTvrtko Ursulin u32 id = random_engine(&prng); 396f0c02c1bSTvrtko Ursulin u32 seqno = prandom_u32_state(&prng); 397f0c02c1bSTvrtko Ursulin 398f0c02c1bSTvrtko Ursulin if (!__intel_timeline_sync_is_later(&tl, id, seqno)) 399f0c02c1bSTvrtko Ursulin __intel_timeline_sync_set(&tl, id, seqno); 400f0c02c1bSTvrtko Ursulin 401f0c02c1bSTvrtko Ursulin count++; 402f0c02c1bSTvrtko Ursulin } while (!time_after(jiffies, end_time)); 403f0c02c1bSTvrtko Ursulin kt = ktime_sub(ktime_get(), kt); 404f0c02c1bSTvrtko Ursulin kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); 405f0c02c1bSTvrtko Ursulin pr_info("%s: %lu repeated insert/lookups, %lluns/op\n", 406f0c02c1bSTvrtko Ursulin __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); 407f0c02c1bSTvrtko Ursulin mock_timeline_fini(&tl); 408f0c02c1bSTvrtko Ursulin cond_resched(); 409f0c02c1bSTvrtko Ursulin 410f0c02c1bSTvrtko Ursulin /* Benchmark searching for a known context id and changing the seqno */ 411f0c02c1bSTvrtko Ursulin for (last_order = 1, order = 1; order < 32; 412f0c02c1bSTvrtko Ursulin ({ int tmp = last_order; last_order = order; order += tmp; })) { 413f0c02c1bSTvrtko Ursulin unsigned int mask = BIT(order) - 1; 414f0c02c1bSTvrtko Ursulin 415f0c02c1bSTvrtko Ursulin mock_timeline_init(&tl, 0); 416f0c02c1bSTvrtko Ursulin 417f0c02c1bSTvrtko Ursulin count = 0; 418f0c02c1bSTvrtko Ursulin kt = ktime_get(); 419f0c02c1bSTvrtko Ursulin end_time = jiffies + HZ/10; 420f0c02c1bSTvrtko Ursulin do { 421f0c02c1bSTvrtko Ursulin /* Without assuming too many details of the underlying 422f0c02c1bSTvrtko Ursulin * implementation, try to identify its phase-changes 423f0c02c1bSTvrtko Ursulin * (if any)! 424f0c02c1bSTvrtko Ursulin */ 425f0c02c1bSTvrtko Ursulin u64 id = (u64)(count & mask) << order; 426f0c02c1bSTvrtko Ursulin 427f0c02c1bSTvrtko Ursulin __intel_timeline_sync_is_later(&tl, id, 0); 428f0c02c1bSTvrtko Ursulin __intel_timeline_sync_set(&tl, id, 0); 429f0c02c1bSTvrtko Ursulin 430f0c02c1bSTvrtko Ursulin count++; 431f0c02c1bSTvrtko Ursulin } while (!time_after(jiffies, end_time)); 432f0c02c1bSTvrtko Ursulin kt = ktime_sub(ktime_get(), kt); 433f0c02c1bSTvrtko Ursulin pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n", 434f0c02c1bSTvrtko Ursulin __func__, count, order, 435f0c02c1bSTvrtko Ursulin (long long)div64_ul(ktime_to_ns(kt), count)); 436f0c02c1bSTvrtko Ursulin mock_timeline_fini(&tl); 437f0c02c1bSTvrtko Ursulin cond_resched(); 438f0c02c1bSTvrtko Ursulin } 439f0c02c1bSTvrtko Ursulin 440f0c02c1bSTvrtko Ursulin return 0; 441f0c02c1bSTvrtko Ursulin } 442f0c02c1bSTvrtko Ursulin 443f0c02c1bSTvrtko Ursulin int intel_timeline_mock_selftests(void) 444f0c02c1bSTvrtko Ursulin { 445f0c02c1bSTvrtko Ursulin static const struct i915_subtest tests[] = { 446f0c02c1bSTvrtko Ursulin SUBTEST(mock_hwsp_freelist), 447f0c02c1bSTvrtko Ursulin SUBTEST(igt_sync), 448f0c02c1bSTvrtko Ursulin SUBTEST(bench_sync), 449f0c02c1bSTvrtko Ursulin }; 450f0c02c1bSTvrtko Ursulin 451f0c02c1bSTvrtko Ursulin return i915_subtests(tests, NULL); 452f0c02c1bSTvrtko Ursulin } 453f0c02c1bSTvrtko Ursulin 454f0c02c1bSTvrtko Ursulin static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value) 455f0c02c1bSTvrtko Ursulin { 456f0c02c1bSTvrtko Ursulin u32 *cs; 457f0c02c1bSTvrtko Ursulin 458f0c02c1bSTvrtko Ursulin cs = intel_ring_begin(rq, 4); 459f0c02c1bSTvrtko Ursulin if (IS_ERR(cs)) 460f0c02c1bSTvrtko Ursulin return PTR_ERR(cs); 461f0c02c1bSTvrtko Ursulin 462c816723bSLucas De Marchi if (GRAPHICS_VER(rq->engine->i915) >= 8) { 463f0c02c1bSTvrtko Ursulin *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 464f0c02c1bSTvrtko Ursulin *cs++ = addr; 465f0c02c1bSTvrtko Ursulin *cs++ = 0; 466f0c02c1bSTvrtko Ursulin *cs++ = value; 467c816723bSLucas De Marchi } else if (GRAPHICS_VER(rq->engine->i915) >= 4) { 468f0c02c1bSTvrtko Ursulin *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 469f0c02c1bSTvrtko Ursulin *cs++ = 0; 470f0c02c1bSTvrtko Ursulin *cs++ = addr; 471f0c02c1bSTvrtko Ursulin *cs++ = value; 472f0c02c1bSTvrtko Ursulin } else { 473f0c02c1bSTvrtko Ursulin *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; 474f0c02c1bSTvrtko Ursulin *cs++ = addr; 475f0c02c1bSTvrtko Ursulin *cs++ = value; 476f0c02c1bSTvrtko Ursulin *cs++ = MI_NOOP; 477f0c02c1bSTvrtko Ursulin } 478f0c02c1bSTvrtko Ursulin 479f0c02c1bSTvrtko Ursulin intel_ring_advance(rq, cs); 480f0c02c1bSTvrtko Ursulin 481f0c02c1bSTvrtko Ursulin return 0; 482f0c02c1bSTvrtko Ursulin } 483f0c02c1bSTvrtko Ursulin 484f0c02c1bSTvrtko Ursulin static struct i915_request * 4852c8ab333SMaarten Lankhorst checked_tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value) 486f0c02c1bSTvrtko Ursulin { 487f0c02c1bSTvrtko Ursulin struct i915_request *rq; 488f0c02c1bSTvrtko Ursulin int err; 489f0c02c1bSTvrtko Ursulin 4901060974cSMaarten Lankhorst err = selftest_tl_pin(tl); 491f0c02c1bSTvrtko Ursulin if (err) { 492f0c02c1bSTvrtko Ursulin rq = ERR_PTR(err); 493f0c02c1bSTvrtko Ursulin goto out; 494f0c02c1bSTvrtko Ursulin } 495f0c02c1bSTvrtko Ursulin 4962c8ab333SMaarten Lankhorst if (READ_ONCE(*tl->hwsp_seqno) != tl->seqno) { 4972c8ab333SMaarten Lankhorst pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n", 4982c8ab333SMaarten Lankhorst *tl->hwsp_seqno, tl->seqno); 4992c8ab333SMaarten Lankhorst intel_timeline_unpin(tl); 5002c8ab333SMaarten Lankhorst return ERR_PTR(-EINVAL); 5012c8ab333SMaarten Lankhorst } 5022c8ab333SMaarten Lankhorst 503de5825beSChris Wilson rq = intel_engine_create_kernel_request(engine); 504f0c02c1bSTvrtko Ursulin if (IS_ERR(rq)) 505f0c02c1bSTvrtko Ursulin goto out_unpin; 506f0c02c1bSTvrtko Ursulin 5077e805762SChris Wilson i915_request_get(rq); 5087e805762SChris Wilson 509f0c02c1bSTvrtko Ursulin err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value); 510f0c02c1bSTvrtko Ursulin i915_request_add(rq); 5117e805762SChris Wilson if (err) { 5127e805762SChris Wilson i915_request_put(rq); 513f0c02c1bSTvrtko Ursulin rq = ERR_PTR(err); 5147e805762SChris Wilson } 515f0c02c1bSTvrtko Ursulin 516f0c02c1bSTvrtko Ursulin out_unpin: 517f0c02c1bSTvrtko Ursulin intel_timeline_unpin(tl); 518f0c02c1bSTvrtko Ursulin out: 519f0c02c1bSTvrtko Ursulin if (IS_ERR(rq)) 520f0c02c1bSTvrtko Ursulin pr_err("Failed to write to timeline!\n"); 521f0c02c1bSTvrtko Ursulin return rq; 522f0c02c1bSTvrtko Ursulin } 523f0c02c1bSTvrtko Ursulin 524f0c02c1bSTvrtko Ursulin static int live_hwsp_engine(void *arg) 525f0c02c1bSTvrtko Ursulin { 526f0c02c1bSTvrtko Ursulin #define NUM_TIMELINES 4096 5275f65d5a6SChris Wilson struct intel_gt *gt = arg; 528f0c02c1bSTvrtko Ursulin struct intel_timeline **timelines; 529f0c02c1bSTvrtko Ursulin struct intel_engine_cs *engine; 530f0c02c1bSTvrtko Ursulin enum intel_engine_id id; 531f0c02c1bSTvrtko Ursulin unsigned long count, n; 532f0c02c1bSTvrtko Ursulin int err = 0; 533f0c02c1bSTvrtko Ursulin 534f0c02c1bSTvrtko Ursulin /* 535f0c02c1bSTvrtko Ursulin * Create a bunch of timelines and check we can write 536f0c02c1bSTvrtko Ursulin * independently to each of their breadcrumb slots. 537f0c02c1bSTvrtko Ursulin */ 538f0c02c1bSTvrtko Ursulin 539f0c02c1bSTvrtko Ursulin timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES, 540f0c02c1bSTvrtko Ursulin sizeof(*timelines), 541f0c02c1bSTvrtko Ursulin GFP_KERNEL); 542f0c02c1bSTvrtko Ursulin if (!timelines) 543f0c02c1bSTvrtko Ursulin return -ENOMEM; 544f0c02c1bSTvrtko Ursulin 545f0c02c1bSTvrtko Ursulin count = 0; 5465d904e3cSTvrtko Ursulin for_each_engine(engine, gt, id) { 547f0c02c1bSTvrtko Ursulin if (!intel_engine_can_store_dword(engine)) 548f0c02c1bSTvrtko Ursulin continue; 549f0c02c1bSTvrtko Ursulin 5507e805762SChris Wilson intel_engine_pm_get(engine); 5517e805762SChris Wilson 552f0c02c1bSTvrtko Ursulin for (n = 0; n < NUM_TIMELINES; n++) { 553f0c02c1bSTvrtko Ursulin struct intel_timeline *tl; 554f0c02c1bSTvrtko Ursulin struct i915_request *rq; 555f0c02c1bSTvrtko Ursulin 5562c8ab333SMaarten Lankhorst tl = intel_timeline_create(gt); 557f0c02c1bSTvrtko Ursulin if (IS_ERR(tl)) { 558f0c02c1bSTvrtko Ursulin err = PTR_ERR(tl); 5597e805762SChris Wilson break; 560f0c02c1bSTvrtko Ursulin } 561f0c02c1bSTvrtko Ursulin 5622c8ab333SMaarten Lankhorst rq = checked_tl_write(tl, engine, count); 563f0c02c1bSTvrtko Ursulin if (IS_ERR(rq)) { 564f0c02c1bSTvrtko Ursulin intel_timeline_put(tl); 565f0c02c1bSTvrtko Ursulin err = PTR_ERR(rq); 5667e805762SChris Wilson break; 567f0c02c1bSTvrtko Ursulin } 568f0c02c1bSTvrtko Ursulin 569f0c02c1bSTvrtko Ursulin timelines[count++] = tl; 5707e805762SChris Wilson i915_request_put(rq); 571f0c02c1bSTvrtko Ursulin } 572f0c02c1bSTvrtko Ursulin 5737e805762SChris Wilson intel_engine_pm_put(engine); 5747e805762SChris Wilson if (err) 5757e805762SChris Wilson break; 5767e805762SChris Wilson } 5777e805762SChris Wilson 5785f65d5a6SChris Wilson if (igt_flush_test(gt->i915)) 579f0c02c1bSTvrtko Ursulin err = -EIO; 580f0c02c1bSTvrtko Ursulin 581f0c02c1bSTvrtko Ursulin for (n = 0; n < count; n++) { 582f0c02c1bSTvrtko Ursulin struct intel_timeline *tl = timelines[n]; 583f0c02c1bSTvrtko Ursulin 584e310b435SChris Wilson if (!err && READ_ONCE(*tl->hwsp_seqno) != n) { 585e310b435SChris Wilson GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x, found 0x%x\n", 586e310b435SChris Wilson n, tl->fence_context, tl->hwsp_offset, *tl->hwsp_seqno); 587d45171acSChris Wilson GEM_TRACE_DUMP(); 588f0c02c1bSTvrtko Ursulin err = -EINVAL; 589f0c02c1bSTvrtko Ursulin } 590f0c02c1bSTvrtko Ursulin intel_timeline_put(tl); 591f0c02c1bSTvrtko Ursulin } 592f0c02c1bSTvrtko Ursulin 593f0c02c1bSTvrtko Ursulin kvfree(timelines); 594f0c02c1bSTvrtko Ursulin return err; 595f0c02c1bSTvrtko Ursulin #undef NUM_TIMELINES 596f0c02c1bSTvrtko Ursulin } 597f0c02c1bSTvrtko Ursulin 598f0c02c1bSTvrtko Ursulin static int live_hwsp_alternate(void *arg) 599f0c02c1bSTvrtko Ursulin { 600f0c02c1bSTvrtko Ursulin #define NUM_TIMELINES 4096 6015f65d5a6SChris Wilson struct intel_gt *gt = arg; 602f0c02c1bSTvrtko Ursulin struct intel_timeline **timelines; 603f0c02c1bSTvrtko Ursulin struct intel_engine_cs *engine; 604f0c02c1bSTvrtko Ursulin enum intel_engine_id id; 605f0c02c1bSTvrtko Ursulin unsigned long count, n; 606f0c02c1bSTvrtko Ursulin int err = 0; 607f0c02c1bSTvrtko Ursulin 608f0c02c1bSTvrtko Ursulin /* 609f0c02c1bSTvrtko Ursulin * Create a bunch of timelines and check we can write 610f0c02c1bSTvrtko Ursulin * independently to each of their breadcrumb slots with adjacent 611f0c02c1bSTvrtko Ursulin * engines. 612f0c02c1bSTvrtko Ursulin */ 613f0c02c1bSTvrtko Ursulin 614f0c02c1bSTvrtko Ursulin timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES, 615f0c02c1bSTvrtko Ursulin sizeof(*timelines), 616f0c02c1bSTvrtko Ursulin GFP_KERNEL); 617f0c02c1bSTvrtko Ursulin if (!timelines) 618f0c02c1bSTvrtko Ursulin return -ENOMEM; 619f0c02c1bSTvrtko Ursulin 620f0c02c1bSTvrtko Ursulin count = 0; 621f0c02c1bSTvrtko Ursulin for (n = 0; n < NUM_TIMELINES; n++) { 6225d904e3cSTvrtko Ursulin for_each_engine(engine, gt, id) { 623f0c02c1bSTvrtko Ursulin struct intel_timeline *tl; 624f0c02c1bSTvrtko Ursulin struct i915_request *rq; 625f0c02c1bSTvrtko Ursulin 626f0c02c1bSTvrtko Ursulin if (!intel_engine_can_store_dword(engine)) 627f0c02c1bSTvrtko Ursulin continue; 628f0c02c1bSTvrtko Ursulin 6292c8ab333SMaarten Lankhorst tl = intel_timeline_create(gt); 630f0c02c1bSTvrtko Ursulin if (IS_ERR(tl)) { 631f0c02c1bSTvrtko Ursulin err = PTR_ERR(tl); 632f0c02c1bSTvrtko Ursulin goto out; 633f0c02c1bSTvrtko Ursulin } 634f0c02c1bSTvrtko Ursulin 6357e805762SChris Wilson intel_engine_pm_get(engine); 6362c8ab333SMaarten Lankhorst rq = checked_tl_write(tl, engine, count); 6377e805762SChris Wilson intel_engine_pm_put(engine); 638f0c02c1bSTvrtko Ursulin if (IS_ERR(rq)) { 639f0c02c1bSTvrtko Ursulin intel_timeline_put(tl); 640f0c02c1bSTvrtko Ursulin err = PTR_ERR(rq); 641f0c02c1bSTvrtko Ursulin goto out; 642f0c02c1bSTvrtko Ursulin } 643f0c02c1bSTvrtko Ursulin 644f0c02c1bSTvrtko Ursulin timelines[count++] = tl; 6457e805762SChris Wilson i915_request_put(rq); 646f0c02c1bSTvrtko Ursulin } 647f0c02c1bSTvrtko Ursulin } 648f0c02c1bSTvrtko Ursulin 649f0c02c1bSTvrtko Ursulin out: 6505f65d5a6SChris Wilson if (igt_flush_test(gt->i915)) 651f0c02c1bSTvrtko Ursulin err = -EIO; 652f0c02c1bSTvrtko Ursulin 653f0c02c1bSTvrtko Ursulin for (n = 0; n < count; n++) { 654f0c02c1bSTvrtko Ursulin struct intel_timeline *tl = timelines[n]; 655f0c02c1bSTvrtko Ursulin 656e310b435SChris Wilson if (!err && READ_ONCE(*tl->hwsp_seqno) != n) { 657e310b435SChris Wilson GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x, found 0x%x\n", 658e310b435SChris Wilson n, tl->fence_context, tl->hwsp_offset, *tl->hwsp_seqno); 659d45171acSChris Wilson GEM_TRACE_DUMP(); 660f0c02c1bSTvrtko Ursulin err = -EINVAL; 661f0c02c1bSTvrtko Ursulin } 662f0c02c1bSTvrtko Ursulin intel_timeline_put(tl); 663f0c02c1bSTvrtko Ursulin } 664f0c02c1bSTvrtko Ursulin 665f0c02c1bSTvrtko Ursulin kvfree(timelines); 666f0c02c1bSTvrtko Ursulin return err; 667f0c02c1bSTvrtko Ursulin #undef NUM_TIMELINES 668f0c02c1bSTvrtko Ursulin } 669f0c02c1bSTvrtko Ursulin 670f0c02c1bSTvrtko Ursulin static int live_hwsp_wrap(void *arg) 671f0c02c1bSTvrtko Ursulin { 6725f65d5a6SChris Wilson struct intel_gt *gt = arg; 673f0c02c1bSTvrtko Ursulin struct intel_engine_cs *engine; 674f0c02c1bSTvrtko Ursulin struct intel_timeline *tl; 675f0c02c1bSTvrtko Ursulin enum intel_engine_id id; 676f0c02c1bSTvrtko Ursulin int err = 0; 677f0c02c1bSTvrtko Ursulin 678f0c02c1bSTvrtko Ursulin /* 679f0c02c1bSTvrtko Ursulin * Across a seqno wrap, we need to keep the old cacheline alive for 680f0c02c1bSTvrtko Ursulin * foreign GPU references. 681f0c02c1bSTvrtko Ursulin */ 682f0c02c1bSTvrtko Ursulin 683d1bf5dd8SChris Wilson tl = intel_timeline_create(gt); 6847e805762SChris Wilson if (IS_ERR(tl)) 6857e805762SChris Wilson return PTR_ERR(tl); 6867e805762SChris Wilson 68712ca695dSMaarten Lankhorst if (!tl->has_initial_breadcrumb) 688f0c02c1bSTvrtko Ursulin goto out_free; 689f0c02c1bSTvrtko Ursulin 6901060974cSMaarten Lankhorst err = selftest_tl_pin(tl); 691f0c02c1bSTvrtko Ursulin if (err) 692f0c02c1bSTvrtko Ursulin goto out_free; 693f0c02c1bSTvrtko Ursulin 6945d904e3cSTvrtko Ursulin for_each_engine(engine, gt, id) { 695f0c02c1bSTvrtko Ursulin const u32 *hwsp_seqno[2]; 696f0c02c1bSTvrtko Ursulin struct i915_request *rq; 697f0c02c1bSTvrtko Ursulin u32 seqno[2]; 698f0c02c1bSTvrtko Ursulin 699f0c02c1bSTvrtko Ursulin if (!intel_engine_can_store_dword(engine)) 700f0c02c1bSTvrtko Ursulin continue; 701f0c02c1bSTvrtko Ursulin 702de5825beSChris Wilson rq = intel_engine_create_kernel_request(engine); 703f0c02c1bSTvrtko Ursulin if (IS_ERR(rq)) { 704f0c02c1bSTvrtko Ursulin err = PTR_ERR(rq); 705f0c02c1bSTvrtko Ursulin goto out; 706f0c02c1bSTvrtko Ursulin } 707f0c02c1bSTvrtko Ursulin 708f0c02c1bSTvrtko Ursulin tl->seqno = -4u; 709f0c02c1bSTvrtko Ursulin 71025ffd4b1SChris Wilson mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING); 711f0c02c1bSTvrtko Ursulin err = intel_timeline_get_seqno(tl, rq, &seqno[0]); 71225ffd4b1SChris Wilson mutex_unlock(&tl->mutex); 713f0c02c1bSTvrtko Ursulin if (err) { 714f0c02c1bSTvrtko Ursulin i915_request_add(rq); 715f0c02c1bSTvrtko Ursulin goto out; 716f0c02c1bSTvrtko Ursulin } 717f0c02c1bSTvrtko Ursulin pr_debug("seqno[0]:%08x, hwsp_offset:%08x\n", 718f0c02c1bSTvrtko Ursulin seqno[0], tl->hwsp_offset); 719f0c02c1bSTvrtko Ursulin 720f0c02c1bSTvrtko Ursulin err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[0]); 721f0c02c1bSTvrtko Ursulin if (err) { 722f0c02c1bSTvrtko Ursulin i915_request_add(rq); 723f0c02c1bSTvrtko Ursulin goto out; 724f0c02c1bSTvrtko Ursulin } 725f0c02c1bSTvrtko Ursulin hwsp_seqno[0] = tl->hwsp_seqno; 726f0c02c1bSTvrtko Ursulin 72725ffd4b1SChris Wilson mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING); 728f0c02c1bSTvrtko Ursulin err = intel_timeline_get_seqno(tl, rq, &seqno[1]); 72925ffd4b1SChris Wilson mutex_unlock(&tl->mutex); 730f0c02c1bSTvrtko Ursulin if (err) { 731f0c02c1bSTvrtko Ursulin i915_request_add(rq); 732f0c02c1bSTvrtko Ursulin goto out; 733f0c02c1bSTvrtko Ursulin } 734f0c02c1bSTvrtko Ursulin pr_debug("seqno[1]:%08x, hwsp_offset:%08x\n", 735f0c02c1bSTvrtko Ursulin seqno[1], tl->hwsp_offset); 736f0c02c1bSTvrtko Ursulin 737f0c02c1bSTvrtko Ursulin err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[1]); 738f0c02c1bSTvrtko Ursulin if (err) { 739f0c02c1bSTvrtko Ursulin i915_request_add(rq); 740f0c02c1bSTvrtko Ursulin goto out; 741f0c02c1bSTvrtko Ursulin } 742f0c02c1bSTvrtko Ursulin hwsp_seqno[1] = tl->hwsp_seqno; 743f0c02c1bSTvrtko Ursulin 744f0c02c1bSTvrtko Ursulin /* With wrap should come a new hwsp */ 745f0c02c1bSTvrtko Ursulin GEM_BUG_ON(seqno[1] >= seqno[0]); 746f0c02c1bSTvrtko Ursulin GEM_BUG_ON(hwsp_seqno[0] == hwsp_seqno[1]); 747f0c02c1bSTvrtko Ursulin 748f0c02c1bSTvrtko Ursulin i915_request_add(rq); 749f0c02c1bSTvrtko Ursulin 750f0c02c1bSTvrtko Ursulin if (i915_request_wait(rq, 0, HZ / 5) < 0) { 751f0c02c1bSTvrtko Ursulin pr_err("Wait for timeline writes timed out!\n"); 752f0c02c1bSTvrtko Ursulin err = -EIO; 753f0c02c1bSTvrtko Ursulin goto out; 754f0c02c1bSTvrtko Ursulin } 755f0c02c1bSTvrtko Ursulin 756e310b435SChris Wilson if (READ_ONCE(*hwsp_seqno[0]) != seqno[0] || 757e310b435SChris Wilson READ_ONCE(*hwsp_seqno[1]) != seqno[1]) { 758f0c02c1bSTvrtko Ursulin pr_err("Bad timeline values: found (%x, %x), expected (%x, %x)\n", 759f0c02c1bSTvrtko Ursulin *hwsp_seqno[0], *hwsp_seqno[1], 760f0c02c1bSTvrtko Ursulin seqno[0], seqno[1]); 761f0c02c1bSTvrtko Ursulin err = -EINVAL; 762f0c02c1bSTvrtko Ursulin goto out; 763f0c02c1bSTvrtko Ursulin } 764f0c02c1bSTvrtko Ursulin 76566101975SChris Wilson intel_gt_retire_requests(gt); /* recycle HWSP */ 766f0c02c1bSTvrtko Ursulin } 767f0c02c1bSTvrtko Ursulin 768f0c02c1bSTvrtko Ursulin out: 7695f65d5a6SChris Wilson if (igt_flush_test(gt->i915)) 770f0c02c1bSTvrtko Ursulin err = -EIO; 771f0c02c1bSTvrtko Ursulin 772f0c02c1bSTvrtko Ursulin intel_timeline_unpin(tl); 773f0c02c1bSTvrtko Ursulin out_free: 774f0c02c1bSTvrtko Ursulin intel_timeline_put(tl); 775f0c02c1bSTvrtko Ursulin return err; 776f0c02c1bSTvrtko Ursulin } 777f0c02c1bSTvrtko Ursulin 7786e7a21e7SChris Wilson static int emit_read_hwsp(struct i915_request *rq, 7796e7a21e7SChris Wilson u32 seqno, u32 hwsp, 7806e7a21e7SChris Wilson u32 *addr) 7816e7a21e7SChris Wilson { 7826e7a21e7SChris Wilson const u32 gpr = i915_mmio_reg_offset(GEN8_RING_CS_GPR(rq->engine->mmio_base, 0)); 7836e7a21e7SChris Wilson u32 *cs; 7846e7a21e7SChris Wilson 7856e7a21e7SChris Wilson cs = intel_ring_begin(rq, 12); 7866e7a21e7SChris Wilson if (IS_ERR(cs)) 7876e7a21e7SChris Wilson return PTR_ERR(cs); 7886e7a21e7SChris Wilson 7896e7a21e7SChris Wilson *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 7906e7a21e7SChris Wilson *cs++ = *addr; 7916e7a21e7SChris Wilson *cs++ = 0; 7926e7a21e7SChris Wilson *cs++ = seqno; 7936e7a21e7SChris Wilson *addr += 4; 7946e7a21e7SChris Wilson 7956e7a21e7SChris Wilson *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_USE_GGTT; 7966e7a21e7SChris Wilson *cs++ = gpr; 7976e7a21e7SChris Wilson *cs++ = hwsp; 7986e7a21e7SChris Wilson *cs++ = 0; 7996e7a21e7SChris Wilson 8006e7a21e7SChris Wilson *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 8016e7a21e7SChris Wilson *cs++ = gpr; 8026e7a21e7SChris Wilson *cs++ = *addr; 8036e7a21e7SChris Wilson *cs++ = 0; 8046e7a21e7SChris Wilson *addr += 4; 8056e7a21e7SChris Wilson 8066e7a21e7SChris Wilson intel_ring_advance(rq, cs); 8076e7a21e7SChris Wilson 8086e7a21e7SChris Wilson return 0; 8096e7a21e7SChris Wilson } 8106e7a21e7SChris Wilson 8116e7a21e7SChris Wilson struct hwsp_watcher { 8126e7a21e7SChris Wilson struct i915_vma *vma; 8136e7a21e7SChris Wilson struct i915_request *rq; 8146e7a21e7SChris Wilson u32 addr; 8156e7a21e7SChris Wilson u32 *map; 8166e7a21e7SChris Wilson }; 8176e7a21e7SChris Wilson 8186e7a21e7SChris Wilson static bool cmp_lt(u32 a, u32 b) 8196e7a21e7SChris Wilson { 8206e7a21e7SChris Wilson return a < b; 8216e7a21e7SChris Wilson } 8226e7a21e7SChris Wilson 8236e7a21e7SChris Wilson static bool cmp_gte(u32 a, u32 b) 8246e7a21e7SChris Wilson { 8256e7a21e7SChris Wilson return a >= b; 8266e7a21e7SChris Wilson } 8276e7a21e7SChris Wilson 8286e7a21e7SChris Wilson static int setup_watcher(struct hwsp_watcher *w, struct intel_gt *gt) 8296e7a21e7SChris Wilson { 8306e7a21e7SChris Wilson struct drm_i915_gem_object *obj; 8316e7a21e7SChris Wilson struct i915_vma *vma; 8326e7a21e7SChris Wilson 8336e7a21e7SChris Wilson obj = i915_gem_object_create_internal(gt->i915, SZ_2M); 8346e7a21e7SChris Wilson if (IS_ERR(obj)) 8356e7a21e7SChris Wilson return PTR_ERR(obj); 8366e7a21e7SChris Wilson 8371060974cSMaarten Lankhorst w->map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 8386e7a21e7SChris Wilson if (IS_ERR(w->map)) { 8396e7a21e7SChris Wilson i915_gem_object_put(obj); 8406e7a21e7SChris Wilson return PTR_ERR(w->map); 8416e7a21e7SChris Wilson } 8426e7a21e7SChris Wilson 8431060974cSMaarten Lankhorst vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); 8446e7a21e7SChris Wilson if (IS_ERR(vma)) { 8456e7a21e7SChris Wilson i915_gem_object_put(obj); 8466e7a21e7SChris Wilson return PTR_ERR(vma); 8476e7a21e7SChris Wilson } 8486e7a21e7SChris Wilson 8496e7a21e7SChris Wilson w->vma = vma; 8506e7a21e7SChris Wilson w->addr = i915_ggtt_offset(vma); 8516e7a21e7SChris Wilson return 0; 8526e7a21e7SChris Wilson } 8536e7a21e7SChris Wilson 85412ca695dSMaarten Lankhorst static void switch_tl_lock(struct i915_request *from, struct i915_request *to) 85512ca695dSMaarten Lankhorst { 85612ca695dSMaarten Lankhorst /* some light mutex juggling required; think co-routines */ 85712ca695dSMaarten Lankhorst 85812ca695dSMaarten Lankhorst if (from) { 85912ca695dSMaarten Lankhorst lockdep_unpin_lock(&from->context->timeline->mutex, from->cookie); 86012ca695dSMaarten Lankhorst mutex_unlock(&from->context->timeline->mutex); 86112ca695dSMaarten Lankhorst } 86212ca695dSMaarten Lankhorst 86312ca695dSMaarten Lankhorst if (to) { 86412ca695dSMaarten Lankhorst mutex_lock(&to->context->timeline->mutex); 86512ca695dSMaarten Lankhorst to->cookie = lockdep_pin_lock(&to->context->timeline->mutex); 86612ca695dSMaarten Lankhorst } 86712ca695dSMaarten Lankhorst } 86812ca695dSMaarten Lankhorst 8696e7a21e7SChris Wilson static int create_watcher(struct hwsp_watcher *w, 8706e7a21e7SChris Wilson struct intel_engine_cs *engine, 8716e7a21e7SChris Wilson int ringsz) 8726e7a21e7SChris Wilson { 8736e7a21e7SChris Wilson struct intel_context *ce; 8746e7a21e7SChris Wilson 8756e7a21e7SChris Wilson ce = intel_context_create(engine); 8766e7a21e7SChris Wilson if (IS_ERR(ce)) 8776e7a21e7SChris Wilson return PTR_ERR(ce); 8786e7a21e7SChris Wilson 87974e4b909SJason Ekstrand ce->ring_size = ringsz; 8806e7a21e7SChris Wilson w->rq = intel_context_create_request(ce); 8816e7a21e7SChris Wilson intel_context_put(ce); 8826e7a21e7SChris Wilson if (IS_ERR(w->rq)) 8836e7a21e7SChris Wilson return PTR_ERR(w->rq); 8846e7a21e7SChris Wilson 8856e7a21e7SChris Wilson w->addr = i915_ggtt_offset(w->vma); 8866e7a21e7SChris Wilson 88712ca695dSMaarten Lankhorst switch_tl_lock(w->rq, NULL); 8886e7a21e7SChris Wilson 8896e7a21e7SChris Wilson return 0; 8906e7a21e7SChris Wilson } 8916e7a21e7SChris Wilson 8926e7a21e7SChris Wilson static int check_watcher(struct hwsp_watcher *w, const char *name, 8936e7a21e7SChris Wilson bool (*op)(u32 hwsp, u32 seqno)) 8946e7a21e7SChris Wilson { 8956e7a21e7SChris Wilson struct i915_request *rq = fetch_and_zero(&w->rq); 8966e7a21e7SChris Wilson u32 offset, end; 8976e7a21e7SChris Wilson int err; 8986e7a21e7SChris Wilson 8996e7a21e7SChris Wilson GEM_BUG_ON(w->addr - i915_ggtt_offset(w->vma) > w->vma->size); 9006e7a21e7SChris Wilson 9016e7a21e7SChris Wilson i915_request_get(rq); 90212ca695dSMaarten Lankhorst switch_tl_lock(NULL, rq); 9036e7a21e7SChris Wilson i915_request_add(rq); 9046e7a21e7SChris Wilson 9056e7a21e7SChris Wilson if (i915_request_wait(rq, 0, HZ) < 0) { 9066e7a21e7SChris Wilson err = -ETIME; 9076e7a21e7SChris Wilson goto out; 9086e7a21e7SChris Wilson } 9096e7a21e7SChris Wilson 9106e7a21e7SChris Wilson err = 0; 9116e7a21e7SChris Wilson offset = 0; 9126e7a21e7SChris Wilson end = (w->addr - i915_ggtt_offset(w->vma)) / sizeof(*w->map); 9136e7a21e7SChris Wilson while (offset < end) { 9146e7a21e7SChris Wilson if (!op(w->map[offset + 1], w->map[offset])) { 9156e7a21e7SChris Wilson pr_err("Watcher '%s' found HWSP value %x for seqno %x\n", 9166e7a21e7SChris Wilson name, w->map[offset + 1], w->map[offset]); 9176e7a21e7SChris Wilson err = -EINVAL; 9186e7a21e7SChris Wilson } 9196e7a21e7SChris Wilson 9206e7a21e7SChris Wilson offset += 2; 9216e7a21e7SChris Wilson } 9226e7a21e7SChris Wilson 9236e7a21e7SChris Wilson out: 9246e7a21e7SChris Wilson i915_request_put(rq); 9256e7a21e7SChris Wilson return err; 9266e7a21e7SChris Wilson } 9276e7a21e7SChris Wilson 9286e7a21e7SChris Wilson static void cleanup_watcher(struct hwsp_watcher *w) 9296e7a21e7SChris Wilson { 9306e7a21e7SChris Wilson if (w->rq) { 93112ca695dSMaarten Lankhorst switch_tl_lock(NULL, w->rq); 9326e7a21e7SChris Wilson 9336e7a21e7SChris Wilson i915_request_add(w->rq); 9346e7a21e7SChris Wilson } 9356e7a21e7SChris Wilson 9366e7a21e7SChris Wilson i915_vma_unpin_and_release(&w->vma, I915_VMA_RELEASE_MAP); 9376e7a21e7SChris Wilson } 9386e7a21e7SChris Wilson 9396e7a21e7SChris Wilson static bool retire_requests(struct intel_timeline *tl) 9406e7a21e7SChris Wilson { 9416e7a21e7SChris Wilson struct i915_request *rq, *rn; 9426e7a21e7SChris Wilson 9436e7a21e7SChris Wilson mutex_lock(&tl->mutex); 9446e7a21e7SChris Wilson list_for_each_entry_safe(rq, rn, &tl->requests, link) 9456e7a21e7SChris Wilson if (!i915_request_retire(rq)) 9466e7a21e7SChris Wilson break; 9476e7a21e7SChris Wilson mutex_unlock(&tl->mutex); 9486e7a21e7SChris Wilson 9496e7a21e7SChris Wilson return !i915_active_fence_isset(&tl->last_request); 9506e7a21e7SChris Wilson } 9516e7a21e7SChris Wilson 9526e7a21e7SChris Wilson static struct i915_request *wrap_timeline(struct i915_request *rq) 9536e7a21e7SChris Wilson { 9546e7a21e7SChris Wilson struct intel_context *ce = rq->context; 9556e7a21e7SChris Wilson struct intel_timeline *tl = ce->timeline; 9566e7a21e7SChris Wilson u32 seqno = rq->fence.seqno; 9576e7a21e7SChris Wilson 9586e7a21e7SChris Wilson while (tl->seqno >= seqno) { /* Cause a wrap */ 9596e7a21e7SChris Wilson i915_request_put(rq); 9606e7a21e7SChris Wilson rq = intel_context_create_request(ce); 9616e7a21e7SChris Wilson if (IS_ERR(rq)) 9626e7a21e7SChris Wilson return rq; 9636e7a21e7SChris Wilson 9646e7a21e7SChris Wilson i915_request_get(rq); 9656e7a21e7SChris Wilson i915_request_add(rq); 9666e7a21e7SChris Wilson } 9676e7a21e7SChris Wilson 9686e7a21e7SChris Wilson i915_request_put(rq); 96912ca695dSMaarten Lankhorst rq = i915_request_create(ce); 9706e7a21e7SChris Wilson if (IS_ERR(rq)) 9716e7a21e7SChris Wilson return rq; 9726e7a21e7SChris Wilson 9736e7a21e7SChris Wilson i915_request_get(rq); 9746e7a21e7SChris Wilson i915_request_add(rq); 9756e7a21e7SChris Wilson 9766e7a21e7SChris Wilson return rq; 9776e7a21e7SChris Wilson } 9786e7a21e7SChris Wilson 9796e7a21e7SChris Wilson static int live_hwsp_read(void *arg) 9806e7a21e7SChris Wilson { 9816e7a21e7SChris Wilson struct intel_gt *gt = arg; 9826e7a21e7SChris Wilson struct hwsp_watcher watcher[2] = {}; 9836e7a21e7SChris Wilson struct intel_engine_cs *engine; 9846e7a21e7SChris Wilson struct intel_timeline *tl; 9856e7a21e7SChris Wilson enum intel_engine_id id; 9866e7a21e7SChris Wilson int err = 0; 9876e7a21e7SChris Wilson int i; 9886e7a21e7SChris Wilson 9896e7a21e7SChris Wilson /* 9906e7a21e7SChris Wilson * If we take a reference to the HWSP for reading on the GPU, that 9916e7a21e7SChris Wilson * read may be arbitrarily delayed (either by foreign fence or 9926e7a21e7SChris Wilson * priority saturation) and a wrap can happen within 30 minutes. 9936e7a21e7SChris Wilson * When the GPU read is finally submitted it should be correct, 9946e7a21e7SChris Wilson * even across multiple wraps. 9956e7a21e7SChris Wilson */ 9966e7a21e7SChris Wilson 997c816723bSLucas De Marchi if (GRAPHICS_VER(gt->i915) < 8) /* CS convenience [SRM/LRM] */ 9986e7a21e7SChris Wilson return 0; 9996e7a21e7SChris Wilson 10006e7a21e7SChris Wilson tl = intel_timeline_create(gt); 10016e7a21e7SChris Wilson if (IS_ERR(tl)) 10026e7a21e7SChris Wilson return PTR_ERR(tl); 10036e7a21e7SChris Wilson 100412ca695dSMaarten Lankhorst if (!tl->has_initial_breadcrumb) 10056e7a21e7SChris Wilson goto out_free; 10066e7a21e7SChris Wilson 10076e7a21e7SChris Wilson for (i = 0; i < ARRAY_SIZE(watcher); i++) { 10086e7a21e7SChris Wilson err = setup_watcher(&watcher[i], gt); 10096e7a21e7SChris Wilson if (err) 10106e7a21e7SChris Wilson goto out; 10116e7a21e7SChris Wilson } 10126e7a21e7SChris Wilson 10136e7a21e7SChris Wilson for_each_engine(engine, gt, id) { 10146e7a21e7SChris Wilson struct intel_context *ce; 10156e7a21e7SChris Wilson unsigned long count = 0; 10166e7a21e7SChris Wilson IGT_TIMEOUT(end_time); 10176e7a21e7SChris Wilson 10186e7a21e7SChris Wilson /* Create a request we can use for remote reading of the HWSP */ 10196e7a21e7SChris Wilson err = create_watcher(&watcher[1], engine, SZ_512K); 10206e7a21e7SChris Wilson if (err) 10216e7a21e7SChris Wilson goto out; 10226e7a21e7SChris Wilson 10236e7a21e7SChris Wilson do { 10246e7a21e7SChris Wilson struct i915_sw_fence *submit; 10256e7a21e7SChris Wilson struct i915_request *rq; 102612ca695dSMaarten Lankhorst u32 hwsp, dummy; 10276e7a21e7SChris Wilson 10286e7a21e7SChris Wilson submit = heap_fence_create(GFP_KERNEL); 10296e7a21e7SChris Wilson if (!submit) { 10306e7a21e7SChris Wilson err = -ENOMEM; 10316e7a21e7SChris Wilson goto out; 10326e7a21e7SChris Wilson } 10336e7a21e7SChris Wilson 10346e7a21e7SChris Wilson err = create_watcher(&watcher[0], engine, SZ_4K); 10356e7a21e7SChris Wilson if (err) 10366e7a21e7SChris Wilson goto out; 10376e7a21e7SChris Wilson 10386e7a21e7SChris Wilson ce = intel_context_create(engine); 10396e7a21e7SChris Wilson if (IS_ERR(ce)) { 10406e7a21e7SChris Wilson err = PTR_ERR(ce); 10416e7a21e7SChris Wilson goto out; 10426e7a21e7SChris Wilson } 10436e7a21e7SChris Wilson 10446e7a21e7SChris Wilson ce->timeline = intel_timeline_get(tl); 10456e7a21e7SChris Wilson 104612ca695dSMaarten Lankhorst /* Ensure timeline is mapped, done during first pin */ 104712ca695dSMaarten Lankhorst err = intel_context_pin(ce); 104812ca695dSMaarten Lankhorst if (err) { 104912ca695dSMaarten Lankhorst intel_context_put(ce); 105012ca695dSMaarten Lankhorst goto out; 105112ca695dSMaarten Lankhorst } 105212ca695dSMaarten Lankhorst 105312ca695dSMaarten Lankhorst /* 105412ca695dSMaarten Lankhorst * Start at a new wrap, and set seqno right before another wrap, 105512ca695dSMaarten Lankhorst * saving 30 minutes of nops 105612ca695dSMaarten Lankhorst */ 105712ca695dSMaarten Lankhorst tl->seqno = -12u + 2 * (count & 3); 105812ca695dSMaarten Lankhorst __intel_timeline_get_seqno(tl, &dummy); 105912ca695dSMaarten Lankhorst 106012ca695dSMaarten Lankhorst rq = i915_request_create(ce); 10616e7a21e7SChris Wilson if (IS_ERR(rq)) { 10626e7a21e7SChris Wilson err = PTR_ERR(rq); 106312ca695dSMaarten Lankhorst intel_context_unpin(ce); 10646e7a21e7SChris Wilson intel_context_put(ce); 10656e7a21e7SChris Wilson goto out; 10666e7a21e7SChris Wilson } 10676e7a21e7SChris Wilson 10686e7a21e7SChris Wilson err = i915_sw_fence_await_dma_fence(&rq->submit, 10696e7a21e7SChris Wilson &watcher[0].rq->fence, 0, 10706e7a21e7SChris Wilson GFP_KERNEL); 10716e7a21e7SChris Wilson if (err < 0) { 10726e7a21e7SChris Wilson i915_request_add(rq); 107312ca695dSMaarten Lankhorst intel_context_unpin(ce); 10746e7a21e7SChris Wilson intel_context_put(ce); 10756e7a21e7SChris Wilson goto out; 10766e7a21e7SChris Wilson } 10776e7a21e7SChris Wilson 107812ca695dSMaarten Lankhorst switch_tl_lock(rq, watcher[0].rq); 10796e7a21e7SChris Wilson err = intel_timeline_read_hwsp(rq, watcher[0].rq, &hwsp); 10806e7a21e7SChris Wilson if (err == 0) 10816e7a21e7SChris Wilson err = emit_read_hwsp(watcher[0].rq, /* before */ 10826e7a21e7SChris Wilson rq->fence.seqno, hwsp, 10836e7a21e7SChris Wilson &watcher[0].addr); 108412ca695dSMaarten Lankhorst switch_tl_lock(watcher[0].rq, rq); 10856e7a21e7SChris Wilson if (err) { 10866e7a21e7SChris Wilson i915_request_add(rq); 108712ca695dSMaarten Lankhorst intel_context_unpin(ce); 10886e7a21e7SChris Wilson intel_context_put(ce); 10896e7a21e7SChris Wilson goto out; 10906e7a21e7SChris Wilson } 10916e7a21e7SChris Wilson 109212ca695dSMaarten Lankhorst switch_tl_lock(rq, watcher[1].rq); 10936e7a21e7SChris Wilson err = intel_timeline_read_hwsp(rq, watcher[1].rq, &hwsp); 10946e7a21e7SChris Wilson if (err == 0) 10956e7a21e7SChris Wilson err = emit_read_hwsp(watcher[1].rq, /* after */ 10966e7a21e7SChris Wilson rq->fence.seqno, hwsp, 10976e7a21e7SChris Wilson &watcher[1].addr); 109812ca695dSMaarten Lankhorst switch_tl_lock(watcher[1].rq, rq); 10996e7a21e7SChris Wilson if (err) { 11006e7a21e7SChris Wilson i915_request_add(rq); 110112ca695dSMaarten Lankhorst intel_context_unpin(ce); 11026e7a21e7SChris Wilson intel_context_put(ce); 11036e7a21e7SChris Wilson goto out; 11046e7a21e7SChris Wilson } 11056e7a21e7SChris Wilson 11066e7a21e7SChris Wilson i915_request_get(rq); 11076e7a21e7SChris Wilson i915_request_add(rq); 11086e7a21e7SChris Wilson 11096e7a21e7SChris Wilson rq = wrap_timeline(rq); 111012ca695dSMaarten Lankhorst intel_context_unpin(ce); 11116e7a21e7SChris Wilson intel_context_put(ce); 11126e7a21e7SChris Wilson if (IS_ERR(rq)) { 11136e7a21e7SChris Wilson err = PTR_ERR(rq); 11146e7a21e7SChris Wilson goto out; 11156e7a21e7SChris Wilson } 11166e7a21e7SChris Wilson 11176e7a21e7SChris Wilson err = i915_sw_fence_await_dma_fence(&watcher[1].rq->submit, 11186e7a21e7SChris Wilson &rq->fence, 0, 11196e7a21e7SChris Wilson GFP_KERNEL); 11206e7a21e7SChris Wilson if (err < 0) { 11216e7a21e7SChris Wilson i915_request_put(rq); 11226e7a21e7SChris Wilson goto out; 11236e7a21e7SChris Wilson } 11246e7a21e7SChris Wilson 11256e7a21e7SChris Wilson err = check_watcher(&watcher[0], "before", cmp_lt); 11266e7a21e7SChris Wilson i915_sw_fence_commit(submit); 11276e7a21e7SChris Wilson heap_fence_put(submit); 11286e7a21e7SChris Wilson if (err) { 11296e7a21e7SChris Wilson i915_request_put(rq); 11306e7a21e7SChris Wilson goto out; 11316e7a21e7SChris Wilson } 11326e7a21e7SChris Wilson count++; 11336e7a21e7SChris Wilson 11346e7a21e7SChris Wilson /* Flush the timeline before manually wrapping again */ 11356e7a21e7SChris Wilson if (i915_request_wait(rq, 11366e7a21e7SChris Wilson I915_WAIT_INTERRUPTIBLE, 11376e7a21e7SChris Wilson HZ) < 0) { 11386e7a21e7SChris Wilson err = -ETIME; 11396e7a21e7SChris Wilson i915_request_put(rq); 11406e7a21e7SChris Wilson goto out; 11416e7a21e7SChris Wilson } 11426e7a21e7SChris Wilson retire_requests(tl); 11436e7a21e7SChris Wilson i915_request_put(rq); 114416cfcb0fSChris Wilson 114516cfcb0fSChris Wilson /* Single requests are limited to half a ring at most */ 114616cfcb0fSChris Wilson if (8 * watcher[1].rq->ring->emit > 114716cfcb0fSChris Wilson 3 * watcher[1].rq->ring->size) 114816cfcb0fSChris Wilson break; 114916cfcb0fSChris Wilson 115012ca695dSMaarten Lankhorst } while (!__igt_timeout(end_time, NULL) && 115112ca695dSMaarten Lankhorst count < (PAGE_SIZE / TIMELINE_SEQNO_BYTES - 1) / 2); 11526e7a21e7SChris Wilson 11536e7a21e7SChris Wilson pr_info("%s: simulated %lu wraps\n", engine->name, count); 11546e7a21e7SChris Wilson err = check_watcher(&watcher[1], "after", cmp_gte); 11556e7a21e7SChris Wilson if (err) 11566e7a21e7SChris Wilson goto out; 11576e7a21e7SChris Wilson } 11586e7a21e7SChris Wilson 11596e7a21e7SChris Wilson out: 11606e7a21e7SChris Wilson for (i = 0; i < ARRAY_SIZE(watcher); i++) 11616e7a21e7SChris Wilson cleanup_watcher(&watcher[i]); 11626e7a21e7SChris Wilson 11636e7a21e7SChris Wilson if (igt_flush_test(gt->i915)) 11646e7a21e7SChris Wilson err = -EIO; 11656e7a21e7SChris Wilson 11666e7a21e7SChris Wilson out_free: 11676e7a21e7SChris Wilson intel_timeline_put(tl); 11686e7a21e7SChris Wilson return err; 11696e7a21e7SChris Wilson } 11706e7a21e7SChris Wilson 1171bb5e4397SChris Wilson static int live_hwsp_rollover_kernel(void *arg) 1172bb5e4397SChris Wilson { 1173bb5e4397SChris Wilson struct intel_gt *gt = arg; 1174bb5e4397SChris Wilson struct intel_engine_cs *engine; 1175bb5e4397SChris Wilson enum intel_engine_id id; 1176bb5e4397SChris Wilson int err = 0; 1177bb5e4397SChris Wilson 1178bb5e4397SChris Wilson /* 1179bb5e4397SChris Wilson * Run the host for long enough, and even the kernel context will 1180bb5e4397SChris Wilson * see a seqno rollover. 1181bb5e4397SChris Wilson */ 1182bb5e4397SChris Wilson 1183bb5e4397SChris Wilson for_each_engine(engine, gt, id) { 1184bb5e4397SChris Wilson struct intel_context *ce = engine->kernel_context; 1185bb5e4397SChris Wilson struct intel_timeline *tl = ce->timeline; 1186bb5e4397SChris Wilson struct i915_request *rq[3] = {}; 1187bb5e4397SChris Wilson int i; 1188bb5e4397SChris Wilson 11891b90e4a4SChris Wilson st_engine_heartbeat_disable(engine); 1190bb5e4397SChris Wilson if (intel_gt_wait_for_idle(gt, HZ / 2)) { 1191bb5e4397SChris Wilson err = -EIO; 1192bb5e4397SChris Wilson goto out; 1193bb5e4397SChris Wilson } 1194bb5e4397SChris Wilson 1195bb5e4397SChris Wilson GEM_BUG_ON(i915_active_fence_isset(&tl->last_request)); 119612ca695dSMaarten Lankhorst tl->seqno = -2u; 1197bb5e4397SChris Wilson WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno); 1198bb5e4397SChris Wilson 1199bb5e4397SChris Wilson for (i = 0; i < ARRAY_SIZE(rq); i++) { 1200bb5e4397SChris Wilson struct i915_request *this; 1201bb5e4397SChris Wilson 1202bb5e4397SChris Wilson this = i915_request_create(ce); 1203bb5e4397SChris Wilson if (IS_ERR(this)) { 1204bb5e4397SChris Wilson err = PTR_ERR(this); 1205bb5e4397SChris Wilson goto out; 1206bb5e4397SChris Wilson } 1207bb5e4397SChris Wilson 1208bb5e4397SChris Wilson pr_debug("%s: create fence.seqnp:%d\n", 1209bb5e4397SChris Wilson engine->name, 1210bb5e4397SChris Wilson lower_32_bits(this->fence.seqno)); 1211bb5e4397SChris Wilson 1212bb5e4397SChris Wilson GEM_BUG_ON(rcu_access_pointer(this->timeline) != tl); 1213bb5e4397SChris Wilson 1214bb5e4397SChris Wilson rq[i] = i915_request_get(this); 1215bb5e4397SChris Wilson i915_request_add(this); 1216bb5e4397SChris Wilson } 1217bb5e4397SChris Wilson 1218bb5e4397SChris Wilson /* We expected a wrap! */ 1219bb5e4397SChris Wilson GEM_BUG_ON(rq[2]->fence.seqno > rq[0]->fence.seqno); 1220bb5e4397SChris Wilson 1221bb5e4397SChris Wilson if (i915_request_wait(rq[2], 0, HZ / 5) < 0) { 1222bb5e4397SChris Wilson pr_err("Wait for timeline wrap timed out!\n"); 1223bb5e4397SChris Wilson err = -EIO; 1224bb5e4397SChris Wilson goto out; 1225bb5e4397SChris Wilson } 1226bb5e4397SChris Wilson 1227bb5e4397SChris Wilson for (i = 0; i < ARRAY_SIZE(rq); i++) { 1228bb5e4397SChris Wilson if (!i915_request_completed(rq[i])) { 1229bb5e4397SChris Wilson pr_err("Pre-wrap request not completed!\n"); 1230bb5e4397SChris Wilson err = -EINVAL; 1231bb5e4397SChris Wilson goto out; 1232bb5e4397SChris Wilson } 1233bb5e4397SChris Wilson } 1234bb5e4397SChris Wilson 1235bb5e4397SChris Wilson out: 1236bb5e4397SChris Wilson for (i = 0; i < ARRAY_SIZE(rq); i++) 1237bb5e4397SChris Wilson i915_request_put(rq[i]); 12381b90e4a4SChris Wilson st_engine_heartbeat_enable(engine); 1239bb5e4397SChris Wilson if (err) 1240bb5e4397SChris Wilson break; 1241bb5e4397SChris Wilson } 1242bb5e4397SChris Wilson 1243bb5e4397SChris Wilson if (igt_flush_test(gt->i915)) 1244bb5e4397SChris Wilson err = -EIO; 1245bb5e4397SChris Wilson 1246bb5e4397SChris Wilson return err; 1247bb5e4397SChris Wilson } 1248bb5e4397SChris Wilson 1249bb5e4397SChris Wilson static int live_hwsp_rollover_user(void *arg) 1250bb5e4397SChris Wilson { 1251bb5e4397SChris Wilson struct intel_gt *gt = arg; 1252bb5e4397SChris Wilson struct intel_engine_cs *engine; 1253bb5e4397SChris Wilson enum intel_engine_id id; 1254bb5e4397SChris Wilson int err = 0; 1255bb5e4397SChris Wilson 1256bb5e4397SChris Wilson /* 1257bb5e4397SChris Wilson * Simulate a long running user context, and force the seqno wrap 1258bb5e4397SChris Wilson * on the user's timeline. 1259bb5e4397SChris Wilson */ 1260bb5e4397SChris Wilson 1261bb5e4397SChris Wilson for_each_engine(engine, gt, id) { 1262bb5e4397SChris Wilson struct i915_request *rq[3] = {}; 1263bb5e4397SChris Wilson struct intel_timeline *tl; 1264bb5e4397SChris Wilson struct intel_context *ce; 1265bb5e4397SChris Wilson int i; 1266bb5e4397SChris Wilson 1267bb5e4397SChris Wilson ce = intel_context_create(engine); 1268bb5e4397SChris Wilson if (IS_ERR(ce)) 1269bb5e4397SChris Wilson return PTR_ERR(ce); 1270bb5e4397SChris Wilson 1271bb5e4397SChris Wilson err = intel_context_alloc_state(ce); 1272bb5e4397SChris Wilson if (err) 1273bb5e4397SChris Wilson goto out; 1274bb5e4397SChris Wilson 1275bb5e4397SChris Wilson tl = ce->timeline; 127612ca695dSMaarten Lankhorst if (!tl->has_initial_breadcrumb) 1277bb5e4397SChris Wilson goto out; 1278bb5e4397SChris Wilson 12792c8ab333SMaarten Lankhorst err = intel_context_pin(ce); 12802c8ab333SMaarten Lankhorst if (err) 12812c8ab333SMaarten Lankhorst goto out; 12822c8ab333SMaarten Lankhorst 128312ca695dSMaarten Lankhorst tl->seqno = -4u; 1284bb5e4397SChris Wilson WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno); 1285bb5e4397SChris Wilson 1286bb5e4397SChris Wilson for (i = 0; i < ARRAY_SIZE(rq); i++) { 1287bb5e4397SChris Wilson struct i915_request *this; 1288bb5e4397SChris Wilson 1289bb5e4397SChris Wilson this = intel_context_create_request(ce); 1290bb5e4397SChris Wilson if (IS_ERR(this)) { 1291bb5e4397SChris Wilson err = PTR_ERR(this); 12922c8ab333SMaarten Lankhorst goto out_unpin; 1293bb5e4397SChris Wilson } 1294bb5e4397SChris Wilson 1295bb5e4397SChris Wilson pr_debug("%s: create fence.seqnp:%d\n", 1296bb5e4397SChris Wilson engine->name, 1297bb5e4397SChris Wilson lower_32_bits(this->fence.seqno)); 1298bb5e4397SChris Wilson 1299bb5e4397SChris Wilson GEM_BUG_ON(rcu_access_pointer(this->timeline) != tl); 1300bb5e4397SChris Wilson 1301bb5e4397SChris Wilson rq[i] = i915_request_get(this); 1302bb5e4397SChris Wilson i915_request_add(this); 1303bb5e4397SChris Wilson } 1304bb5e4397SChris Wilson 1305bb5e4397SChris Wilson /* We expected a wrap! */ 1306bb5e4397SChris Wilson GEM_BUG_ON(rq[2]->fence.seqno > rq[0]->fence.seqno); 1307bb5e4397SChris Wilson 1308bb5e4397SChris Wilson if (i915_request_wait(rq[2], 0, HZ / 5) < 0) { 1309bb5e4397SChris Wilson pr_err("Wait for timeline wrap timed out!\n"); 1310bb5e4397SChris Wilson err = -EIO; 13112c8ab333SMaarten Lankhorst goto out_unpin; 1312bb5e4397SChris Wilson } 1313bb5e4397SChris Wilson 1314bb5e4397SChris Wilson for (i = 0; i < ARRAY_SIZE(rq); i++) { 1315bb5e4397SChris Wilson if (!i915_request_completed(rq[i])) { 1316bb5e4397SChris Wilson pr_err("Pre-wrap request not completed!\n"); 1317bb5e4397SChris Wilson err = -EINVAL; 13182c8ab333SMaarten Lankhorst goto out_unpin; 1319bb5e4397SChris Wilson } 1320bb5e4397SChris Wilson } 13212c8ab333SMaarten Lankhorst out_unpin: 13222c8ab333SMaarten Lankhorst intel_context_unpin(ce); 1323bb5e4397SChris Wilson out: 1324bb5e4397SChris Wilson for (i = 0; i < ARRAY_SIZE(rq); i++) 1325bb5e4397SChris Wilson i915_request_put(rq[i]); 1326bb5e4397SChris Wilson intel_context_put(ce); 1327bb5e4397SChris Wilson if (err) 1328bb5e4397SChris Wilson break; 1329bb5e4397SChris Wilson } 1330bb5e4397SChris Wilson 1331bb5e4397SChris Wilson if (igt_flush_test(gt->i915)) 1332bb5e4397SChris Wilson err = -EIO; 1333bb5e4397SChris Wilson 1334bb5e4397SChris Wilson return err; 1335bb5e4397SChris Wilson } 1336bb5e4397SChris Wilson 1337f0c02c1bSTvrtko Ursulin static int live_hwsp_recycle(void *arg) 1338f0c02c1bSTvrtko Ursulin { 13395f65d5a6SChris Wilson struct intel_gt *gt = arg; 1340f0c02c1bSTvrtko Ursulin struct intel_engine_cs *engine; 1341f0c02c1bSTvrtko Ursulin enum intel_engine_id id; 1342f0c02c1bSTvrtko Ursulin unsigned long count; 1343f0c02c1bSTvrtko Ursulin int err = 0; 1344f0c02c1bSTvrtko Ursulin 1345f0c02c1bSTvrtko Ursulin /* 1346f0c02c1bSTvrtko Ursulin * Check seqno writes into one timeline at a time. We expect to 1347f0c02c1bSTvrtko Ursulin * recycle the breadcrumb slot between iterations and neither 1348f0c02c1bSTvrtko Ursulin * want to confuse ourselves or the GPU. 1349f0c02c1bSTvrtko Ursulin */ 1350f0c02c1bSTvrtko Ursulin 1351f0c02c1bSTvrtko Ursulin count = 0; 13525d904e3cSTvrtko Ursulin for_each_engine(engine, gt, id) { 1353f0c02c1bSTvrtko Ursulin IGT_TIMEOUT(end_time); 1354f0c02c1bSTvrtko Ursulin 1355f0c02c1bSTvrtko Ursulin if (!intel_engine_can_store_dword(engine)) 1356f0c02c1bSTvrtko Ursulin continue; 1357f0c02c1bSTvrtko Ursulin 13587e805762SChris Wilson intel_engine_pm_get(engine); 13597e805762SChris Wilson 1360f0c02c1bSTvrtko Ursulin do { 1361f0c02c1bSTvrtko Ursulin struct intel_timeline *tl; 1362f0c02c1bSTvrtko Ursulin struct i915_request *rq; 1363f0c02c1bSTvrtko Ursulin 13642c8ab333SMaarten Lankhorst tl = intel_timeline_create(gt); 1365f0c02c1bSTvrtko Ursulin if (IS_ERR(tl)) { 1366f0c02c1bSTvrtko Ursulin err = PTR_ERR(tl); 13677e805762SChris Wilson break; 1368f0c02c1bSTvrtko Ursulin } 1369f0c02c1bSTvrtko Ursulin 13702c8ab333SMaarten Lankhorst rq = checked_tl_write(tl, engine, count); 1371f0c02c1bSTvrtko Ursulin if (IS_ERR(rq)) { 1372f0c02c1bSTvrtko Ursulin intel_timeline_put(tl); 1373f0c02c1bSTvrtko Ursulin err = PTR_ERR(rq); 13747e805762SChris Wilson break; 1375f0c02c1bSTvrtko Ursulin } 1376f0c02c1bSTvrtko Ursulin 1377f0c02c1bSTvrtko Ursulin if (i915_request_wait(rq, 0, HZ / 5) < 0) { 1378f0c02c1bSTvrtko Ursulin pr_err("Wait for timeline writes timed out!\n"); 13797e805762SChris Wilson i915_request_put(rq); 1380f0c02c1bSTvrtko Ursulin intel_timeline_put(tl); 1381f0c02c1bSTvrtko Ursulin err = -EIO; 13827e805762SChris Wilson break; 1383f0c02c1bSTvrtko Ursulin } 1384f0c02c1bSTvrtko Ursulin 1385e310b435SChris Wilson if (READ_ONCE(*tl->hwsp_seqno) != count) { 1386e310b435SChris Wilson GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x found 0x%x\n", 1387e310b435SChris Wilson count, tl->fence_context, 1388e310b435SChris Wilson tl->hwsp_offset, *tl->hwsp_seqno); 1389d45171acSChris Wilson GEM_TRACE_DUMP(); 1390f0c02c1bSTvrtko Ursulin err = -EINVAL; 1391f0c02c1bSTvrtko Ursulin } 1392f0c02c1bSTvrtko Ursulin 13937e805762SChris Wilson i915_request_put(rq); 1394f0c02c1bSTvrtko Ursulin intel_timeline_put(tl); 1395f0c02c1bSTvrtko Ursulin count++; 1396f0c02c1bSTvrtko Ursulin 1397f0c02c1bSTvrtko Ursulin if (err) 13987e805762SChris Wilson break; 1399f0c02c1bSTvrtko Ursulin } while (!__igt_timeout(end_time, NULL)); 1400f0c02c1bSTvrtko Ursulin 14017e805762SChris Wilson intel_engine_pm_put(engine); 14027e805762SChris Wilson if (err) 14037e805762SChris Wilson break; 14047e805762SChris Wilson } 1405f0c02c1bSTvrtko Ursulin 1406f0c02c1bSTvrtko Ursulin return err; 1407f0c02c1bSTvrtko Ursulin } 1408f0c02c1bSTvrtko Ursulin 1409f0c02c1bSTvrtko Ursulin int intel_timeline_live_selftests(struct drm_i915_private *i915) 1410f0c02c1bSTvrtko Ursulin { 1411f0c02c1bSTvrtko Ursulin static const struct i915_subtest tests[] = { 1412f0c02c1bSTvrtko Ursulin SUBTEST(live_hwsp_recycle), 1413f0c02c1bSTvrtko Ursulin SUBTEST(live_hwsp_engine), 1414f0c02c1bSTvrtko Ursulin SUBTEST(live_hwsp_alternate), 1415f0c02c1bSTvrtko Ursulin SUBTEST(live_hwsp_wrap), 14166e7a21e7SChris Wilson SUBTEST(live_hwsp_read), 1417bb5e4397SChris Wilson SUBTEST(live_hwsp_rollover_kernel), 1418bb5e4397SChris Wilson SUBTEST(live_hwsp_rollover_user), 1419f0c02c1bSTvrtko Ursulin }; 1420f0c02c1bSTvrtko Ursulin 1421c14adcbdSMichał Winiarski if (intel_gt_is_wedged(to_gt(i915))) 1422f0c02c1bSTvrtko Ursulin return 0; 1423f0c02c1bSTvrtko Ursulin 1424c14adcbdSMichał Winiarski return intel_gt_live_subtests(tests, to_gt(i915)); 1425f0c02c1bSTvrtko Ursulin } 1426