1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2017-2018 Intel Corporation
5  */
6 
7 #include <linux/prime_numbers.h>
8 
9 #include "intel_context.h"
10 #include "intel_engine_heartbeat.h"
11 #include "intel_engine_pm.h"
12 #include "intel_gt.h"
13 #include "intel_gt_requests.h"
14 #include "intel_ring.h"
15 #include "selftest_engine_heartbeat.h"
16 
17 #include "../selftests/i915_random.h"
18 #include "../i915_selftest.h"
19 
20 #include "../selftests/igt_flush_test.h"
21 #include "../selftests/mock_gem_device.h"
22 #include "selftests/mock_timeline.h"
23 
24 static struct page *hwsp_page(struct intel_timeline *tl)
25 {
26 	struct drm_i915_gem_object *obj = tl->hwsp_ggtt->obj;
27 
28 	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
29 	return sg_page(obj->mm.pages->sgl);
30 }
31 
32 static unsigned long hwsp_cacheline(struct intel_timeline *tl)
33 {
34 	unsigned long address = (unsigned long)page_address(hwsp_page(tl));
35 
36 	return (address + tl->hwsp_offset) / CACHELINE_BYTES;
37 }
38 
39 #define CACHELINES_PER_PAGE (PAGE_SIZE / CACHELINE_BYTES)
40 
41 struct mock_hwsp_freelist {
42 	struct intel_gt *gt;
43 	struct radix_tree_root cachelines;
44 	struct intel_timeline **history;
45 	unsigned long count, max;
46 	struct rnd_state prng;
47 };
48 
49 enum {
50 	SHUFFLE = BIT(0),
51 };
52 
53 static void __mock_hwsp_record(struct mock_hwsp_freelist *state,
54 			       unsigned int idx,
55 			       struct intel_timeline *tl)
56 {
57 	tl = xchg(&state->history[idx], tl);
58 	if (tl) {
59 		radix_tree_delete(&state->cachelines, hwsp_cacheline(tl));
60 		intel_timeline_put(tl);
61 	}
62 }
63 
64 static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state,
65 				unsigned int count,
66 				unsigned int flags)
67 {
68 	struct intel_timeline *tl;
69 	unsigned int idx;
70 
71 	while (count--) {
72 		unsigned long cacheline;
73 		int err;
74 
75 		tl = intel_timeline_create(state->gt);
76 		if (IS_ERR(tl))
77 			return PTR_ERR(tl);
78 
79 		cacheline = hwsp_cacheline(tl);
80 		err = radix_tree_insert(&state->cachelines, cacheline, tl);
81 		if (err) {
82 			if (err == -EEXIST) {
83 				pr_err("HWSP cacheline %lu already used; duplicate allocation!\n",
84 				       cacheline);
85 			}
86 			intel_timeline_put(tl);
87 			return err;
88 		}
89 
90 		idx = state->count++ % state->max;
91 		__mock_hwsp_record(state, idx, tl);
92 	}
93 
94 	if (flags & SHUFFLE)
95 		i915_prandom_shuffle(state->history,
96 				     sizeof(*state->history),
97 				     min(state->count, state->max),
98 				     &state->prng);
99 
100 	count = i915_prandom_u32_max_state(min(state->count, state->max),
101 					   &state->prng);
102 	while (count--) {
103 		idx = --state->count % state->max;
104 		__mock_hwsp_record(state, idx, NULL);
105 	}
106 
107 	return 0;
108 }
109 
110 static int mock_hwsp_freelist(void *arg)
111 {
112 	struct mock_hwsp_freelist state;
113 	struct drm_i915_private *i915;
114 	const struct {
115 		const char *name;
116 		unsigned int flags;
117 	} phases[] = {
118 		{ "linear", 0 },
119 		{ "shuffled", SHUFFLE },
120 		{ },
121 	}, *p;
122 	unsigned int na;
123 	int err = 0;
124 
125 	i915 = mock_gem_device();
126 	if (!i915)
127 		return -ENOMEM;
128 
129 	INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL);
130 	state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed);
131 
132 	state.gt = &i915->gt;
133 
134 	/*
135 	 * Create a bunch of timelines and check that their HWSP do not overlap.
136 	 * Free some, and try again.
137 	 */
138 
139 	state.max = PAGE_SIZE / sizeof(*state.history);
140 	state.count = 0;
141 	state.history = kcalloc(state.max, sizeof(*state.history), GFP_KERNEL);
142 	if (!state.history) {
143 		err = -ENOMEM;
144 		goto err_put;
145 	}
146 
147 	for (p = phases; p->name; p++) {
148 		pr_debug("%s(%s)\n", __func__, p->name);
149 		for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) {
150 			err = __mock_hwsp_timeline(&state, na, p->flags);
151 			if (err)
152 				goto out;
153 		}
154 	}
155 
156 out:
157 	for (na = 0; na < state.max; na++)
158 		__mock_hwsp_record(&state, na, NULL);
159 	kfree(state.history);
160 err_put:
161 	mock_destroy_device(i915);
162 	return err;
163 }
164 
165 struct __igt_sync {
166 	const char *name;
167 	u32 seqno;
168 	bool expected;
169 	bool set;
170 };
171 
172 static int __igt_sync(struct intel_timeline *tl,
173 		      u64 ctx,
174 		      const struct __igt_sync *p,
175 		      const char *name)
176 {
177 	int ret;
178 
179 	if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) {
180 		pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n",
181 		       name, p->name, ctx, p->seqno, yesno(p->expected));
182 		return -EINVAL;
183 	}
184 
185 	if (p->set) {
186 		ret = __intel_timeline_sync_set(tl, ctx, p->seqno);
187 		if (ret)
188 			return ret;
189 	}
190 
191 	return 0;
192 }
193 
194 static int igt_sync(void *arg)
195 {
196 	const struct __igt_sync pass[] = {
197 		{ "unset", 0, false, false },
198 		{ "new", 0, false, true },
199 		{ "0a", 0, true, true },
200 		{ "1a", 1, false, true },
201 		{ "1b", 1, true, true },
202 		{ "0b", 0, true, false },
203 		{ "2a", 2, false, true },
204 		{ "4", 4, false, true },
205 		{ "INT_MAX", INT_MAX, false, true },
206 		{ "INT_MAX-1", INT_MAX-1, true, false },
207 		{ "INT_MAX+1", (u32)INT_MAX+1, false, true },
208 		{ "INT_MAX", INT_MAX, true, false },
209 		{ "UINT_MAX", UINT_MAX, false, true },
210 		{ "wrap", 0, false, true },
211 		{ "unwrap", UINT_MAX, true, false },
212 		{},
213 	}, *p;
214 	struct intel_timeline tl;
215 	int order, offset;
216 	int ret = -ENODEV;
217 
218 	mock_timeline_init(&tl, 0);
219 	for (p = pass; p->name; p++) {
220 		for (order = 1; order < 64; order++) {
221 			for (offset = -1; offset <= (order > 1); offset++) {
222 				u64 ctx = BIT_ULL(order) + offset;
223 
224 				ret = __igt_sync(&tl, ctx, p, "1");
225 				if (ret)
226 					goto out;
227 			}
228 		}
229 	}
230 	mock_timeline_fini(&tl);
231 
232 	mock_timeline_init(&tl, 0);
233 	for (order = 1; order < 64; order++) {
234 		for (offset = -1; offset <= (order > 1); offset++) {
235 			u64 ctx = BIT_ULL(order) + offset;
236 
237 			for (p = pass; p->name; p++) {
238 				ret = __igt_sync(&tl, ctx, p, "2");
239 				if (ret)
240 					goto out;
241 			}
242 		}
243 	}
244 
245 out:
246 	mock_timeline_fini(&tl);
247 	return ret;
248 }
249 
250 static unsigned int random_engine(struct rnd_state *rnd)
251 {
252 	return i915_prandom_u32_max_state(I915_NUM_ENGINES, rnd);
253 }
254 
255 static int bench_sync(void *arg)
256 {
257 	struct rnd_state prng;
258 	struct intel_timeline tl;
259 	unsigned long end_time, count;
260 	u64 prng32_1M;
261 	ktime_t kt;
262 	int order, last_order;
263 
264 	mock_timeline_init(&tl, 0);
265 
266 	/* Lookups from cache are very fast and so the random number generation
267 	 * and the loop itself becomes a significant factor in the per-iteration
268 	 * timings. We try to compensate the results by measuring the overhead
269 	 * of the prng and subtract it from the reported results.
270 	 */
271 	prandom_seed_state(&prng, i915_selftest.random_seed);
272 	count = 0;
273 	kt = ktime_get();
274 	end_time = jiffies + HZ/10;
275 	do {
276 		u32 x;
277 
278 		/* Make sure the compiler doesn't optimise away the prng call */
279 		WRITE_ONCE(x, prandom_u32_state(&prng));
280 
281 		count++;
282 	} while (!time_after(jiffies, end_time));
283 	kt = ktime_sub(ktime_get(), kt);
284 	pr_debug("%s: %lu random evaluations, %lluns/prng\n",
285 		 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
286 	prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count);
287 
288 	/* Benchmark (only) setting random context ids */
289 	prandom_seed_state(&prng, i915_selftest.random_seed);
290 	count = 0;
291 	kt = ktime_get();
292 	end_time = jiffies + HZ/10;
293 	do {
294 		u64 id = i915_prandom_u64_state(&prng);
295 
296 		__intel_timeline_sync_set(&tl, id, 0);
297 		count++;
298 	} while (!time_after(jiffies, end_time));
299 	kt = ktime_sub(ktime_get(), kt);
300 	kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
301 	pr_info("%s: %lu random insertions, %lluns/insert\n",
302 		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
303 
304 	/* Benchmark looking up the exact same context ids as we just set */
305 	prandom_seed_state(&prng, i915_selftest.random_seed);
306 	end_time = count;
307 	kt = ktime_get();
308 	while (end_time--) {
309 		u64 id = i915_prandom_u64_state(&prng);
310 
311 		if (!__intel_timeline_sync_is_later(&tl, id, 0)) {
312 			mock_timeline_fini(&tl);
313 			pr_err("Lookup of %llu failed\n", id);
314 			return -EINVAL;
315 		}
316 	}
317 	kt = ktime_sub(ktime_get(), kt);
318 	kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
319 	pr_info("%s: %lu random lookups, %lluns/lookup\n",
320 		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
321 
322 	mock_timeline_fini(&tl);
323 	cond_resched();
324 
325 	mock_timeline_init(&tl, 0);
326 
327 	/* Benchmark setting the first N (in order) contexts */
328 	count = 0;
329 	kt = ktime_get();
330 	end_time = jiffies + HZ/10;
331 	do {
332 		__intel_timeline_sync_set(&tl, count++, 0);
333 	} while (!time_after(jiffies, end_time));
334 	kt = ktime_sub(ktime_get(), kt);
335 	pr_info("%s: %lu in-order insertions, %lluns/insert\n",
336 		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
337 
338 	/* Benchmark looking up the exact same context ids as we just set */
339 	end_time = count;
340 	kt = ktime_get();
341 	while (end_time--) {
342 		if (!__intel_timeline_sync_is_later(&tl, end_time, 0)) {
343 			pr_err("Lookup of %lu failed\n", end_time);
344 			mock_timeline_fini(&tl);
345 			return -EINVAL;
346 		}
347 	}
348 	kt = ktime_sub(ktime_get(), kt);
349 	pr_info("%s: %lu in-order lookups, %lluns/lookup\n",
350 		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
351 
352 	mock_timeline_fini(&tl);
353 	cond_resched();
354 
355 	mock_timeline_init(&tl, 0);
356 
357 	/* Benchmark searching for a random context id and maybe changing it */
358 	prandom_seed_state(&prng, i915_selftest.random_seed);
359 	count = 0;
360 	kt = ktime_get();
361 	end_time = jiffies + HZ/10;
362 	do {
363 		u32 id = random_engine(&prng);
364 		u32 seqno = prandom_u32_state(&prng);
365 
366 		if (!__intel_timeline_sync_is_later(&tl, id, seqno))
367 			__intel_timeline_sync_set(&tl, id, seqno);
368 
369 		count++;
370 	} while (!time_after(jiffies, end_time));
371 	kt = ktime_sub(ktime_get(), kt);
372 	kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
373 	pr_info("%s: %lu repeated insert/lookups, %lluns/op\n",
374 		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
375 	mock_timeline_fini(&tl);
376 	cond_resched();
377 
378 	/* Benchmark searching for a known context id and changing the seqno */
379 	for (last_order = 1, order = 1; order < 32;
380 	     ({ int tmp = last_order; last_order = order; order += tmp; })) {
381 		unsigned int mask = BIT(order) - 1;
382 
383 		mock_timeline_init(&tl, 0);
384 
385 		count = 0;
386 		kt = ktime_get();
387 		end_time = jiffies + HZ/10;
388 		do {
389 			/* Without assuming too many details of the underlying
390 			 * implementation, try to identify its phase-changes
391 			 * (if any)!
392 			 */
393 			u64 id = (u64)(count & mask) << order;
394 
395 			__intel_timeline_sync_is_later(&tl, id, 0);
396 			__intel_timeline_sync_set(&tl, id, 0);
397 
398 			count++;
399 		} while (!time_after(jiffies, end_time));
400 		kt = ktime_sub(ktime_get(), kt);
401 		pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n",
402 			__func__, count, order,
403 			(long long)div64_ul(ktime_to_ns(kt), count));
404 		mock_timeline_fini(&tl);
405 		cond_resched();
406 	}
407 
408 	return 0;
409 }
410 
411 int intel_timeline_mock_selftests(void)
412 {
413 	static const struct i915_subtest tests[] = {
414 		SUBTEST(mock_hwsp_freelist),
415 		SUBTEST(igt_sync),
416 		SUBTEST(bench_sync),
417 	};
418 
419 	return i915_subtests(tests, NULL);
420 }
421 
422 static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value)
423 {
424 	u32 *cs;
425 
426 	cs = intel_ring_begin(rq, 4);
427 	if (IS_ERR(cs))
428 		return PTR_ERR(cs);
429 
430 	if (INTEL_GEN(rq->engine->i915) >= 8) {
431 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
432 		*cs++ = addr;
433 		*cs++ = 0;
434 		*cs++ = value;
435 	} else if (INTEL_GEN(rq->engine->i915) >= 4) {
436 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
437 		*cs++ = 0;
438 		*cs++ = addr;
439 		*cs++ = value;
440 	} else {
441 		*cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
442 		*cs++ = addr;
443 		*cs++ = value;
444 		*cs++ = MI_NOOP;
445 	}
446 
447 	intel_ring_advance(rq, cs);
448 
449 	return 0;
450 }
451 
452 static struct i915_request *
453 tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value)
454 {
455 	struct i915_request *rq;
456 	int err;
457 
458 	err = intel_timeline_pin(tl, NULL);
459 	if (err) {
460 		rq = ERR_PTR(err);
461 		goto out;
462 	}
463 
464 	rq = intel_engine_create_kernel_request(engine);
465 	if (IS_ERR(rq))
466 		goto out_unpin;
467 
468 	i915_request_get(rq);
469 
470 	err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value);
471 	i915_request_add(rq);
472 	if (err) {
473 		i915_request_put(rq);
474 		rq = ERR_PTR(err);
475 	}
476 
477 out_unpin:
478 	intel_timeline_unpin(tl);
479 out:
480 	if (IS_ERR(rq))
481 		pr_err("Failed to write to timeline!\n");
482 	return rq;
483 }
484 
485 static struct intel_timeline *
486 checked_intel_timeline_create(struct intel_gt *gt)
487 {
488 	struct intel_timeline *tl;
489 
490 	tl = intel_timeline_create(gt);
491 	if (IS_ERR(tl))
492 		return tl;
493 
494 	if (READ_ONCE(*tl->hwsp_seqno) != tl->seqno) {
495 		pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n",
496 		       *tl->hwsp_seqno, tl->seqno);
497 		intel_timeline_put(tl);
498 		return ERR_PTR(-EINVAL);
499 	}
500 
501 	return tl;
502 }
503 
504 static int live_hwsp_engine(void *arg)
505 {
506 #define NUM_TIMELINES 4096
507 	struct intel_gt *gt = arg;
508 	struct intel_timeline **timelines;
509 	struct intel_engine_cs *engine;
510 	enum intel_engine_id id;
511 	unsigned long count, n;
512 	int err = 0;
513 
514 	/*
515 	 * Create a bunch of timelines and check we can write
516 	 * independently to each of their breadcrumb slots.
517 	 */
518 
519 	timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
520 				   sizeof(*timelines),
521 				   GFP_KERNEL);
522 	if (!timelines)
523 		return -ENOMEM;
524 
525 	count = 0;
526 	for_each_engine(engine, gt, id) {
527 		if (!intel_engine_can_store_dword(engine))
528 			continue;
529 
530 		intel_engine_pm_get(engine);
531 
532 		for (n = 0; n < NUM_TIMELINES; n++) {
533 			struct intel_timeline *tl;
534 			struct i915_request *rq;
535 
536 			tl = checked_intel_timeline_create(gt);
537 			if (IS_ERR(tl)) {
538 				err = PTR_ERR(tl);
539 				break;
540 			}
541 
542 			rq = tl_write(tl, engine, count);
543 			if (IS_ERR(rq)) {
544 				intel_timeline_put(tl);
545 				err = PTR_ERR(rq);
546 				break;
547 			}
548 
549 			timelines[count++] = tl;
550 			i915_request_put(rq);
551 		}
552 
553 		intel_engine_pm_put(engine);
554 		if (err)
555 			break;
556 	}
557 
558 	if (igt_flush_test(gt->i915))
559 		err = -EIO;
560 
561 	for (n = 0; n < count; n++) {
562 		struct intel_timeline *tl = timelines[n];
563 
564 		if (!err && READ_ONCE(*tl->hwsp_seqno) != n) {
565 			GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x, found 0x%x\n",
566 				      n, tl->fence_context, tl->hwsp_offset, *tl->hwsp_seqno);
567 			GEM_TRACE_DUMP();
568 			err = -EINVAL;
569 		}
570 		intel_timeline_put(tl);
571 	}
572 
573 	kvfree(timelines);
574 	return err;
575 #undef NUM_TIMELINES
576 }
577 
578 static int live_hwsp_alternate(void *arg)
579 {
580 #define NUM_TIMELINES 4096
581 	struct intel_gt *gt = arg;
582 	struct intel_timeline **timelines;
583 	struct intel_engine_cs *engine;
584 	enum intel_engine_id id;
585 	unsigned long count, n;
586 	int err = 0;
587 
588 	/*
589 	 * Create a bunch of timelines and check we can write
590 	 * independently to each of their breadcrumb slots with adjacent
591 	 * engines.
592 	 */
593 
594 	timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
595 				   sizeof(*timelines),
596 				   GFP_KERNEL);
597 	if (!timelines)
598 		return -ENOMEM;
599 
600 	count = 0;
601 	for (n = 0; n < NUM_TIMELINES; n++) {
602 		for_each_engine(engine, gt, id) {
603 			struct intel_timeline *tl;
604 			struct i915_request *rq;
605 
606 			if (!intel_engine_can_store_dword(engine))
607 				continue;
608 
609 			tl = checked_intel_timeline_create(gt);
610 			if (IS_ERR(tl)) {
611 				err = PTR_ERR(tl);
612 				goto out;
613 			}
614 
615 			intel_engine_pm_get(engine);
616 			rq = tl_write(tl, engine, count);
617 			intel_engine_pm_put(engine);
618 			if (IS_ERR(rq)) {
619 				intel_timeline_put(tl);
620 				err = PTR_ERR(rq);
621 				goto out;
622 			}
623 
624 			timelines[count++] = tl;
625 			i915_request_put(rq);
626 		}
627 	}
628 
629 out:
630 	if (igt_flush_test(gt->i915))
631 		err = -EIO;
632 
633 	for (n = 0; n < count; n++) {
634 		struct intel_timeline *tl = timelines[n];
635 
636 		if (!err && READ_ONCE(*tl->hwsp_seqno) != n) {
637 			GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x, found 0x%x\n",
638 				      n, tl->fence_context, tl->hwsp_offset, *tl->hwsp_seqno);
639 			GEM_TRACE_DUMP();
640 			err = -EINVAL;
641 		}
642 		intel_timeline_put(tl);
643 	}
644 
645 	kvfree(timelines);
646 	return err;
647 #undef NUM_TIMELINES
648 }
649 
650 static int live_hwsp_wrap(void *arg)
651 {
652 	struct intel_gt *gt = arg;
653 	struct intel_engine_cs *engine;
654 	struct intel_timeline *tl;
655 	enum intel_engine_id id;
656 	int err = 0;
657 
658 	/*
659 	 * Across a seqno wrap, we need to keep the old cacheline alive for
660 	 * foreign GPU references.
661 	 */
662 
663 	tl = intel_timeline_create(gt);
664 	if (IS_ERR(tl))
665 		return PTR_ERR(tl);
666 
667 	if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
668 		goto out_free;
669 
670 	err = intel_timeline_pin(tl, NULL);
671 	if (err)
672 		goto out_free;
673 
674 	for_each_engine(engine, gt, id) {
675 		const u32 *hwsp_seqno[2];
676 		struct i915_request *rq;
677 		u32 seqno[2];
678 
679 		if (!intel_engine_can_store_dword(engine))
680 			continue;
681 
682 		rq = intel_engine_create_kernel_request(engine);
683 		if (IS_ERR(rq)) {
684 			err = PTR_ERR(rq);
685 			goto out;
686 		}
687 
688 		tl->seqno = -4u;
689 
690 		mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
691 		err = intel_timeline_get_seqno(tl, rq, &seqno[0]);
692 		mutex_unlock(&tl->mutex);
693 		if (err) {
694 			i915_request_add(rq);
695 			goto out;
696 		}
697 		pr_debug("seqno[0]:%08x, hwsp_offset:%08x\n",
698 			 seqno[0], tl->hwsp_offset);
699 
700 		err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[0]);
701 		if (err) {
702 			i915_request_add(rq);
703 			goto out;
704 		}
705 		hwsp_seqno[0] = tl->hwsp_seqno;
706 
707 		mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
708 		err = intel_timeline_get_seqno(tl, rq, &seqno[1]);
709 		mutex_unlock(&tl->mutex);
710 		if (err) {
711 			i915_request_add(rq);
712 			goto out;
713 		}
714 		pr_debug("seqno[1]:%08x, hwsp_offset:%08x\n",
715 			 seqno[1], tl->hwsp_offset);
716 
717 		err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[1]);
718 		if (err) {
719 			i915_request_add(rq);
720 			goto out;
721 		}
722 		hwsp_seqno[1] = tl->hwsp_seqno;
723 
724 		/* With wrap should come a new hwsp */
725 		GEM_BUG_ON(seqno[1] >= seqno[0]);
726 		GEM_BUG_ON(hwsp_seqno[0] == hwsp_seqno[1]);
727 
728 		i915_request_add(rq);
729 
730 		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
731 			pr_err("Wait for timeline writes timed out!\n");
732 			err = -EIO;
733 			goto out;
734 		}
735 
736 		if (READ_ONCE(*hwsp_seqno[0]) != seqno[0] ||
737 		    READ_ONCE(*hwsp_seqno[1]) != seqno[1]) {
738 			pr_err("Bad timeline values: found (%x, %x), expected (%x, %x)\n",
739 			       *hwsp_seqno[0], *hwsp_seqno[1],
740 			       seqno[0], seqno[1]);
741 			err = -EINVAL;
742 			goto out;
743 		}
744 
745 		intel_gt_retire_requests(gt); /* recycle HWSP */
746 	}
747 
748 out:
749 	if (igt_flush_test(gt->i915))
750 		err = -EIO;
751 
752 	intel_timeline_unpin(tl);
753 out_free:
754 	intel_timeline_put(tl);
755 	return err;
756 }
757 
758 static int live_hwsp_rollover_kernel(void *arg)
759 {
760 	struct intel_gt *gt = arg;
761 	struct intel_engine_cs *engine;
762 	enum intel_engine_id id;
763 	int err = 0;
764 
765 	/*
766 	 * Run the host for long enough, and even the kernel context will
767 	 * see a seqno rollover.
768 	 */
769 
770 	for_each_engine(engine, gt, id) {
771 		struct intel_context *ce = engine->kernel_context;
772 		struct intel_timeline *tl = ce->timeline;
773 		struct i915_request *rq[3] = {};
774 		int i;
775 
776 		st_engine_heartbeat_disable(engine);
777 		if (intel_gt_wait_for_idle(gt, HZ / 2)) {
778 			err = -EIO;
779 			goto out;
780 		}
781 
782 		GEM_BUG_ON(i915_active_fence_isset(&tl->last_request));
783 		tl->seqno = 0;
784 		timeline_rollback(tl);
785 		timeline_rollback(tl);
786 		WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno);
787 
788 		for (i = 0; i < ARRAY_SIZE(rq); i++) {
789 			struct i915_request *this;
790 
791 			this = i915_request_create(ce);
792 			if (IS_ERR(this)) {
793 				err = PTR_ERR(this);
794 				goto out;
795 			}
796 
797 			pr_debug("%s: create fence.seqnp:%d\n",
798 				 engine->name,
799 				 lower_32_bits(this->fence.seqno));
800 
801 			GEM_BUG_ON(rcu_access_pointer(this->timeline) != tl);
802 
803 			rq[i] = i915_request_get(this);
804 			i915_request_add(this);
805 		}
806 
807 		/* We expected a wrap! */
808 		GEM_BUG_ON(rq[2]->fence.seqno > rq[0]->fence.seqno);
809 
810 		if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
811 			pr_err("Wait for timeline wrap timed out!\n");
812 			err = -EIO;
813 			goto out;
814 		}
815 
816 		for (i = 0; i < ARRAY_SIZE(rq); i++) {
817 			if (!i915_request_completed(rq[i])) {
818 				pr_err("Pre-wrap request not completed!\n");
819 				err = -EINVAL;
820 				goto out;
821 			}
822 		}
823 
824 out:
825 		for (i = 0; i < ARRAY_SIZE(rq); i++)
826 			i915_request_put(rq[i]);
827 		st_engine_heartbeat_enable(engine);
828 		if (err)
829 			break;
830 	}
831 
832 	if (igt_flush_test(gt->i915))
833 		err = -EIO;
834 
835 	return err;
836 }
837 
838 static int live_hwsp_rollover_user(void *arg)
839 {
840 	struct intel_gt *gt = arg;
841 	struct intel_engine_cs *engine;
842 	enum intel_engine_id id;
843 	int err = 0;
844 
845 	/*
846 	 * Simulate a long running user context, and force the seqno wrap
847 	 * on the user's timeline.
848 	 */
849 
850 	for_each_engine(engine, gt, id) {
851 		struct i915_request *rq[3] = {};
852 		struct intel_timeline *tl;
853 		struct intel_context *ce;
854 		int i;
855 
856 		ce = intel_context_create(engine);
857 		if (IS_ERR(ce))
858 			return PTR_ERR(ce);
859 
860 		err = intel_context_alloc_state(ce);
861 		if (err)
862 			goto out;
863 
864 		tl = ce->timeline;
865 		if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
866 			goto out;
867 
868 		timeline_rollback(tl);
869 		timeline_rollback(tl);
870 		WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno);
871 
872 		for (i = 0; i < ARRAY_SIZE(rq); i++) {
873 			struct i915_request *this;
874 
875 			this = intel_context_create_request(ce);
876 			if (IS_ERR(this)) {
877 				err = PTR_ERR(this);
878 				goto out;
879 			}
880 
881 			pr_debug("%s: create fence.seqnp:%d\n",
882 				 engine->name,
883 				 lower_32_bits(this->fence.seqno));
884 
885 			GEM_BUG_ON(rcu_access_pointer(this->timeline) != tl);
886 
887 			rq[i] = i915_request_get(this);
888 			i915_request_add(this);
889 		}
890 
891 		/* We expected a wrap! */
892 		GEM_BUG_ON(rq[2]->fence.seqno > rq[0]->fence.seqno);
893 
894 		if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
895 			pr_err("Wait for timeline wrap timed out!\n");
896 			err = -EIO;
897 			goto out;
898 		}
899 
900 		for (i = 0; i < ARRAY_SIZE(rq); i++) {
901 			if (!i915_request_completed(rq[i])) {
902 				pr_err("Pre-wrap request not completed!\n");
903 				err = -EINVAL;
904 				goto out;
905 			}
906 		}
907 
908 out:
909 		for (i = 0; i < ARRAY_SIZE(rq); i++)
910 			i915_request_put(rq[i]);
911 		intel_context_put(ce);
912 		if (err)
913 			break;
914 	}
915 
916 	if (igt_flush_test(gt->i915))
917 		err = -EIO;
918 
919 	return err;
920 }
921 
922 static int live_hwsp_recycle(void *arg)
923 {
924 	struct intel_gt *gt = arg;
925 	struct intel_engine_cs *engine;
926 	enum intel_engine_id id;
927 	unsigned long count;
928 	int err = 0;
929 
930 	/*
931 	 * Check seqno writes into one timeline at a time. We expect to
932 	 * recycle the breadcrumb slot between iterations and neither
933 	 * want to confuse ourselves or the GPU.
934 	 */
935 
936 	count = 0;
937 	for_each_engine(engine, gt, id) {
938 		IGT_TIMEOUT(end_time);
939 
940 		if (!intel_engine_can_store_dword(engine))
941 			continue;
942 
943 		intel_engine_pm_get(engine);
944 
945 		do {
946 			struct intel_timeline *tl;
947 			struct i915_request *rq;
948 
949 			tl = checked_intel_timeline_create(gt);
950 			if (IS_ERR(tl)) {
951 				err = PTR_ERR(tl);
952 				break;
953 			}
954 
955 			rq = tl_write(tl, engine, count);
956 			if (IS_ERR(rq)) {
957 				intel_timeline_put(tl);
958 				err = PTR_ERR(rq);
959 				break;
960 			}
961 
962 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
963 				pr_err("Wait for timeline writes timed out!\n");
964 				i915_request_put(rq);
965 				intel_timeline_put(tl);
966 				err = -EIO;
967 				break;
968 			}
969 
970 			if (READ_ONCE(*tl->hwsp_seqno) != count) {
971 				GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x found 0x%x\n",
972 					      count, tl->fence_context,
973 					      tl->hwsp_offset, *tl->hwsp_seqno);
974 				GEM_TRACE_DUMP();
975 				err = -EINVAL;
976 			}
977 
978 			i915_request_put(rq);
979 			intel_timeline_put(tl);
980 			count++;
981 
982 			if (err)
983 				break;
984 		} while (!__igt_timeout(end_time, NULL));
985 
986 		intel_engine_pm_put(engine);
987 		if (err)
988 			break;
989 	}
990 
991 	return err;
992 }
993 
994 int intel_timeline_live_selftests(struct drm_i915_private *i915)
995 {
996 	static const struct i915_subtest tests[] = {
997 		SUBTEST(live_hwsp_recycle),
998 		SUBTEST(live_hwsp_engine),
999 		SUBTEST(live_hwsp_alternate),
1000 		SUBTEST(live_hwsp_wrap),
1001 		SUBTEST(live_hwsp_rollover_kernel),
1002 		SUBTEST(live_hwsp_rollover_user),
1003 	};
1004 
1005 	if (intel_gt_is_wedged(&i915->gt))
1006 		return 0;
1007 
1008 	return intel_gt_live_subtests(tests, &i915->gt);
1009 }
1010