1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2017-2018 Intel Corporation
5  */
6 
7 #include <linux/prime_numbers.h>
8 
9 #include "intel_engine_pm.h"
10 #include "intel_gt.h"
11 #include "intel_gt_requests.h"
12 #include "intel_ring.h"
13 
14 #include "../selftests/i915_random.h"
15 #include "../i915_selftest.h"
16 
17 #include "../selftests/igt_flush_test.h"
18 #include "../selftests/mock_gem_device.h"
19 #include "selftests/mock_timeline.h"
20 
21 static struct page *hwsp_page(struct intel_timeline *tl)
22 {
23 	struct drm_i915_gem_object *obj = tl->hwsp_ggtt->obj;
24 
25 	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
26 	return sg_page(obj->mm.pages->sgl);
27 }
28 
29 static unsigned long hwsp_cacheline(struct intel_timeline *tl)
30 {
31 	unsigned long address = (unsigned long)page_address(hwsp_page(tl));
32 
33 	return (address + tl->hwsp_offset) / CACHELINE_BYTES;
34 }
35 
36 #define CACHELINES_PER_PAGE (PAGE_SIZE / CACHELINE_BYTES)
37 
38 struct mock_hwsp_freelist {
39 	struct intel_gt *gt;
40 	struct radix_tree_root cachelines;
41 	struct intel_timeline **history;
42 	unsigned long count, max;
43 	struct rnd_state prng;
44 };
45 
46 enum {
47 	SHUFFLE = BIT(0),
48 };
49 
50 static void __mock_hwsp_record(struct mock_hwsp_freelist *state,
51 			       unsigned int idx,
52 			       struct intel_timeline *tl)
53 {
54 	tl = xchg(&state->history[idx], tl);
55 	if (tl) {
56 		radix_tree_delete(&state->cachelines, hwsp_cacheline(tl));
57 		intel_timeline_put(tl);
58 	}
59 }
60 
61 static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state,
62 				unsigned int count,
63 				unsigned int flags)
64 {
65 	struct intel_timeline *tl;
66 	unsigned int idx;
67 
68 	while (count--) {
69 		unsigned long cacheline;
70 		int err;
71 
72 		tl = intel_timeline_create(state->gt, NULL);
73 		if (IS_ERR(tl))
74 			return PTR_ERR(tl);
75 
76 		cacheline = hwsp_cacheline(tl);
77 		err = radix_tree_insert(&state->cachelines, cacheline, tl);
78 		if (err) {
79 			if (err == -EEXIST) {
80 				pr_err("HWSP cacheline %lu already used; duplicate allocation!\n",
81 				       cacheline);
82 			}
83 			intel_timeline_put(tl);
84 			return err;
85 		}
86 
87 		idx = state->count++ % state->max;
88 		__mock_hwsp_record(state, idx, tl);
89 	}
90 
91 	if (flags & SHUFFLE)
92 		i915_prandom_shuffle(state->history,
93 				     sizeof(*state->history),
94 				     min(state->count, state->max),
95 				     &state->prng);
96 
97 	count = i915_prandom_u32_max_state(min(state->count, state->max),
98 					   &state->prng);
99 	while (count--) {
100 		idx = --state->count % state->max;
101 		__mock_hwsp_record(state, idx, NULL);
102 	}
103 
104 	return 0;
105 }
106 
107 static int mock_hwsp_freelist(void *arg)
108 {
109 	struct mock_hwsp_freelist state;
110 	struct drm_i915_private *i915;
111 	const struct {
112 		const char *name;
113 		unsigned int flags;
114 	} phases[] = {
115 		{ "linear", 0 },
116 		{ "shuffled", SHUFFLE },
117 		{ },
118 	}, *p;
119 	unsigned int na;
120 	int err = 0;
121 
122 	i915 = mock_gem_device();
123 	if (!i915)
124 		return -ENOMEM;
125 
126 	INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL);
127 	state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed);
128 
129 	state.gt = &i915->gt;
130 
131 	/*
132 	 * Create a bunch of timelines and check that their HWSP do not overlap.
133 	 * Free some, and try again.
134 	 */
135 
136 	state.max = PAGE_SIZE / sizeof(*state.history);
137 	state.count = 0;
138 	state.history = kcalloc(state.max, sizeof(*state.history), GFP_KERNEL);
139 	if (!state.history) {
140 		err = -ENOMEM;
141 		goto err_put;
142 	}
143 
144 	for (p = phases; p->name; p++) {
145 		pr_debug("%s(%s)\n", __func__, p->name);
146 		for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) {
147 			err = __mock_hwsp_timeline(&state, na, p->flags);
148 			if (err)
149 				goto out;
150 		}
151 	}
152 
153 out:
154 	for (na = 0; na < state.max; na++)
155 		__mock_hwsp_record(&state, na, NULL);
156 	kfree(state.history);
157 err_put:
158 	drm_dev_put(&i915->drm);
159 	return err;
160 }
161 
162 struct __igt_sync {
163 	const char *name;
164 	u32 seqno;
165 	bool expected;
166 	bool set;
167 };
168 
169 static int __igt_sync(struct intel_timeline *tl,
170 		      u64 ctx,
171 		      const struct __igt_sync *p,
172 		      const char *name)
173 {
174 	int ret;
175 
176 	if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) {
177 		pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n",
178 		       name, p->name, ctx, p->seqno, yesno(p->expected));
179 		return -EINVAL;
180 	}
181 
182 	if (p->set) {
183 		ret = __intel_timeline_sync_set(tl, ctx, p->seqno);
184 		if (ret)
185 			return ret;
186 	}
187 
188 	return 0;
189 }
190 
191 static int igt_sync(void *arg)
192 {
193 	const struct __igt_sync pass[] = {
194 		{ "unset", 0, false, false },
195 		{ "new", 0, false, true },
196 		{ "0a", 0, true, true },
197 		{ "1a", 1, false, true },
198 		{ "1b", 1, true, true },
199 		{ "0b", 0, true, false },
200 		{ "2a", 2, false, true },
201 		{ "4", 4, false, true },
202 		{ "INT_MAX", INT_MAX, false, true },
203 		{ "INT_MAX-1", INT_MAX-1, true, false },
204 		{ "INT_MAX+1", (u32)INT_MAX+1, false, true },
205 		{ "INT_MAX", INT_MAX, true, false },
206 		{ "UINT_MAX", UINT_MAX, false, true },
207 		{ "wrap", 0, false, true },
208 		{ "unwrap", UINT_MAX, true, false },
209 		{},
210 	}, *p;
211 	struct intel_timeline tl;
212 	int order, offset;
213 	int ret = -ENODEV;
214 
215 	mock_timeline_init(&tl, 0);
216 	for (p = pass; p->name; p++) {
217 		for (order = 1; order < 64; order++) {
218 			for (offset = -1; offset <= (order > 1); offset++) {
219 				u64 ctx = BIT_ULL(order) + offset;
220 
221 				ret = __igt_sync(&tl, ctx, p, "1");
222 				if (ret)
223 					goto out;
224 			}
225 		}
226 	}
227 	mock_timeline_fini(&tl);
228 
229 	mock_timeline_init(&tl, 0);
230 	for (order = 1; order < 64; order++) {
231 		for (offset = -1; offset <= (order > 1); offset++) {
232 			u64 ctx = BIT_ULL(order) + offset;
233 
234 			for (p = pass; p->name; p++) {
235 				ret = __igt_sync(&tl, ctx, p, "2");
236 				if (ret)
237 					goto out;
238 			}
239 		}
240 	}
241 
242 out:
243 	mock_timeline_fini(&tl);
244 	return ret;
245 }
246 
247 static unsigned int random_engine(struct rnd_state *rnd)
248 {
249 	return i915_prandom_u32_max_state(I915_NUM_ENGINES, rnd);
250 }
251 
252 static int bench_sync(void *arg)
253 {
254 	struct rnd_state prng;
255 	struct intel_timeline tl;
256 	unsigned long end_time, count;
257 	u64 prng32_1M;
258 	ktime_t kt;
259 	int order, last_order;
260 
261 	mock_timeline_init(&tl, 0);
262 
263 	/* Lookups from cache are very fast and so the random number generation
264 	 * and the loop itself becomes a significant factor in the per-iteration
265 	 * timings. We try to compensate the results by measuring the overhead
266 	 * of the prng and subtract it from the reported results.
267 	 */
268 	prandom_seed_state(&prng, i915_selftest.random_seed);
269 	count = 0;
270 	kt = ktime_get();
271 	end_time = jiffies + HZ/10;
272 	do {
273 		u32 x;
274 
275 		/* Make sure the compiler doesn't optimise away the prng call */
276 		WRITE_ONCE(x, prandom_u32_state(&prng));
277 
278 		count++;
279 	} while (!time_after(jiffies, end_time));
280 	kt = ktime_sub(ktime_get(), kt);
281 	pr_debug("%s: %lu random evaluations, %lluns/prng\n",
282 		 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
283 	prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count);
284 
285 	/* Benchmark (only) setting random context ids */
286 	prandom_seed_state(&prng, i915_selftest.random_seed);
287 	count = 0;
288 	kt = ktime_get();
289 	end_time = jiffies + HZ/10;
290 	do {
291 		u64 id = i915_prandom_u64_state(&prng);
292 
293 		__intel_timeline_sync_set(&tl, id, 0);
294 		count++;
295 	} while (!time_after(jiffies, end_time));
296 	kt = ktime_sub(ktime_get(), kt);
297 	kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
298 	pr_info("%s: %lu random insertions, %lluns/insert\n",
299 		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
300 
301 	/* Benchmark looking up the exact same context ids as we just set */
302 	prandom_seed_state(&prng, i915_selftest.random_seed);
303 	end_time = count;
304 	kt = ktime_get();
305 	while (end_time--) {
306 		u64 id = i915_prandom_u64_state(&prng);
307 
308 		if (!__intel_timeline_sync_is_later(&tl, id, 0)) {
309 			mock_timeline_fini(&tl);
310 			pr_err("Lookup of %llu failed\n", id);
311 			return -EINVAL;
312 		}
313 	}
314 	kt = ktime_sub(ktime_get(), kt);
315 	kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
316 	pr_info("%s: %lu random lookups, %lluns/lookup\n",
317 		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
318 
319 	mock_timeline_fini(&tl);
320 	cond_resched();
321 
322 	mock_timeline_init(&tl, 0);
323 
324 	/* Benchmark setting the first N (in order) contexts */
325 	count = 0;
326 	kt = ktime_get();
327 	end_time = jiffies + HZ/10;
328 	do {
329 		__intel_timeline_sync_set(&tl, count++, 0);
330 	} while (!time_after(jiffies, end_time));
331 	kt = ktime_sub(ktime_get(), kt);
332 	pr_info("%s: %lu in-order insertions, %lluns/insert\n",
333 		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
334 
335 	/* Benchmark looking up the exact same context ids as we just set */
336 	end_time = count;
337 	kt = ktime_get();
338 	while (end_time--) {
339 		if (!__intel_timeline_sync_is_later(&tl, end_time, 0)) {
340 			pr_err("Lookup of %lu failed\n", end_time);
341 			mock_timeline_fini(&tl);
342 			return -EINVAL;
343 		}
344 	}
345 	kt = ktime_sub(ktime_get(), kt);
346 	pr_info("%s: %lu in-order lookups, %lluns/lookup\n",
347 		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
348 
349 	mock_timeline_fini(&tl);
350 	cond_resched();
351 
352 	mock_timeline_init(&tl, 0);
353 
354 	/* Benchmark searching for a random context id and maybe changing it */
355 	prandom_seed_state(&prng, i915_selftest.random_seed);
356 	count = 0;
357 	kt = ktime_get();
358 	end_time = jiffies + HZ/10;
359 	do {
360 		u32 id = random_engine(&prng);
361 		u32 seqno = prandom_u32_state(&prng);
362 
363 		if (!__intel_timeline_sync_is_later(&tl, id, seqno))
364 			__intel_timeline_sync_set(&tl, id, seqno);
365 
366 		count++;
367 	} while (!time_after(jiffies, end_time));
368 	kt = ktime_sub(ktime_get(), kt);
369 	kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
370 	pr_info("%s: %lu repeated insert/lookups, %lluns/op\n",
371 		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
372 	mock_timeline_fini(&tl);
373 	cond_resched();
374 
375 	/* Benchmark searching for a known context id and changing the seqno */
376 	for (last_order = 1, order = 1; order < 32;
377 	     ({ int tmp = last_order; last_order = order; order += tmp; })) {
378 		unsigned int mask = BIT(order) - 1;
379 
380 		mock_timeline_init(&tl, 0);
381 
382 		count = 0;
383 		kt = ktime_get();
384 		end_time = jiffies + HZ/10;
385 		do {
386 			/* Without assuming too many details of the underlying
387 			 * implementation, try to identify its phase-changes
388 			 * (if any)!
389 			 */
390 			u64 id = (u64)(count & mask) << order;
391 
392 			__intel_timeline_sync_is_later(&tl, id, 0);
393 			__intel_timeline_sync_set(&tl, id, 0);
394 
395 			count++;
396 		} while (!time_after(jiffies, end_time));
397 		kt = ktime_sub(ktime_get(), kt);
398 		pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n",
399 			__func__, count, order,
400 			(long long)div64_ul(ktime_to_ns(kt), count));
401 		mock_timeline_fini(&tl);
402 		cond_resched();
403 	}
404 
405 	return 0;
406 }
407 
408 int intel_timeline_mock_selftests(void)
409 {
410 	static const struct i915_subtest tests[] = {
411 		SUBTEST(mock_hwsp_freelist),
412 		SUBTEST(igt_sync),
413 		SUBTEST(bench_sync),
414 	};
415 
416 	return i915_subtests(tests, NULL);
417 }
418 
419 static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value)
420 {
421 	u32 *cs;
422 
423 	cs = intel_ring_begin(rq, 4);
424 	if (IS_ERR(cs))
425 		return PTR_ERR(cs);
426 
427 	if (INTEL_GEN(rq->i915) >= 8) {
428 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
429 		*cs++ = addr;
430 		*cs++ = 0;
431 		*cs++ = value;
432 	} else if (INTEL_GEN(rq->i915) >= 4) {
433 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
434 		*cs++ = 0;
435 		*cs++ = addr;
436 		*cs++ = value;
437 	} else {
438 		*cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
439 		*cs++ = addr;
440 		*cs++ = value;
441 		*cs++ = MI_NOOP;
442 	}
443 
444 	intel_ring_advance(rq, cs);
445 
446 	return 0;
447 }
448 
449 static struct i915_request *
450 tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value)
451 {
452 	struct i915_request *rq;
453 	int err;
454 
455 	err = intel_timeline_pin(tl);
456 	if (err) {
457 		rq = ERR_PTR(err);
458 		goto out;
459 	}
460 
461 	rq = i915_request_create(engine->kernel_context);
462 	if (IS_ERR(rq))
463 		goto out_unpin;
464 
465 	i915_request_get(rq);
466 
467 	err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value);
468 	i915_request_add(rq);
469 	if (err) {
470 		i915_request_put(rq);
471 		rq = ERR_PTR(err);
472 	}
473 
474 out_unpin:
475 	intel_timeline_unpin(tl);
476 out:
477 	if (IS_ERR(rq))
478 		pr_err("Failed to write to timeline!\n");
479 	return rq;
480 }
481 
482 static struct intel_timeline *
483 checked_intel_timeline_create(struct intel_gt *gt)
484 {
485 	struct intel_timeline *tl;
486 
487 	tl = intel_timeline_create(gt, NULL);
488 	if (IS_ERR(tl))
489 		return tl;
490 
491 	if (*tl->hwsp_seqno != tl->seqno) {
492 		pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n",
493 		       *tl->hwsp_seqno, tl->seqno);
494 		intel_timeline_put(tl);
495 		return ERR_PTR(-EINVAL);
496 	}
497 
498 	return tl;
499 }
500 
501 static int live_hwsp_engine(void *arg)
502 {
503 #define NUM_TIMELINES 4096
504 	struct intel_gt *gt = arg;
505 	struct intel_timeline **timelines;
506 	struct intel_engine_cs *engine;
507 	enum intel_engine_id id;
508 	unsigned long count, n;
509 	int err = 0;
510 
511 	/*
512 	 * Create a bunch of timelines and check we can write
513 	 * independently to each of their breadcrumb slots.
514 	 */
515 
516 	timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
517 				   sizeof(*timelines),
518 				   GFP_KERNEL);
519 	if (!timelines)
520 		return -ENOMEM;
521 
522 	count = 0;
523 	for_each_engine(engine, gt, id) {
524 		if (!intel_engine_can_store_dword(engine))
525 			continue;
526 
527 		intel_engine_pm_get(engine);
528 
529 		for (n = 0; n < NUM_TIMELINES; n++) {
530 			struct intel_timeline *tl;
531 			struct i915_request *rq;
532 
533 			tl = checked_intel_timeline_create(gt);
534 			if (IS_ERR(tl)) {
535 				err = PTR_ERR(tl);
536 				break;
537 			}
538 
539 			rq = tl_write(tl, engine, count);
540 			if (IS_ERR(rq)) {
541 				intel_timeline_put(tl);
542 				err = PTR_ERR(rq);
543 				break;
544 			}
545 
546 			timelines[count++] = tl;
547 			i915_request_put(rq);
548 		}
549 
550 		intel_engine_pm_put(engine);
551 		if (err)
552 			break;
553 	}
554 
555 	if (igt_flush_test(gt->i915))
556 		err = -EIO;
557 
558 	for (n = 0; n < count; n++) {
559 		struct intel_timeline *tl = timelines[n];
560 
561 		if (!err && *tl->hwsp_seqno != n) {
562 			pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
563 			       n, *tl->hwsp_seqno);
564 			err = -EINVAL;
565 		}
566 		intel_timeline_put(tl);
567 	}
568 
569 	kvfree(timelines);
570 	return err;
571 #undef NUM_TIMELINES
572 }
573 
574 static int live_hwsp_alternate(void *arg)
575 {
576 #define NUM_TIMELINES 4096
577 	struct intel_gt *gt = arg;
578 	struct intel_timeline **timelines;
579 	struct intel_engine_cs *engine;
580 	enum intel_engine_id id;
581 	unsigned long count, n;
582 	int err = 0;
583 
584 	/*
585 	 * Create a bunch of timelines and check we can write
586 	 * independently to each of their breadcrumb slots with adjacent
587 	 * engines.
588 	 */
589 
590 	timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
591 				   sizeof(*timelines),
592 				   GFP_KERNEL);
593 	if (!timelines)
594 		return -ENOMEM;
595 
596 	count = 0;
597 	for (n = 0; n < NUM_TIMELINES; n++) {
598 		for_each_engine(engine, gt, id) {
599 			struct intel_timeline *tl;
600 			struct i915_request *rq;
601 
602 			if (!intel_engine_can_store_dword(engine))
603 				continue;
604 
605 			tl = checked_intel_timeline_create(gt);
606 			if (IS_ERR(tl)) {
607 				intel_engine_pm_put(engine);
608 				err = PTR_ERR(tl);
609 				goto out;
610 			}
611 
612 			intel_engine_pm_get(engine);
613 			rq = tl_write(tl, engine, count);
614 			intel_engine_pm_put(engine);
615 			if (IS_ERR(rq)) {
616 				intel_timeline_put(tl);
617 				err = PTR_ERR(rq);
618 				goto out;
619 			}
620 
621 			timelines[count++] = tl;
622 			i915_request_put(rq);
623 		}
624 	}
625 
626 out:
627 	if (igt_flush_test(gt->i915))
628 		err = -EIO;
629 
630 	for (n = 0; n < count; n++) {
631 		struct intel_timeline *tl = timelines[n];
632 
633 		if (!err && *tl->hwsp_seqno != n) {
634 			pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
635 			       n, *tl->hwsp_seqno);
636 			err = -EINVAL;
637 		}
638 		intel_timeline_put(tl);
639 	}
640 
641 	kvfree(timelines);
642 	return err;
643 #undef NUM_TIMELINES
644 }
645 
646 static int live_hwsp_wrap(void *arg)
647 {
648 	struct intel_gt *gt = arg;
649 	struct intel_engine_cs *engine;
650 	struct intel_timeline *tl;
651 	enum intel_engine_id id;
652 	int err = 0;
653 
654 	/*
655 	 * Across a seqno wrap, we need to keep the old cacheline alive for
656 	 * foreign GPU references.
657 	 */
658 
659 	tl = intel_timeline_create(gt, NULL);
660 	if (IS_ERR(tl))
661 		return PTR_ERR(tl);
662 
663 	if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
664 		goto out_free;
665 
666 	err = intel_timeline_pin(tl);
667 	if (err)
668 		goto out_free;
669 
670 	for_each_engine(engine, gt, id) {
671 		const u32 *hwsp_seqno[2];
672 		struct i915_request *rq;
673 		u32 seqno[2];
674 
675 		if (!intel_engine_can_store_dword(engine))
676 			continue;
677 
678 		intel_engine_pm_get(engine);
679 		rq = i915_request_create(engine->kernel_context);
680 		intel_engine_pm_put(engine);
681 		if (IS_ERR(rq)) {
682 			err = PTR_ERR(rq);
683 			goto out;
684 		}
685 
686 		tl->seqno = -4u;
687 
688 		mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
689 		err = intel_timeline_get_seqno(tl, rq, &seqno[0]);
690 		mutex_unlock(&tl->mutex);
691 		if (err) {
692 			i915_request_add(rq);
693 			goto out;
694 		}
695 		pr_debug("seqno[0]:%08x, hwsp_offset:%08x\n",
696 			 seqno[0], tl->hwsp_offset);
697 
698 		err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[0]);
699 		if (err) {
700 			i915_request_add(rq);
701 			goto out;
702 		}
703 		hwsp_seqno[0] = tl->hwsp_seqno;
704 
705 		mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
706 		err = intel_timeline_get_seqno(tl, rq, &seqno[1]);
707 		mutex_unlock(&tl->mutex);
708 		if (err) {
709 			i915_request_add(rq);
710 			goto out;
711 		}
712 		pr_debug("seqno[1]:%08x, hwsp_offset:%08x\n",
713 			 seqno[1], tl->hwsp_offset);
714 
715 		err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[1]);
716 		if (err) {
717 			i915_request_add(rq);
718 			goto out;
719 		}
720 		hwsp_seqno[1] = tl->hwsp_seqno;
721 
722 		/* With wrap should come a new hwsp */
723 		GEM_BUG_ON(seqno[1] >= seqno[0]);
724 		GEM_BUG_ON(hwsp_seqno[0] == hwsp_seqno[1]);
725 
726 		i915_request_add(rq);
727 
728 		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
729 			pr_err("Wait for timeline writes timed out!\n");
730 			err = -EIO;
731 			goto out;
732 		}
733 
734 		if (*hwsp_seqno[0] != seqno[0] || *hwsp_seqno[1] != seqno[1]) {
735 			pr_err("Bad timeline values: found (%x, %x), expected (%x, %x)\n",
736 			       *hwsp_seqno[0], *hwsp_seqno[1],
737 			       seqno[0], seqno[1]);
738 			err = -EINVAL;
739 			goto out;
740 		}
741 
742 		intel_gt_retire_requests(gt); /* recycle HWSP */
743 	}
744 
745 out:
746 	if (igt_flush_test(gt->i915))
747 		err = -EIO;
748 
749 	intel_timeline_unpin(tl);
750 out_free:
751 	intel_timeline_put(tl);
752 	return err;
753 }
754 
755 static int live_hwsp_recycle(void *arg)
756 {
757 	struct intel_gt *gt = arg;
758 	struct intel_engine_cs *engine;
759 	enum intel_engine_id id;
760 	unsigned long count;
761 	int err = 0;
762 
763 	/*
764 	 * Check seqno writes into one timeline at a time. We expect to
765 	 * recycle the breadcrumb slot between iterations and neither
766 	 * want to confuse ourselves or the GPU.
767 	 */
768 
769 	count = 0;
770 	for_each_engine(engine, gt, id) {
771 		IGT_TIMEOUT(end_time);
772 
773 		if (!intel_engine_can_store_dword(engine))
774 			continue;
775 
776 		intel_engine_pm_get(engine);
777 
778 		do {
779 			struct intel_timeline *tl;
780 			struct i915_request *rq;
781 
782 			tl = checked_intel_timeline_create(gt);
783 			if (IS_ERR(tl)) {
784 				err = PTR_ERR(tl);
785 				break;
786 			}
787 
788 			rq = tl_write(tl, engine, count);
789 			if (IS_ERR(rq)) {
790 				intel_timeline_put(tl);
791 				err = PTR_ERR(rq);
792 				break;
793 			}
794 
795 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
796 				pr_err("Wait for timeline writes timed out!\n");
797 				i915_request_put(rq);
798 				intel_timeline_put(tl);
799 				err = -EIO;
800 				break;
801 			}
802 
803 			if (*tl->hwsp_seqno != count) {
804 				pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
805 				       count, *tl->hwsp_seqno);
806 				err = -EINVAL;
807 			}
808 
809 			i915_request_put(rq);
810 			intel_timeline_put(tl);
811 			count++;
812 
813 			if (err)
814 				break;
815 		} while (!__igt_timeout(end_time, NULL));
816 
817 		intel_engine_pm_put(engine);
818 		if (err)
819 			break;
820 	}
821 
822 	return err;
823 }
824 
825 int intel_timeline_live_selftests(struct drm_i915_private *i915)
826 {
827 	static const struct i915_subtest tests[] = {
828 		SUBTEST(live_hwsp_recycle),
829 		SUBTEST(live_hwsp_engine),
830 		SUBTEST(live_hwsp_alternate),
831 		SUBTEST(live_hwsp_wrap),
832 	};
833 
834 	if (intel_gt_is_wedged(&i915->gt))
835 		return 0;
836 
837 	return intel_gt_live_subtests(tests, &i915->gt);
838 }
839