1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2017-2018 Intel Corporation
5  */
6 
7 #include <linux/prime_numbers.h>
8 
9 #include "intel_context.h"
10 #include "intel_engine_heartbeat.h"
11 #include "intel_engine_pm.h"
12 #include "intel_gt.h"
13 #include "intel_gt_requests.h"
14 #include "intel_ring.h"
15 
16 #include "../selftests/i915_random.h"
17 #include "../i915_selftest.h"
18 
19 #include "../selftests/igt_flush_test.h"
20 #include "../selftests/mock_gem_device.h"
21 #include "selftests/mock_timeline.h"
22 
23 static struct page *hwsp_page(struct intel_timeline *tl)
24 {
25 	struct drm_i915_gem_object *obj = tl->hwsp_ggtt->obj;
26 
27 	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
28 	return sg_page(obj->mm.pages->sgl);
29 }
30 
31 static unsigned long hwsp_cacheline(struct intel_timeline *tl)
32 {
33 	unsigned long address = (unsigned long)page_address(hwsp_page(tl));
34 
35 	return (address + tl->hwsp_offset) / CACHELINE_BYTES;
36 }
37 
38 #define CACHELINES_PER_PAGE (PAGE_SIZE / CACHELINE_BYTES)
39 
40 struct mock_hwsp_freelist {
41 	struct intel_gt *gt;
42 	struct radix_tree_root cachelines;
43 	struct intel_timeline **history;
44 	unsigned long count, max;
45 	struct rnd_state prng;
46 };
47 
48 enum {
49 	SHUFFLE = BIT(0),
50 };
51 
52 static void __mock_hwsp_record(struct mock_hwsp_freelist *state,
53 			       unsigned int idx,
54 			       struct intel_timeline *tl)
55 {
56 	tl = xchg(&state->history[idx], tl);
57 	if (tl) {
58 		radix_tree_delete(&state->cachelines, hwsp_cacheline(tl));
59 		intel_timeline_put(tl);
60 	}
61 }
62 
63 static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state,
64 				unsigned int count,
65 				unsigned int flags)
66 {
67 	struct intel_timeline *tl;
68 	unsigned int idx;
69 
70 	while (count--) {
71 		unsigned long cacheline;
72 		int err;
73 
74 		tl = intel_timeline_create(state->gt, NULL);
75 		if (IS_ERR(tl))
76 			return PTR_ERR(tl);
77 
78 		cacheline = hwsp_cacheline(tl);
79 		err = radix_tree_insert(&state->cachelines, cacheline, tl);
80 		if (err) {
81 			if (err == -EEXIST) {
82 				pr_err("HWSP cacheline %lu already used; duplicate allocation!\n",
83 				       cacheline);
84 			}
85 			intel_timeline_put(tl);
86 			return err;
87 		}
88 
89 		idx = state->count++ % state->max;
90 		__mock_hwsp_record(state, idx, tl);
91 	}
92 
93 	if (flags & SHUFFLE)
94 		i915_prandom_shuffle(state->history,
95 				     sizeof(*state->history),
96 				     min(state->count, state->max),
97 				     &state->prng);
98 
99 	count = i915_prandom_u32_max_state(min(state->count, state->max),
100 					   &state->prng);
101 	while (count--) {
102 		idx = --state->count % state->max;
103 		__mock_hwsp_record(state, idx, NULL);
104 	}
105 
106 	return 0;
107 }
108 
109 static int mock_hwsp_freelist(void *arg)
110 {
111 	struct mock_hwsp_freelist state;
112 	struct drm_i915_private *i915;
113 	const struct {
114 		const char *name;
115 		unsigned int flags;
116 	} phases[] = {
117 		{ "linear", 0 },
118 		{ "shuffled", SHUFFLE },
119 		{ },
120 	}, *p;
121 	unsigned int na;
122 	int err = 0;
123 
124 	i915 = mock_gem_device();
125 	if (!i915)
126 		return -ENOMEM;
127 
128 	INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL);
129 	state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed);
130 
131 	state.gt = &i915->gt;
132 
133 	/*
134 	 * Create a bunch of timelines and check that their HWSP do not overlap.
135 	 * Free some, and try again.
136 	 */
137 
138 	state.max = PAGE_SIZE / sizeof(*state.history);
139 	state.count = 0;
140 	state.history = kcalloc(state.max, sizeof(*state.history), GFP_KERNEL);
141 	if (!state.history) {
142 		err = -ENOMEM;
143 		goto err_put;
144 	}
145 
146 	for (p = phases; p->name; p++) {
147 		pr_debug("%s(%s)\n", __func__, p->name);
148 		for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) {
149 			err = __mock_hwsp_timeline(&state, na, p->flags);
150 			if (err)
151 				goto out;
152 		}
153 	}
154 
155 out:
156 	for (na = 0; na < state.max; na++)
157 		__mock_hwsp_record(&state, na, NULL);
158 	kfree(state.history);
159 err_put:
160 	drm_dev_put(&i915->drm);
161 	return err;
162 }
163 
164 struct __igt_sync {
165 	const char *name;
166 	u32 seqno;
167 	bool expected;
168 	bool set;
169 };
170 
171 static int __igt_sync(struct intel_timeline *tl,
172 		      u64 ctx,
173 		      const struct __igt_sync *p,
174 		      const char *name)
175 {
176 	int ret;
177 
178 	if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) {
179 		pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n",
180 		       name, p->name, ctx, p->seqno, yesno(p->expected));
181 		return -EINVAL;
182 	}
183 
184 	if (p->set) {
185 		ret = __intel_timeline_sync_set(tl, ctx, p->seqno);
186 		if (ret)
187 			return ret;
188 	}
189 
190 	return 0;
191 }
192 
193 static int igt_sync(void *arg)
194 {
195 	const struct __igt_sync pass[] = {
196 		{ "unset", 0, false, false },
197 		{ "new", 0, false, true },
198 		{ "0a", 0, true, true },
199 		{ "1a", 1, false, true },
200 		{ "1b", 1, true, true },
201 		{ "0b", 0, true, false },
202 		{ "2a", 2, false, true },
203 		{ "4", 4, false, true },
204 		{ "INT_MAX", INT_MAX, false, true },
205 		{ "INT_MAX-1", INT_MAX-1, true, false },
206 		{ "INT_MAX+1", (u32)INT_MAX+1, false, true },
207 		{ "INT_MAX", INT_MAX, true, false },
208 		{ "UINT_MAX", UINT_MAX, false, true },
209 		{ "wrap", 0, false, true },
210 		{ "unwrap", UINT_MAX, true, false },
211 		{},
212 	}, *p;
213 	struct intel_timeline tl;
214 	int order, offset;
215 	int ret = -ENODEV;
216 
217 	mock_timeline_init(&tl, 0);
218 	for (p = pass; p->name; p++) {
219 		for (order = 1; order < 64; order++) {
220 			for (offset = -1; offset <= (order > 1); offset++) {
221 				u64 ctx = BIT_ULL(order) + offset;
222 
223 				ret = __igt_sync(&tl, ctx, p, "1");
224 				if (ret)
225 					goto out;
226 			}
227 		}
228 	}
229 	mock_timeline_fini(&tl);
230 
231 	mock_timeline_init(&tl, 0);
232 	for (order = 1; order < 64; order++) {
233 		for (offset = -1; offset <= (order > 1); offset++) {
234 			u64 ctx = BIT_ULL(order) + offset;
235 
236 			for (p = pass; p->name; p++) {
237 				ret = __igt_sync(&tl, ctx, p, "2");
238 				if (ret)
239 					goto out;
240 			}
241 		}
242 	}
243 
244 out:
245 	mock_timeline_fini(&tl);
246 	return ret;
247 }
248 
249 static unsigned int random_engine(struct rnd_state *rnd)
250 {
251 	return i915_prandom_u32_max_state(I915_NUM_ENGINES, rnd);
252 }
253 
254 static int bench_sync(void *arg)
255 {
256 	struct rnd_state prng;
257 	struct intel_timeline tl;
258 	unsigned long end_time, count;
259 	u64 prng32_1M;
260 	ktime_t kt;
261 	int order, last_order;
262 
263 	mock_timeline_init(&tl, 0);
264 
265 	/* Lookups from cache are very fast and so the random number generation
266 	 * and the loop itself becomes a significant factor in the per-iteration
267 	 * timings. We try to compensate the results by measuring the overhead
268 	 * of the prng and subtract it from the reported results.
269 	 */
270 	prandom_seed_state(&prng, i915_selftest.random_seed);
271 	count = 0;
272 	kt = ktime_get();
273 	end_time = jiffies + HZ/10;
274 	do {
275 		u32 x;
276 
277 		/* Make sure the compiler doesn't optimise away the prng call */
278 		WRITE_ONCE(x, prandom_u32_state(&prng));
279 
280 		count++;
281 	} while (!time_after(jiffies, end_time));
282 	kt = ktime_sub(ktime_get(), kt);
283 	pr_debug("%s: %lu random evaluations, %lluns/prng\n",
284 		 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
285 	prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count);
286 
287 	/* Benchmark (only) setting random context ids */
288 	prandom_seed_state(&prng, i915_selftest.random_seed);
289 	count = 0;
290 	kt = ktime_get();
291 	end_time = jiffies + HZ/10;
292 	do {
293 		u64 id = i915_prandom_u64_state(&prng);
294 
295 		__intel_timeline_sync_set(&tl, id, 0);
296 		count++;
297 	} while (!time_after(jiffies, end_time));
298 	kt = ktime_sub(ktime_get(), kt);
299 	kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
300 	pr_info("%s: %lu random insertions, %lluns/insert\n",
301 		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
302 
303 	/* Benchmark looking up the exact same context ids as we just set */
304 	prandom_seed_state(&prng, i915_selftest.random_seed);
305 	end_time = count;
306 	kt = ktime_get();
307 	while (end_time--) {
308 		u64 id = i915_prandom_u64_state(&prng);
309 
310 		if (!__intel_timeline_sync_is_later(&tl, id, 0)) {
311 			mock_timeline_fini(&tl);
312 			pr_err("Lookup of %llu failed\n", id);
313 			return -EINVAL;
314 		}
315 	}
316 	kt = ktime_sub(ktime_get(), kt);
317 	kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
318 	pr_info("%s: %lu random lookups, %lluns/lookup\n",
319 		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
320 
321 	mock_timeline_fini(&tl);
322 	cond_resched();
323 
324 	mock_timeline_init(&tl, 0);
325 
326 	/* Benchmark setting the first N (in order) contexts */
327 	count = 0;
328 	kt = ktime_get();
329 	end_time = jiffies + HZ/10;
330 	do {
331 		__intel_timeline_sync_set(&tl, count++, 0);
332 	} while (!time_after(jiffies, end_time));
333 	kt = ktime_sub(ktime_get(), kt);
334 	pr_info("%s: %lu in-order insertions, %lluns/insert\n",
335 		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
336 
337 	/* Benchmark looking up the exact same context ids as we just set */
338 	end_time = count;
339 	kt = ktime_get();
340 	while (end_time--) {
341 		if (!__intel_timeline_sync_is_later(&tl, end_time, 0)) {
342 			pr_err("Lookup of %lu failed\n", end_time);
343 			mock_timeline_fini(&tl);
344 			return -EINVAL;
345 		}
346 	}
347 	kt = ktime_sub(ktime_get(), kt);
348 	pr_info("%s: %lu in-order lookups, %lluns/lookup\n",
349 		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
350 
351 	mock_timeline_fini(&tl);
352 	cond_resched();
353 
354 	mock_timeline_init(&tl, 0);
355 
356 	/* Benchmark searching for a random context id and maybe changing it */
357 	prandom_seed_state(&prng, i915_selftest.random_seed);
358 	count = 0;
359 	kt = ktime_get();
360 	end_time = jiffies + HZ/10;
361 	do {
362 		u32 id = random_engine(&prng);
363 		u32 seqno = prandom_u32_state(&prng);
364 
365 		if (!__intel_timeline_sync_is_later(&tl, id, seqno))
366 			__intel_timeline_sync_set(&tl, id, seqno);
367 
368 		count++;
369 	} while (!time_after(jiffies, end_time));
370 	kt = ktime_sub(ktime_get(), kt);
371 	kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
372 	pr_info("%s: %lu repeated insert/lookups, %lluns/op\n",
373 		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
374 	mock_timeline_fini(&tl);
375 	cond_resched();
376 
377 	/* Benchmark searching for a known context id and changing the seqno */
378 	for (last_order = 1, order = 1; order < 32;
379 	     ({ int tmp = last_order; last_order = order; order += tmp; })) {
380 		unsigned int mask = BIT(order) - 1;
381 
382 		mock_timeline_init(&tl, 0);
383 
384 		count = 0;
385 		kt = ktime_get();
386 		end_time = jiffies + HZ/10;
387 		do {
388 			/* Without assuming too many details of the underlying
389 			 * implementation, try to identify its phase-changes
390 			 * (if any)!
391 			 */
392 			u64 id = (u64)(count & mask) << order;
393 
394 			__intel_timeline_sync_is_later(&tl, id, 0);
395 			__intel_timeline_sync_set(&tl, id, 0);
396 
397 			count++;
398 		} while (!time_after(jiffies, end_time));
399 		kt = ktime_sub(ktime_get(), kt);
400 		pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n",
401 			__func__, count, order,
402 			(long long)div64_ul(ktime_to_ns(kt), count));
403 		mock_timeline_fini(&tl);
404 		cond_resched();
405 	}
406 
407 	return 0;
408 }
409 
410 int intel_timeline_mock_selftests(void)
411 {
412 	static const struct i915_subtest tests[] = {
413 		SUBTEST(mock_hwsp_freelist),
414 		SUBTEST(igt_sync),
415 		SUBTEST(bench_sync),
416 	};
417 
418 	return i915_subtests(tests, NULL);
419 }
420 
421 static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value)
422 {
423 	u32 *cs;
424 
425 	cs = intel_ring_begin(rq, 4);
426 	if (IS_ERR(cs))
427 		return PTR_ERR(cs);
428 
429 	if (INTEL_GEN(rq->i915) >= 8) {
430 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
431 		*cs++ = addr;
432 		*cs++ = 0;
433 		*cs++ = value;
434 	} else if (INTEL_GEN(rq->i915) >= 4) {
435 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
436 		*cs++ = 0;
437 		*cs++ = addr;
438 		*cs++ = value;
439 	} else {
440 		*cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
441 		*cs++ = addr;
442 		*cs++ = value;
443 		*cs++ = MI_NOOP;
444 	}
445 
446 	intel_ring_advance(rq, cs);
447 
448 	return 0;
449 }
450 
451 static struct i915_request *
452 tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value)
453 {
454 	struct i915_request *rq;
455 	int err;
456 
457 	err = intel_timeline_pin(tl);
458 	if (err) {
459 		rq = ERR_PTR(err);
460 		goto out;
461 	}
462 
463 	rq = intel_engine_create_kernel_request(engine);
464 	if (IS_ERR(rq))
465 		goto out_unpin;
466 
467 	i915_request_get(rq);
468 
469 	err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value);
470 	i915_request_add(rq);
471 	if (err) {
472 		i915_request_put(rq);
473 		rq = ERR_PTR(err);
474 	}
475 
476 out_unpin:
477 	intel_timeline_unpin(tl);
478 out:
479 	if (IS_ERR(rq))
480 		pr_err("Failed to write to timeline!\n");
481 	return rq;
482 }
483 
484 static struct intel_timeline *
485 checked_intel_timeline_create(struct intel_gt *gt)
486 {
487 	struct intel_timeline *tl;
488 
489 	tl = intel_timeline_create(gt, NULL);
490 	if (IS_ERR(tl))
491 		return tl;
492 
493 	if (*tl->hwsp_seqno != tl->seqno) {
494 		pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n",
495 		       *tl->hwsp_seqno, tl->seqno);
496 		intel_timeline_put(tl);
497 		return ERR_PTR(-EINVAL);
498 	}
499 
500 	return tl;
501 }
502 
503 static int live_hwsp_engine(void *arg)
504 {
505 #define NUM_TIMELINES 4096
506 	struct intel_gt *gt = arg;
507 	struct intel_timeline **timelines;
508 	struct intel_engine_cs *engine;
509 	enum intel_engine_id id;
510 	unsigned long count, n;
511 	int err = 0;
512 
513 	/*
514 	 * Create a bunch of timelines and check we can write
515 	 * independently to each of their breadcrumb slots.
516 	 */
517 
518 	timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
519 				   sizeof(*timelines),
520 				   GFP_KERNEL);
521 	if (!timelines)
522 		return -ENOMEM;
523 
524 	count = 0;
525 	for_each_engine(engine, gt, id) {
526 		if (!intel_engine_can_store_dword(engine))
527 			continue;
528 
529 		intel_engine_pm_get(engine);
530 
531 		for (n = 0; n < NUM_TIMELINES; n++) {
532 			struct intel_timeline *tl;
533 			struct i915_request *rq;
534 
535 			tl = checked_intel_timeline_create(gt);
536 			if (IS_ERR(tl)) {
537 				err = PTR_ERR(tl);
538 				break;
539 			}
540 
541 			rq = tl_write(tl, engine, count);
542 			if (IS_ERR(rq)) {
543 				intel_timeline_put(tl);
544 				err = PTR_ERR(rq);
545 				break;
546 			}
547 
548 			timelines[count++] = tl;
549 			i915_request_put(rq);
550 		}
551 
552 		intel_engine_pm_put(engine);
553 		if (err)
554 			break;
555 	}
556 
557 	if (igt_flush_test(gt->i915))
558 		err = -EIO;
559 
560 	for (n = 0; n < count; n++) {
561 		struct intel_timeline *tl = timelines[n];
562 
563 		if (!err && *tl->hwsp_seqno != n) {
564 			pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
565 			       n, *tl->hwsp_seqno);
566 			err = -EINVAL;
567 		}
568 		intel_timeline_put(tl);
569 	}
570 
571 	kvfree(timelines);
572 	return err;
573 #undef NUM_TIMELINES
574 }
575 
576 static int live_hwsp_alternate(void *arg)
577 {
578 #define NUM_TIMELINES 4096
579 	struct intel_gt *gt = arg;
580 	struct intel_timeline **timelines;
581 	struct intel_engine_cs *engine;
582 	enum intel_engine_id id;
583 	unsigned long count, n;
584 	int err = 0;
585 
586 	/*
587 	 * Create a bunch of timelines and check we can write
588 	 * independently to each of their breadcrumb slots with adjacent
589 	 * engines.
590 	 */
591 
592 	timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
593 				   sizeof(*timelines),
594 				   GFP_KERNEL);
595 	if (!timelines)
596 		return -ENOMEM;
597 
598 	count = 0;
599 	for (n = 0; n < NUM_TIMELINES; n++) {
600 		for_each_engine(engine, gt, id) {
601 			struct intel_timeline *tl;
602 			struct i915_request *rq;
603 
604 			if (!intel_engine_can_store_dword(engine))
605 				continue;
606 
607 			tl = checked_intel_timeline_create(gt);
608 			if (IS_ERR(tl)) {
609 				err = PTR_ERR(tl);
610 				goto out;
611 			}
612 
613 			intel_engine_pm_get(engine);
614 			rq = tl_write(tl, engine, count);
615 			intel_engine_pm_put(engine);
616 			if (IS_ERR(rq)) {
617 				intel_timeline_put(tl);
618 				err = PTR_ERR(rq);
619 				goto out;
620 			}
621 
622 			timelines[count++] = tl;
623 			i915_request_put(rq);
624 		}
625 	}
626 
627 out:
628 	if (igt_flush_test(gt->i915))
629 		err = -EIO;
630 
631 	for (n = 0; n < count; n++) {
632 		struct intel_timeline *tl = timelines[n];
633 
634 		if (!err && *tl->hwsp_seqno != n) {
635 			pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
636 			       n, *tl->hwsp_seqno);
637 			err = -EINVAL;
638 		}
639 		intel_timeline_put(tl);
640 	}
641 
642 	kvfree(timelines);
643 	return err;
644 #undef NUM_TIMELINES
645 }
646 
647 static int live_hwsp_wrap(void *arg)
648 {
649 	struct intel_gt *gt = arg;
650 	struct intel_engine_cs *engine;
651 	struct intel_timeline *tl;
652 	enum intel_engine_id id;
653 	int err = 0;
654 
655 	/*
656 	 * Across a seqno wrap, we need to keep the old cacheline alive for
657 	 * foreign GPU references.
658 	 */
659 
660 	tl = intel_timeline_create(gt, NULL);
661 	if (IS_ERR(tl))
662 		return PTR_ERR(tl);
663 
664 	if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
665 		goto out_free;
666 
667 	err = intel_timeline_pin(tl);
668 	if (err)
669 		goto out_free;
670 
671 	for_each_engine(engine, gt, id) {
672 		const u32 *hwsp_seqno[2];
673 		struct i915_request *rq;
674 		u32 seqno[2];
675 
676 		if (!intel_engine_can_store_dword(engine))
677 			continue;
678 
679 		rq = intel_engine_create_kernel_request(engine);
680 		if (IS_ERR(rq)) {
681 			err = PTR_ERR(rq);
682 			goto out;
683 		}
684 
685 		tl->seqno = -4u;
686 
687 		mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
688 		err = intel_timeline_get_seqno(tl, rq, &seqno[0]);
689 		mutex_unlock(&tl->mutex);
690 		if (err) {
691 			i915_request_add(rq);
692 			goto out;
693 		}
694 		pr_debug("seqno[0]:%08x, hwsp_offset:%08x\n",
695 			 seqno[0], tl->hwsp_offset);
696 
697 		err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[0]);
698 		if (err) {
699 			i915_request_add(rq);
700 			goto out;
701 		}
702 		hwsp_seqno[0] = tl->hwsp_seqno;
703 
704 		mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
705 		err = intel_timeline_get_seqno(tl, rq, &seqno[1]);
706 		mutex_unlock(&tl->mutex);
707 		if (err) {
708 			i915_request_add(rq);
709 			goto out;
710 		}
711 		pr_debug("seqno[1]:%08x, hwsp_offset:%08x\n",
712 			 seqno[1], tl->hwsp_offset);
713 
714 		err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[1]);
715 		if (err) {
716 			i915_request_add(rq);
717 			goto out;
718 		}
719 		hwsp_seqno[1] = tl->hwsp_seqno;
720 
721 		/* With wrap should come a new hwsp */
722 		GEM_BUG_ON(seqno[1] >= seqno[0]);
723 		GEM_BUG_ON(hwsp_seqno[0] == hwsp_seqno[1]);
724 
725 		i915_request_add(rq);
726 
727 		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
728 			pr_err("Wait for timeline writes timed out!\n");
729 			err = -EIO;
730 			goto out;
731 		}
732 
733 		if (*hwsp_seqno[0] != seqno[0] || *hwsp_seqno[1] != seqno[1]) {
734 			pr_err("Bad timeline values: found (%x, %x), expected (%x, %x)\n",
735 			       *hwsp_seqno[0], *hwsp_seqno[1],
736 			       seqno[0], seqno[1]);
737 			err = -EINVAL;
738 			goto out;
739 		}
740 
741 		intel_gt_retire_requests(gt); /* recycle HWSP */
742 	}
743 
744 out:
745 	if (igt_flush_test(gt->i915))
746 		err = -EIO;
747 
748 	intel_timeline_unpin(tl);
749 out_free:
750 	intel_timeline_put(tl);
751 	return err;
752 }
753 
754 static void engine_heartbeat_disable(struct intel_engine_cs *engine,
755 				     unsigned long *saved)
756 {
757 	*saved = engine->props.heartbeat_interval_ms;
758 	engine->props.heartbeat_interval_ms = 0;
759 
760 	intel_engine_pm_get(engine);
761 	intel_engine_park_heartbeat(engine);
762 }
763 
764 static void engine_heartbeat_enable(struct intel_engine_cs *engine,
765 				    unsigned long saved)
766 {
767 	intel_engine_pm_put(engine);
768 
769 	engine->props.heartbeat_interval_ms = saved;
770 }
771 
772 static int live_hwsp_rollover_kernel(void *arg)
773 {
774 	struct intel_gt *gt = arg;
775 	struct intel_engine_cs *engine;
776 	enum intel_engine_id id;
777 	int err = 0;
778 
779 	/*
780 	 * Run the host for long enough, and even the kernel context will
781 	 * see a seqno rollover.
782 	 */
783 
784 	for_each_engine(engine, gt, id) {
785 		struct intel_context *ce = engine->kernel_context;
786 		struct intel_timeline *tl = ce->timeline;
787 		struct i915_request *rq[3] = {};
788 		unsigned long heartbeat;
789 		int i;
790 
791 		engine_heartbeat_disable(engine, &heartbeat);
792 		if (intel_gt_wait_for_idle(gt, HZ / 2)) {
793 			err = -EIO;
794 			goto out;
795 		}
796 
797 		GEM_BUG_ON(i915_active_fence_isset(&tl->last_request));
798 		tl->seqno = 0;
799 		timeline_rollback(tl);
800 		timeline_rollback(tl);
801 		WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno);
802 
803 		for (i = 0; i < ARRAY_SIZE(rq); i++) {
804 			struct i915_request *this;
805 
806 			this = i915_request_create(ce);
807 			if (IS_ERR(this)) {
808 				err = PTR_ERR(this);
809 				goto out;
810 			}
811 
812 			pr_debug("%s: create fence.seqnp:%d\n",
813 				 engine->name,
814 				 lower_32_bits(this->fence.seqno));
815 
816 			GEM_BUG_ON(rcu_access_pointer(this->timeline) != tl);
817 
818 			rq[i] = i915_request_get(this);
819 			i915_request_add(this);
820 		}
821 
822 		/* We expected a wrap! */
823 		GEM_BUG_ON(rq[2]->fence.seqno > rq[0]->fence.seqno);
824 
825 		if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
826 			pr_err("Wait for timeline wrap timed out!\n");
827 			err = -EIO;
828 			goto out;
829 		}
830 
831 		for (i = 0; i < ARRAY_SIZE(rq); i++) {
832 			if (!i915_request_completed(rq[i])) {
833 				pr_err("Pre-wrap request not completed!\n");
834 				err = -EINVAL;
835 				goto out;
836 			}
837 		}
838 
839 out:
840 		for (i = 0; i < ARRAY_SIZE(rq); i++)
841 			i915_request_put(rq[i]);
842 		engine_heartbeat_enable(engine, heartbeat);
843 		if (err)
844 			break;
845 	}
846 
847 	if (igt_flush_test(gt->i915))
848 		err = -EIO;
849 
850 	return err;
851 }
852 
853 static int live_hwsp_rollover_user(void *arg)
854 {
855 	struct intel_gt *gt = arg;
856 	struct intel_engine_cs *engine;
857 	enum intel_engine_id id;
858 	int err = 0;
859 
860 	/*
861 	 * Simulate a long running user context, and force the seqno wrap
862 	 * on the user's timeline.
863 	 */
864 
865 	for_each_engine(engine, gt, id) {
866 		struct i915_request *rq[3] = {};
867 		struct intel_timeline *tl;
868 		struct intel_context *ce;
869 		int i;
870 
871 		ce = intel_context_create(engine);
872 		if (IS_ERR(ce))
873 			return PTR_ERR(ce);
874 
875 		err = intel_context_alloc_state(ce);
876 		if (err)
877 			goto out;
878 
879 		tl = ce->timeline;
880 		if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
881 			goto out;
882 
883 		timeline_rollback(tl);
884 		timeline_rollback(tl);
885 		WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno);
886 
887 		for (i = 0; i < ARRAY_SIZE(rq); i++) {
888 			struct i915_request *this;
889 
890 			this = intel_context_create_request(ce);
891 			if (IS_ERR(this)) {
892 				err = PTR_ERR(this);
893 				goto out;
894 			}
895 
896 			pr_debug("%s: create fence.seqnp:%d\n",
897 				 engine->name,
898 				 lower_32_bits(this->fence.seqno));
899 
900 			GEM_BUG_ON(rcu_access_pointer(this->timeline) != tl);
901 
902 			rq[i] = i915_request_get(this);
903 			i915_request_add(this);
904 		}
905 
906 		/* We expected a wrap! */
907 		GEM_BUG_ON(rq[2]->fence.seqno > rq[0]->fence.seqno);
908 
909 		if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
910 			pr_err("Wait for timeline wrap timed out!\n");
911 			err = -EIO;
912 			goto out;
913 		}
914 
915 		for (i = 0; i < ARRAY_SIZE(rq); i++) {
916 			if (!i915_request_completed(rq[i])) {
917 				pr_err("Pre-wrap request not completed!\n");
918 				err = -EINVAL;
919 				goto out;
920 			}
921 		}
922 
923 out:
924 		for (i = 0; i < ARRAY_SIZE(rq); i++)
925 			i915_request_put(rq[i]);
926 		intel_context_put(ce);
927 		if (err)
928 			break;
929 	}
930 
931 	if (igt_flush_test(gt->i915))
932 		err = -EIO;
933 
934 	return err;
935 }
936 
937 static int live_hwsp_recycle(void *arg)
938 {
939 	struct intel_gt *gt = arg;
940 	struct intel_engine_cs *engine;
941 	enum intel_engine_id id;
942 	unsigned long count;
943 	int err = 0;
944 
945 	/*
946 	 * Check seqno writes into one timeline at a time. We expect to
947 	 * recycle the breadcrumb slot between iterations and neither
948 	 * want to confuse ourselves or the GPU.
949 	 */
950 
951 	count = 0;
952 	for_each_engine(engine, gt, id) {
953 		IGT_TIMEOUT(end_time);
954 
955 		if (!intel_engine_can_store_dword(engine))
956 			continue;
957 
958 		intel_engine_pm_get(engine);
959 
960 		do {
961 			struct intel_timeline *tl;
962 			struct i915_request *rq;
963 
964 			tl = checked_intel_timeline_create(gt);
965 			if (IS_ERR(tl)) {
966 				err = PTR_ERR(tl);
967 				break;
968 			}
969 
970 			rq = tl_write(tl, engine, count);
971 			if (IS_ERR(rq)) {
972 				intel_timeline_put(tl);
973 				err = PTR_ERR(rq);
974 				break;
975 			}
976 
977 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
978 				pr_err("Wait for timeline writes timed out!\n");
979 				i915_request_put(rq);
980 				intel_timeline_put(tl);
981 				err = -EIO;
982 				break;
983 			}
984 
985 			if (*tl->hwsp_seqno != count) {
986 				pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
987 				       count, *tl->hwsp_seqno);
988 				err = -EINVAL;
989 			}
990 
991 			i915_request_put(rq);
992 			intel_timeline_put(tl);
993 			count++;
994 
995 			if (err)
996 				break;
997 		} while (!__igt_timeout(end_time, NULL));
998 
999 		intel_engine_pm_put(engine);
1000 		if (err)
1001 			break;
1002 	}
1003 
1004 	return err;
1005 }
1006 
1007 int intel_timeline_live_selftests(struct drm_i915_private *i915)
1008 {
1009 	static const struct i915_subtest tests[] = {
1010 		SUBTEST(live_hwsp_recycle),
1011 		SUBTEST(live_hwsp_engine),
1012 		SUBTEST(live_hwsp_alternate),
1013 		SUBTEST(live_hwsp_wrap),
1014 		SUBTEST(live_hwsp_rollover_kernel),
1015 		SUBTEST(live_hwsp_rollover_user),
1016 	};
1017 
1018 	if (intel_gt_is_wedged(&i915->gt))
1019 		return 0;
1020 
1021 	return intel_gt_live_subtests(tests, &i915->gt);
1022 }
1023