1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright © 2018 Intel Corporation
4  */
5 
6 #include <linux/sort.h>
7 
8 #include "i915_selftest.h"
9 #include "intel_gpu_commands.h"
10 #include "intel_gt_clock_utils.h"
11 #include "selftest_engine.h"
12 #include "selftest_engine_heartbeat.h"
13 #include "selftests/igt_atomic.h"
14 #include "selftests/igt_flush_test.h"
15 #include "selftests/igt_spinner.h"
16 
17 #define COUNT 5
18 
19 static int cmp_u64(const void *A, const void *B)
20 {
21 	const u64 *a = A, *b = B;
22 
23 	return *a - *b;
24 }
25 
26 static u64 trifilter(u64 *a)
27 {
28 	sort(a, COUNT, sizeof(*a), cmp_u64, NULL);
29 	return (a[1] + 2 * a[2] + a[3]) >> 2;
30 }
31 
32 static u32 *emit_wait(u32 *cs, u32 offset, int op, u32 value)
33 {
34 	*cs++ = MI_SEMAPHORE_WAIT |
35 		MI_SEMAPHORE_GLOBAL_GTT |
36 		MI_SEMAPHORE_POLL |
37 		op;
38 	*cs++ = value;
39 	*cs++ = offset;
40 	*cs++ = 0;
41 
42 	return cs;
43 }
44 
45 static u32 *emit_store(u32 *cs, u32 offset, u32 value)
46 {
47 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
48 	*cs++ = offset;
49 	*cs++ = 0;
50 	*cs++ = value;
51 
52 	return cs;
53 }
54 
55 static u32 *emit_srm(u32 *cs, i915_reg_t reg, u32 offset)
56 {
57 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
58 	*cs++ = i915_mmio_reg_offset(reg);
59 	*cs++ = offset;
60 	*cs++ = 0;
61 
62 	return cs;
63 }
64 
65 static void write_semaphore(u32 *x, u32 value)
66 {
67 	WRITE_ONCE(*x, value);
68 	wmb();
69 }
70 
71 static int __measure_timestamps(struct intel_context *ce,
72 				u64 *dt, u64 *d_ring, u64 *d_ctx)
73 {
74 	struct intel_engine_cs *engine = ce->engine;
75 	u32 *sema = memset32(engine->status_page.addr + 1000, 0, 5);
76 	u32 offset = i915_ggtt_offset(engine->status_page.vma);
77 	struct i915_request *rq;
78 	u32 *cs;
79 
80 	rq = intel_context_create_request(ce);
81 	if (IS_ERR(rq))
82 		return PTR_ERR(rq);
83 
84 	cs = intel_ring_begin(rq, 28);
85 	if (IS_ERR(cs)) {
86 		i915_request_add(rq);
87 		return PTR_ERR(cs);
88 	}
89 
90 	/* Signal & wait for start */
91 	cs = emit_store(cs, offset + 4008, 1);
92 	cs = emit_wait(cs, offset + 4008, MI_SEMAPHORE_SAD_NEQ_SDD, 1);
93 
94 	cs = emit_srm(cs, RING_TIMESTAMP(engine->mmio_base), offset + 4000);
95 	cs = emit_srm(cs, RING_CTX_TIMESTAMP(engine->mmio_base), offset + 4004);
96 
97 	/* Busy wait */
98 	cs = emit_wait(cs, offset + 4008, MI_SEMAPHORE_SAD_EQ_SDD, 1);
99 
100 	cs = emit_srm(cs, RING_TIMESTAMP(engine->mmio_base), offset + 4016);
101 	cs = emit_srm(cs, RING_CTX_TIMESTAMP(engine->mmio_base), offset + 4012);
102 
103 	intel_ring_advance(rq, cs);
104 	i915_request_get(rq);
105 	i915_request_add(rq);
106 	intel_engine_flush_submission(engine);
107 
108 	/* Wait for the request to start executing, that then waits for us */
109 	while (READ_ONCE(sema[2]) == 0)
110 		cpu_relax();
111 
112 	/* Run the request for a 100us, sampling timestamps before/after */
113 	local_irq_disable();
114 	write_semaphore(&sema[2], 0);
115 	while (READ_ONCE(sema[1]) == 0) /* wait for the gpu to catch up */
116 		cpu_relax();
117 	*dt = local_clock();
118 	udelay(100);
119 	*dt = local_clock() - *dt;
120 	write_semaphore(&sema[2], 1);
121 	local_irq_enable();
122 
123 	if (i915_request_wait(rq, 0, HZ / 2) < 0) {
124 		i915_request_put(rq);
125 		return -ETIME;
126 	}
127 	i915_request_put(rq);
128 
129 	pr_debug("%s CTX_TIMESTAMP: [%x, %x], RING_TIMESTAMP: [%x, %x]\n",
130 		 engine->name, sema[1], sema[3], sema[0], sema[4]);
131 
132 	*d_ctx = sema[3] - sema[1];
133 	*d_ring = sema[4] - sema[0];
134 	return 0;
135 }
136 
137 static int __live_engine_timestamps(struct intel_engine_cs *engine)
138 {
139 	u64 s_ring[COUNT], s_ctx[COUNT], st[COUNT], d_ring, d_ctx, dt;
140 	struct intel_context *ce;
141 	int i, err = 0;
142 
143 	ce = intel_context_create(engine);
144 	if (IS_ERR(ce))
145 		return PTR_ERR(ce);
146 
147 	for (i = 0; i < COUNT; i++) {
148 		err = __measure_timestamps(ce, &st[i], &s_ring[i], &s_ctx[i]);
149 		if (err)
150 			break;
151 	}
152 	intel_context_put(ce);
153 	if (err)
154 		return err;
155 
156 	dt = trifilter(st);
157 	d_ring = trifilter(s_ring);
158 	d_ctx = trifilter(s_ctx);
159 
160 	pr_info("%s elapsed:%lldns, CTX_TIMESTAMP:%lldns, RING_TIMESTAMP:%lldns\n",
161 		engine->name, dt,
162 		intel_gt_clock_interval_to_ns(engine->gt, d_ctx),
163 		intel_gt_clock_interval_to_ns(engine->gt, d_ring));
164 
165 	d_ring = intel_gt_clock_interval_to_ns(engine->gt, d_ring);
166 	if (3 * dt > 4 * d_ring || 4 * dt < 3 * d_ring) {
167 		pr_err("%s Mismatch between ring timestamp and walltime!\n",
168 		       engine->name);
169 		return -EINVAL;
170 	}
171 
172 	d_ring = trifilter(s_ring);
173 	d_ctx = trifilter(s_ctx);
174 
175 	d_ctx *= engine->gt->clock_frequency;
176 	if (GRAPHICS_VER(engine->i915) == 11)
177 		d_ring *= 12500000; /* Fixed 80ns for GEN11 ctx timestamp? */
178 	else
179 		d_ring *= engine->gt->clock_frequency;
180 
181 	if (3 * d_ctx > 4 * d_ring || 4 * d_ctx < 3 * d_ring) {
182 		pr_err("%s Mismatch between ring and context timestamps!\n",
183 		       engine->name);
184 		return -EINVAL;
185 	}
186 
187 	return 0;
188 }
189 
190 static int live_engine_timestamps(void *arg)
191 {
192 	struct intel_gt *gt = arg;
193 	struct intel_engine_cs *engine;
194 	enum intel_engine_id id;
195 
196 	/*
197 	 * Check that CS_TIMESTAMP / CTX_TIMESTAMP are in sync, i.e. share
198 	 * the same CS clock.
199 	 */
200 
201 	if (GRAPHICS_VER(gt->i915) < 8)
202 		return 0;
203 
204 	for_each_engine(engine, gt, id) {
205 		int err;
206 
207 		st_engine_heartbeat_disable(engine);
208 		err = __live_engine_timestamps(engine);
209 		st_engine_heartbeat_enable(engine);
210 		if (err)
211 			return err;
212 	}
213 
214 	return 0;
215 }
216 
217 static int __spin_until_busier(struct intel_engine_cs *engine, ktime_t busyness)
218 {
219 	ktime_t start, unused, dt;
220 
221 	if (!intel_engine_uses_guc(engine))
222 		return 0;
223 
224 	/*
225 	 * In GuC mode of submission, the busyness stats may get updated after
226 	 * the batch starts running. Poll for a change in busyness and timeout
227 	 * after 500 us.
228 	 */
229 	start = ktime_get();
230 	while (intel_engine_get_busy_time(engine, &unused) == busyness) {
231 		dt = ktime_get() - start;
232 		if (dt > 10000000) {
233 			pr_err("active wait timed out %lld\n", dt);
234 			ENGINE_TRACE(engine, "active wait time out %lld\n", dt);
235 			return -ETIME;
236 		}
237 	}
238 
239 	return 0;
240 }
241 
242 static int live_engine_busy_stats(void *arg)
243 {
244 	struct intel_gt *gt = arg;
245 	struct intel_engine_cs *engine;
246 	enum intel_engine_id id;
247 	struct igt_spinner spin;
248 	int err = 0;
249 
250 	/*
251 	 * Check that if an engine supports busy-stats, they tell the truth.
252 	 */
253 
254 	if (igt_spinner_init(&spin, gt))
255 		return -ENOMEM;
256 
257 	GEM_BUG_ON(intel_gt_pm_is_awake(gt));
258 	for_each_engine(engine, gt, id) {
259 		struct i915_request *rq;
260 		ktime_t busyness, dummy;
261 		ktime_t de, dt;
262 		ktime_t t[2];
263 
264 		if (!intel_engine_supports_stats(engine))
265 			continue;
266 
267 		if (!intel_engine_can_store_dword(engine))
268 			continue;
269 
270 		if (intel_gt_pm_wait_for_idle(gt)) {
271 			err = -EBUSY;
272 			break;
273 		}
274 
275 		st_engine_heartbeat_disable(engine);
276 
277 		ENGINE_TRACE(engine, "measuring idle time\n");
278 		preempt_disable();
279 		de = intel_engine_get_busy_time(engine, &t[0]);
280 		udelay(100);
281 		de = ktime_sub(intel_engine_get_busy_time(engine, &t[1]), de);
282 		preempt_enable();
283 		dt = ktime_sub(t[1], t[0]);
284 		if (de < 0 || de > 10) {
285 			pr_err("%s: reported %lldns [%d%%] busyness while sleeping [for %lldns]\n",
286 			       engine->name,
287 			       de, (int)div64_u64(100 * de, dt), dt);
288 			GEM_TRACE_DUMP();
289 			err = -EINVAL;
290 			goto end;
291 		}
292 
293 		/* 100% busy */
294 		rq = igt_spinner_create_request(&spin,
295 						engine->kernel_context,
296 						MI_NOOP);
297 		if (IS_ERR(rq)) {
298 			err = PTR_ERR(rq);
299 			goto end;
300 		}
301 		i915_request_add(rq);
302 
303 		busyness = intel_engine_get_busy_time(engine, &dummy);
304 		if (!igt_wait_for_spinner(&spin, rq)) {
305 			intel_gt_set_wedged(engine->gt);
306 			err = -ETIME;
307 			goto end;
308 		}
309 
310 		err = __spin_until_busier(engine, busyness);
311 		if (err) {
312 			GEM_TRACE_DUMP();
313 			goto end;
314 		}
315 
316 		ENGINE_TRACE(engine, "measuring busy time\n");
317 		preempt_disable();
318 		de = intel_engine_get_busy_time(engine, &t[0]);
319 		mdelay(10);
320 		de = ktime_sub(intel_engine_get_busy_time(engine, &t[1]), de);
321 		preempt_enable();
322 		dt = ktime_sub(t[1], t[0]);
323 		if (100 * de < 95 * dt || 95 * de > 100 * dt) {
324 			pr_err("%s: reported %lldns [%d%%] busyness while spinning [for %lldns]\n",
325 			       engine->name,
326 			       de, (int)div64_u64(100 * de, dt), dt);
327 			GEM_TRACE_DUMP();
328 			err = -EINVAL;
329 			goto end;
330 		}
331 
332 end:
333 		st_engine_heartbeat_enable(engine);
334 		igt_spinner_end(&spin);
335 		if (igt_flush_test(gt->i915))
336 			err = -EIO;
337 		if (err)
338 			break;
339 	}
340 
341 	igt_spinner_fini(&spin);
342 	if (igt_flush_test(gt->i915))
343 		err = -EIO;
344 	return err;
345 }
346 
347 static int live_engine_pm(void *arg)
348 {
349 	struct intel_gt *gt = arg;
350 	struct intel_engine_cs *engine;
351 	enum intel_engine_id id;
352 
353 	/*
354 	 * Check we can call intel_engine_pm_put from any context. No
355 	 * failures are reported directly, but if we mess up lockdep should
356 	 * tell us.
357 	 */
358 	if (intel_gt_pm_wait_for_idle(gt)) {
359 		pr_err("Unable to flush GT pm before test\n");
360 		return -EBUSY;
361 	}
362 
363 	GEM_BUG_ON(intel_gt_pm_is_awake(gt));
364 	for_each_engine(engine, gt, id) {
365 		const typeof(*igt_atomic_phases) *p;
366 
367 		for (p = igt_atomic_phases; p->name; p++) {
368 			/*
369 			 * Acquisition is always synchronous, except if we
370 			 * know that the engine is already awake, in which
371 			 * case we should use intel_engine_pm_get_if_awake()
372 			 * to atomically grab the wakeref.
373 			 *
374 			 * In practice,
375 			 *    intel_engine_pm_get();
376 			 *    intel_engine_pm_put();
377 			 * occurs in one thread, while simultaneously
378 			 *    intel_engine_pm_get_if_awake();
379 			 *    intel_engine_pm_put();
380 			 * occurs from atomic context in another.
381 			 */
382 			GEM_BUG_ON(intel_engine_pm_is_awake(engine));
383 			intel_engine_pm_get(engine);
384 
385 			p->critical_section_begin();
386 			if (!intel_engine_pm_get_if_awake(engine))
387 				pr_err("intel_engine_pm_get_if_awake(%s) failed under %s\n",
388 				       engine->name, p->name);
389 			else
390 				intel_engine_pm_put_async(engine);
391 			intel_engine_pm_put_async(engine);
392 			p->critical_section_end();
393 
394 			intel_engine_pm_flush(engine);
395 
396 			if (intel_engine_pm_is_awake(engine)) {
397 				pr_err("%s is still awake after flushing pm\n",
398 				       engine->name);
399 				return -EINVAL;
400 			}
401 
402 			/* gt wakeref is async (deferred to workqueue) */
403 			if (intel_gt_pm_wait_for_idle(gt)) {
404 				pr_err("GT failed to idle\n");
405 				return -EINVAL;
406 			}
407 		}
408 	}
409 
410 	return 0;
411 }
412 
413 int live_engine_pm_selftests(struct intel_gt *gt)
414 {
415 	static const struct i915_subtest tests[] = {
416 		SUBTEST(live_engine_timestamps),
417 		SUBTEST(live_engine_busy_stats),
418 		SUBTEST(live_engine_pm),
419 	};
420 
421 	return intel_gt_live_subtests(tests, gt);
422 }
423