xref: /openbmc/linux/drivers/gpu/drm/i915/gt/selftest_engine_pm.c (revision 53e8558837be58c1d44d50ad87247a8c56c95c13)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright © 2018 Intel Corporation
4  */
5 
6 #include <linux/sort.h>
7 
8 #include "i915_selftest.h"
9 #include "intel_gpu_commands.h"
10 #include "intel_gt_clock_utils.h"
11 #include "selftest_engine.h"
12 #include "selftest_engine_heartbeat.h"
13 #include "selftests/igt_atomic.h"
14 #include "selftests/igt_flush_test.h"
15 #include "selftests/igt_spinner.h"
16 
17 #define COUNT 5
18 
19 static int cmp_u64(const void *A, const void *B)
20 {
21 	const u64 *a = A, *b = B;
22 
23 	return *a - *b;
24 }
25 
26 static u64 trifilter(u64 *a)
27 {
28 	sort(a, COUNT, sizeof(*a), cmp_u64, NULL);
29 	return (a[1] + 2 * a[2] + a[3]) >> 2;
30 }
31 
32 static u32 *emit_wait(u32 *cs, u32 offset, int op, u32 value)
33 {
34 	*cs++ = MI_SEMAPHORE_WAIT |
35 		MI_SEMAPHORE_GLOBAL_GTT |
36 		MI_SEMAPHORE_POLL |
37 		op;
38 	*cs++ = value;
39 	*cs++ = offset;
40 	*cs++ = 0;
41 
42 	return cs;
43 }
44 
45 static u32 *emit_store(u32 *cs, u32 offset, u32 value)
46 {
47 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
48 	*cs++ = offset;
49 	*cs++ = 0;
50 	*cs++ = value;
51 
52 	return cs;
53 }
54 
55 static u32 *emit_srm(u32 *cs, i915_reg_t reg, u32 offset)
56 {
57 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
58 	*cs++ = i915_mmio_reg_offset(reg);
59 	*cs++ = offset;
60 	*cs++ = 0;
61 
62 	return cs;
63 }
64 
65 static void write_semaphore(u32 *x, u32 value)
66 {
67 	WRITE_ONCE(*x, value);
68 	wmb();
69 }
70 
71 static int __measure_timestamps(struct intel_context *ce,
72 				u64 *dt, u64 *d_ring, u64 *d_ctx)
73 {
74 	struct intel_engine_cs *engine = ce->engine;
75 	u32 *sema = memset32(engine->status_page.addr + 1000, 0, 5);
76 	u32 offset = i915_ggtt_offset(engine->status_page.vma);
77 	struct i915_request *rq;
78 	u32 *cs;
79 
80 	rq = intel_context_create_request(ce);
81 	if (IS_ERR(rq))
82 		return PTR_ERR(rq);
83 
84 	cs = intel_ring_begin(rq, 28);
85 	if (IS_ERR(cs)) {
86 		i915_request_add(rq);
87 		return PTR_ERR(cs);
88 	}
89 
90 	/* Signal & wait for start */
91 	cs = emit_store(cs, offset + 4008, 1);
92 	cs = emit_wait(cs, offset + 4008, MI_SEMAPHORE_SAD_NEQ_SDD, 1);
93 
94 	cs = emit_srm(cs, RING_TIMESTAMP(engine->mmio_base), offset + 4000);
95 	cs = emit_srm(cs, RING_CTX_TIMESTAMP(engine->mmio_base), offset + 4004);
96 
97 	/* Busy wait */
98 	cs = emit_wait(cs, offset + 4008, MI_SEMAPHORE_SAD_EQ_SDD, 1);
99 
100 	cs = emit_srm(cs, RING_TIMESTAMP(engine->mmio_base), offset + 4016);
101 	cs = emit_srm(cs, RING_CTX_TIMESTAMP(engine->mmio_base), offset + 4012);
102 
103 	intel_ring_advance(rq, cs);
104 	i915_request_get(rq);
105 	i915_request_add(rq);
106 	intel_engine_flush_submission(engine);
107 
108 	/* Wait for the request to start executing, that then waits for us */
109 	while (READ_ONCE(sema[2]) == 0)
110 		cpu_relax();
111 
112 	/* Run the request for a 100us, sampling timestamps before/after */
113 	local_irq_disable();
114 	write_semaphore(&sema[2], 0);
115 	while (READ_ONCE(sema[1]) == 0) /* wait for the gpu to catch up */
116 		cpu_relax();
117 	*dt = local_clock();
118 	udelay(100);
119 	*dt = local_clock() - *dt;
120 	write_semaphore(&sema[2], 1);
121 	local_irq_enable();
122 
123 	if (i915_request_wait(rq, 0, HZ / 2) < 0) {
124 		i915_request_put(rq);
125 		return -ETIME;
126 	}
127 	i915_request_put(rq);
128 
129 	pr_debug("%s CTX_TIMESTAMP: [%x, %x], RING_TIMESTAMP: [%x, %x]\n",
130 		 engine->name, sema[1], sema[3], sema[0], sema[4]);
131 
132 	*d_ctx = sema[3] - sema[1];
133 	*d_ring = sema[4] - sema[0];
134 	return 0;
135 }
136 
137 static int __live_engine_timestamps(struct intel_engine_cs *engine)
138 {
139 	u64 s_ring[COUNT], s_ctx[COUNT], st[COUNT], d_ring, d_ctx, dt;
140 	struct intel_context *ce;
141 	int i, err = 0;
142 
143 	ce = intel_context_create(engine);
144 	if (IS_ERR(ce))
145 		return PTR_ERR(ce);
146 
147 	for (i = 0; i < COUNT; i++) {
148 		err = __measure_timestamps(ce, &st[i], &s_ring[i], &s_ctx[i]);
149 		if (err)
150 			break;
151 	}
152 	intel_context_put(ce);
153 	if (err)
154 		return err;
155 
156 	dt = trifilter(st);
157 	d_ring = trifilter(s_ring);
158 	d_ctx = trifilter(s_ctx);
159 
160 	pr_info("%s elapsed:%lldns, CTX_TIMESTAMP:%lldns, RING_TIMESTAMP:%lldns\n",
161 		engine->name, dt,
162 		intel_gt_clock_interval_to_ns(engine->gt, d_ctx),
163 		intel_gt_clock_interval_to_ns(engine->gt, d_ring));
164 
165 	d_ring = intel_gt_clock_interval_to_ns(engine->gt, d_ring);
166 	if (3 * dt > 4 * d_ring || 4 * dt < 3 * d_ring) {
167 		pr_err("%s Mismatch between ring timestamp and walltime!\n",
168 		       engine->name);
169 		return -EINVAL;
170 	}
171 
172 	d_ring = trifilter(s_ring);
173 	d_ctx = trifilter(s_ctx);
174 
175 	d_ctx *= engine->gt->clock_frequency;
176 	if (GRAPHICS_VER(engine->i915) == 11)
177 		d_ring *= 12500000; /* Fixed 80ns for GEN11 ctx timestamp? */
178 	else
179 		d_ring *= engine->gt->clock_frequency;
180 
181 	if (3 * d_ctx > 4 * d_ring || 4 * d_ctx < 3 * d_ring) {
182 		pr_err("%s Mismatch between ring and context timestamps!\n",
183 		       engine->name);
184 		return -EINVAL;
185 	}
186 
187 	return 0;
188 }
189 
190 static int live_engine_timestamps(void *arg)
191 {
192 	struct intel_gt *gt = arg;
193 	struct intel_engine_cs *engine;
194 	enum intel_engine_id id;
195 
196 	/*
197 	 * Check that CS_TIMESTAMP / CTX_TIMESTAMP are in sync, i.e. share
198 	 * the same CS clock.
199 	 */
200 
201 	if (GRAPHICS_VER(gt->i915) < 8)
202 		return 0;
203 
204 	for_each_engine(engine, gt, id) {
205 		int err;
206 
207 		st_engine_heartbeat_disable(engine);
208 		err = __live_engine_timestamps(engine);
209 		st_engine_heartbeat_enable(engine);
210 		if (err)
211 			return err;
212 	}
213 
214 	return 0;
215 }
216 
217 static int live_engine_busy_stats(void *arg)
218 {
219 	struct intel_gt *gt = arg;
220 	struct intel_engine_cs *engine;
221 	enum intel_engine_id id;
222 	struct igt_spinner spin;
223 	int err = 0;
224 
225 	/*
226 	 * Check that if an engine supports busy-stats, they tell the truth.
227 	 */
228 
229 	if (igt_spinner_init(&spin, gt))
230 		return -ENOMEM;
231 
232 	GEM_BUG_ON(intel_gt_pm_is_awake(gt));
233 	for_each_engine(engine, gt, id) {
234 		struct i915_request *rq;
235 		ktime_t de, dt;
236 		ktime_t t[2];
237 
238 		if (!intel_engine_supports_stats(engine))
239 			continue;
240 
241 		if (!intel_engine_can_store_dword(engine))
242 			continue;
243 
244 		if (intel_gt_pm_wait_for_idle(gt)) {
245 			err = -EBUSY;
246 			break;
247 		}
248 
249 		st_engine_heartbeat_disable(engine);
250 
251 		ENGINE_TRACE(engine, "measuring idle time\n");
252 		preempt_disable();
253 		de = intel_engine_get_busy_time(engine, &t[0]);
254 		udelay(100);
255 		de = ktime_sub(intel_engine_get_busy_time(engine, &t[1]), de);
256 		preempt_enable();
257 		dt = ktime_sub(t[1], t[0]);
258 		if (de < 0 || de > 10) {
259 			pr_err("%s: reported %lldns [%d%%] busyness while sleeping [for %lldns]\n",
260 			       engine->name,
261 			       de, (int)div64_u64(100 * de, dt), dt);
262 			GEM_TRACE_DUMP();
263 			err = -EINVAL;
264 			goto end;
265 		}
266 
267 		/* 100% busy */
268 		rq = igt_spinner_create_request(&spin,
269 						engine->kernel_context,
270 						MI_NOOP);
271 		if (IS_ERR(rq)) {
272 			err = PTR_ERR(rq);
273 			goto end;
274 		}
275 		i915_request_add(rq);
276 
277 		if (!igt_wait_for_spinner(&spin, rq)) {
278 			intel_gt_set_wedged(engine->gt);
279 			err = -ETIME;
280 			goto end;
281 		}
282 
283 		ENGINE_TRACE(engine, "measuring busy time\n");
284 		preempt_disable();
285 		de = intel_engine_get_busy_time(engine, &t[0]);
286 		udelay(100);
287 		de = ktime_sub(intel_engine_get_busy_time(engine, &t[1]), de);
288 		preempt_enable();
289 		dt = ktime_sub(t[1], t[0]);
290 		if (100 * de < 95 * dt || 95 * de > 100 * dt) {
291 			pr_err("%s: reported %lldns [%d%%] busyness while spinning [for %lldns]\n",
292 			       engine->name,
293 			       de, (int)div64_u64(100 * de, dt), dt);
294 			GEM_TRACE_DUMP();
295 			err = -EINVAL;
296 			goto end;
297 		}
298 
299 end:
300 		st_engine_heartbeat_enable(engine);
301 		igt_spinner_end(&spin);
302 		if (igt_flush_test(gt->i915))
303 			err = -EIO;
304 		if (err)
305 			break;
306 	}
307 
308 	igt_spinner_fini(&spin);
309 	if (igt_flush_test(gt->i915))
310 		err = -EIO;
311 	return err;
312 }
313 
314 static int live_engine_pm(void *arg)
315 {
316 	struct intel_gt *gt = arg;
317 	struct intel_engine_cs *engine;
318 	enum intel_engine_id id;
319 
320 	/*
321 	 * Check we can call intel_engine_pm_put from any context. No
322 	 * failures are reported directly, but if we mess up lockdep should
323 	 * tell us.
324 	 */
325 	if (intel_gt_pm_wait_for_idle(gt)) {
326 		pr_err("Unable to flush GT pm before test\n");
327 		return -EBUSY;
328 	}
329 
330 	GEM_BUG_ON(intel_gt_pm_is_awake(gt));
331 	for_each_engine(engine, gt, id) {
332 		const typeof(*igt_atomic_phases) *p;
333 
334 		for (p = igt_atomic_phases; p->name; p++) {
335 			/*
336 			 * Acquisition is always synchronous, except if we
337 			 * know that the engine is already awake, in which
338 			 * case we should use intel_engine_pm_get_if_awake()
339 			 * to atomically grab the wakeref.
340 			 *
341 			 * In practice,
342 			 *    intel_engine_pm_get();
343 			 *    intel_engine_pm_put();
344 			 * occurs in one thread, while simultaneously
345 			 *    intel_engine_pm_get_if_awake();
346 			 *    intel_engine_pm_put();
347 			 * occurs from atomic context in another.
348 			 */
349 			GEM_BUG_ON(intel_engine_pm_is_awake(engine));
350 			intel_engine_pm_get(engine);
351 
352 			p->critical_section_begin();
353 			if (!intel_engine_pm_get_if_awake(engine))
354 				pr_err("intel_engine_pm_get_if_awake(%s) failed under %s\n",
355 				       engine->name, p->name);
356 			else
357 				intel_engine_pm_put_async(engine);
358 			intel_engine_pm_put_async(engine);
359 			p->critical_section_end();
360 
361 			intel_engine_pm_flush(engine);
362 
363 			if (intel_engine_pm_is_awake(engine)) {
364 				pr_err("%s is still awake after flushing pm\n",
365 				       engine->name);
366 				return -EINVAL;
367 			}
368 
369 			/* gt wakeref is async (deferred to workqueue) */
370 			if (intel_gt_pm_wait_for_idle(gt)) {
371 				pr_err("GT failed to idle\n");
372 				return -EINVAL;
373 			}
374 		}
375 	}
376 
377 	return 0;
378 }
379 
380 int live_engine_pm_selftests(struct intel_gt *gt)
381 {
382 	static const struct i915_subtest tests[] = {
383 		SUBTEST(live_engine_timestamps),
384 		SUBTEST(live_engine_busy_stats),
385 		SUBTEST(live_engine_pm),
386 	};
387 
388 	return intel_gt_live_subtests(tests, gt);
389 }
390