xref: /openbmc/linux/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c (revision f97cee494dc92395a668445bcd24d34c89f4ff8c)
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2018 Intel Corporation
5  */
6 
7 #include <linux/sort.h>
8 
9 #include "i915_drv.h"
10 
11 #include "intel_gt_requests.h"
12 #include "i915_selftest.h"
13 #include "selftest_engine_heartbeat.h"
14 
15 static int timeline_sync(struct intel_timeline *tl)
16 {
17 	struct dma_fence *fence;
18 	long timeout;
19 
20 	fence = i915_active_fence_get(&tl->last_request);
21 	if (!fence)
22 		return 0;
23 
24 	timeout = dma_fence_wait_timeout(fence, true, HZ / 2);
25 	dma_fence_put(fence);
26 	if (timeout < 0)
27 		return timeout;
28 
29 	return 0;
30 }
31 
32 static int engine_sync_barrier(struct intel_engine_cs *engine)
33 {
34 	return timeline_sync(engine->kernel_context->timeline);
35 }
36 
37 struct pulse {
38 	struct i915_active active;
39 	struct kref kref;
40 };
41 
42 static int pulse_active(struct i915_active *active)
43 {
44 	kref_get(&container_of(active, struct pulse, active)->kref);
45 	return 0;
46 }
47 
48 static void pulse_free(struct kref *kref)
49 {
50 	kfree(container_of(kref, struct pulse, kref));
51 }
52 
53 static void pulse_put(struct pulse *p)
54 {
55 	kref_put(&p->kref, pulse_free);
56 }
57 
58 static void pulse_retire(struct i915_active *active)
59 {
60 	pulse_put(container_of(active, struct pulse, active));
61 }
62 
63 static struct pulse *pulse_create(void)
64 {
65 	struct pulse *p;
66 
67 	p = kmalloc(sizeof(*p), GFP_KERNEL);
68 	if (!p)
69 		return p;
70 
71 	kref_init(&p->kref);
72 	i915_active_init(&p->active, pulse_active, pulse_retire);
73 
74 	return p;
75 }
76 
77 static void pulse_unlock_wait(struct pulse *p)
78 {
79 	i915_active_unlock_wait(&p->active);
80 }
81 
82 static int __live_idle_pulse(struct intel_engine_cs *engine,
83 			     int (*fn)(struct intel_engine_cs *cs))
84 {
85 	struct pulse *p;
86 	int err;
87 
88 	GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
89 
90 	p = pulse_create();
91 	if (!p)
92 		return -ENOMEM;
93 
94 	err = i915_active_acquire(&p->active);
95 	if (err)
96 		goto out;
97 
98 	err = i915_active_acquire_preallocate_barrier(&p->active, engine);
99 	if (err) {
100 		i915_active_release(&p->active);
101 		goto out;
102 	}
103 
104 	i915_active_acquire_barrier(&p->active);
105 	i915_active_release(&p->active);
106 
107 	GEM_BUG_ON(i915_active_is_idle(&p->active));
108 	GEM_BUG_ON(llist_empty(&engine->barrier_tasks));
109 
110 	err = fn(engine);
111 	if (err)
112 		goto out;
113 
114 	GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
115 
116 	if (engine_sync_barrier(engine)) {
117 		struct drm_printer m = drm_err_printer("pulse");
118 
119 		pr_err("%s: no heartbeat pulse?\n", engine->name);
120 		intel_engine_dump(engine, &m, "%s", engine->name);
121 
122 		err = -ETIME;
123 		goto out;
124 	}
125 
126 	GEM_BUG_ON(READ_ONCE(engine->serial) != engine->wakeref_serial);
127 
128 	pulse_unlock_wait(p); /* synchronize with the retirement callback */
129 
130 	if (!i915_active_is_idle(&p->active)) {
131 		struct drm_printer m = drm_err_printer("pulse");
132 
133 		pr_err("%s: heartbeat pulse did not flush idle tasks\n",
134 		       engine->name);
135 		i915_active_print(&p->active, &m);
136 
137 		err = -EINVAL;
138 		goto out;
139 	}
140 
141 out:
142 	pulse_put(p);
143 	return err;
144 }
145 
146 static int live_idle_flush(void *arg)
147 {
148 	struct intel_gt *gt = arg;
149 	struct intel_engine_cs *engine;
150 	enum intel_engine_id id;
151 	int err = 0;
152 
153 	/* Check that we can flush the idle barriers */
154 
155 	for_each_engine(engine, gt, id) {
156 		st_engine_heartbeat_disable(engine);
157 		err = __live_idle_pulse(engine, intel_engine_flush_barriers);
158 		st_engine_heartbeat_enable(engine);
159 		if (err)
160 			break;
161 	}
162 
163 	return err;
164 }
165 
166 static int live_idle_pulse(void *arg)
167 {
168 	struct intel_gt *gt = arg;
169 	struct intel_engine_cs *engine;
170 	enum intel_engine_id id;
171 	int err = 0;
172 
173 	/* Check that heartbeat pulses flush the idle barriers */
174 
175 	for_each_engine(engine, gt, id) {
176 		st_engine_heartbeat_disable(engine);
177 		err = __live_idle_pulse(engine, intel_engine_pulse);
178 		st_engine_heartbeat_enable(engine);
179 		if (err && err != -ENODEV)
180 			break;
181 
182 		err = 0;
183 	}
184 
185 	return err;
186 }
187 
188 static int cmp_u32(const void *_a, const void *_b)
189 {
190 	const u32 *a = _a, *b = _b;
191 
192 	return *a - *b;
193 }
194 
195 static int __live_heartbeat_fast(struct intel_engine_cs *engine)
196 {
197 	struct intel_context *ce;
198 	struct i915_request *rq;
199 	ktime_t t0, t1;
200 	u32 times[5];
201 	int err;
202 	int i;
203 
204 	ce = intel_context_create(engine);
205 	if (IS_ERR(ce))
206 		return PTR_ERR(ce);
207 
208 	intel_engine_pm_get(engine);
209 
210 	err = intel_engine_set_heartbeat(engine, 1);
211 	if (err)
212 		goto err_pm;
213 
214 	for (i = 0; i < ARRAY_SIZE(times); i++) {
215 		/* Manufacture a tick */
216 		do {
217 			while (READ_ONCE(engine->heartbeat.systole))
218 				flush_delayed_work(&engine->heartbeat.work);
219 
220 			engine->serial++; /* quick, pretend we are not idle! */
221 			flush_delayed_work(&engine->heartbeat.work);
222 			if (!delayed_work_pending(&engine->heartbeat.work)) {
223 				pr_err("%s: heartbeat did not start\n",
224 				       engine->name);
225 				err = -EINVAL;
226 				goto err_pm;
227 			}
228 
229 			rcu_read_lock();
230 			rq = READ_ONCE(engine->heartbeat.systole);
231 			if (rq)
232 				rq = i915_request_get_rcu(rq);
233 			rcu_read_unlock();
234 		} while (!rq);
235 
236 		t0 = ktime_get();
237 		while (rq == READ_ONCE(engine->heartbeat.systole))
238 			yield(); /* work is on the local cpu! */
239 		t1 = ktime_get();
240 
241 		i915_request_put(rq);
242 		times[i] = ktime_us_delta(t1, t0);
243 	}
244 
245 	sort(times, ARRAY_SIZE(times), sizeof(times[0]), cmp_u32, NULL);
246 
247 	pr_info("%s: Heartbeat delay: %uus [%u, %u]\n",
248 		engine->name,
249 		times[ARRAY_SIZE(times) / 2],
250 		times[0],
251 		times[ARRAY_SIZE(times) - 1]);
252 
253 	/* Min work delay is 2 * 2 (worst), +1 for scheduling, +1 for slack */
254 	if (times[ARRAY_SIZE(times) / 2] > jiffies_to_usecs(6)) {
255 		pr_err("%s: Heartbeat delay was %uus, expected less than %dus\n",
256 		       engine->name,
257 		       times[ARRAY_SIZE(times) / 2],
258 		       jiffies_to_usecs(6));
259 		err = -EINVAL;
260 	}
261 
262 	intel_engine_set_heartbeat(engine, CONFIG_DRM_I915_HEARTBEAT_INTERVAL);
263 err_pm:
264 	intel_engine_pm_put(engine);
265 	intel_context_put(ce);
266 	return err;
267 }
268 
269 static int live_heartbeat_fast(void *arg)
270 {
271 	struct intel_gt *gt = arg;
272 	struct intel_engine_cs *engine;
273 	enum intel_engine_id id;
274 	int err = 0;
275 
276 	/* Check that the heartbeat ticks at the desired rate. */
277 	if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
278 		return 0;
279 
280 	for_each_engine(engine, gt, id) {
281 		err = __live_heartbeat_fast(engine);
282 		if (err)
283 			break;
284 	}
285 
286 	return err;
287 }
288 
289 static int __live_heartbeat_off(struct intel_engine_cs *engine)
290 {
291 	int err;
292 
293 	intel_engine_pm_get(engine);
294 
295 	engine->serial++;
296 	flush_delayed_work(&engine->heartbeat.work);
297 	if (!delayed_work_pending(&engine->heartbeat.work)) {
298 		pr_err("%s: heartbeat not running\n",
299 		       engine->name);
300 		err = -EINVAL;
301 		goto err_pm;
302 	}
303 
304 	err = intel_engine_set_heartbeat(engine, 0);
305 	if (err)
306 		goto err_pm;
307 
308 	engine->serial++;
309 	flush_delayed_work(&engine->heartbeat.work);
310 	if (delayed_work_pending(&engine->heartbeat.work)) {
311 		pr_err("%s: heartbeat still running\n",
312 		       engine->name);
313 		err = -EINVAL;
314 		goto err_beat;
315 	}
316 
317 	if (READ_ONCE(engine->heartbeat.systole)) {
318 		pr_err("%s: heartbeat still allocated\n",
319 		       engine->name);
320 		err = -EINVAL;
321 		goto err_beat;
322 	}
323 
324 err_beat:
325 	intel_engine_set_heartbeat(engine, CONFIG_DRM_I915_HEARTBEAT_INTERVAL);
326 err_pm:
327 	intel_engine_pm_put(engine);
328 	return err;
329 }
330 
331 static int live_heartbeat_off(void *arg)
332 {
333 	struct intel_gt *gt = arg;
334 	struct intel_engine_cs *engine;
335 	enum intel_engine_id id;
336 	int err = 0;
337 
338 	/* Check that we can turn off heartbeat and not interrupt VIP */
339 	if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
340 		return 0;
341 
342 	for_each_engine(engine, gt, id) {
343 		if (!intel_engine_has_preemption(engine))
344 			continue;
345 
346 		err = __live_heartbeat_off(engine);
347 		if (err)
348 			break;
349 	}
350 
351 	return err;
352 }
353 
354 int intel_heartbeat_live_selftests(struct drm_i915_private *i915)
355 {
356 	static const struct i915_subtest tests[] = {
357 		SUBTEST(live_idle_flush),
358 		SUBTEST(live_idle_pulse),
359 		SUBTEST(live_heartbeat_fast),
360 		SUBTEST(live_heartbeat_off),
361 	};
362 	int saved_hangcheck;
363 	int err;
364 
365 	if (intel_gt_is_wedged(&i915->gt))
366 		return 0;
367 
368 	saved_hangcheck = i915->params.enable_hangcheck;
369 	i915->params.enable_hangcheck = INT_MAX;
370 
371 	err = intel_gt_live_subtests(tests, &i915->gt);
372 
373 	i915->params.enable_hangcheck = saved_hangcheck;
374 	return err;
375 }
376 
377 void st_engine_heartbeat_disable(struct intel_engine_cs *engine)
378 {
379 	engine->props.heartbeat_interval_ms = 0;
380 
381 	intel_engine_pm_get(engine);
382 	intel_engine_park_heartbeat(engine);
383 }
384 
385 void st_engine_heartbeat_enable(struct intel_engine_cs *engine)
386 {
387 	intel_engine_pm_put(engine);
388 
389 	engine->props.heartbeat_interval_ms =
390 		engine->defaults.heartbeat_interval_ms;
391 }
392