xref: /openbmc/linux/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c (revision 7b73a9c8e26ce5769c41d4b787767c10fe7269db)
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2018 Intel Corporation
5  */
6 
7 #include <linux/sort.h>
8 
9 #include "i915_drv.h"
10 
11 #include "intel_gt_requests.h"
12 #include "i915_selftest.h"
13 
14 struct pulse {
15 	struct i915_active active;
16 	struct kref kref;
17 };
18 
19 static int pulse_active(struct i915_active *active)
20 {
21 	kref_get(&container_of(active, struct pulse, active)->kref);
22 	return 0;
23 }
24 
25 static void pulse_free(struct kref *kref)
26 {
27 	kfree(container_of(kref, struct pulse, kref));
28 }
29 
30 static void pulse_put(struct pulse *p)
31 {
32 	kref_put(&p->kref, pulse_free);
33 }
34 
35 static void pulse_retire(struct i915_active *active)
36 {
37 	pulse_put(container_of(active, struct pulse, active));
38 }
39 
40 static struct pulse *pulse_create(void)
41 {
42 	struct pulse *p;
43 
44 	p = kmalloc(sizeof(*p), GFP_KERNEL);
45 	if (!p)
46 		return p;
47 
48 	kref_init(&p->kref);
49 	i915_active_init(&p->active, pulse_active, pulse_retire);
50 
51 	return p;
52 }
53 
54 static void pulse_unlock_wait(struct pulse *p)
55 {
56 	mutex_lock(&p->active.mutex);
57 	mutex_unlock(&p->active.mutex);
58 	flush_work(&p->active.work);
59 }
60 
61 static int __live_idle_pulse(struct intel_engine_cs *engine,
62 			     int (*fn)(struct intel_engine_cs *cs))
63 {
64 	struct pulse *p;
65 	int err;
66 
67 	GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
68 
69 	p = pulse_create();
70 	if (!p)
71 		return -ENOMEM;
72 
73 	err = i915_active_acquire(&p->active);
74 	if (err)
75 		goto out;
76 
77 	err = i915_active_acquire_preallocate_barrier(&p->active, engine);
78 	if (err) {
79 		i915_active_release(&p->active);
80 		goto out;
81 	}
82 
83 	i915_active_acquire_barrier(&p->active);
84 	i915_active_release(&p->active);
85 
86 	GEM_BUG_ON(i915_active_is_idle(&p->active));
87 	GEM_BUG_ON(llist_empty(&engine->barrier_tasks));
88 
89 	err = fn(engine);
90 	if (err)
91 		goto out;
92 
93 	GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
94 
95 	if (intel_gt_retire_requests_timeout(engine->gt, HZ / 5)) {
96 		err = -ETIME;
97 		goto out;
98 	}
99 
100 	GEM_BUG_ON(READ_ONCE(engine->serial) != engine->wakeref_serial);
101 
102 	pulse_unlock_wait(p); /* synchronize with the retirement callback */
103 
104 	if (!i915_active_is_idle(&p->active)) {
105 		struct drm_printer m = drm_err_printer("pulse");
106 
107 		pr_err("%s: heartbeat pulse did not flush idle tasks\n",
108 		       engine->name);
109 		i915_active_print(&p->active, &m);
110 
111 		err = -EINVAL;
112 		goto out;
113 	}
114 
115 out:
116 	pulse_put(p);
117 	return err;
118 }
119 
120 static int live_idle_flush(void *arg)
121 {
122 	struct intel_gt *gt = arg;
123 	struct intel_engine_cs *engine;
124 	enum intel_engine_id id;
125 	int err = 0;
126 
127 	/* Check that we can flush the idle barriers */
128 
129 	for_each_engine(engine, gt, id) {
130 		intel_engine_pm_get(engine);
131 		err = __live_idle_pulse(engine, intel_engine_flush_barriers);
132 		intel_engine_pm_put(engine);
133 		if (err)
134 			break;
135 	}
136 
137 	return err;
138 }
139 
140 static int live_idle_pulse(void *arg)
141 {
142 	struct intel_gt *gt = arg;
143 	struct intel_engine_cs *engine;
144 	enum intel_engine_id id;
145 	int err = 0;
146 
147 	/* Check that heartbeat pulses flush the idle barriers */
148 
149 	for_each_engine(engine, gt, id) {
150 		intel_engine_pm_get(engine);
151 		err = __live_idle_pulse(engine, intel_engine_pulse);
152 		intel_engine_pm_put(engine);
153 		if (err && err != -ENODEV)
154 			break;
155 
156 		err = 0;
157 	}
158 
159 	return err;
160 }
161 
162 static int cmp_u32(const void *_a, const void *_b)
163 {
164 	const u32 *a = _a, *b = _b;
165 
166 	return *a - *b;
167 }
168 
169 static int __live_heartbeat_fast(struct intel_engine_cs *engine)
170 {
171 	struct intel_context *ce;
172 	struct i915_request *rq;
173 	ktime_t t0, t1;
174 	u32 times[5];
175 	int err;
176 	int i;
177 
178 	ce = intel_context_create(engine->kernel_context->gem_context,
179 				  engine);
180 	if (IS_ERR(ce))
181 		return PTR_ERR(ce);
182 
183 	intel_engine_pm_get(engine);
184 
185 	err = intel_engine_set_heartbeat(engine, 1);
186 	if (err)
187 		goto err_pm;
188 
189 	for (i = 0; i < ARRAY_SIZE(times); i++) {
190 		/* Manufacture a tick */
191 		do {
192 			while (READ_ONCE(engine->heartbeat.systole))
193 				flush_delayed_work(&engine->heartbeat.work);
194 
195 			engine->serial++; /* quick, pretend we are not idle! */
196 			flush_delayed_work(&engine->heartbeat.work);
197 			if (!delayed_work_pending(&engine->heartbeat.work)) {
198 				pr_err("%s: heartbeat did not start\n",
199 				       engine->name);
200 				err = -EINVAL;
201 				goto err_pm;
202 			}
203 
204 			rcu_read_lock();
205 			rq = READ_ONCE(engine->heartbeat.systole);
206 			if (rq)
207 				rq = i915_request_get_rcu(rq);
208 			rcu_read_unlock();
209 		} while (!rq);
210 
211 		t0 = ktime_get();
212 		while (rq == READ_ONCE(engine->heartbeat.systole))
213 			yield(); /* work is on the local cpu! */
214 		t1 = ktime_get();
215 
216 		i915_request_put(rq);
217 		times[i] = ktime_us_delta(t1, t0);
218 	}
219 
220 	sort(times, ARRAY_SIZE(times), sizeof(times[0]), cmp_u32, NULL);
221 
222 	pr_info("%s: Heartbeat delay: %uus [%u, %u]\n",
223 		engine->name,
224 		times[ARRAY_SIZE(times) / 2],
225 		times[0],
226 		times[ARRAY_SIZE(times) - 1]);
227 
228 	/* Min work delay is 2 * 2 (worst), +1 for scheduling, +1 for slack */
229 	if (times[ARRAY_SIZE(times) / 2] > jiffies_to_usecs(6)) {
230 		pr_err("%s: Heartbeat delay was %uus, expected less than %dus\n",
231 		       engine->name,
232 		       times[ARRAY_SIZE(times) / 2],
233 		       jiffies_to_usecs(6));
234 		err = -EINVAL;
235 	}
236 
237 	intel_engine_set_heartbeat(engine, CONFIG_DRM_I915_HEARTBEAT_INTERVAL);
238 err_pm:
239 	intel_engine_pm_put(engine);
240 	intel_context_put(ce);
241 	return err;
242 }
243 
244 static int live_heartbeat_fast(void *arg)
245 {
246 	struct intel_gt *gt = arg;
247 	struct intel_engine_cs *engine;
248 	enum intel_engine_id id;
249 	int err = 0;
250 
251 	/* Check that the heartbeat ticks at the desired rate. */
252 	if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
253 		return 0;
254 
255 	for_each_engine(engine, gt, id) {
256 		err = __live_heartbeat_fast(engine);
257 		if (err)
258 			break;
259 	}
260 
261 	return err;
262 }
263 
264 static int __live_heartbeat_off(struct intel_engine_cs *engine)
265 {
266 	int err;
267 
268 	intel_engine_pm_get(engine);
269 
270 	engine->serial++;
271 	flush_delayed_work(&engine->heartbeat.work);
272 	if (!delayed_work_pending(&engine->heartbeat.work)) {
273 		pr_err("%s: heartbeat not running\n",
274 		       engine->name);
275 		err = -EINVAL;
276 		goto err_pm;
277 	}
278 
279 	err = intel_engine_set_heartbeat(engine, 0);
280 	if (err)
281 		goto err_pm;
282 
283 	engine->serial++;
284 	flush_delayed_work(&engine->heartbeat.work);
285 	if (delayed_work_pending(&engine->heartbeat.work)) {
286 		pr_err("%s: heartbeat still running\n",
287 		       engine->name);
288 		err = -EINVAL;
289 		goto err_beat;
290 	}
291 
292 	if (READ_ONCE(engine->heartbeat.systole)) {
293 		pr_err("%s: heartbeat still allocated\n",
294 		       engine->name);
295 		err = -EINVAL;
296 		goto err_beat;
297 	}
298 
299 err_beat:
300 	intel_engine_set_heartbeat(engine, CONFIG_DRM_I915_HEARTBEAT_INTERVAL);
301 err_pm:
302 	intel_engine_pm_put(engine);
303 	return err;
304 }
305 
306 static int live_heartbeat_off(void *arg)
307 {
308 	struct intel_gt *gt = arg;
309 	struct intel_engine_cs *engine;
310 	enum intel_engine_id id;
311 	int err = 0;
312 
313 	/* Check that we can turn off heartbeat and not interrupt VIP */
314 	if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
315 		return 0;
316 
317 	for_each_engine(engine, gt, id) {
318 		if (!intel_engine_has_preemption(engine))
319 			continue;
320 
321 		err = __live_heartbeat_off(engine);
322 		if (err)
323 			break;
324 	}
325 
326 	return err;
327 }
328 
329 int intel_heartbeat_live_selftests(struct drm_i915_private *i915)
330 {
331 	static const struct i915_subtest tests[] = {
332 		SUBTEST(live_idle_flush),
333 		SUBTEST(live_idle_pulse),
334 		SUBTEST(live_heartbeat_fast),
335 		SUBTEST(live_heartbeat_off),
336 	};
337 	int saved_hangcheck;
338 	int err;
339 
340 	if (intel_gt_is_wedged(&i915->gt))
341 		return 0;
342 
343 	saved_hangcheck = i915_modparams.enable_hangcheck;
344 	i915_modparams.enable_hangcheck = INT_MAX;
345 
346 	err = intel_gt_live_subtests(tests, &i915->gt);
347 
348 	i915_modparams.enable_hangcheck = saved_hangcheck;
349 	return err;
350 }
351