xref: /openbmc/linux/drivers/gpu/drm/i915/gt/selftest_context.c (revision f97cee494dc92395a668445bcd24d34c89f4ff8c)
1 /*
2  * SPDX-License-Identifier: GPL-2.0
3  *
4  * Copyright © 2019 Intel Corporation
5  */
6 
7 #include "i915_selftest.h"
8 #include "intel_engine_heartbeat.h"
9 #include "intel_engine_pm.h"
10 #include "intel_gt.h"
11 
12 #include "gem/selftests/mock_context.h"
13 #include "selftests/igt_flush_test.h"
14 #include "selftests/mock_drm.h"
15 
16 static int request_sync(struct i915_request *rq)
17 {
18 	struct intel_timeline *tl = i915_request_timeline(rq);
19 	long timeout;
20 	int err = 0;
21 
22 	intel_timeline_get(tl);
23 	i915_request_get(rq);
24 
25 	/* Opencode i915_request_add() so we can keep the timeline locked. */
26 	__i915_request_commit(rq);
27 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
28 	__i915_request_queue(rq, NULL);
29 
30 	timeout = i915_request_wait(rq, 0, HZ / 10);
31 	if (timeout < 0)
32 		err = timeout;
33 	else
34 		i915_request_retire_upto(rq);
35 
36 	lockdep_unpin_lock(&tl->mutex, rq->cookie);
37 	mutex_unlock(&tl->mutex);
38 
39 	i915_request_put(rq);
40 	intel_timeline_put(tl);
41 
42 	return err;
43 }
44 
45 static int context_sync(struct intel_context *ce)
46 {
47 	struct intel_timeline *tl = ce->timeline;
48 	int err = 0;
49 
50 	mutex_lock(&tl->mutex);
51 	do {
52 		struct i915_request *rq;
53 		long timeout;
54 
55 		if (list_empty(&tl->requests))
56 			break;
57 
58 		rq = list_last_entry(&tl->requests, typeof(*rq), link);
59 		i915_request_get(rq);
60 
61 		timeout = i915_request_wait(rq, 0, HZ / 10);
62 		if (timeout < 0)
63 			err = timeout;
64 		else
65 			i915_request_retire_upto(rq);
66 
67 		i915_request_put(rq);
68 	} while (!err);
69 	mutex_unlock(&tl->mutex);
70 
71 	return err;
72 }
73 
74 static int __live_context_size(struct intel_engine_cs *engine)
75 {
76 	struct intel_context *ce;
77 	struct i915_request *rq;
78 	void *vaddr;
79 	int err;
80 
81 	ce = intel_context_create(engine);
82 	if (IS_ERR(ce))
83 		return PTR_ERR(ce);
84 
85 	err = intel_context_pin(ce);
86 	if (err)
87 		goto err;
88 
89 	vaddr = i915_gem_object_pin_map(ce->state->obj,
90 					i915_coherent_map_type(engine->i915));
91 	if (IS_ERR(vaddr)) {
92 		err = PTR_ERR(vaddr);
93 		intel_context_unpin(ce);
94 		goto err;
95 	}
96 
97 	/*
98 	 * Note that execlists also applies a redzone which it checks on
99 	 * context unpin when debugging. We are using the same location
100 	 * and same poison value so that our checks overlap. Despite the
101 	 * redundancy, we want to keep this little selftest so that we
102 	 * get coverage of any and all submission backends, and we can
103 	 * always extend this test to ensure we trick the HW into a
104 	 * compromising position wrt to the various sections that need
105 	 * to be written into the context state.
106 	 *
107 	 * TLDR; this overlaps with the execlists redzone.
108 	 */
109 	vaddr += engine->context_size - I915_GTT_PAGE_SIZE;
110 	memset(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE);
111 
112 	rq = intel_context_create_request(ce);
113 	intel_context_unpin(ce);
114 	if (IS_ERR(rq)) {
115 		err = PTR_ERR(rq);
116 		goto err_unpin;
117 	}
118 
119 	err = request_sync(rq);
120 	if (err)
121 		goto err_unpin;
122 
123 	/* Force the context switch */
124 	rq = intel_engine_create_kernel_request(engine);
125 	if (IS_ERR(rq)) {
126 		err = PTR_ERR(rq);
127 		goto err_unpin;
128 	}
129 	err = request_sync(rq);
130 	if (err)
131 		goto err_unpin;
132 
133 	if (memchr_inv(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE)) {
134 		pr_err("%s context overwrote trailing red-zone!", engine->name);
135 		err = -EINVAL;
136 	}
137 
138 err_unpin:
139 	i915_gem_object_unpin_map(ce->state->obj);
140 err:
141 	intel_context_put(ce);
142 	return err;
143 }
144 
145 static int live_context_size(void *arg)
146 {
147 	struct intel_gt *gt = arg;
148 	struct intel_engine_cs *engine;
149 	enum intel_engine_id id;
150 	int err = 0;
151 
152 	/*
153 	 * Check that our context sizes are correct by seeing if the
154 	 * HW tries to write past the end of one.
155 	 */
156 
157 	for_each_engine(engine, gt, id) {
158 		struct file *saved;
159 
160 		if (!engine->context_size)
161 			continue;
162 
163 		intel_engine_pm_get(engine);
164 
165 		/*
166 		 * Hide the old default state -- we lie about the context size
167 		 * and get confused when the default state is smaller than
168 		 * expected. For our do nothing request, inheriting the
169 		 * active state is sufficient, we are only checking that we
170 		 * don't use more than we planned.
171 		 */
172 		saved = fetch_and_zero(&engine->default_state);
173 
174 		/* Overlaps with the execlists redzone */
175 		engine->context_size += I915_GTT_PAGE_SIZE;
176 
177 		err = __live_context_size(engine);
178 
179 		engine->context_size -= I915_GTT_PAGE_SIZE;
180 
181 		engine->default_state = saved;
182 
183 		intel_engine_pm_put(engine);
184 
185 		if (err)
186 			break;
187 	}
188 
189 	return err;
190 }
191 
192 static int __live_active_context(struct intel_engine_cs *engine)
193 {
194 	unsigned long saved_heartbeat;
195 	struct intel_context *ce;
196 	int pass;
197 	int err;
198 
199 	/*
200 	 * We keep active contexts alive until after a subsequent context
201 	 * switch as the final write from the context-save will be after
202 	 * we retire the final request. We track when we unpin the context,
203 	 * under the presumption that the final pin is from the last request,
204 	 * and instead of immediately unpinning the context, we add a task
205 	 * to unpin the context from the next idle-barrier.
206 	 *
207 	 * This test makes sure that the context is kept alive until a
208 	 * subsequent idle-barrier (emitted when the engine wakeref hits 0
209 	 * with no more outstanding requests).
210 	 */
211 
212 	if (intel_engine_pm_is_awake(engine)) {
213 		pr_err("%s is awake before starting %s!\n",
214 		       engine->name, __func__);
215 		return -EINVAL;
216 	}
217 
218 	ce = intel_context_create(engine);
219 	if (IS_ERR(ce))
220 		return PTR_ERR(ce);
221 
222 	saved_heartbeat = engine->props.heartbeat_interval_ms;
223 	engine->props.heartbeat_interval_ms = 0;
224 
225 	for (pass = 0; pass <= 2; pass++) {
226 		struct i915_request *rq;
227 
228 		intel_engine_pm_get(engine);
229 
230 		rq = intel_context_create_request(ce);
231 		if (IS_ERR(rq)) {
232 			err = PTR_ERR(rq);
233 			goto out_engine;
234 		}
235 
236 		err = request_sync(rq);
237 		if (err)
238 			goto out_engine;
239 
240 		/* Context will be kept active until after an idle-barrier. */
241 		if (i915_active_is_idle(&ce->active)) {
242 			pr_err("context is not active; expected idle-barrier (%s pass %d)\n",
243 			       engine->name, pass);
244 			err = -EINVAL;
245 			goto out_engine;
246 		}
247 
248 		if (!intel_engine_pm_is_awake(engine)) {
249 			pr_err("%s is asleep before idle-barrier\n",
250 			       engine->name);
251 			err = -EINVAL;
252 			goto out_engine;
253 		}
254 
255 out_engine:
256 		intel_engine_pm_put(engine);
257 		if (err)
258 			goto err;
259 	}
260 
261 	/* Now make sure our idle-barriers are flushed */
262 	err = intel_engine_flush_barriers(engine);
263 	if (err)
264 		goto err;
265 
266 	/* Wait for the barrier and in the process wait for engine to park */
267 	err = context_sync(engine->kernel_context);
268 	if (err)
269 		goto err;
270 
271 	if (!i915_active_is_idle(&ce->active)) {
272 		pr_err("context is still active!");
273 		err = -EINVAL;
274 	}
275 
276 	intel_engine_pm_flush(engine);
277 
278 	if (intel_engine_pm_is_awake(engine)) {
279 		struct drm_printer p = drm_debug_printer(__func__);
280 
281 		intel_engine_dump(engine, &p,
282 				  "%s is still awake:%d after idle-barriers\n",
283 				  engine->name,
284 				  atomic_read(&engine->wakeref.count));
285 		GEM_TRACE_DUMP();
286 
287 		err = -EINVAL;
288 		goto err;
289 	}
290 
291 err:
292 	engine->props.heartbeat_interval_ms = saved_heartbeat;
293 	intel_context_put(ce);
294 	return err;
295 }
296 
297 static int live_active_context(void *arg)
298 {
299 	struct intel_gt *gt = arg;
300 	struct intel_engine_cs *engine;
301 	enum intel_engine_id id;
302 	int err = 0;
303 
304 	for_each_engine(engine, gt, id) {
305 		err = __live_active_context(engine);
306 		if (err)
307 			break;
308 
309 		err = igt_flush_test(gt->i915);
310 		if (err)
311 			break;
312 	}
313 
314 	return err;
315 }
316 
317 static int __remote_sync(struct intel_context *ce, struct intel_context *remote)
318 {
319 	struct i915_request *rq;
320 	int err;
321 
322 	err = intel_context_pin(remote);
323 	if (err)
324 		return err;
325 
326 	rq = intel_context_create_request(ce);
327 	if (IS_ERR(rq)) {
328 		err = PTR_ERR(rq);
329 		goto unpin;
330 	}
331 
332 	err = intel_context_prepare_remote_request(remote, rq);
333 	if (err) {
334 		i915_request_add(rq);
335 		goto unpin;
336 	}
337 
338 	err = request_sync(rq);
339 
340 unpin:
341 	intel_context_unpin(remote);
342 	return err;
343 }
344 
345 static int __live_remote_context(struct intel_engine_cs *engine)
346 {
347 	struct intel_context *local, *remote;
348 	unsigned long saved_heartbeat;
349 	int pass;
350 	int err;
351 
352 	/*
353 	 * Check that our idle barriers do not interfere with normal
354 	 * activity tracking. In particular, check that operating
355 	 * on the context image remotely (intel_context_prepare_remote_request),
356 	 * which inserts foreign fences into intel_context.active, does not
357 	 * clobber the idle-barrier.
358 	 */
359 
360 	if (intel_engine_pm_is_awake(engine)) {
361 		pr_err("%s is awake before starting %s!\n",
362 		       engine->name, __func__);
363 		return -EINVAL;
364 	}
365 
366 	remote = intel_context_create(engine);
367 	if (IS_ERR(remote))
368 		return PTR_ERR(remote);
369 
370 	local = intel_context_create(engine);
371 	if (IS_ERR(local)) {
372 		err = PTR_ERR(local);
373 		goto err_remote;
374 	}
375 
376 	saved_heartbeat = engine->props.heartbeat_interval_ms;
377 	engine->props.heartbeat_interval_ms = 0;
378 	intel_engine_pm_get(engine);
379 
380 	for (pass = 0; pass <= 2; pass++) {
381 		err = __remote_sync(local, remote);
382 		if (err)
383 			break;
384 
385 		err = __remote_sync(engine->kernel_context, remote);
386 		if (err)
387 			break;
388 
389 		if (i915_active_is_idle(&remote->active)) {
390 			pr_err("remote context is not active; expected idle-barrier (%s pass %d)\n",
391 			       engine->name, pass);
392 			err = -EINVAL;
393 			break;
394 		}
395 	}
396 
397 	intel_engine_pm_put(engine);
398 	engine->props.heartbeat_interval_ms = saved_heartbeat;
399 
400 	intel_context_put(local);
401 err_remote:
402 	intel_context_put(remote);
403 	return err;
404 }
405 
406 static int live_remote_context(void *arg)
407 {
408 	struct intel_gt *gt = arg;
409 	struct intel_engine_cs *engine;
410 	enum intel_engine_id id;
411 	int err = 0;
412 
413 	for_each_engine(engine, gt, id) {
414 		err = __live_remote_context(engine);
415 		if (err)
416 			break;
417 
418 		err = igt_flush_test(gt->i915);
419 		if (err)
420 			break;
421 	}
422 
423 	return err;
424 }
425 
426 int intel_context_live_selftests(struct drm_i915_private *i915)
427 {
428 	static const struct i915_subtest tests[] = {
429 		SUBTEST(live_context_size),
430 		SUBTEST(live_active_context),
431 		SUBTEST(live_remote_context),
432 	};
433 	struct intel_gt *gt = &i915->gt;
434 
435 	if (intel_gt_is_wedged(gt))
436 		return 0;
437 
438 	return intel_gt_live_subtests(tests, gt);
439 }
440