xref: /openbmc/linux/drivers/gpu/drm/i915/i915_active.h (revision ae40e94f)
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2019 Intel Corporation
5  */
6 
7 #ifndef _I915_ACTIVE_H_
8 #define _I915_ACTIVE_H_
9 
10 #include <linux/lockdep.h>
11 
12 #include "i915_active_types.h"
13 #include "i915_request.h"
14 
15 /*
16  * We treat requests as fences. This is not be to confused with our
17  * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync.
18  * We use the fences to synchronize access from the CPU with activity on the
19  * GPU, for example, we should not rewrite an object's PTE whilst the GPU
20  * is reading them. We also track fences at a higher level to provide
21  * implicit synchronisation around GEM objects, e.g. set-domain will wait
22  * for outstanding GPU rendering before marking the object ready for CPU
23  * access, or a pageflip will wait until the GPU is complete before showing
24  * the frame on the scanout.
25  *
26  * In order to use a fence, the object must track the fence it needs to
27  * serialise with. For example, GEM objects want to track both read and
28  * write access so that we can perform concurrent read operations between
29  * the CPU and GPU engines, as well as waiting for all rendering to
30  * complete, or waiting for the last GPU user of a "fence register". The
31  * object then embeds a #i915_active_request to track the most recent (in
32  * retirement order) request relevant for the desired mode of access.
33  * The #i915_active_request is updated with i915_active_request_set() to
34  * track the most recent fence request, typically this is done as part of
35  * i915_vma_move_to_active().
36  *
37  * When the #i915_active_request completes (is retired), it will
38  * signal its completion to the owner through a callback as well as mark
39  * itself as idle (i915_active_request.request == NULL). The owner
40  * can then perform any action, such as delayed freeing of an active
41  * resource including itself.
42  */
43 
44 void i915_active_retire_noop(struct i915_active_request *active,
45 			     struct i915_request *request);
46 
47 /**
48  * i915_active_request_init - prepares the activity tracker for use
49  * @active - the active tracker
50  * @rq - initial request to track, can be NULL
51  * @func - a callback when then the tracker is retired (becomes idle),
52  *         can be NULL
53  *
54  * i915_active_request_init() prepares the embedded @active struct for use as
55  * an activity tracker, that is for tracking the last known active request
56  * associated with it. When the last request becomes idle, when it is retired
57  * after completion, the optional callback @func is invoked.
58  */
59 static inline void
60 i915_active_request_init(struct i915_active_request *active,
61 			 struct i915_request *rq,
62 			 i915_active_retire_fn retire)
63 {
64 	RCU_INIT_POINTER(active->request, rq);
65 	INIT_LIST_HEAD(&active->link);
66 	active->retire = retire ?: i915_active_retire_noop;
67 }
68 
69 #define INIT_ACTIVE_REQUEST(name) i915_active_request_init((name), NULL, NULL)
70 
71 /**
72  * i915_active_request_set - updates the tracker to watch the current request
73  * @active - the active tracker
74  * @request - the request to watch
75  *
76  * __i915_active_request_set() watches the given @request for completion. Whilst
77  * that @request is busy, the @active reports busy. When that @request is
78  * retired, the @active tracker is updated to report idle.
79  */
80 static inline void
81 __i915_active_request_set(struct i915_active_request *active,
82 			  struct i915_request *request)
83 {
84 	list_move(&active->link, &request->active_list);
85 	rcu_assign_pointer(active->request, request);
86 }
87 
88 int __must_check
89 i915_active_request_set(struct i915_active_request *active,
90 			struct i915_request *rq);
91 
92 /**
93  * i915_active_request_set_retire_fn - updates the retirement callback
94  * @active - the active tracker
95  * @fn - the routine called when the request is retired
96  * @mutex - struct_mutex used to guard retirements
97  *
98  * i915_active_request_set_retire_fn() updates the function pointer that
99  * is called when the final request associated with the @active tracker
100  * is retired.
101  */
102 static inline void
103 i915_active_request_set_retire_fn(struct i915_active_request *active,
104 				  i915_active_retire_fn fn,
105 				  struct mutex *mutex)
106 {
107 	lockdep_assert_held(mutex);
108 	active->retire = fn ?: i915_active_retire_noop;
109 }
110 
111 static inline struct i915_request *
112 __i915_active_request_peek(const struct i915_active_request *active)
113 {
114 	/*
115 	 * Inside the error capture (running with the driver in an unknown
116 	 * state), we want to bend the rules slightly (a lot).
117 	 *
118 	 * Work is in progress to make it safer, in the meantime this keeps
119 	 * the known issue from spamming the logs.
120 	 */
121 	return rcu_dereference_protected(active->request, 1);
122 }
123 
124 /**
125  * i915_active_request_raw - return the active request
126  * @active - the active tracker
127  *
128  * i915_active_request_raw() returns the current request being tracked, or NULL.
129  * It does not obtain a reference on the request for the caller, so the caller
130  * must hold struct_mutex.
131  */
132 static inline struct i915_request *
133 i915_active_request_raw(const struct i915_active_request *active,
134 			struct mutex *mutex)
135 {
136 	return rcu_dereference_protected(active->request,
137 					 lockdep_is_held(mutex));
138 }
139 
140 /**
141  * i915_active_request_peek - report the active request being monitored
142  * @active - the active tracker
143  *
144  * i915_active_request_peek() returns the current request being tracked if
145  * still active, or NULL. It does not obtain a reference on the request
146  * for the caller, so the caller must hold struct_mutex.
147  */
148 static inline struct i915_request *
149 i915_active_request_peek(const struct i915_active_request *active,
150 			 struct mutex *mutex)
151 {
152 	struct i915_request *request;
153 
154 	request = i915_active_request_raw(active, mutex);
155 	if (!request || i915_request_completed(request))
156 		return NULL;
157 
158 	return request;
159 }
160 
161 /**
162  * i915_active_request_get - return a reference to the active request
163  * @active - the active tracker
164  *
165  * i915_active_request_get() returns a reference to the active request, or NULL
166  * if the active tracker is idle. The caller must hold struct_mutex.
167  */
168 static inline struct i915_request *
169 i915_active_request_get(const struct i915_active_request *active,
170 			struct mutex *mutex)
171 {
172 	return i915_request_get(i915_active_request_peek(active, mutex));
173 }
174 
175 /**
176  * __i915_active_request_get_rcu - return a reference to the active request
177  * @active - the active tracker
178  *
179  * __i915_active_request_get() returns a reference to the active request,
180  * or NULL if the active tracker is idle. The caller must hold the RCU read
181  * lock, but the returned pointer is safe to use outside of RCU.
182  */
183 static inline struct i915_request *
184 __i915_active_request_get_rcu(const struct i915_active_request *active)
185 {
186 	/*
187 	 * Performing a lockless retrieval of the active request is super
188 	 * tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing
189 	 * slab of request objects will not be freed whilst we hold the
190 	 * RCU read lock. It does not guarantee that the request itself
191 	 * will not be freed and then *reused*. Viz,
192 	 *
193 	 * Thread A			Thread B
194 	 *
195 	 * rq = active.request
196 	 *				retire(rq) -> free(rq);
197 	 *				(rq is now first on the slab freelist)
198 	 *				active.request = NULL
199 	 *
200 	 *				rq = new submission on a new object
201 	 * ref(rq)
202 	 *
203 	 * To prevent the request from being reused whilst the caller
204 	 * uses it, we take a reference like normal. Whilst acquiring
205 	 * the reference we check that it is not in a destroyed state
206 	 * (refcnt == 0). That prevents the request being reallocated
207 	 * whilst the caller holds on to it. To check that the request
208 	 * was not reallocated as we acquired the reference we have to
209 	 * check that our request remains the active request across
210 	 * the lookup, in the same manner as a seqlock. The visibility
211 	 * of the pointer versus the reference counting is controlled
212 	 * by using RCU barriers (rcu_dereference and rcu_assign_pointer).
213 	 *
214 	 * In the middle of all that, we inspect whether the request is
215 	 * complete. Retiring is lazy so the request may be completed long
216 	 * before the active tracker is updated. Querying whether the
217 	 * request is complete is far cheaper (as it involves no locked
218 	 * instructions setting cachelines to exclusive) than acquiring
219 	 * the reference, so we do it first. The RCU read lock ensures the
220 	 * pointer dereference is valid, but does not ensure that the
221 	 * seqno nor HWS is the right one! However, if the request was
222 	 * reallocated, that means the active tracker's request was complete.
223 	 * If the new request is also complete, then both are and we can
224 	 * just report the active tracker is idle. If the new request is
225 	 * incomplete, then we acquire a reference on it and check that
226 	 * it remained the active request.
227 	 *
228 	 * It is then imperative that we do not zero the request on
229 	 * reallocation, so that we can chase the dangling pointers!
230 	 * See i915_request_alloc().
231 	 */
232 	do {
233 		struct i915_request *request;
234 
235 		request = rcu_dereference(active->request);
236 		if (!request || i915_request_completed(request))
237 			return NULL;
238 
239 		/*
240 		 * An especially silly compiler could decide to recompute the
241 		 * result of i915_request_completed, more specifically
242 		 * re-emit the load for request->fence.seqno. A race would catch
243 		 * a later seqno value, which could flip the result from true to
244 		 * false. Which means part of the instructions below might not
245 		 * be executed, while later on instructions are executed. Due to
246 		 * barriers within the refcounting the inconsistency can't reach
247 		 * past the call to i915_request_get_rcu, but not executing
248 		 * that while still executing i915_request_put() creates
249 		 * havoc enough.  Prevent this with a compiler barrier.
250 		 */
251 		barrier();
252 
253 		request = i915_request_get_rcu(request);
254 
255 		/*
256 		 * What stops the following rcu_access_pointer() from occurring
257 		 * before the above i915_request_get_rcu()? If we were
258 		 * to read the value before pausing to get the reference to
259 		 * the request, we may not notice a change in the active
260 		 * tracker.
261 		 *
262 		 * The rcu_access_pointer() is a mere compiler barrier, which
263 		 * means both the CPU and compiler are free to perform the
264 		 * memory read without constraint. The compiler only has to
265 		 * ensure that any operations after the rcu_access_pointer()
266 		 * occur afterwards in program order. This means the read may
267 		 * be performed earlier by an out-of-order CPU, or adventurous
268 		 * compiler.
269 		 *
270 		 * The atomic operation at the heart of
271 		 * i915_request_get_rcu(), see dma_fence_get_rcu(), is
272 		 * atomic_inc_not_zero() which is only a full memory barrier
273 		 * when successful. That is, if i915_request_get_rcu()
274 		 * returns the request (and so with the reference counted
275 		 * incremented) then the following read for rcu_access_pointer()
276 		 * must occur after the atomic operation and so confirm
277 		 * that this request is the one currently being tracked.
278 		 *
279 		 * The corresponding write barrier is part of
280 		 * rcu_assign_pointer().
281 		 */
282 		if (!request || request == rcu_access_pointer(active->request))
283 			return rcu_pointer_handoff(request);
284 
285 		i915_request_put(request);
286 	} while (1);
287 }
288 
289 /**
290  * i915_active_request_get_unlocked - return a reference to the active request
291  * @active - the active tracker
292  *
293  * i915_active_request_get_unlocked() returns a reference to the active request,
294  * or NULL if the active tracker is idle. The reference is obtained under RCU,
295  * so no locking is required by the caller.
296  *
297  * The reference should be freed with i915_request_put().
298  */
299 static inline struct i915_request *
300 i915_active_request_get_unlocked(const struct i915_active_request *active)
301 {
302 	struct i915_request *request;
303 
304 	rcu_read_lock();
305 	request = __i915_active_request_get_rcu(active);
306 	rcu_read_unlock();
307 
308 	return request;
309 }
310 
311 /**
312  * i915_active_request_isset - report whether the active tracker is assigned
313  * @active - the active tracker
314  *
315  * i915_active_request_isset() returns true if the active tracker is currently
316  * assigned to a request. Due to the lazy retiring, that request may be idle
317  * and this may report stale information.
318  */
319 static inline bool
320 i915_active_request_isset(const struct i915_active_request *active)
321 {
322 	return rcu_access_pointer(active->request);
323 }
324 
325 /**
326  * i915_active_request_retire - waits until the request is retired
327  * @active - the active request on which to wait
328  *
329  * i915_active_request_retire() waits until the request is completed,
330  * and then ensures that at least the retirement handler for this
331  * @active tracker is called before returning. If the @active
332  * tracker is idle, the function returns immediately.
333  */
334 static inline int __must_check
335 i915_active_request_retire(struct i915_active_request *active,
336 			   struct mutex *mutex)
337 {
338 	struct i915_request *request;
339 	long ret;
340 
341 	request = i915_active_request_raw(active, mutex);
342 	if (!request)
343 		return 0;
344 
345 	ret = i915_request_wait(request,
346 				I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
347 				MAX_SCHEDULE_TIMEOUT);
348 	if (ret < 0)
349 		return ret;
350 
351 	list_del_init(&active->link);
352 	RCU_INIT_POINTER(active->request, NULL);
353 
354 	active->retire(active, request);
355 
356 	return 0;
357 }
358 
359 /*
360  * GPU activity tracking
361  *
362  * Each set of commands submitted to the GPU compromises a single request that
363  * signals a fence upon completion. struct i915_request combines the
364  * command submission, scheduling and fence signaling roles. If we want to see
365  * if a particular task is complete, we need to grab the fence (struct
366  * i915_request) for that task and check or wait for it to be signaled. More
367  * often though we want to track the status of a bunch of tasks, for example
368  * to wait for the GPU to finish accessing some memory across a variety of
369  * different command pipelines from different clients. We could choose to
370  * track every single request associated with the task, but knowing that
371  * each request belongs to an ordered timeline (later requests within a
372  * timeline must wait for earlier requests), we need only track the
373  * latest request in each timeline to determine the overall status of the
374  * task.
375  *
376  * struct i915_active provides this tracking across timelines. It builds a
377  * composite shared-fence, and is updated as new work is submitted to the task,
378  * forming a snapshot of the current status. It should be embedded into the
379  * different resources that need to track their associated GPU activity to
380  * provide a callback when that GPU activity has ceased, or otherwise to
381  * provide a serialisation point either for request submission or for CPU
382  * synchronisation.
383  */
384 
385 void i915_active_init(struct drm_i915_private *i915,
386 		      struct i915_active *ref,
387 		      void (*retire)(struct i915_active *ref));
388 
389 int i915_active_ref(struct i915_active *ref,
390 		    u64 timeline,
391 		    struct i915_request *rq);
392 
393 int i915_active_wait(struct i915_active *ref);
394 
395 int i915_request_await_active(struct i915_request *rq,
396 			      struct i915_active *ref);
397 int i915_request_await_active_request(struct i915_request *rq,
398 				      struct i915_active_request *active);
399 
400 bool i915_active_acquire(struct i915_active *ref);
401 
402 static inline void i915_active_cancel(struct i915_active *ref)
403 {
404 	GEM_BUG_ON(ref->count != 1);
405 	ref->count = 0;
406 }
407 
408 void i915_active_release(struct i915_active *ref);
409 
410 static inline bool
411 i915_active_is_idle(const struct i915_active *ref)
412 {
413 	return !ref->count;
414 }
415 
416 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
417 void i915_active_fini(struct i915_active *ref);
418 #else
419 static inline void i915_active_fini(struct i915_active *ref) { }
420 #endif
421 
422 int i915_global_active_init(void);
423 void i915_global_active_exit(void);
424 
425 #endif /* _I915_ACTIVE_H_ */
426