1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2019 Intel Corporation 5 */ 6 7 #ifndef _I915_ACTIVE_H_ 8 #define _I915_ACTIVE_H_ 9 10 #include <linux/lockdep.h> 11 12 #include "i915_active_types.h" 13 #include "i915_request.h" 14 15 /* 16 * We treat requests as fences. This is not be to confused with our 17 * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync. 18 * We use the fences to synchronize access from the CPU with activity on the 19 * GPU, for example, we should not rewrite an object's PTE whilst the GPU 20 * is reading them. We also track fences at a higher level to provide 21 * implicit synchronisation around GEM objects, e.g. set-domain will wait 22 * for outstanding GPU rendering before marking the object ready for CPU 23 * access, or a pageflip will wait until the GPU is complete before showing 24 * the frame on the scanout. 25 * 26 * In order to use a fence, the object must track the fence it needs to 27 * serialise with. For example, GEM objects want to track both read and 28 * write access so that we can perform concurrent read operations between 29 * the CPU and GPU engines, as well as waiting for all rendering to 30 * complete, or waiting for the last GPU user of a "fence register". The 31 * object then embeds a #i915_active_request to track the most recent (in 32 * retirement order) request relevant for the desired mode of access. 33 * The #i915_active_request is updated with i915_active_request_set() to 34 * track the most recent fence request, typically this is done as part of 35 * i915_vma_move_to_active(). 36 * 37 * When the #i915_active_request completes (is retired), it will 38 * signal its completion to the owner through a callback as well as mark 39 * itself as idle (i915_active_request.request == NULL). The owner 40 * can then perform any action, such as delayed freeing of an active 41 * resource including itself. 42 */ 43 44 void i915_active_retire_noop(struct i915_active_request *active, 45 struct i915_request *request); 46 47 /** 48 * i915_active_request_init - prepares the activity tracker for use 49 * @active - the active tracker 50 * @rq - initial request to track, can be NULL 51 * @func - a callback when then the tracker is retired (becomes idle), 52 * can be NULL 53 * 54 * i915_active_request_init() prepares the embedded @active struct for use as 55 * an activity tracker, that is for tracking the last known active request 56 * associated with it. When the last request becomes idle, when it is retired 57 * after completion, the optional callback @func is invoked. 58 */ 59 static inline void 60 i915_active_request_init(struct i915_active_request *active, 61 struct i915_request *rq, 62 i915_active_retire_fn retire) 63 { 64 RCU_INIT_POINTER(active->request, rq); 65 INIT_LIST_HEAD(&active->link); 66 active->retire = retire ?: i915_active_retire_noop; 67 } 68 69 #define INIT_ACTIVE_REQUEST(name) i915_active_request_init((name), NULL, NULL) 70 71 /** 72 * i915_active_request_set - updates the tracker to watch the current request 73 * @active - the active tracker 74 * @request - the request to watch 75 * 76 * __i915_active_request_set() watches the given @request for completion. Whilst 77 * that @request is busy, the @active reports busy. When that @request is 78 * retired, the @active tracker is updated to report idle. 79 */ 80 static inline void 81 __i915_active_request_set(struct i915_active_request *active, 82 struct i915_request *request) 83 { 84 list_move(&active->link, &request->active_list); 85 rcu_assign_pointer(active->request, request); 86 } 87 88 int __must_check 89 i915_active_request_set(struct i915_active_request *active, 90 struct i915_request *rq); 91 92 /** 93 * i915_active_request_set_retire_fn - updates the retirement callback 94 * @active - the active tracker 95 * @fn - the routine called when the request is retired 96 * @mutex - struct_mutex used to guard retirements 97 * 98 * i915_active_request_set_retire_fn() updates the function pointer that 99 * is called when the final request associated with the @active tracker 100 * is retired. 101 */ 102 static inline void 103 i915_active_request_set_retire_fn(struct i915_active_request *active, 104 i915_active_retire_fn fn, 105 struct mutex *mutex) 106 { 107 lockdep_assert_held(mutex); 108 active->retire = fn ?: i915_active_retire_noop; 109 } 110 111 /** 112 * i915_active_request_raw - return the active request 113 * @active - the active tracker 114 * 115 * i915_active_request_raw() returns the current request being tracked, or NULL. 116 * It does not obtain a reference on the request for the caller, so the caller 117 * must hold struct_mutex. 118 */ 119 static inline struct i915_request * 120 i915_active_request_raw(const struct i915_active_request *active, 121 struct mutex *mutex) 122 { 123 return rcu_dereference_protected(active->request, 124 lockdep_is_held(mutex)); 125 } 126 127 /** 128 * i915_active_request_peek - report the active request being monitored 129 * @active - the active tracker 130 * 131 * i915_active_request_peek() returns the current request being tracked if 132 * still active, or NULL. It does not obtain a reference on the request 133 * for the caller, so the caller must hold struct_mutex. 134 */ 135 static inline struct i915_request * 136 i915_active_request_peek(const struct i915_active_request *active, 137 struct mutex *mutex) 138 { 139 struct i915_request *request; 140 141 request = i915_active_request_raw(active, mutex); 142 if (!request || i915_request_completed(request)) 143 return NULL; 144 145 return request; 146 } 147 148 /** 149 * i915_active_request_get - return a reference to the active request 150 * @active - the active tracker 151 * 152 * i915_active_request_get() returns a reference to the active request, or NULL 153 * if the active tracker is idle. The caller must hold struct_mutex. 154 */ 155 static inline struct i915_request * 156 i915_active_request_get(const struct i915_active_request *active, 157 struct mutex *mutex) 158 { 159 return i915_request_get(i915_active_request_peek(active, mutex)); 160 } 161 162 /** 163 * __i915_active_request_get_rcu - return a reference to the active request 164 * @active - the active tracker 165 * 166 * __i915_active_request_get() returns a reference to the active request, 167 * or NULL if the active tracker is idle. The caller must hold the RCU read 168 * lock, but the returned pointer is safe to use outside of RCU. 169 */ 170 static inline struct i915_request * 171 __i915_active_request_get_rcu(const struct i915_active_request *active) 172 { 173 /* 174 * Performing a lockless retrieval of the active request is super 175 * tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing 176 * slab of request objects will not be freed whilst we hold the 177 * RCU read lock. It does not guarantee that the request itself 178 * will not be freed and then *reused*. Viz, 179 * 180 * Thread A Thread B 181 * 182 * rq = active.request 183 * retire(rq) -> free(rq); 184 * (rq is now first on the slab freelist) 185 * active.request = NULL 186 * 187 * rq = new submission on a new object 188 * ref(rq) 189 * 190 * To prevent the request from being reused whilst the caller 191 * uses it, we take a reference like normal. Whilst acquiring 192 * the reference we check that it is not in a destroyed state 193 * (refcnt == 0). That prevents the request being reallocated 194 * whilst the caller holds on to it. To check that the request 195 * was not reallocated as we acquired the reference we have to 196 * check that our request remains the active request across 197 * the lookup, in the same manner as a seqlock. The visibility 198 * of the pointer versus the reference counting is controlled 199 * by using RCU barriers (rcu_dereference and rcu_assign_pointer). 200 * 201 * In the middle of all that, we inspect whether the request is 202 * complete. Retiring is lazy so the request may be completed long 203 * before the active tracker is updated. Querying whether the 204 * request is complete is far cheaper (as it involves no locked 205 * instructions setting cachelines to exclusive) than acquiring 206 * the reference, so we do it first. The RCU read lock ensures the 207 * pointer dereference is valid, but does not ensure that the 208 * seqno nor HWS is the right one! However, if the request was 209 * reallocated, that means the active tracker's request was complete. 210 * If the new request is also complete, then both are and we can 211 * just report the active tracker is idle. If the new request is 212 * incomplete, then we acquire a reference on it and check that 213 * it remained the active request. 214 * 215 * It is then imperative that we do not zero the request on 216 * reallocation, so that we can chase the dangling pointers! 217 * See i915_request_alloc(). 218 */ 219 do { 220 struct i915_request *request; 221 222 request = rcu_dereference(active->request); 223 if (!request || i915_request_completed(request)) 224 return NULL; 225 226 /* 227 * An especially silly compiler could decide to recompute the 228 * result of i915_request_completed, more specifically 229 * re-emit the load for request->fence.seqno. A race would catch 230 * a later seqno value, which could flip the result from true to 231 * false. Which means part of the instructions below might not 232 * be executed, while later on instructions are executed. Due to 233 * barriers within the refcounting the inconsistency can't reach 234 * past the call to i915_request_get_rcu, but not executing 235 * that while still executing i915_request_put() creates 236 * havoc enough. Prevent this with a compiler barrier. 237 */ 238 barrier(); 239 240 request = i915_request_get_rcu(request); 241 242 /* 243 * What stops the following rcu_access_pointer() from occurring 244 * before the above i915_request_get_rcu()? If we were 245 * to read the value before pausing to get the reference to 246 * the request, we may not notice a change in the active 247 * tracker. 248 * 249 * The rcu_access_pointer() is a mere compiler barrier, which 250 * means both the CPU and compiler are free to perform the 251 * memory read without constraint. The compiler only has to 252 * ensure that any operations after the rcu_access_pointer() 253 * occur afterwards in program order. This means the read may 254 * be performed earlier by an out-of-order CPU, or adventurous 255 * compiler. 256 * 257 * The atomic operation at the heart of 258 * i915_request_get_rcu(), see dma_fence_get_rcu(), is 259 * atomic_inc_not_zero() which is only a full memory barrier 260 * when successful. That is, if i915_request_get_rcu() 261 * returns the request (and so with the reference counted 262 * incremented) then the following read for rcu_access_pointer() 263 * must occur after the atomic operation and so confirm 264 * that this request is the one currently being tracked. 265 * 266 * The corresponding write barrier is part of 267 * rcu_assign_pointer(). 268 */ 269 if (!request || request == rcu_access_pointer(active->request)) 270 return rcu_pointer_handoff(request); 271 272 i915_request_put(request); 273 } while (1); 274 } 275 276 /** 277 * i915_active_request_get_unlocked - return a reference to the active request 278 * @active - the active tracker 279 * 280 * i915_active_request_get_unlocked() returns a reference to the active request, 281 * or NULL if the active tracker is idle. The reference is obtained under RCU, 282 * so no locking is required by the caller. 283 * 284 * The reference should be freed with i915_request_put(). 285 */ 286 static inline struct i915_request * 287 i915_active_request_get_unlocked(const struct i915_active_request *active) 288 { 289 struct i915_request *request; 290 291 rcu_read_lock(); 292 request = __i915_active_request_get_rcu(active); 293 rcu_read_unlock(); 294 295 return request; 296 } 297 298 /** 299 * i915_active_request_isset - report whether the active tracker is assigned 300 * @active - the active tracker 301 * 302 * i915_active_request_isset() returns true if the active tracker is currently 303 * assigned to a request. Due to the lazy retiring, that request may be idle 304 * and this may report stale information. 305 */ 306 static inline bool 307 i915_active_request_isset(const struct i915_active_request *active) 308 { 309 return rcu_access_pointer(active->request); 310 } 311 312 /** 313 * i915_active_request_retire - waits until the request is retired 314 * @active - the active request on which to wait 315 * 316 * i915_active_request_retire() waits until the request is completed, 317 * and then ensures that at least the retirement handler for this 318 * @active tracker is called before returning. If the @active 319 * tracker is idle, the function returns immediately. 320 */ 321 static inline int __must_check 322 i915_active_request_retire(struct i915_active_request *active, 323 struct mutex *mutex) 324 { 325 struct i915_request *request; 326 long ret; 327 328 request = i915_active_request_raw(active, mutex); 329 if (!request) 330 return 0; 331 332 ret = i915_request_wait(request, 333 I915_WAIT_INTERRUPTIBLE, 334 MAX_SCHEDULE_TIMEOUT); 335 if (ret < 0) 336 return ret; 337 338 list_del_init(&active->link); 339 RCU_INIT_POINTER(active->request, NULL); 340 341 active->retire(active, request); 342 343 return 0; 344 } 345 346 /* 347 * GPU activity tracking 348 * 349 * Each set of commands submitted to the GPU compromises a single request that 350 * signals a fence upon completion. struct i915_request combines the 351 * command submission, scheduling and fence signaling roles. If we want to see 352 * if a particular task is complete, we need to grab the fence (struct 353 * i915_request) for that task and check or wait for it to be signaled. More 354 * often though we want to track the status of a bunch of tasks, for example 355 * to wait for the GPU to finish accessing some memory across a variety of 356 * different command pipelines from different clients. We could choose to 357 * track every single request associated with the task, but knowing that 358 * each request belongs to an ordered timeline (later requests within a 359 * timeline must wait for earlier requests), we need only track the 360 * latest request in each timeline to determine the overall status of the 361 * task. 362 * 363 * struct i915_active provides this tracking across timelines. It builds a 364 * composite shared-fence, and is updated as new work is submitted to the task, 365 * forming a snapshot of the current status. It should be embedded into the 366 * different resources that need to track their associated GPU activity to 367 * provide a callback when that GPU activity has ceased, or otherwise to 368 * provide a serialisation point either for request submission or for CPU 369 * synchronisation. 370 */ 371 372 void i915_active_init(struct drm_i915_private *i915, 373 struct i915_active *ref, 374 void (*retire)(struct i915_active *ref)); 375 376 int i915_active_ref(struct i915_active *ref, 377 u64 timeline, 378 struct i915_request *rq); 379 380 int i915_active_wait(struct i915_active *ref); 381 382 int i915_request_await_active(struct i915_request *rq, 383 struct i915_active *ref); 384 int i915_request_await_active_request(struct i915_request *rq, 385 struct i915_active_request *active); 386 387 bool i915_active_acquire(struct i915_active *ref); 388 389 static inline void i915_active_cancel(struct i915_active *ref) 390 { 391 GEM_BUG_ON(ref->count != 1); 392 ref->count = 0; 393 } 394 395 void i915_active_release(struct i915_active *ref); 396 397 static inline bool 398 i915_active_is_idle(const struct i915_active *ref) 399 { 400 return !ref->count; 401 } 402 403 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) 404 void i915_active_fini(struct i915_active *ref); 405 #else 406 static inline void i915_active_fini(struct i915_active *ref) { } 407 #endif 408 409 int i915_active_acquire_preallocate_barrier(struct i915_active *ref, 410 struct intel_engine_cs *engine); 411 void i915_active_acquire_barrier(struct i915_active *ref); 412 void i915_request_add_barriers(struct i915_request *rq); 413 414 #endif /* _I915_ACTIVE_H_ */ 415