1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2019 Intel Corporation 5 */ 6 7 #ifndef _I915_ACTIVE_H_ 8 #define _I915_ACTIVE_H_ 9 10 #include <linux/lockdep.h> 11 12 #include "i915_active_types.h" 13 #include "i915_request.h" 14 15 /* 16 * We treat requests as fences. This is not be to confused with our 17 * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync. 18 * We use the fences to synchronize access from the CPU with activity on the 19 * GPU, for example, we should not rewrite an object's PTE whilst the GPU 20 * is reading them. We also track fences at a higher level to provide 21 * implicit synchronisation around GEM objects, e.g. set-domain will wait 22 * for outstanding GPU rendering before marking the object ready for CPU 23 * access, or a pageflip will wait until the GPU is complete before showing 24 * the frame on the scanout. 25 * 26 * In order to use a fence, the object must track the fence it needs to 27 * serialise with. For example, GEM objects want to track both read and 28 * write access so that we can perform concurrent read operations between 29 * the CPU and GPU engines, as well as waiting for all rendering to 30 * complete, or waiting for the last GPU user of a "fence register". The 31 * object then embeds a #i915_active_request to track the most recent (in 32 * retirement order) request relevant for the desired mode of access. 33 * The #i915_active_request is updated with i915_active_request_set() to 34 * track the most recent fence request, typically this is done as part of 35 * i915_vma_move_to_active(). 36 * 37 * When the #i915_active_request completes (is retired), it will 38 * signal its completion to the owner through a callback as well as mark 39 * itself as idle (i915_active_request.request == NULL). The owner 40 * can then perform any action, such as delayed freeing of an active 41 * resource including itself. 42 */ 43 44 void i915_active_retire_noop(struct i915_active_request *active, 45 struct i915_request *request); 46 47 /** 48 * i915_active_request_init - prepares the activity tracker for use 49 * @active - the active tracker 50 * @rq - initial request to track, can be NULL 51 * @func - a callback when then the tracker is retired (becomes idle), 52 * can be NULL 53 * 54 * i915_active_request_init() prepares the embedded @active struct for use as 55 * an activity tracker, that is for tracking the last known active request 56 * associated with it. When the last request becomes idle, when it is retired 57 * after completion, the optional callback @func is invoked. 58 */ 59 static inline void 60 i915_active_request_init(struct i915_active_request *active, 61 struct i915_request *rq, 62 i915_active_retire_fn retire) 63 { 64 RCU_INIT_POINTER(active->request, rq); 65 INIT_LIST_HEAD(&active->link); 66 active->retire = retire ?: i915_active_retire_noop; 67 } 68 69 #define INIT_ACTIVE_REQUEST(name) i915_active_request_init((name), NULL, NULL) 70 71 /** 72 * i915_active_request_set - updates the tracker to watch the current request 73 * @active - the active tracker 74 * @request - the request to watch 75 * 76 * __i915_active_request_set() watches the given @request for completion. Whilst 77 * that @request is busy, the @active reports busy. When that @request is 78 * retired, the @active tracker is updated to report idle. 79 */ 80 static inline void 81 __i915_active_request_set(struct i915_active_request *active, 82 struct i915_request *request) 83 { 84 list_move(&active->link, &request->active_list); 85 rcu_assign_pointer(active->request, request); 86 } 87 88 int __must_check 89 i915_active_request_set(struct i915_active_request *active, 90 struct i915_request *rq); 91 92 /** 93 * i915_active_request_set_retire_fn - updates the retirement callback 94 * @active - the active tracker 95 * @fn - the routine called when the request is retired 96 * @mutex - struct_mutex used to guard retirements 97 * 98 * i915_active_request_set_retire_fn() updates the function pointer that 99 * is called when the final request associated with the @active tracker 100 * is retired. 101 */ 102 static inline void 103 i915_active_request_set_retire_fn(struct i915_active_request *active, 104 i915_active_retire_fn fn, 105 struct mutex *mutex) 106 { 107 lockdep_assert_held(mutex); 108 active->retire = fn ?: i915_active_retire_noop; 109 } 110 111 static inline struct i915_request * 112 __i915_active_request_peek(const struct i915_active_request *active) 113 { 114 /* 115 * Inside the error capture (running with the driver in an unknown 116 * state), we want to bend the rules slightly (a lot). 117 * 118 * Work is in progress to make it safer, in the meantime this keeps 119 * the known issue from spamming the logs. 120 */ 121 return rcu_dereference_protected(active->request, 1); 122 } 123 124 /** 125 * i915_active_request_raw - return the active request 126 * @active - the active tracker 127 * 128 * i915_active_request_raw() returns the current request being tracked, or NULL. 129 * It does not obtain a reference on the request for the caller, so the caller 130 * must hold struct_mutex. 131 */ 132 static inline struct i915_request * 133 i915_active_request_raw(const struct i915_active_request *active, 134 struct mutex *mutex) 135 { 136 return rcu_dereference_protected(active->request, 137 lockdep_is_held(mutex)); 138 } 139 140 /** 141 * i915_active_request_peek - report the active request being monitored 142 * @active - the active tracker 143 * 144 * i915_active_request_peek() returns the current request being tracked if 145 * still active, or NULL. It does not obtain a reference on the request 146 * for the caller, so the caller must hold struct_mutex. 147 */ 148 static inline struct i915_request * 149 i915_active_request_peek(const struct i915_active_request *active, 150 struct mutex *mutex) 151 { 152 struct i915_request *request; 153 154 request = i915_active_request_raw(active, mutex); 155 if (!request || i915_request_completed(request)) 156 return NULL; 157 158 return request; 159 } 160 161 /** 162 * i915_active_request_get - return a reference to the active request 163 * @active - the active tracker 164 * 165 * i915_active_request_get() returns a reference to the active request, or NULL 166 * if the active tracker is idle. The caller must hold struct_mutex. 167 */ 168 static inline struct i915_request * 169 i915_active_request_get(const struct i915_active_request *active, 170 struct mutex *mutex) 171 { 172 return i915_request_get(i915_active_request_peek(active, mutex)); 173 } 174 175 /** 176 * __i915_active_request_get_rcu - return a reference to the active request 177 * @active - the active tracker 178 * 179 * __i915_active_request_get() returns a reference to the active request, 180 * or NULL if the active tracker is idle. The caller must hold the RCU read 181 * lock, but the returned pointer is safe to use outside of RCU. 182 */ 183 static inline struct i915_request * 184 __i915_active_request_get_rcu(const struct i915_active_request *active) 185 { 186 /* 187 * Performing a lockless retrieval of the active request is super 188 * tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing 189 * slab of request objects will not be freed whilst we hold the 190 * RCU read lock. It does not guarantee that the request itself 191 * will not be freed and then *reused*. Viz, 192 * 193 * Thread A Thread B 194 * 195 * rq = active.request 196 * retire(rq) -> free(rq); 197 * (rq is now first on the slab freelist) 198 * active.request = NULL 199 * 200 * rq = new submission on a new object 201 * ref(rq) 202 * 203 * To prevent the request from being reused whilst the caller 204 * uses it, we take a reference like normal. Whilst acquiring 205 * the reference we check that it is not in a destroyed state 206 * (refcnt == 0). That prevents the request being reallocated 207 * whilst the caller holds on to it. To check that the request 208 * was not reallocated as we acquired the reference we have to 209 * check that our request remains the active request across 210 * the lookup, in the same manner as a seqlock. The visibility 211 * of the pointer versus the reference counting is controlled 212 * by using RCU barriers (rcu_dereference and rcu_assign_pointer). 213 * 214 * In the middle of all that, we inspect whether the request is 215 * complete. Retiring is lazy so the request may be completed long 216 * before the active tracker is updated. Querying whether the 217 * request is complete is far cheaper (as it involves no locked 218 * instructions setting cachelines to exclusive) than acquiring 219 * the reference, so we do it first. The RCU read lock ensures the 220 * pointer dereference is valid, but does not ensure that the 221 * seqno nor HWS is the right one! However, if the request was 222 * reallocated, that means the active tracker's request was complete. 223 * If the new request is also complete, then both are and we can 224 * just report the active tracker is idle. If the new request is 225 * incomplete, then we acquire a reference on it and check that 226 * it remained the active request. 227 * 228 * It is then imperative that we do not zero the request on 229 * reallocation, so that we can chase the dangling pointers! 230 * See i915_request_alloc(). 231 */ 232 do { 233 struct i915_request *request; 234 235 request = rcu_dereference(active->request); 236 if (!request || i915_request_completed(request)) 237 return NULL; 238 239 /* 240 * An especially silly compiler could decide to recompute the 241 * result of i915_request_completed, more specifically 242 * re-emit the load for request->fence.seqno. A race would catch 243 * a later seqno value, which could flip the result from true to 244 * false. Which means part of the instructions below might not 245 * be executed, while later on instructions are executed. Due to 246 * barriers within the refcounting the inconsistency can't reach 247 * past the call to i915_request_get_rcu, but not executing 248 * that while still executing i915_request_put() creates 249 * havoc enough. Prevent this with a compiler barrier. 250 */ 251 barrier(); 252 253 request = i915_request_get_rcu(request); 254 255 /* 256 * What stops the following rcu_access_pointer() from occurring 257 * before the above i915_request_get_rcu()? If we were 258 * to read the value before pausing to get the reference to 259 * the request, we may not notice a change in the active 260 * tracker. 261 * 262 * The rcu_access_pointer() is a mere compiler barrier, which 263 * means both the CPU and compiler are free to perform the 264 * memory read without constraint. The compiler only has to 265 * ensure that any operations after the rcu_access_pointer() 266 * occur afterwards in program order. This means the read may 267 * be performed earlier by an out-of-order CPU, or adventurous 268 * compiler. 269 * 270 * The atomic operation at the heart of 271 * i915_request_get_rcu(), see dma_fence_get_rcu(), is 272 * atomic_inc_not_zero() which is only a full memory barrier 273 * when successful. That is, if i915_request_get_rcu() 274 * returns the request (and so with the reference counted 275 * incremented) then the following read for rcu_access_pointer() 276 * must occur after the atomic operation and so confirm 277 * that this request is the one currently being tracked. 278 * 279 * The corresponding write barrier is part of 280 * rcu_assign_pointer(). 281 */ 282 if (!request || request == rcu_access_pointer(active->request)) 283 return rcu_pointer_handoff(request); 284 285 i915_request_put(request); 286 } while (1); 287 } 288 289 /** 290 * i915_active_request_get_unlocked - return a reference to the active request 291 * @active - the active tracker 292 * 293 * i915_active_request_get_unlocked() returns a reference to the active request, 294 * or NULL if the active tracker is idle. The reference is obtained under RCU, 295 * so no locking is required by the caller. 296 * 297 * The reference should be freed with i915_request_put(). 298 */ 299 static inline struct i915_request * 300 i915_active_request_get_unlocked(const struct i915_active_request *active) 301 { 302 struct i915_request *request; 303 304 rcu_read_lock(); 305 request = __i915_active_request_get_rcu(active); 306 rcu_read_unlock(); 307 308 return request; 309 } 310 311 /** 312 * i915_active_request_isset - report whether the active tracker is assigned 313 * @active - the active tracker 314 * 315 * i915_active_request_isset() returns true if the active tracker is currently 316 * assigned to a request. Due to the lazy retiring, that request may be idle 317 * and this may report stale information. 318 */ 319 static inline bool 320 i915_active_request_isset(const struct i915_active_request *active) 321 { 322 return rcu_access_pointer(active->request); 323 } 324 325 /** 326 * i915_active_request_retire - waits until the request is retired 327 * @active - the active request on which to wait 328 * 329 * i915_active_request_retire() waits until the request is completed, 330 * and then ensures that at least the retirement handler for this 331 * @active tracker is called before returning. If the @active 332 * tracker is idle, the function returns immediately. 333 */ 334 static inline int __must_check 335 i915_active_request_retire(struct i915_active_request *active, 336 struct mutex *mutex) 337 { 338 struct i915_request *request; 339 long ret; 340 341 request = i915_active_request_raw(active, mutex); 342 if (!request) 343 return 0; 344 345 ret = i915_request_wait(request, 346 I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED, 347 MAX_SCHEDULE_TIMEOUT); 348 if (ret < 0) 349 return ret; 350 351 list_del_init(&active->link); 352 RCU_INIT_POINTER(active->request, NULL); 353 354 active->retire(active, request); 355 356 return 0; 357 } 358 359 /* 360 * GPU activity tracking 361 * 362 * Each set of commands submitted to the GPU compromises a single request that 363 * signals a fence upon completion. struct i915_request combines the 364 * command submission, scheduling and fence signaling roles. If we want to see 365 * if a particular task is complete, we need to grab the fence (struct 366 * i915_request) for that task and check or wait for it to be signaled. More 367 * often though we want to track the status of a bunch of tasks, for example 368 * to wait for the GPU to finish accessing some memory across a variety of 369 * different command pipelines from different clients. We could choose to 370 * track every single request associated with the task, but knowing that 371 * each request belongs to an ordered timeline (later requests within a 372 * timeline must wait for earlier requests), we need only track the 373 * latest request in each timeline to determine the overall status of the 374 * task. 375 * 376 * struct i915_active provides this tracking across timelines. It builds a 377 * composite shared-fence, and is updated as new work is submitted to the task, 378 * forming a snapshot of the current status. It should be embedded into the 379 * different resources that need to track their associated GPU activity to 380 * provide a callback when that GPU activity has ceased, or otherwise to 381 * provide a serialisation point either for request submission or for CPU 382 * synchronisation. 383 */ 384 385 void i915_active_init(struct drm_i915_private *i915, 386 struct i915_active *ref, 387 void (*retire)(struct i915_active *ref)); 388 389 int i915_active_ref(struct i915_active *ref, 390 u64 timeline, 391 struct i915_request *rq); 392 393 int i915_active_wait(struct i915_active *ref); 394 395 int i915_request_await_active(struct i915_request *rq, 396 struct i915_active *ref); 397 int i915_request_await_active_request(struct i915_request *rq, 398 struct i915_active_request *active); 399 400 bool i915_active_acquire(struct i915_active *ref); 401 402 static inline void i915_active_cancel(struct i915_active *ref) 403 { 404 GEM_BUG_ON(ref->count != 1); 405 ref->count = 0; 406 } 407 408 void i915_active_release(struct i915_active *ref); 409 410 static inline bool 411 i915_active_is_idle(const struct i915_active *ref) 412 { 413 return !ref->count; 414 } 415 416 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) 417 void i915_active_fini(struct i915_active *ref); 418 #else 419 static inline void i915_active_fini(struct i915_active *ref) { } 420 #endif 421 422 int i915_global_active_init(void); 423 void i915_global_active_exit(void); 424 425 #endif /* _I915_ACTIVE_H_ */ 426