1 /* 2 * Copyright © 2008-2018 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25 #ifndef I915_REQUEST_H 26 #define I915_REQUEST_H 27 28 #include <linux/dma-fence.h> 29 30 #include "i915_gem.h" 31 #include "i915_scheduler.h" 32 #include "i915_selftest.h" 33 #include "i915_sw_fence.h" 34 35 #include <uapi/drm/i915_drm.h> 36 37 struct drm_file; 38 struct drm_i915_gem_object; 39 struct i915_request; 40 struct i915_timeline; 41 struct i915_timeline_cacheline; 42 43 struct i915_capture_list { 44 struct i915_capture_list *next; 45 struct i915_vma *vma; 46 }; 47 48 enum { 49 /* 50 * I915_FENCE_FLAG_ACTIVE - this request is currently submitted to HW. 51 * 52 * Set by __i915_request_submit() on handing over to HW, and cleared 53 * by __i915_request_unsubmit() if we preempt this request. 54 * 55 * Finally cleared for consistency on retiring the request, when 56 * we know the HW is no longer running this request. 57 * 58 * See i915_request_is_active() 59 */ 60 I915_FENCE_FLAG_ACTIVE = DMA_FENCE_FLAG_USER_BITS, 61 62 /* 63 * I915_FENCE_FLAG_SIGNAL - this request is currently on signal_list 64 * 65 * Internal bookkeeping used by the breadcrumb code to track when 66 * a request is on the various signal_list. 67 */ 68 I915_FENCE_FLAG_SIGNAL, 69 }; 70 71 /** 72 * Request queue structure. 73 * 74 * The request queue allows us to note sequence numbers that have been emitted 75 * and may be associated with active buffers to be retired. 76 * 77 * By keeping this list, we can avoid having to do questionable sequence 78 * number comparisons on buffer last_read|write_seqno. It also allows an 79 * emission time to be associated with the request for tracking how far ahead 80 * of the GPU the submission is. 81 * 82 * When modifying this structure be very aware that we perform a lockless 83 * RCU lookup of it that may race against reallocation of the struct 84 * from the slab freelist. We intentionally do not zero the structure on 85 * allocation so that the lookup can use the dangling pointers (and is 86 * cogniscent that those pointers may be wrong). Instead, everything that 87 * needs to be initialised must be done so explicitly. 88 * 89 * The requests are reference counted. 90 */ 91 struct i915_request { 92 struct dma_fence fence; 93 spinlock_t lock; 94 95 /** On Which ring this request was generated */ 96 struct drm_i915_private *i915; 97 98 /** 99 * Context and ring buffer related to this request 100 * Contexts are refcounted, so when this request is associated with a 101 * context, we must increment the context's refcount, to guarantee that 102 * it persists while any request is linked to it. Requests themselves 103 * are also refcounted, so the request will only be freed when the last 104 * reference to it is dismissed, and the code in 105 * i915_request_free() will then decrement the refcount on the 106 * context. 107 */ 108 struct i915_gem_context *gem_context; 109 struct intel_engine_cs *engine; 110 struct intel_context *hw_context; 111 struct intel_ring *ring; 112 struct i915_timeline *timeline; 113 struct list_head signal_link; 114 115 /* 116 * The rcu epoch of when this request was allocated. Used to judiciously 117 * apply backpressure on future allocations to ensure that under 118 * mempressure there is sufficient RCU ticks for us to reclaim our 119 * RCU protected slabs. 120 */ 121 unsigned long rcustate; 122 123 /* 124 * Fences for the various phases in the request's lifetime. 125 * 126 * The submit fence is used to await upon all of the request's 127 * dependencies. When it is signaled, the request is ready to run. 128 * It is used by the driver to then queue the request for execution. 129 */ 130 struct i915_sw_fence submit; 131 union { 132 wait_queue_entry_t submitq; 133 struct i915_sw_dma_fence_cb dmaq; 134 }; 135 struct list_head execute_cb; 136 137 /* 138 * A list of everyone we wait upon, and everyone who waits upon us. 139 * Even though we will not be submitted to the hardware before the 140 * submit fence is signaled (it waits for all external events as well 141 * as our own requests), the scheduler still needs to know the 142 * dependency tree for the lifetime of the request (from execbuf 143 * to retirement), i.e. bidirectional dependency information for the 144 * request not tied to individual fences. 145 */ 146 struct i915_sched_node sched; 147 struct i915_dependency dep; 148 149 /* 150 * A convenience pointer to the current breadcrumb value stored in 151 * the HW status page (or our timeline's local equivalent). The full 152 * path would be rq->hw_context->ring->timeline->hwsp_seqno. 153 */ 154 const u32 *hwsp_seqno; 155 156 /* 157 * If we need to access the timeline's seqno for this request in 158 * another request, we need to keep a read reference to this associated 159 * cacheline, so that we do not free and recycle it before the foreign 160 * observers have completed. Hence, we keep a pointer to the cacheline 161 * inside the timeline's HWSP vma, but it is only valid while this 162 * request has not completed and guarded by the timeline mutex. 163 */ 164 struct i915_timeline_cacheline *hwsp_cacheline; 165 166 /** Position in the ring of the start of the request */ 167 u32 head; 168 169 /** Position in the ring of the start of the user packets */ 170 u32 infix; 171 172 /** 173 * Position in the ring of the start of the postfix. 174 * This is required to calculate the maximum available ring space 175 * without overwriting the postfix. 176 */ 177 u32 postfix; 178 179 /** Position in the ring of the end of the whole request */ 180 u32 tail; 181 182 /** Position in the ring of the end of any workarounds after the tail */ 183 u32 wa_tail; 184 185 /** Preallocate space in the ring for the emitting the request */ 186 u32 reserved_space; 187 188 /** Batch buffer related to this request if any (used for 189 * error state dump only). 190 */ 191 struct i915_vma *batch; 192 /** 193 * Additional buffers requested by userspace to be captured upon 194 * a GPU hang. The vma/obj on this list are protected by their 195 * active reference - all objects on this list must also be 196 * on the active_list (of their final request). 197 */ 198 struct i915_capture_list *capture_list; 199 struct list_head active_list; 200 201 /** Time at which this request was emitted, in jiffies. */ 202 unsigned long emitted_jiffies; 203 204 bool waitboost; 205 206 /** engine->request_list entry for this request */ 207 struct list_head link; 208 209 /** ring->request_list entry for this request */ 210 struct list_head ring_link; 211 212 struct drm_i915_file_private *file_priv; 213 /** file_priv list entry for this request */ 214 struct list_head client_link; 215 216 I915_SELFTEST_DECLARE(struct { 217 struct list_head link; 218 unsigned long delay; 219 } mock;) 220 }; 221 222 #define I915_FENCE_GFP (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN) 223 224 extern const struct dma_fence_ops i915_fence_ops; 225 226 static inline bool dma_fence_is_i915(const struct dma_fence *fence) 227 { 228 return fence->ops == &i915_fence_ops; 229 } 230 231 struct i915_request * __must_check 232 i915_request_alloc(struct intel_engine_cs *engine, 233 struct i915_gem_context *ctx); 234 void i915_request_retire_upto(struct i915_request *rq); 235 236 static inline struct i915_request * 237 to_request(struct dma_fence *fence) 238 { 239 /* We assume that NULL fence/request are interoperable */ 240 BUILD_BUG_ON(offsetof(struct i915_request, fence) != 0); 241 GEM_BUG_ON(fence && !dma_fence_is_i915(fence)); 242 return container_of(fence, struct i915_request, fence); 243 } 244 245 static inline struct i915_request * 246 i915_request_get(struct i915_request *rq) 247 { 248 return to_request(dma_fence_get(&rq->fence)); 249 } 250 251 static inline struct i915_request * 252 i915_request_get_rcu(struct i915_request *rq) 253 { 254 return to_request(dma_fence_get_rcu(&rq->fence)); 255 } 256 257 static inline void 258 i915_request_put(struct i915_request *rq) 259 { 260 dma_fence_put(&rq->fence); 261 } 262 263 int i915_request_await_object(struct i915_request *to, 264 struct drm_i915_gem_object *obj, 265 bool write); 266 int i915_request_await_dma_fence(struct i915_request *rq, 267 struct dma_fence *fence); 268 269 void i915_request_add(struct i915_request *rq); 270 271 void __i915_request_submit(struct i915_request *request); 272 void i915_request_submit(struct i915_request *request); 273 274 void i915_request_skip(struct i915_request *request, int error); 275 276 void __i915_request_unsubmit(struct i915_request *request); 277 void i915_request_unsubmit(struct i915_request *request); 278 279 /* Note: part of the intel_breadcrumbs family */ 280 bool i915_request_enable_breadcrumb(struct i915_request *request); 281 void i915_request_cancel_breadcrumb(struct i915_request *request); 282 283 long i915_request_wait(struct i915_request *rq, 284 unsigned int flags, 285 long timeout) 286 __attribute__((nonnull(1))); 287 #define I915_WAIT_INTERRUPTIBLE BIT(0) 288 #define I915_WAIT_LOCKED BIT(1) /* struct_mutex held, handle GPU reset */ 289 #define I915_WAIT_PRIORITY BIT(2) /* small priority bump for the request */ 290 #define I915_WAIT_ALL BIT(3) /* used by i915_gem_object_wait() */ 291 #define I915_WAIT_FOR_IDLE_BOOST BIT(4) 292 293 static inline bool i915_request_signaled(const struct i915_request *rq) 294 { 295 /* The request may live longer than its HWSP, so check flags first! */ 296 return test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags); 297 } 298 299 static inline bool i915_request_is_active(const struct i915_request *rq) 300 { 301 return test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); 302 } 303 304 /** 305 * Returns true if seq1 is later than seq2. 306 */ 307 static inline bool i915_seqno_passed(u32 seq1, u32 seq2) 308 { 309 return (s32)(seq1 - seq2) >= 0; 310 } 311 312 static inline u32 __hwsp_seqno(const struct i915_request *rq) 313 { 314 return READ_ONCE(*rq->hwsp_seqno); 315 } 316 317 /** 318 * hwsp_seqno - the current breadcrumb value in the HW status page 319 * @rq: the request, to chase the relevant HW status page 320 * 321 * The emphasis in naming here is that hwsp_seqno() is not a property of the 322 * request, but an indication of the current HW state (associated with this 323 * request). Its value will change as the GPU executes more requests. 324 * 325 * Returns the current breadcrumb value in the associated HW status page (or 326 * the local timeline's equivalent) for this request. The request itself 327 * has the associated breadcrumb value of rq->fence.seqno, when the HW 328 * status page has that breadcrumb or later, this request is complete. 329 */ 330 static inline u32 hwsp_seqno(const struct i915_request *rq) 331 { 332 u32 seqno; 333 334 rcu_read_lock(); /* the HWSP may be freed at runtime */ 335 seqno = __hwsp_seqno(rq); 336 rcu_read_unlock(); 337 338 return seqno; 339 } 340 341 static inline bool __i915_request_has_started(const struct i915_request *rq) 342 { 343 return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno - 1); 344 } 345 346 /** 347 * i915_request_started - check if the request has begun being executed 348 * @rq: the request 349 * 350 * If the timeline is not using initial breadcrumbs, a request is 351 * considered started if the previous request on its timeline (i.e. 352 * context) has been signaled. 353 * 354 * If the timeline is using semaphores, it will also be emitting an 355 * "initial breadcrumb" after the semaphores are complete and just before 356 * it began executing the user payload. A request can therefore be active 357 * on the HW and not yet started as it is still busywaiting on its 358 * dependencies (via HW semaphores). 359 * 360 * If the request has started, its dependencies will have been signaled 361 * (either by fences or by semaphores) and it will have begun processing 362 * the user payload. 363 * 364 * However, even if a request has started, it may have been preempted and 365 * so no longer active, or it may have already completed. 366 * 367 * See also i915_request_is_active(). 368 * 369 * Returns true if the request has begun executing the user payload, or 370 * has completed: 371 */ 372 static inline bool i915_request_started(const struct i915_request *rq) 373 { 374 if (i915_request_signaled(rq)) 375 return true; 376 377 /* Remember: started but may have since been preempted! */ 378 return __i915_request_has_started(rq); 379 } 380 381 /** 382 * i915_request_is_running - check if the request may actually be executing 383 * @rq: the request 384 * 385 * Returns true if the request is currently submitted to hardware, has passed 386 * its start point (i.e. the context is setup and not busywaiting). Note that 387 * it may no longer be running by the time the function returns! 388 */ 389 static inline bool i915_request_is_running(const struct i915_request *rq) 390 { 391 if (!i915_request_is_active(rq)) 392 return false; 393 394 return __i915_request_has_started(rq); 395 } 396 397 static inline bool i915_request_completed(const struct i915_request *rq) 398 { 399 if (i915_request_signaled(rq)) 400 return true; 401 402 return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno); 403 } 404 405 static inline void i915_request_mark_complete(struct i915_request *rq) 406 { 407 rq->hwsp_seqno = (u32 *)&rq->fence.seqno; /* decouple from HWSP */ 408 } 409 410 void i915_retire_requests(struct drm_i915_private *i915); 411 412 #endif /* I915_REQUEST_H */ 413