1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25 #include <linux/prefetch.h> 26 #include <linux/dma-fence-array.h> 27 #include <linux/sched.h> 28 #include <linux/sched/clock.h> 29 #include <linux/sched/signal.h> 30 31 #include "i915_drv.h" 32 33 static const char *i915_fence_get_driver_name(struct dma_fence *fence) 34 { 35 return "i915"; 36 } 37 38 static const char *i915_fence_get_timeline_name(struct dma_fence *fence) 39 { 40 /* 41 * The timeline struct (as part of the ppgtt underneath a context) 42 * may be freed when the request is no longer in use by the GPU. 43 * We could extend the life of a context to beyond that of all 44 * fences, possibly keeping the hw resource around indefinitely, 45 * or we just give them a false name. Since 46 * dma_fence_ops.get_timeline_name is a debug feature, the occasional 47 * lie seems justifiable. 48 */ 49 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) 50 return "signaled"; 51 52 return to_request(fence)->timeline->common->name; 53 } 54 55 static bool i915_fence_signaled(struct dma_fence *fence) 56 { 57 return i915_request_completed(to_request(fence)); 58 } 59 60 static bool i915_fence_enable_signaling(struct dma_fence *fence) 61 { 62 if (i915_fence_signaled(fence)) 63 return false; 64 65 intel_engine_enable_signaling(to_request(fence), true); 66 return !i915_fence_signaled(fence); 67 } 68 69 static signed long i915_fence_wait(struct dma_fence *fence, 70 bool interruptible, 71 signed long timeout) 72 { 73 return i915_request_wait(to_request(fence), interruptible, timeout); 74 } 75 76 static void i915_fence_release(struct dma_fence *fence) 77 { 78 struct i915_request *rq = to_request(fence); 79 80 /* 81 * The request is put onto a RCU freelist (i.e. the address 82 * is immediately reused), mark the fences as being freed now. 83 * Otherwise the debugobjects for the fences are only marked as 84 * freed when the slab cache itself is freed, and so we would get 85 * caught trying to reuse dead objects. 86 */ 87 i915_sw_fence_fini(&rq->submit); 88 89 kmem_cache_free(rq->i915->requests, rq); 90 } 91 92 const struct dma_fence_ops i915_fence_ops = { 93 .get_driver_name = i915_fence_get_driver_name, 94 .get_timeline_name = i915_fence_get_timeline_name, 95 .enable_signaling = i915_fence_enable_signaling, 96 .signaled = i915_fence_signaled, 97 .wait = i915_fence_wait, 98 .release = i915_fence_release, 99 }; 100 101 static inline void 102 i915_request_remove_from_client(struct i915_request *request) 103 { 104 struct drm_i915_file_private *file_priv; 105 106 file_priv = request->file_priv; 107 if (!file_priv) 108 return; 109 110 spin_lock(&file_priv->mm.lock); 111 if (request->file_priv) { 112 list_del(&request->client_link); 113 request->file_priv = NULL; 114 } 115 spin_unlock(&file_priv->mm.lock); 116 } 117 118 static struct i915_dependency * 119 i915_dependency_alloc(struct drm_i915_private *i915) 120 { 121 return kmem_cache_alloc(i915->dependencies, GFP_KERNEL); 122 } 123 124 static void 125 i915_dependency_free(struct drm_i915_private *i915, 126 struct i915_dependency *dep) 127 { 128 kmem_cache_free(i915->dependencies, dep); 129 } 130 131 static void 132 __i915_priotree_add_dependency(struct i915_priotree *pt, 133 struct i915_priotree *signal, 134 struct i915_dependency *dep, 135 unsigned long flags) 136 { 137 INIT_LIST_HEAD(&dep->dfs_link); 138 list_add(&dep->wait_link, &signal->waiters_list); 139 list_add(&dep->signal_link, &pt->signalers_list); 140 dep->signaler = signal; 141 dep->flags = flags; 142 } 143 144 static int 145 i915_priotree_add_dependency(struct drm_i915_private *i915, 146 struct i915_priotree *pt, 147 struct i915_priotree *signal) 148 { 149 struct i915_dependency *dep; 150 151 dep = i915_dependency_alloc(i915); 152 if (!dep) 153 return -ENOMEM; 154 155 __i915_priotree_add_dependency(pt, signal, dep, I915_DEPENDENCY_ALLOC); 156 return 0; 157 } 158 159 static void 160 i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt) 161 { 162 struct i915_dependency *dep, *next; 163 164 GEM_BUG_ON(!list_empty(&pt->link)); 165 166 /* 167 * Everyone we depended upon (the fences we wait to be signaled) 168 * should retire before us and remove themselves from our list. 169 * However, retirement is run independently on each timeline and 170 * so we may be called out-of-order. 171 */ 172 list_for_each_entry_safe(dep, next, &pt->signalers_list, signal_link) { 173 GEM_BUG_ON(!i915_priotree_signaled(dep->signaler)); 174 GEM_BUG_ON(!list_empty(&dep->dfs_link)); 175 176 list_del(&dep->wait_link); 177 if (dep->flags & I915_DEPENDENCY_ALLOC) 178 i915_dependency_free(i915, dep); 179 } 180 181 /* Remove ourselves from everyone who depends upon us */ 182 list_for_each_entry_safe(dep, next, &pt->waiters_list, wait_link) { 183 GEM_BUG_ON(dep->signaler != pt); 184 GEM_BUG_ON(!list_empty(&dep->dfs_link)); 185 186 list_del(&dep->signal_link); 187 if (dep->flags & I915_DEPENDENCY_ALLOC) 188 i915_dependency_free(i915, dep); 189 } 190 } 191 192 static void 193 i915_priotree_init(struct i915_priotree *pt) 194 { 195 INIT_LIST_HEAD(&pt->signalers_list); 196 INIT_LIST_HEAD(&pt->waiters_list); 197 INIT_LIST_HEAD(&pt->link); 198 pt->priority = I915_PRIORITY_INVALID; 199 } 200 201 static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) 202 { 203 struct intel_engine_cs *engine; 204 enum intel_engine_id id; 205 int ret; 206 207 /* Carefully retire all requests without writing to the rings */ 208 ret = i915_gem_wait_for_idle(i915, 209 I915_WAIT_INTERRUPTIBLE | 210 I915_WAIT_LOCKED); 211 if (ret) 212 return ret; 213 214 /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ 215 for_each_engine(engine, i915, id) { 216 struct i915_gem_timeline *timeline; 217 struct intel_timeline *tl = engine->timeline; 218 219 if (!i915_seqno_passed(seqno, tl->seqno)) { 220 /* Flush any waiters before we reuse the seqno */ 221 intel_engine_disarm_breadcrumbs(engine); 222 GEM_BUG_ON(!list_empty(&engine->breadcrumbs.signals)); 223 } 224 225 /* Check we are idle before we fiddle with hw state! */ 226 GEM_BUG_ON(!intel_engine_is_idle(engine)); 227 GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request)); 228 229 /* Finally reset hw state */ 230 intel_engine_init_global_seqno(engine, seqno); 231 tl->seqno = seqno; 232 233 list_for_each_entry(timeline, &i915->gt.timelines, link) 234 memset(timeline->engine[id].global_sync, 0, 235 sizeof(timeline->engine[id].global_sync)); 236 } 237 238 return 0; 239 } 240 241 int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno) 242 { 243 struct drm_i915_private *i915 = to_i915(dev); 244 245 lockdep_assert_held(&i915->drm.struct_mutex); 246 247 if (seqno == 0) 248 return -EINVAL; 249 250 /* HWS page needs to be set less than what we will inject to ring */ 251 return reset_all_global_seqno(i915, seqno - 1); 252 } 253 254 static void mark_busy(struct drm_i915_private *i915) 255 { 256 if (i915->gt.awake) 257 return; 258 259 GEM_BUG_ON(!i915->gt.active_requests); 260 261 intel_runtime_pm_get_noresume(i915); 262 263 /* 264 * It seems that the DMC likes to transition between the DC states a lot 265 * when there are no connected displays (no active power domains) during 266 * command submission. 267 * 268 * This activity has negative impact on the performance of the chip with 269 * huge latencies observed in the interrupt handler and elsewhere. 270 * 271 * Work around it by grabbing a GT IRQ power domain whilst there is any 272 * GT activity, preventing any DC state transitions. 273 */ 274 intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); 275 276 i915->gt.awake = true; 277 if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */ 278 i915->gt.epoch = 1; 279 280 intel_enable_gt_powersave(i915); 281 i915_update_gfx_val(i915); 282 if (INTEL_GEN(i915) >= 6) 283 gen6_rps_busy(i915); 284 i915_pmu_gt_unparked(i915); 285 286 intel_engines_unpark(i915); 287 288 i915_queue_hangcheck(i915); 289 290 queue_delayed_work(i915->wq, 291 &i915->gt.retire_work, 292 round_jiffies_up_relative(HZ)); 293 } 294 295 static int reserve_engine(struct intel_engine_cs *engine) 296 { 297 struct drm_i915_private *i915 = engine->i915; 298 u32 active = ++engine->timeline->inflight_seqnos; 299 u32 seqno = engine->timeline->seqno; 300 int ret; 301 302 /* Reservation is fine until we need to wrap around */ 303 if (unlikely(add_overflows(seqno, active))) { 304 ret = reset_all_global_seqno(i915, 0); 305 if (ret) { 306 engine->timeline->inflight_seqnos--; 307 return ret; 308 } 309 } 310 311 if (!i915->gt.active_requests++) 312 mark_busy(i915); 313 314 return 0; 315 } 316 317 static void unreserve_engine(struct intel_engine_cs *engine) 318 { 319 struct drm_i915_private *i915 = engine->i915; 320 321 if (!--i915->gt.active_requests) { 322 /* Cancel the mark_busy() from our reserve_engine() */ 323 GEM_BUG_ON(!i915->gt.awake); 324 mod_delayed_work(i915->wq, 325 &i915->gt.idle_work, 326 msecs_to_jiffies(100)); 327 } 328 329 GEM_BUG_ON(!engine->timeline->inflight_seqnos); 330 engine->timeline->inflight_seqnos--; 331 } 332 333 void i915_gem_retire_noop(struct i915_gem_active *active, 334 struct i915_request *request) 335 { 336 /* Space left intentionally blank */ 337 } 338 339 static void advance_ring(struct i915_request *request) 340 { 341 unsigned int tail; 342 343 /* 344 * We know the GPU must have read the request to have 345 * sent us the seqno + interrupt, so use the position 346 * of tail of the request to update the last known position 347 * of the GPU head. 348 * 349 * Note this requires that we are always called in request 350 * completion order. 351 */ 352 if (list_is_last(&request->ring_link, &request->ring->request_list)) { 353 /* 354 * We may race here with execlists resubmitting this request 355 * as we retire it. The resubmission will move the ring->tail 356 * forwards (to request->wa_tail). We either read the 357 * current value that was written to hw, or the value that 358 * is just about to be. Either works, if we miss the last two 359 * noops - they are safe to be replayed on a reset. 360 */ 361 tail = READ_ONCE(request->ring->tail); 362 } else { 363 tail = request->postfix; 364 } 365 list_del(&request->ring_link); 366 367 request->ring->head = tail; 368 } 369 370 static void free_capture_list(struct i915_request *request) 371 { 372 struct i915_capture_list *capture; 373 374 capture = request->capture_list; 375 while (capture) { 376 struct i915_capture_list *next = capture->next; 377 378 kfree(capture); 379 capture = next; 380 } 381 } 382 383 static void i915_request_retire(struct i915_request *request) 384 { 385 struct intel_engine_cs *engine = request->engine; 386 struct i915_gem_active *active, *next; 387 388 lockdep_assert_held(&request->i915->drm.struct_mutex); 389 GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit)); 390 GEM_BUG_ON(!i915_request_completed(request)); 391 GEM_BUG_ON(!request->i915->gt.active_requests); 392 393 trace_i915_request_retire(request); 394 395 spin_lock_irq(&engine->timeline->lock); 396 list_del_init(&request->link); 397 spin_unlock_irq(&engine->timeline->lock); 398 399 unreserve_engine(request->engine); 400 advance_ring(request); 401 402 free_capture_list(request); 403 404 /* 405 * Walk through the active list, calling retire on each. This allows 406 * objects to track their GPU activity and mark themselves as idle 407 * when their *last* active request is completed (updating state 408 * tracking lists for eviction, active references for GEM, etc). 409 * 410 * As the ->retire() may free the node, we decouple it first and 411 * pass along the auxiliary information (to avoid dereferencing 412 * the node after the callback). 413 */ 414 list_for_each_entry_safe(active, next, &request->active_list, link) { 415 /* 416 * In microbenchmarks or focusing upon time inside the kernel, 417 * we may spend an inordinate amount of time simply handling 418 * the retirement of requests and processing their callbacks. 419 * Of which, this loop itself is particularly hot due to the 420 * cache misses when jumping around the list of i915_gem_active. 421 * So we try to keep this loop as streamlined as possible and 422 * also prefetch the next i915_gem_active to try and hide 423 * the likely cache miss. 424 */ 425 prefetchw(next); 426 427 INIT_LIST_HEAD(&active->link); 428 RCU_INIT_POINTER(active->request, NULL); 429 430 active->retire(active, request); 431 } 432 433 i915_request_remove_from_client(request); 434 435 /* Retirement decays the ban score as it is a sign of ctx progress */ 436 atomic_dec_if_positive(&request->ctx->ban_score); 437 438 /* 439 * The backing object for the context is done after switching to the 440 * *next* context. Therefore we cannot retire the previous context until 441 * the next context has already started running. However, since we 442 * cannot take the required locks at i915_request_submit() we 443 * defer the unpinning of the active context to now, retirement of 444 * the subsequent request. 445 */ 446 if (engine->last_retired_context) 447 engine->context_unpin(engine, engine->last_retired_context); 448 engine->last_retired_context = request->ctx; 449 450 spin_lock_irq(&request->lock); 451 if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags)) 452 dma_fence_signal_locked(&request->fence); 453 if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) 454 intel_engine_cancel_signaling(request); 455 if (request->waitboost) { 456 GEM_BUG_ON(!atomic_read(&request->i915->gt_pm.rps.num_waiters)); 457 atomic_dec(&request->i915->gt_pm.rps.num_waiters); 458 } 459 spin_unlock_irq(&request->lock); 460 461 i915_priotree_fini(request->i915, &request->priotree); 462 i915_request_put(request); 463 } 464 465 void i915_request_retire_upto(struct i915_request *rq) 466 { 467 struct intel_engine_cs *engine = rq->engine; 468 struct i915_request *tmp; 469 470 lockdep_assert_held(&rq->i915->drm.struct_mutex); 471 GEM_BUG_ON(!i915_request_completed(rq)); 472 473 if (list_empty(&rq->link)) 474 return; 475 476 do { 477 tmp = list_first_entry(&engine->timeline->requests, 478 typeof(*tmp), link); 479 480 i915_request_retire(tmp); 481 } while (tmp != rq); 482 } 483 484 static u32 timeline_get_seqno(struct intel_timeline *tl) 485 { 486 return ++tl->seqno; 487 } 488 489 void __i915_request_submit(struct i915_request *request) 490 { 491 struct intel_engine_cs *engine = request->engine; 492 struct intel_timeline *timeline; 493 u32 seqno; 494 495 GEM_BUG_ON(!irqs_disabled()); 496 lockdep_assert_held(&engine->timeline->lock); 497 498 /* Transfer from per-context onto the global per-engine timeline */ 499 timeline = engine->timeline; 500 GEM_BUG_ON(timeline == request->timeline); 501 GEM_BUG_ON(request->global_seqno); 502 503 seqno = timeline_get_seqno(timeline); 504 GEM_BUG_ON(!seqno); 505 GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno)); 506 507 /* We may be recursing from the signal callback of another i915 fence */ 508 spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); 509 request->global_seqno = seqno; 510 if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) 511 intel_engine_enable_signaling(request, false); 512 spin_unlock(&request->lock); 513 514 engine->emit_breadcrumb(request, 515 request->ring->vaddr + request->postfix); 516 517 spin_lock(&request->timeline->lock); 518 list_move_tail(&request->link, &timeline->requests); 519 spin_unlock(&request->timeline->lock); 520 521 trace_i915_request_execute(request); 522 523 wake_up_all(&request->execute); 524 } 525 526 void i915_request_submit(struct i915_request *request) 527 { 528 struct intel_engine_cs *engine = request->engine; 529 unsigned long flags; 530 531 /* Will be called from irq-context when using foreign fences. */ 532 spin_lock_irqsave(&engine->timeline->lock, flags); 533 534 __i915_request_submit(request); 535 536 spin_unlock_irqrestore(&engine->timeline->lock, flags); 537 } 538 539 void __i915_request_unsubmit(struct i915_request *request) 540 { 541 struct intel_engine_cs *engine = request->engine; 542 struct intel_timeline *timeline; 543 544 GEM_BUG_ON(!irqs_disabled()); 545 lockdep_assert_held(&engine->timeline->lock); 546 547 /* 548 * Only unwind in reverse order, required so that the per-context list 549 * is kept in seqno/ring order. 550 */ 551 GEM_BUG_ON(!request->global_seqno); 552 GEM_BUG_ON(request->global_seqno != engine->timeline->seqno); 553 GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), 554 request->global_seqno)); 555 engine->timeline->seqno--; 556 557 /* We may be recursing from the signal callback of another i915 fence */ 558 spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); 559 request->global_seqno = 0; 560 if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) 561 intel_engine_cancel_signaling(request); 562 spin_unlock(&request->lock); 563 564 /* Transfer back from the global per-engine timeline to per-context */ 565 timeline = request->timeline; 566 GEM_BUG_ON(timeline == engine->timeline); 567 568 spin_lock(&timeline->lock); 569 list_move(&request->link, &timeline->requests); 570 spin_unlock(&timeline->lock); 571 572 /* 573 * We don't need to wake_up any waiters on request->execute, they 574 * will get woken by any other event or us re-adding this request 575 * to the engine timeline (__i915_request_submit()). The waiters 576 * should be quite adapt at finding that the request now has a new 577 * global_seqno to the one they went to sleep on. 578 */ 579 } 580 581 void i915_request_unsubmit(struct i915_request *request) 582 { 583 struct intel_engine_cs *engine = request->engine; 584 unsigned long flags; 585 586 /* Will be called from irq-context when using foreign fences. */ 587 spin_lock_irqsave(&engine->timeline->lock, flags); 588 589 __i915_request_unsubmit(request); 590 591 spin_unlock_irqrestore(&engine->timeline->lock, flags); 592 } 593 594 static int __i915_sw_fence_call 595 submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) 596 { 597 struct i915_request *request = 598 container_of(fence, typeof(*request), submit); 599 600 switch (state) { 601 case FENCE_COMPLETE: 602 trace_i915_request_submit(request); 603 /* 604 * We need to serialize use of the submit_request() callback 605 * with its hotplugging performed during an emergency 606 * i915_gem_set_wedged(). We use the RCU mechanism to mark the 607 * critical section in order to force i915_gem_set_wedged() to 608 * wait until the submit_request() is completed before 609 * proceeding. 610 */ 611 rcu_read_lock(); 612 request->engine->submit_request(request); 613 rcu_read_unlock(); 614 break; 615 616 case FENCE_FREE: 617 i915_request_put(request); 618 break; 619 } 620 621 return NOTIFY_DONE; 622 } 623 624 /** 625 * i915_request_alloc - allocate a request structure 626 * 627 * @engine: engine that we wish to issue the request on. 628 * @ctx: context that the request will be associated with. 629 * 630 * Returns a pointer to the allocated request if successful, 631 * or an error code if not. 632 */ 633 struct i915_request * 634 i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) 635 { 636 struct drm_i915_private *i915 = engine->i915; 637 struct i915_request *rq; 638 struct intel_ring *ring; 639 int ret; 640 641 lockdep_assert_held(&i915->drm.struct_mutex); 642 643 /* 644 * Preempt contexts are reserved for exclusive use to inject a 645 * preemption context switch. They are never to be used for any trivial 646 * request! 647 */ 648 GEM_BUG_ON(ctx == i915->preempt_context); 649 650 /* 651 * ABI: Before userspace accesses the GPU (e.g. execbuffer), report 652 * EIO if the GPU is already wedged. 653 */ 654 if (i915_terminally_wedged(&i915->gpu_error)) 655 return ERR_PTR(-EIO); 656 657 /* 658 * Pinning the contexts may generate requests in order to acquire 659 * GGTT space, so do this first before we reserve a seqno for 660 * ourselves. 661 */ 662 ring = engine->context_pin(engine, ctx); 663 if (IS_ERR(ring)) 664 return ERR_CAST(ring); 665 GEM_BUG_ON(!ring); 666 667 ret = reserve_engine(engine); 668 if (ret) 669 goto err_unpin; 670 671 ret = intel_ring_wait_for_space(ring, MIN_SPACE_FOR_ADD_REQUEST); 672 if (ret) 673 goto err_unreserve; 674 675 /* Move the oldest request to the slab-cache (if not in use!) */ 676 rq = list_first_entry_or_null(&engine->timeline->requests, 677 typeof(*rq), link); 678 if (rq && i915_request_completed(rq)) 679 i915_request_retire(rq); 680 681 /* 682 * Beware: Dragons be flying overhead. 683 * 684 * We use RCU to look up requests in flight. The lookups may 685 * race with the request being allocated from the slab freelist. 686 * That is the request we are writing to here, may be in the process 687 * of being read by __i915_gem_active_get_rcu(). As such, 688 * we have to be very careful when overwriting the contents. During 689 * the RCU lookup, we change chase the request->engine pointer, 690 * read the request->global_seqno and increment the reference count. 691 * 692 * The reference count is incremented atomically. If it is zero, 693 * the lookup knows the request is unallocated and complete. Otherwise, 694 * it is either still in use, or has been reallocated and reset 695 * with dma_fence_init(). This increment is safe for release as we 696 * check that the request we have a reference to and matches the active 697 * request. 698 * 699 * Before we increment the refcount, we chase the request->engine 700 * pointer. We must not call kmem_cache_zalloc() or else we set 701 * that pointer to NULL and cause a crash during the lookup. If 702 * we see the request is completed (based on the value of the 703 * old engine and seqno), the lookup is complete and reports NULL. 704 * If we decide the request is not completed (new engine or seqno), 705 * then we grab a reference and double check that it is still the 706 * active request - which it won't be and restart the lookup. 707 * 708 * Do not use kmem_cache_zalloc() here! 709 */ 710 rq = kmem_cache_alloc(i915->requests, 711 GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 712 if (unlikely(!rq)) { 713 /* Ratelimit ourselves to prevent oom from malicious clients */ 714 ret = i915_gem_wait_for_idle(i915, 715 I915_WAIT_LOCKED | 716 I915_WAIT_INTERRUPTIBLE); 717 if (ret) 718 goto err_unreserve; 719 720 /* 721 * We've forced the client to stall and catch up with whatever 722 * backlog there might have been. As we are assuming that we 723 * caused the mempressure, now is an opportune time to 724 * recover as much memory from the request pool as is possible. 725 * Having already penalized the client to stall, we spend 726 * a little extra time to re-optimise page allocation. 727 */ 728 kmem_cache_shrink(i915->requests); 729 rcu_barrier(); /* Recover the TYPESAFE_BY_RCU pages */ 730 731 rq = kmem_cache_alloc(i915->requests, GFP_KERNEL); 732 if (!rq) { 733 ret = -ENOMEM; 734 goto err_unreserve; 735 } 736 } 737 738 rq->timeline = i915_gem_context_lookup_timeline(ctx, engine); 739 GEM_BUG_ON(rq->timeline == engine->timeline); 740 741 spin_lock_init(&rq->lock); 742 dma_fence_init(&rq->fence, 743 &i915_fence_ops, 744 &rq->lock, 745 rq->timeline->fence_context, 746 timeline_get_seqno(rq->timeline)); 747 748 /* We bump the ref for the fence chain */ 749 i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify); 750 init_waitqueue_head(&rq->execute); 751 752 i915_priotree_init(&rq->priotree); 753 754 INIT_LIST_HEAD(&rq->active_list); 755 rq->i915 = i915; 756 rq->engine = engine; 757 rq->ctx = ctx; 758 rq->ring = ring; 759 760 /* No zalloc, must clear what we need by hand */ 761 rq->global_seqno = 0; 762 rq->signaling.wait.seqno = 0; 763 rq->file_priv = NULL; 764 rq->batch = NULL; 765 rq->capture_list = NULL; 766 rq->waitboost = false; 767 768 /* 769 * Reserve space in the ring buffer for all the commands required to 770 * eventually emit this request. This is to guarantee that the 771 * i915_request_add() call can't fail. Note that the reserve may need 772 * to be redone if the request is not actually submitted straight 773 * away, e.g. because a GPU scheduler has deferred it. 774 */ 775 rq->reserved_space = MIN_SPACE_FOR_ADD_REQUEST; 776 GEM_BUG_ON(rq->reserved_space < engine->emit_breadcrumb_sz); 777 778 /* 779 * Record the position of the start of the request so that 780 * should we detect the updated seqno part-way through the 781 * GPU processing the request, we never over-estimate the 782 * position of the head. 783 */ 784 rq->head = rq->ring->emit; 785 786 /* Unconditionally invalidate GPU caches and TLBs. */ 787 ret = engine->emit_flush(rq, EMIT_INVALIDATE); 788 if (ret) 789 goto err_unwind; 790 791 ret = engine->request_alloc(rq); 792 if (ret) 793 goto err_unwind; 794 795 /* Check that we didn't interrupt ourselves with a new request */ 796 GEM_BUG_ON(rq->timeline->seqno != rq->fence.seqno); 797 return rq; 798 799 err_unwind: 800 rq->ring->emit = rq->head; 801 802 /* Make sure we didn't add ourselves to external state before freeing */ 803 GEM_BUG_ON(!list_empty(&rq->active_list)); 804 GEM_BUG_ON(!list_empty(&rq->priotree.signalers_list)); 805 GEM_BUG_ON(!list_empty(&rq->priotree.waiters_list)); 806 807 kmem_cache_free(i915->requests, rq); 808 err_unreserve: 809 unreserve_engine(engine); 810 err_unpin: 811 engine->context_unpin(engine, ctx); 812 return ERR_PTR(ret); 813 } 814 815 static int 816 i915_request_await_request(struct i915_request *to, struct i915_request *from) 817 { 818 int ret; 819 820 GEM_BUG_ON(to == from); 821 GEM_BUG_ON(to->timeline == from->timeline); 822 823 if (i915_request_completed(from)) 824 return 0; 825 826 if (to->engine->schedule) { 827 ret = i915_priotree_add_dependency(to->i915, 828 &to->priotree, 829 &from->priotree); 830 if (ret < 0) 831 return ret; 832 } 833 834 if (to->engine == from->engine) { 835 ret = i915_sw_fence_await_sw_fence_gfp(&to->submit, 836 &from->submit, 837 I915_FENCE_GFP); 838 return ret < 0 ? ret : 0; 839 } 840 841 if (to->engine->semaphore.sync_to) { 842 u32 seqno; 843 844 GEM_BUG_ON(!from->engine->semaphore.signal); 845 846 seqno = i915_request_global_seqno(from); 847 if (!seqno) 848 goto await_dma_fence; 849 850 if (seqno <= to->timeline->global_sync[from->engine->id]) 851 return 0; 852 853 trace_i915_gem_ring_sync_to(to, from); 854 ret = to->engine->semaphore.sync_to(to, from); 855 if (ret) 856 return ret; 857 858 to->timeline->global_sync[from->engine->id] = seqno; 859 return 0; 860 } 861 862 await_dma_fence: 863 ret = i915_sw_fence_await_dma_fence(&to->submit, 864 &from->fence, 0, 865 I915_FENCE_GFP); 866 return ret < 0 ? ret : 0; 867 } 868 869 int 870 i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) 871 { 872 struct dma_fence **child = &fence; 873 unsigned int nchild = 1; 874 int ret; 875 876 /* 877 * Note that if the fence-array was created in signal-on-any mode, 878 * we should *not* decompose it into its individual fences. However, 879 * we don't currently store which mode the fence-array is operating 880 * in. Fortunately, the only user of signal-on-any is private to 881 * amdgpu and we should not see any incoming fence-array from 882 * sync-file being in signal-on-any mode. 883 */ 884 if (dma_fence_is_array(fence)) { 885 struct dma_fence_array *array = to_dma_fence_array(fence); 886 887 child = array->fences; 888 nchild = array->num_fences; 889 GEM_BUG_ON(!nchild); 890 } 891 892 do { 893 fence = *child++; 894 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) 895 continue; 896 897 /* 898 * Requests on the same timeline are explicitly ordered, along 899 * with their dependencies, by i915_request_add() which ensures 900 * that requests are submitted in-order through each ring. 901 */ 902 if (fence->context == rq->fence.context) 903 continue; 904 905 /* Squash repeated waits to the same timelines */ 906 if (fence->context != rq->i915->mm.unordered_timeline && 907 intel_timeline_sync_is_later(rq->timeline, fence)) 908 continue; 909 910 if (dma_fence_is_i915(fence)) 911 ret = i915_request_await_request(rq, to_request(fence)); 912 else 913 ret = i915_sw_fence_await_dma_fence(&rq->submit, fence, 914 I915_FENCE_TIMEOUT, 915 I915_FENCE_GFP); 916 if (ret < 0) 917 return ret; 918 919 /* Record the latest fence used against each timeline */ 920 if (fence->context != rq->i915->mm.unordered_timeline) 921 intel_timeline_sync_set(rq->timeline, fence); 922 } while (--nchild); 923 924 return 0; 925 } 926 927 /** 928 * i915_request_await_object - set this request to (async) wait upon a bo 929 * @to: request we are wishing to use 930 * @obj: object which may be in use on another ring. 931 * @write: whether the wait is on behalf of a writer 932 * 933 * This code is meant to abstract object synchronization with the GPU. 934 * Conceptually we serialise writes between engines inside the GPU. 935 * We only allow one engine to write into a buffer at any time, but 936 * multiple readers. To ensure each has a coherent view of memory, we must: 937 * 938 * - If there is an outstanding write request to the object, the new 939 * request must wait for it to complete (either CPU or in hw, requests 940 * on the same ring will be naturally ordered). 941 * 942 * - If we are a write request (pending_write_domain is set), the new 943 * request must wait for outstanding read requests to complete. 944 * 945 * Returns 0 if successful, else propagates up the lower layer error. 946 */ 947 int 948 i915_request_await_object(struct i915_request *to, 949 struct drm_i915_gem_object *obj, 950 bool write) 951 { 952 struct dma_fence *excl; 953 int ret = 0; 954 955 if (write) { 956 struct dma_fence **shared; 957 unsigned int count, i; 958 959 ret = reservation_object_get_fences_rcu(obj->resv, 960 &excl, &count, &shared); 961 if (ret) 962 return ret; 963 964 for (i = 0; i < count; i++) { 965 ret = i915_request_await_dma_fence(to, shared[i]); 966 if (ret) 967 break; 968 969 dma_fence_put(shared[i]); 970 } 971 972 for (; i < count; i++) 973 dma_fence_put(shared[i]); 974 kfree(shared); 975 } else { 976 excl = reservation_object_get_excl_rcu(obj->resv); 977 } 978 979 if (excl) { 980 if (ret == 0) 981 ret = i915_request_await_dma_fence(to, excl); 982 983 dma_fence_put(excl); 984 } 985 986 return ret; 987 } 988 989 /* 990 * NB: This function is not allowed to fail. Doing so would mean the the 991 * request is not being tracked for completion but the work itself is 992 * going to happen on the hardware. This would be a Bad Thing(tm). 993 */ 994 void __i915_request_add(struct i915_request *request, bool flush_caches) 995 { 996 struct intel_engine_cs *engine = request->engine; 997 struct intel_ring *ring = request->ring; 998 struct intel_timeline *timeline = request->timeline; 999 struct i915_request *prev; 1000 u32 *cs; 1001 int err; 1002 1003 lockdep_assert_held(&request->i915->drm.struct_mutex); 1004 trace_i915_request_add(request); 1005 1006 /* 1007 * Make sure that no request gazumped us - if it was allocated after 1008 * our i915_request_alloc() and called __i915_request_add() before 1009 * us, the timeline will hold its seqno which is later than ours. 1010 */ 1011 GEM_BUG_ON(timeline->seqno != request->fence.seqno); 1012 1013 /* 1014 * To ensure that this call will not fail, space for its emissions 1015 * should already have been reserved in the ring buffer. Let the ring 1016 * know that it is time to use that space up. 1017 */ 1018 request->reserved_space = 0; 1019 1020 /* 1021 * Emit any outstanding flushes - execbuf can fail to emit the flush 1022 * after having emitted the batchbuffer command. Hence we need to fix 1023 * things up similar to emitting the lazy request. The difference here 1024 * is that the flush _must_ happen before the next request, no matter 1025 * what. 1026 */ 1027 if (flush_caches) { 1028 err = engine->emit_flush(request, EMIT_FLUSH); 1029 1030 /* Not allowed to fail! */ 1031 WARN(err, "engine->emit_flush() failed: %d!\n", err); 1032 } 1033 1034 /* 1035 * Record the position of the start of the breadcrumb so that 1036 * should we detect the updated seqno part-way through the 1037 * GPU processing the request, we never over-estimate the 1038 * position of the ring's HEAD. 1039 */ 1040 cs = intel_ring_begin(request, engine->emit_breadcrumb_sz); 1041 GEM_BUG_ON(IS_ERR(cs)); 1042 request->postfix = intel_ring_offset(request, cs); 1043 1044 /* 1045 * Seal the request and mark it as pending execution. Note that 1046 * we may inspect this state, without holding any locks, during 1047 * hangcheck. Hence we apply the barrier to ensure that we do not 1048 * see a more recent value in the hws than we are tracking. 1049 */ 1050 1051 prev = i915_gem_active_raw(&timeline->last_request, 1052 &request->i915->drm.struct_mutex); 1053 if (prev && !i915_request_completed(prev)) { 1054 i915_sw_fence_await_sw_fence(&request->submit, &prev->submit, 1055 &request->submitq); 1056 if (engine->schedule) 1057 __i915_priotree_add_dependency(&request->priotree, 1058 &prev->priotree, 1059 &request->dep, 1060 0); 1061 } 1062 1063 spin_lock_irq(&timeline->lock); 1064 list_add_tail(&request->link, &timeline->requests); 1065 spin_unlock_irq(&timeline->lock); 1066 1067 GEM_BUG_ON(timeline->seqno != request->fence.seqno); 1068 i915_gem_active_set(&timeline->last_request, request); 1069 1070 list_add_tail(&request->ring_link, &ring->request_list); 1071 request->emitted_jiffies = jiffies; 1072 1073 /* 1074 * Let the backend know a new request has arrived that may need 1075 * to adjust the existing execution schedule due to a high priority 1076 * request - i.e. we may want to preempt the current request in order 1077 * to run a high priority dependency chain *before* we can execute this 1078 * request. 1079 * 1080 * This is called before the request is ready to run so that we can 1081 * decide whether to preempt the entire chain so that it is ready to 1082 * run at the earliest possible convenience. 1083 */ 1084 rcu_read_lock(); 1085 if (engine->schedule) 1086 engine->schedule(request, request->ctx->priority); 1087 rcu_read_unlock(); 1088 1089 local_bh_disable(); 1090 i915_sw_fence_commit(&request->submit); 1091 local_bh_enable(); /* Kick the execlists tasklet if just scheduled */ 1092 1093 /* 1094 * In typical scenarios, we do not expect the previous request on 1095 * the timeline to be still tracked by timeline->last_request if it 1096 * has been completed. If the completed request is still here, that 1097 * implies that request retirement is a long way behind submission, 1098 * suggesting that we haven't been retiring frequently enough from 1099 * the combination of retire-before-alloc, waiters and the background 1100 * retirement worker. So if the last request on this timeline was 1101 * already completed, do a catch up pass, flushing the retirement queue 1102 * up to this client. Since we have now moved the heaviest operations 1103 * during retirement onto secondary workers, such as freeing objects 1104 * or contexts, retiring a bunch of requests is mostly list management 1105 * (and cache misses), and so we should not be overly penalizing this 1106 * client by performing excess work, though we may still performing 1107 * work on behalf of others -- but instead we should benefit from 1108 * improved resource management. (Well, that's the theory at least.) 1109 */ 1110 if (prev && i915_request_completed(prev)) 1111 i915_request_retire_upto(prev); 1112 } 1113 1114 static unsigned long local_clock_us(unsigned int *cpu) 1115 { 1116 unsigned long t; 1117 1118 /* 1119 * Cheaply and approximately convert from nanoseconds to microseconds. 1120 * The result and subsequent calculations are also defined in the same 1121 * approximate microseconds units. The principal source of timing 1122 * error here is from the simple truncation. 1123 * 1124 * Note that local_clock() is only defined wrt to the current CPU; 1125 * the comparisons are no longer valid if we switch CPUs. Instead of 1126 * blocking preemption for the entire busywait, we can detect the CPU 1127 * switch and use that as indicator of system load and a reason to 1128 * stop busywaiting, see busywait_stop(). 1129 */ 1130 *cpu = get_cpu(); 1131 t = local_clock() >> 10; 1132 put_cpu(); 1133 1134 return t; 1135 } 1136 1137 static bool busywait_stop(unsigned long timeout, unsigned int cpu) 1138 { 1139 unsigned int this_cpu; 1140 1141 if (time_after(local_clock_us(&this_cpu), timeout)) 1142 return true; 1143 1144 return this_cpu != cpu; 1145 } 1146 1147 static bool __i915_spin_request(const struct i915_request *rq, 1148 u32 seqno, int state, unsigned long timeout_us) 1149 { 1150 struct intel_engine_cs *engine = rq->engine; 1151 unsigned int irq, cpu; 1152 1153 GEM_BUG_ON(!seqno); 1154 1155 /* 1156 * Only wait for the request if we know it is likely to complete. 1157 * 1158 * We don't track the timestamps around requests, nor the average 1159 * request length, so we do not have a good indicator that this 1160 * request will complete within the timeout. What we do know is the 1161 * order in which requests are executed by the engine and so we can 1162 * tell if the request has started. If the request hasn't started yet, 1163 * it is a fair assumption that it will not complete within our 1164 * relatively short timeout. 1165 */ 1166 if (!i915_seqno_passed(intel_engine_get_seqno(engine), seqno - 1)) 1167 return false; 1168 1169 /* 1170 * When waiting for high frequency requests, e.g. during synchronous 1171 * rendering split between the CPU and GPU, the finite amount of time 1172 * required to set up the irq and wait upon it limits the response 1173 * rate. By busywaiting on the request completion for a short while we 1174 * can service the high frequency waits as quick as possible. However, 1175 * if it is a slow request, we want to sleep as quickly as possible. 1176 * The tradeoff between waiting and sleeping is roughly the time it 1177 * takes to sleep on a request, on the order of a microsecond. 1178 */ 1179 1180 irq = atomic_read(&engine->irq_count); 1181 timeout_us += local_clock_us(&cpu); 1182 do { 1183 if (i915_seqno_passed(intel_engine_get_seqno(engine), seqno)) 1184 return seqno == i915_request_global_seqno(rq); 1185 1186 /* 1187 * Seqno are meant to be ordered *before* the interrupt. If 1188 * we see an interrupt without a corresponding seqno advance, 1189 * assume we won't see one in the near future but require 1190 * the engine->seqno_barrier() to fixup coherency. 1191 */ 1192 if (atomic_read(&engine->irq_count) != irq) 1193 break; 1194 1195 if (signal_pending_state(state, current)) 1196 break; 1197 1198 if (busywait_stop(timeout_us, cpu)) 1199 break; 1200 1201 cpu_relax(); 1202 } while (!need_resched()); 1203 1204 return false; 1205 } 1206 1207 static bool __i915_wait_request_check_and_reset(struct i915_request *request) 1208 { 1209 if (likely(!i915_reset_handoff(&request->i915->gpu_error))) 1210 return false; 1211 1212 __set_current_state(TASK_RUNNING); 1213 i915_reset(request->i915, 0); 1214 return true; 1215 } 1216 1217 /** 1218 * i915_request_wait - wait until execution of request has finished 1219 * @rq: the request to wait upon 1220 * @flags: how to wait 1221 * @timeout: how long to wait in jiffies 1222 * 1223 * i915_request_wait() waits for the request to be completed, for a 1224 * maximum of @timeout jiffies (with MAX_SCHEDULE_TIMEOUT implying an 1225 * unbounded wait). 1226 * 1227 * If the caller holds the struct_mutex, the caller must pass I915_WAIT_LOCKED 1228 * in via the flags, and vice versa if the struct_mutex is not held, the caller 1229 * must not specify that the wait is locked. 1230 * 1231 * Returns the remaining time (in jiffies) if the request completed, which may 1232 * be zero or -ETIME if the request is unfinished after the timeout expires. 1233 * May return -EINTR is called with I915_WAIT_INTERRUPTIBLE and a signal is 1234 * pending before the request completes. 1235 */ 1236 long i915_request_wait(struct i915_request *rq, 1237 unsigned int flags, 1238 long timeout) 1239 { 1240 const int state = flags & I915_WAIT_INTERRUPTIBLE ? 1241 TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; 1242 wait_queue_head_t *errq = &rq->i915->gpu_error.wait_queue; 1243 DEFINE_WAIT_FUNC(reset, default_wake_function); 1244 DEFINE_WAIT_FUNC(exec, default_wake_function); 1245 struct intel_wait wait; 1246 1247 might_sleep(); 1248 #if IS_ENABLED(CONFIG_LOCKDEP) 1249 GEM_BUG_ON(debug_locks && 1250 !!lockdep_is_held(&rq->i915->drm.struct_mutex) != 1251 !!(flags & I915_WAIT_LOCKED)); 1252 #endif 1253 GEM_BUG_ON(timeout < 0); 1254 1255 if (i915_request_completed(rq)) 1256 return timeout; 1257 1258 if (!timeout) 1259 return -ETIME; 1260 1261 trace_i915_request_wait_begin(rq, flags); 1262 1263 add_wait_queue(&rq->execute, &exec); 1264 if (flags & I915_WAIT_LOCKED) 1265 add_wait_queue(errq, &reset); 1266 1267 intel_wait_init(&wait, rq); 1268 1269 restart: 1270 do { 1271 set_current_state(state); 1272 if (intel_wait_update_request(&wait, rq)) 1273 break; 1274 1275 if (flags & I915_WAIT_LOCKED && 1276 __i915_wait_request_check_and_reset(rq)) 1277 continue; 1278 1279 if (signal_pending_state(state, current)) { 1280 timeout = -ERESTARTSYS; 1281 goto complete; 1282 } 1283 1284 if (!timeout) { 1285 timeout = -ETIME; 1286 goto complete; 1287 } 1288 1289 timeout = io_schedule_timeout(timeout); 1290 } while (1); 1291 1292 GEM_BUG_ON(!intel_wait_has_seqno(&wait)); 1293 GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit)); 1294 1295 /* Optimistic short spin before touching IRQs */ 1296 if (__i915_spin_request(rq, wait.seqno, state, 5)) 1297 goto complete; 1298 1299 set_current_state(state); 1300 if (intel_engine_add_wait(rq->engine, &wait)) 1301 /* 1302 * In order to check that we haven't missed the interrupt 1303 * as we enabled it, we need to kick ourselves to do a 1304 * coherent check on the seqno before we sleep. 1305 */ 1306 goto wakeup; 1307 1308 if (flags & I915_WAIT_LOCKED) 1309 __i915_wait_request_check_and_reset(rq); 1310 1311 for (;;) { 1312 if (signal_pending_state(state, current)) { 1313 timeout = -ERESTARTSYS; 1314 break; 1315 } 1316 1317 if (!timeout) { 1318 timeout = -ETIME; 1319 break; 1320 } 1321 1322 timeout = io_schedule_timeout(timeout); 1323 1324 if (intel_wait_complete(&wait) && 1325 intel_wait_check_request(&wait, rq)) 1326 break; 1327 1328 set_current_state(state); 1329 1330 wakeup: 1331 /* 1332 * Carefully check if the request is complete, giving time 1333 * for the seqno to be visible following the interrupt. 1334 * We also have to check in case we are kicked by the GPU 1335 * reset in order to drop the struct_mutex. 1336 */ 1337 if (__i915_request_irq_complete(rq)) 1338 break; 1339 1340 /* 1341 * If the GPU is hung, and we hold the lock, reset the GPU 1342 * and then check for completion. On a full reset, the engine's 1343 * HW seqno will be advanced passed us and we are complete. 1344 * If we do a partial reset, we have to wait for the GPU to 1345 * resume and update the breadcrumb. 1346 * 1347 * If we don't hold the mutex, we can just wait for the worker 1348 * to come along and update the breadcrumb (either directly 1349 * itself, or indirectly by recovering the GPU). 1350 */ 1351 if (flags & I915_WAIT_LOCKED && 1352 __i915_wait_request_check_and_reset(rq)) 1353 continue; 1354 1355 /* Only spin if we know the GPU is processing this request */ 1356 if (__i915_spin_request(rq, wait.seqno, state, 2)) 1357 break; 1358 1359 if (!intel_wait_check_request(&wait, rq)) { 1360 intel_engine_remove_wait(rq->engine, &wait); 1361 goto restart; 1362 } 1363 } 1364 1365 intel_engine_remove_wait(rq->engine, &wait); 1366 complete: 1367 __set_current_state(TASK_RUNNING); 1368 if (flags & I915_WAIT_LOCKED) 1369 remove_wait_queue(errq, &reset); 1370 remove_wait_queue(&rq->execute, &exec); 1371 trace_i915_request_wait_end(rq); 1372 1373 return timeout; 1374 } 1375 1376 static void engine_retire_requests(struct intel_engine_cs *engine) 1377 { 1378 struct i915_request *request, *next; 1379 u32 seqno = intel_engine_get_seqno(engine); 1380 LIST_HEAD(retire); 1381 1382 spin_lock_irq(&engine->timeline->lock); 1383 list_for_each_entry_safe(request, next, 1384 &engine->timeline->requests, link) { 1385 if (!i915_seqno_passed(seqno, request->global_seqno)) 1386 break; 1387 1388 list_move_tail(&request->link, &retire); 1389 } 1390 spin_unlock_irq(&engine->timeline->lock); 1391 1392 list_for_each_entry_safe(request, next, &retire, link) 1393 i915_request_retire(request); 1394 } 1395 1396 void i915_retire_requests(struct drm_i915_private *i915) 1397 { 1398 struct intel_engine_cs *engine; 1399 enum intel_engine_id id; 1400 1401 lockdep_assert_held(&i915->drm.struct_mutex); 1402 1403 if (!i915->gt.active_requests) 1404 return; 1405 1406 for_each_engine(engine, i915, id) 1407 engine_retire_requests(engine); 1408 } 1409 1410 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 1411 #include "selftests/mock_request.c" 1412 #include "selftests/i915_request.c" 1413 #endif 1414