1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2019 Intel Corporation 5 */ 6 7 #include <linux/debugobjects.h> 8 9 #include "gt/intel_context.h" 10 #include "gt/intel_engine_heartbeat.h" 11 #include "gt/intel_engine_pm.h" 12 #include "gt/intel_ring.h" 13 14 #include "i915_drv.h" 15 #include "i915_active.h" 16 #include "i915_globals.h" 17 18 /* 19 * Active refs memory management 20 * 21 * To be more economical with memory, we reap all the i915_active trees as 22 * they idle (when we know the active requests are inactive) and allocate the 23 * nodes from a local slab cache to hopefully reduce the fragmentation. 24 */ 25 static struct i915_global_active { 26 struct i915_global base; 27 struct kmem_cache *slab_cache; 28 } global; 29 30 struct active_node { 31 struct i915_active_fence base; 32 struct i915_active *ref; 33 struct rb_node node; 34 u64 timeline; 35 }; 36 37 static inline struct active_node * 38 node_from_active(struct i915_active_fence *active) 39 { 40 return container_of(active, struct active_node, base); 41 } 42 43 #define take_preallocated_barriers(x) llist_del_all(&(x)->preallocated_barriers) 44 45 static inline bool is_barrier(const struct i915_active_fence *active) 46 { 47 return IS_ERR(rcu_access_pointer(active->fence)); 48 } 49 50 static inline struct llist_node *barrier_to_ll(struct active_node *node) 51 { 52 GEM_BUG_ON(!is_barrier(&node->base)); 53 return (struct llist_node *)&node->base.cb.node; 54 } 55 56 static inline struct intel_engine_cs * 57 __barrier_to_engine(struct active_node *node) 58 { 59 return (struct intel_engine_cs *)READ_ONCE(node->base.cb.node.prev); 60 } 61 62 static inline struct intel_engine_cs * 63 barrier_to_engine(struct active_node *node) 64 { 65 GEM_BUG_ON(!is_barrier(&node->base)); 66 return __barrier_to_engine(node); 67 } 68 69 static inline struct active_node *barrier_from_ll(struct llist_node *x) 70 { 71 return container_of((struct list_head *)x, 72 struct active_node, base.cb.node); 73 } 74 75 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && IS_ENABLED(CONFIG_DEBUG_OBJECTS) 76 77 static void *active_debug_hint(void *addr) 78 { 79 struct i915_active *ref = addr; 80 81 return (void *)ref->active ?: (void *)ref->retire ?: (void *)ref; 82 } 83 84 static struct debug_obj_descr active_debug_desc = { 85 .name = "i915_active", 86 .debug_hint = active_debug_hint, 87 }; 88 89 static void debug_active_init(struct i915_active *ref) 90 { 91 debug_object_init(ref, &active_debug_desc); 92 } 93 94 static void debug_active_activate(struct i915_active *ref) 95 { 96 lockdep_assert_held(&ref->tree_lock); 97 if (!atomic_read(&ref->count)) /* before the first inc */ 98 debug_object_activate(ref, &active_debug_desc); 99 } 100 101 static void debug_active_deactivate(struct i915_active *ref) 102 { 103 lockdep_assert_held(&ref->tree_lock); 104 if (!atomic_read(&ref->count)) /* after the last dec */ 105 debug_object_deactivate(ref, &active_debug_desc); 106 } 107 108 static void debug_active_fini(struct i915_active *ref) 109 { 110 debug_object_free(ref, &active_debug_desc); 111 } 112 113 static void debug_active_assert(struct i915_active *ref) 114 { 115 debug_object_assert_init(ref, &active_debug_desc); 116 } 117 118 #else 119 120 static inline void debug_active_init(struct i915_active *ref) { } 121 static inline void debug_active_activate(struct i915_active *ref) { } 122 static inline void debug_active_deactivate(struct i915_active *ref) { } 123 static inline void debug_active_fini(struct i915_active *ref) { } 124 static inline void debug_active_assert(struct i915_active *ref) { } 125 126 #endif 127 128 static void 129 __active_retire(struct i915_active *ref) 130 { 131 struct active_node *it, *n; 132 struct rb_root root; 133 unsigned long flags; 134 135 GEM_BUG_ON(i915_active_is_idle(ref)); 136 137 /* return the unused nodes to our slabcache -- flushing the allocator */ 138 if (!atomic_dec_and_lock_irqsave(&ref->count, &ref->tree_lock, flags)) 139 return; 140 141 GEM_BUG_ON(rcu_access_pointer(ref->excl.fence)); 142 debug_active_deactivate(ref); 143 144 root = ref->tree; 145 ref->tree = RB_ROOT; 146 ref->cache = NULL; 147 148 spin_unlock_irqrestore(&ref->tree_lock, flags); 149 150 /* After the final retire, the entire struct may be freed */ 151 if (ref->retire) 152 ref->retire(ref); 153 154 /* ... except if you wait on it, you must manage your own references! */ 155 wake_up_var(ref); 156 157 rbtree_postorder_for_each_entry_safe(it, n, &root, node) { 158 GEM_BUG_ON(i915_active_fence_isset(&it->base)); 159 kmem_cache_free(global.slab_cache, it); 160 } 161 } 162 163 static void 164 active_work(struct work_struct *wrk) 165 { 166 struct i915_active *ref = container_of(wrk, typeof(*ref), work); 167 168 GEM_BUG_ON(!atomic_read(&ref->count)); 169 if (atomic_add_unless(&ref->count, -1, 1)) 170 return; 171 172 __active_retire(ref); 173 } 174 175 static void 176 active_retire(struct i915_active *ref) 177 { 178 GEM_BUG_ON(!atomic_read(&ref->count)); 179 if (atomic_add_unless(&ref->count, -1, 1)) 180 return; 181 182 if (ref->flags & I915_ACTIVE_RETIRE_SLEEPS) { 183 queue_work(system_unbound_wq, &ref->work); 184 return; 185 } 186 187 __active_retire(ref); 188 } 189 190 static inline struct dma_fence ** 191 __active_fence_slot(struct i915_active_fence *active) 192 { 193 return (struct dma_fence ** __force)&active->fence; 194 } 195 196 static inline bool 197 active_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) 198 { 199 struct i915_active_fence *active = 200 container_of(cb, typeof(*active), cb); 201 202 return cmpxchg(__active_fence_slot(active), fence, NULL) == fence; 203 } 204 205 static void 206 node_retire(struct dma_fence *fence, struct dma_fence_cb *cb) 207 { 208 if (active_fence_cb(fence, cb)) 209 active_retire(container_of(cb, struct active_node, base.cb)->ref); 210 } 211 212 static void 213 excl_retire(struct dma_fence *fence, struct dma_fence_cb *cb) 214 { 215 if (active_fence_cb(fence, cb)) 216 active_retire(container_of(cb, struct i915_active, excl.cb)); 217 } 218 219 static struct i915_active_fence * 220 active_instance(struct i915_active *ref, struct intel_timeline *tl) 221 { 222 struct active_node *node, *prealloc; 223 struct rb_node **p, *parent; 224 u64 idx = tl->fence_context; 225 226 /* 227 * We track the most recently used timeline to skip a rbtree search 228 * for the common case, under typical loads we never need the rbtree 229 * at all. We can reuse the last slot if it is empty, that is 230 * after the previous activity has been retired, or if it matches the 231 * current timeline. 232 */ 233 node = READ_ONCE(ref->cache); 234 if (node && node->timeline == idx) 235 return &node->base; 236 237 /* Preallocate a replacement, just in case */ 238 prealloc = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); 239 if (!prealloc) 240 return NULL; 241 242 spin_lock_irq(&ref->tree_lock); 243 GEM_BUG_ON(i915_active_is_idle(ref)); 244 245 parent = NULL; 246 p = &ref->tree.rb_node; 247 while (*p) { 248 parent = *p; 249 250 node = rb_entry(parent, struct active_node, node); 251 if (node->timeline == idx) { 252 kmem_cache_free(global.slab_cache, prealloc); 253 goto out; 254 } 255 256 if (node->timeline < idx) 257 p = &parent->rb_right; 258 else 259 p = &parent->rb_left; 260 } 261 262 node = prealloc; 263 __i915_active_fence_init(&node->base, NULL, node_retire); 264 node->ref = ref; 265 node->timeline = idx; 266 267 rb_link_node(&node->node, parent, p); 268 rb_insert_color(&node->node, &ref->tree); 269 270 out: 271 ref->cache = node; 272 spin_unlock_irq(&ref->tree_lock); 273 274 BUILD_BUG_ON(offsetof(typeof(*node), base)); 275 return &node->base; 276 } 277 278 void __i915_active_init(struct i915_active *ref, 279 int (*active)(struct i915_active *ref), 280 void (*retire)(struct i915_active *ref), 281 struct lock_class_key *mkey, 282 struct lock_class_key *wkey) 283 { 284 unsigned long bits; 285 286 debug_active_init(ref); 287 288 ref->flags = 0; 289 ref->active = active; 290 ref->retire = ptr_unpack_bits(retire, &bits, 2); 291 if (bits & I915_ACTIVE_MAY_SLEEP) 292 ref->flags |= I915_ACTIVE_RETIRE_SLEEPS; 293 294 spin_lock_init(&ref->tree_lock); 295 ref->tree = RB_ROOT; 296 ref->cache = NULL; 297 298 init_llist_head(&ref->preallocated_barriers); 299 atomic_set(&ref->count, 0); 300 __mutex_init(&ref->mutex, "i915_active", mkey); 301 __i915_active_fence_init(&ref->excl, NULL, excl_retire); 302 INIT_WORK(&ref->work, active_work); 303 #if IS_ENABLED(CONFIG_LOCKDEP) 304 lockdep_init_map(&ref->work.lockdep_map, "i915_active.work", wkey, 0); 305 #endif 306 } 307 308 static bool ____active_del_barrier(struct i915_active *ref, 309 struct active_node *node, 310 struct intel_engine_cs *engine) 311 312 { 313 struct llist_node *head = NULL, *tail = NULL; 314 struct llist_node *pos, *next; 315 316 GEM_BUG_ON(node->timeline != engine->kernel_context->timeline->fence_context); 317 318 /* 319 * Rebuild the llist excluding our node. We may perform this 320 * outside of the kernel_context timeline mutex and so someone 321 * else may be manipulating the engine->barrier_tasks, in 322 * which case either we or they will be upset :) 323 * 324 * A second __active_del_barrier() will report failure to claim 325 * the active_node and the caller will just shrug and know not to 326 * claim ownership of its node. 327 * 328 * A concurrent i915_request_add_active_barriers() will miss adding 329 * any of the tasks, but we will try again on the next -- and since 330 * we are actively using the barrier, we know that there will be 331 * at least another opportunity when we idle. 332 */ 333 llist_for_each_safe(pos, next, llist_del_all(&engine->barrier_tasks)) { 334 if (node == barrier_from_ll(pos)) { 335 node = NULL; 336 continue; 337 } 338 339 pos->next = head; 340 head = pos; 341 if (!tail) 342 tail = pos; 343 } 344 if (head) 345 llist_add_batch(head, tail, &engine->barrier_tasks); 346 347 return !node; 348 } 349 350 static bool 351 __active_del_barrier(struct i915_active *ref, struct active_node *node) 352 { 353 return ____active_del_barrier(ref, node, barrier_to_engine(node)); 354 } 355 356 int i915_active_ref(struct i915_active *ref, 357 struct intel_timeline *tl, 358 struct dma_fence *fence) 359 { 360 struct i915_active_fence *active; 361 int err; 362 363 lockdep_assert_held(&tl->mutex); 364 365 /* Prevent reaping in case we malloc/wait while building the tree */ 366 err = i915_active_acquire(ref); 367 if (err) 368 return err; 369 370 active = active_instance(ref, tl); 371 if (!active) { 372 err = -ENOMEM; 373 goto out; 374 } 375 376 if (is_barrier(active)) { /* proto-node used by our idle barrier */ 377 /* 378 * This request is on the kernel_context timeline, and so 379 * we can use it to substitute for the pending idle-barrer 380 * request that we want to emit on the kernel_context. 381 */ 382 __active_del_barrier(ref, node_from_active(active)); 383 RCU_INIT_POINTER(active->fence, NULL); 384 atomic_dec(&ref->count); 385 } 386 if (!__i915_active_fence_set(active, fence)) 387 atomic_inc(&ref->count); 388 389 out: 390 i915_active_release(ref); 391 return err; 392 } 393 394 struct dma_fence * 395 i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f) 396 { 397 struct dma_fence *prev; 398 399 /* We expect the caller to manage the exclusive timeline ordering */ 400 GEM_BUG_ON(i915_active_is_idle(ref)); 401 402 rcu_read_lock(); 403 prev = __i915_active_fence_set(&ref->excl, f); 404 if (prev) 405 prev = dma_fence_get_rcu(prev); 406 else 407 atomic_inc(&ref->count); 408 rcu_read_unlock(); 409 410 return prev; 411 } 412 413 bool i915_active_acquire_if_busy(struct i915_active *ref) 414 { 415 debug_active_assert(ref); 416 return atomic_add_unless(&ref->count, 1, 0); 417 } 418 419 int i915_active_acquire(struct i915_active *ref) 420 { 421 int err; 422 423 if (i915_active_acquire_if_busy(ref)) 424 return 0; 425 426 err = mutex_lock_interruptible(&ref->mutex); 427 if (err) 428 return err; 429 430 if (likely(!i915_active_acquire_if_busy(ref))) { 431 if (ref->active) 432 err = ref->active(ref); 433 if (!err) { 434 spin_lock_irq(&ref->tree_lock); /* __active_retire() */ 435 debug_active_activate(ref); 436 atomic_inc(&ref->count); 437 spin_unlock_irq(&ref->tree_lock); 438 } 439 } 440 441 mutex_unlock(&ref->mutex); 442 443 return err; 444 } 445 446 void i915_active_release(struct i915_active *ref) 447 { 448 debug_active_assert(ref); 449 active_retire(ref); 450 } 451 452 static void enable_signaling(struct i915_active_fence *active) 453 { 454 struct dma_fence *fence; 455 456 if (unlikely(is_barrier(active))) 457 return; 458 459 fence = i915_active_fence_get(active); 460 if (!fence) 461 return; 462 463 dma_fence_enable_sw_signaling(fence); 464 dma_fence_put(fence); 465 } 466 467 static int flush_barrier(struct active_node *it) 468 { 469 struct intel_engine_cs *engine; 470 471 if (likely(!is_barrier(&it->base))) 472 return 0; 473 474 engine = __barrier_to_engine(it); 475 smp_rmb(); /* serialise with add_active_barriers */ 476 if (!is_barrier(&it->base)) 477 return 0; 478 479 return intel_engine_flush_barriers(engine); 480 } 481 482 static int flush_lazy_signals(struct i915_active *ref) 483 { 484 struct active_node *it, *n; 485 int err = 0; 486 487 enable_signaling(&ref->excl); 488 rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { 489 err = flush_barrier(it); /* unconnected idle barrier? */ 490 if (err) 491 break; 492 493 enable_signaling(&it->base); 494 } 495 496 return err; 497 } 498 499 int i915_active_wait(struct i915_active *ref) 500 { 501 int err; 502 503 might_sleep(); 504 505 if (!i915_active_acquire_if_busy(ref)) 506 return 0; 507 508 /* Any fence added after the wait begins will not be auto-signaled */ 509 err = flush_lazy_signals(ref); 510 i915_active_release(ref); 511 if (err) 512 return err; 513 514 if (wait_var_event_interruptible(ref, i915_active_is_idle(ref))) 515 return -EINTR; 516 517 flush_work(&ref->work); 518 return 0; 519 } 520 521 static int __await_active(struct i915_active_fence *active, 522 int (*fn)(void *arg, struct dma_fence *fence), 523 void *arg) 524 { 525 struct dma_fence *fence; 526 527 if (is_barrier(active)) /* XXX flush the barrier? */ 528 return 0; 529 530 fence = i915_active_fence_get(active); 531 if (fence) { 532 int err; 533 534 err = fn(arg, fence); 535 dma_fence_put(fence); 536 if (err < 0) 537 return err; 538 } 539 540 return 0; 541 } 542 543 static int await_active(struct i915_active *ref, 544 unsigned int flags, 545 int (*fn)(void *arg, struct dma_fence *fence), 546 void *arg) 547 { 548 int err = 0; 549 550 /* We must always wait for the exclusive fence! */ 551 if (rcu_access_pointer(ref->excl.fence)) { 552 err = __await_active(&ref->excl, fn, arg); 553 if (err) 554 return err; 555 } 556 557 if (flags & I915_ACTIVE_AWAIT_ALL && i915_active_acquire_if_busy(ref)) { 558 struct active_node *it, *n; 559 560 rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { 561 err = __await_active(&it->base, fn, arg); 562 if (err) 563 break; 564 } 565 i915_active_release(ref); 566 if (err) 567 return err; 568 } 569 570 return 0; 571 } 572 573 static int rq_await_fence(void *arg, struct dma_fence *fence) 574 { 575 return i915_request_await_dma_fence(arg, fence); 576 } 577 578 int i915_request_await_active(struct i915_request *rq, 579 struct i915_active *ref, 580 unsigned int flags) 581 { 582 return await_active(ref, flags, rq_await_fence, rq); 583 } 584 585 static int sw_await_fence(void *arg, struct dma_fence *fence) 586 { 587 return i915_sw_fence_await_dma_fence(arg, fence, 0, 588 GFP_NOWAIT | __GFP_NOWARN); 589 } 590 591 int i915_sw_fence_await_active(struct i915_sw_fence *fence, 592 struct i915_active *ref, 593 unsigned int flags) 594 { 595 return await_active(ref, flags, sw_await_fence, fence); 596 } 597 598 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) 599 void i915_active_fini(struct i915_active *ref) 600 { 601 debug_active_fini(ref); 602 GEM_BUG_ON(atomic_read(&ref->count)); 603 GEM_BUG_ON(work_pending(&ref->work)); 604 GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree)); 605 mutex_destroy(&ref->mutex); 606 } 607 #endif 608 609 static inline bool is_idle_barrier(struct active_node *node, u64 idx) 610 { 611 return node->timeline == idx && !i915_active_fence_isset(&node->base); 612 } 613 614 static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx) 615 { 616 struct rb_node *prev, *p; 617 618 if (RB_EMPTY_ROOT(&ref->tree)) 619 return NULL; 620 621 spin_lock_irq(&ref->tree_lock); 622 GEM_BUG_ON(i915_active_is_idle(ref)); 623 624 /* 625 * Try to reuse any existing barrier nodes already allocated for this 626 * i915_active, due to overlapping active phases there is likely a 627 * node kept alive (as we reuse before parking). We prefer to reuse 628 * completely idle barriers (less hassle in manipulating the llists), 629 * but otherwise any will do. 630 */ 631 if (ref->cache && is_idle_barrier(ref->cache, idx)) { 632 p = &ref->cache->node; 633 goto match; 634 } 635 636 prev = NULL; 637 p = ref->tree.rb_node; 638 while (p) { 639 struct active_node *node = 640 rb_entry(p, struct active_node, node); 641 642 if (is_idle_barrier(node, idx)) 643 goto match; 644 645 prev = p; 646 if (node->timeline < idx) 647 p = p->rb_right; 648 else 649 p = p->rb_left; 650 } 651 652 /* 653 * No quick match, but we did find the leftmost rb_node for the 654 * kernel_context. Walk the rb_tree in-order to see if there were 655 * any idle-barriers on this timeline that we missed, or just use 656 * the first pending barrier. 657 */ 658 for (p = prev; p; p = rb_next(p)) { 659 struct active_node *node = 660 rb_entry(p, struct active_node, node); 661 struct intel_engine_cs *engine; 662 663 if (node->timeline > idx) 664 break; 665 666 if (node->timeline < idx) 667 continue; 668 669 if (is_idle_barrier(node, idx)) 670 goto match; 671 672 /* 673 * The list of pending barriers is protected by the 674 * kernel_context timeline, which notably we do not hold 675 * here. i915_request_add_active_barriers() may consume 676 * the barrier before we claim it, so we have to check 677 * for success. 678 */ 679 engine = __barrier_to_engine(node); 680 smp_rmb(); /* serialise with add_active_barriers */ 681 if (is_barrier(&node->base) && 682 ____active_del_barrier(ref, node, engine)) 683 goto match; 684 } 685 686 spin_unlock_irq(&ref->tree_lock); 687 688 return NULL; 689 690 match: 691 rb_erase(p, &ref->tree); /* Hide from waits and sibling allocations */ 692 if (p == &ref->cache->node) 693 ref->cache = NULL; 694 spin_unlock_irq(&ref->tree_lock); 695 696 return rb_entry(p, struct active_node, node); 697 } 698 699 int i915_active_acquire_preallocate_barrier(struct i915_active *ref, 700 struct intel_engine_cs *engine) 701 { 702 intel_engine_mask_t tmp, mask = engine->mask; 703 struct llist_node *first = NULL, *last = NULL; 704 struct intel_gt *gt = engine->gt; 705 int err; 706 707 GEM_BUG_ON(i915_active_is_idle(ref)); 708 709 /* Wait until the previous preallocation is completed */ 710 while (!llist_empty(&ref->preallocated_barriers)) 711 cond_resched(); 712 713 /* 714 * Preallocate a node for each physical engine supporting the target 715 * engine (remember virtual engines have more than one sibling). 716 * We can then use the preallocated nodes in 717 * i915_active_acquire_barrier() 718 */ 719 GEM_BUG_ON(!mask); 720 for_each_engine_masked(engine, gt, mask, tmp) { 721 u64 idx = engine->kernel_context->timeline->fence_context; 722 struct llist_node *prev = first; 723 struct active_node *node; 724 725 node = reuse_idle_barrier(ref, idx); 726 if (!node) { 727 node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); 728 if (!node) { 729 err = ENOMEM; 730 goto unwind; 731 } 732 733 RCU_INIT_POINTER(node->base.fence, NULL); 734 node->base.cb.func = node_retire; 735 node->timeline = idx; 736 node->ref = ref; 737 } 738 739 if (!i915_active_fence_isset(&node->base)) { 740 /* 741 * Mark this as being *our* unconnected proto-node. 742 * 743 * Since this node is not in any list, and we have 744 * decoupled it from the rbtree, we can reuse the 745 * request to indicate this is an idle-barrier node 746 * and then we can use the rb_node and list pointers 747 * for our tracking of the pending barrier. 748 */ 749 RCU_INIT_POINTER(node->base.fence, ERR_PTR(-EAGAIN)); 750 node->base.cb.node.prev = (void *)engine; 751 atomic_inc(&ref->count); 752 } 753 GEM_BUG_ON(rcu_access_pointer(node->base.fence) != ERR_PTR(-EAGAIN)); 754 755 GEM_BUG_ON(barrier_to_engine(node) != engine); 756 first = barrier_to_ll(node); 757 first->next = prev; 758 if (!last) 759 last = first; 760 intel_engine_pm_get(engine); 761 } 762 763 GEM_BUG_ON(!llist_empty(&ref->preallocated_barriers)); 764 llist_add_batch(first, last, &ref->preallocated_barriers); 765 766 return 0; 767 768 unwind: 769 while (first) { 770 struct active_node *node = barrier_from_ll(first); 771 772 first = first->next; 773 774 atomic_dec(&ref->count); 775 intel_engine_pm_put(barrier_to_engine(node)); 776 777 kmem_cache_free(global.slab_cache, node); 778 } 779 return err; 780 } 781 782 void i915_active_acquire_barrier(struct i915_active *ref) 783 { 784 struct llist_node *pos, *next; 785 unsigned long flags; 786 787 GEM_BUG_ON(i915_active_is_idle(ref)); 788 789 /* 790 * Transfer the list of preallocated barriers into the 791 * i915_active rbtree, but only as proto-nodes. They will be 792 * populated by i915_request_add_active_barriers() to point to the 793 * request that will eventually release them. 794 */ 795 llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) { 796 struct active_node *node = barrier_from_ll(pos); 797 struct intel_engine_cs *engine = barrier_to_engine(node); 798 struct rb_node **p, *parent; 799 800 spin_lock_irqsave_nested(&ref->tree_lock, flags, 801 SINGLE_DEPTH_NESTING); 802 parent = NULL; 803 p = &ref->tree.rb_node; 804 while (*p) { 805 struct active_node *it; 806 807 parent = *p; 808 809 it = rb_entry(parent, struct active_node, node); 810 if (it->timeline < node->timeline) 811 p = &parent->rb_right; 812 else 813 p = &parent->rb_left; 814 } 815 rb_link_node(&node->node, parent, p); 816 rb_insert_color(&node->node, &ref->tree); 817 spin_unlock_irqrestore(&ref->tree_lock, flags); 818 819 GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); 820 llist_add(barrier_to_ll(node), &engine->barrier_tasks); 821 intel_engine_pm_put(engine); 822 } 823 } 824 825 static struct dma_fence **ll_to_fence_slot(struct llist_node *node) 826 { 827 return __active_fence_slot(&barrier_from_ll(node)->base); 828 } 829 830 void i915_request_add_active_barriers(struct i915_request *rq) 831 { 832 struct intel_engine_cs *engine = rq->engine; 833 struct llist_node *node, *next; 834 unsigned long flags; 835 836 GEM_BUG_ON(!intel_context_is_barrier(rq->context)); 837 GEM_BUG_ON(intel_engine_is_virtual(engine)); 838 GEM_BUG_ON(i915_request_timeline(rq) != engine->kernel_context->timeline); 839 840 node = llist_del_all(&engine->barrier_tasks); 841 if (!node) 842 return; 843 /* 844 * Attach the list of proto-fences to the in-flight request such 845 * that the parent i915_active will be released when this request 846 * is retired. 847 */ 848 spin_lock_irqsave(&rq->lock, flags); 849 llist_for_each_safe(node, next, node) { 850 /* serialise with reuse_idle_barrier */ 851 smp_store_mb(*ll_to_fence_slot(node), &rq->fence); 852 list_add_tail((struct list_head *)node, &rq->fence.cb_list); 853 } 854 spin_unlock_irqrestore(&rq->lock, flags); 855 } 856 857 /* 858 * __i915_active_fence_set: Update the last active fence along its timeline 859 * @active: the active tracker 860 * @fence: the new fence (under construction) 861 * 862 * Records the new @fence as the last active fence along its timeline in 863 * this active tracker, moving the tracking callbacks from the previous 864 * fence onto this one. Returns the previous fence (if not already completed), 865 * which the caller must ensure is executed before the new fence. To ensure 866 * that the order of fences within the timeline of the i915_active_fence is 867 * understood, it should be locked by the caller. 868 */ 869 struct dma_fence * 870 __i915_active_fence_set(struct i915_active_fence *active, 871 struct dma_fence *fence) 872 { 873 struct dma_fence *prev; 874 unsigned long flags; 875 876 if (fence == rcu_access_pointer(active->fence)) 877 return fence; 878 879 GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)); 880 881 /* 882 * Consider that we have two threads arriving (A and B), with 883 * C already resident as the active->fence. 884 * 885 * A does the xchg first, and so it sees C or NULL depending 886 * on the timing of the interrupt handler. If it is NULL, the 887 * previous fence must have been signaled and we know that 888 * we are first on the timeline. If it is still present, 889 * we acquire the lock on that fence and serialise with the interrupt 890 * handler, in the process removing it from any future interrupt 891 * callback. A will then wait on C before executing (if present). 892 * 893 * As B is second, it sees A as the previous fence and so waits for 894 * it to complete its transition and takes over the occupancy for 895 * itself -- remembering that it needs to wait on A before executing. 896 * 897 * Note the strong ordering of the timeline also provides consistent 898 * nesting rules for the fence->lock; the inner lock is always the 899 * older lock. 900 */ 901 spin_lock_irqsave(fence->lock, flags); 902 prev = xchg(__active_fence_slot(active), fence); 903 if (prev) { 904 GEM_BUG_ON(prev == fence); 905 spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING); 906 __list_del_entry(&active->cb.node); 907 spin_unlock(prev->lock); /* serialise with prev->cb_list */ 908 } 909 list_add_tail(&active->cb.node, &fence->cb_list); 910 spin_unlock_irqrestore(fence->lock, flags); 911 912 return prev; 913 } 914 915 int i915_active_fence_set(struct i915_active_fence *active, 916 struct i915_request *rq) 917 { 918 struct dma_fence *fence; 919 int err = 0; 920 921 /* Must maintain timeline ordering wrt previous active requests */ 922 rcu_read_lock(); 923 fence = __i915_active_fence_set(active, &rq->fence); 924 if (fence) /* but the previous fence may not belong to that timeline! */ 925 fence = dma_fence_get_rcu(fence); 926 rcu_read_unlock(); 927 if (fence) { 928 err = i915_request_await_dma_fence(rq, fence); 929 dma_fence_put(fence); 930 } 931 932 return err; 933 } 934 935 void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb) 936 { 937 active_fence_cb(fence, cb); 938 } 939 940 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 941 #include "selftests/i915_active.c" 942 #endif 943 944 static void i915_global_active_shrink(void) 945 { 946 kmem_cache_shrink(global.slab_cache); 947 } 948 949 static void i915_global_active_exit(void) 950 { 951 kmem_cache_destroy(global.slab_cache); 952 } 953 954 static struct i915_global_active global = { { 955 .shrink = i915_global_active_shrink, 956 .exit = i915_global_active_exit, 957 } }; 958 959 int __init i915_global_active_init(void) 960 { 961 global.slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN); 962 if (!global.slab_cache) 963 return -ENOMEM; 964 965 i915_global_register(&global.base); 966 return 0; 967 } 968