1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2019 Intel Corporation 5 */ 6 7 #include <linux/debugobjects.h> 8 9 #include "gt/intel_context.h" 10 #include "gt/intel_engine_heartbeat.h" 11 #include "gt/intel_engine_pm.h" 12 #include "gt/intel_ring.h" 13 14 #include "i915_drv.h" 15 #include "i915_active.h" 16 #include "i915_globals.h" 17 18 /* 19 * Active refs memory management 20 * 21 * To be more economical with memory, we reap all the i915_active trees as 22 * they idle (when we know the active requests are inactive) and allocate the 23 * nodes from a local slab cache to hopefully reduce the fragmentation. 24 */ 25 static struct i915_global_active { 26 struct i915_global base; 27 struct kmem_cache *slab_cache; 28 } global; 29 30 struct active_node { 31 struct i915_active_fence base; 32 struct i915_active *ref; 33 struct rb_node node; 34 u64 timeline; 35 }; 36 37 static inline struct active_node * 38 node_from_active(struct i915_active_fence *active) 39 { 40 return container_of(active, struct active_node, base); 41 } 42 43 #define take_preallocated_barriers(x) llist_del_all(&(x)->preallocated_barriers) 44 45 static inline bool is_barrier(const struct i915_active_fence *active) 46 { 47 return IS_ERR(rcu_access_pointer(active->fence)); 48 } 49 50 static inline struct llist_node *barrier_to_ll(struct active_node *node) 51 { 52 GEM_BUG_ON(!is_barrier(&node->base)); 53 return (struct llist_node *)&node->base.cb.node; 54 } 55 56 static inline struct intel_engine_cs * 57 __barrier_to_engine(struct active_node *node) 58 { 59 return (struct intel_engine_cs *)READ_ONCE(node->base.cb.node.prev); 60 } 61 62 static inline struct intel_engine_cs * 63 barrier_to_engine(struct active_node *node) 64 { 65 GEM_BUG_ON(!is_barrier(&node->base)); 66 return __barrier_to_engine(node); 67 } 68 69 static inline struct active_node *barrier_from_ll(struct llist_node *x) 70 { 71 return container_of((struct list_head *)x, 72 struct active_node, base.cb.node); 73 } 74 75 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && IS_ENABLED(CONFIG_DEBUG_OBJECTS) 76 77 static void *active_debug_hint(void *addr) 78 { 79 struct i915_active *ref = addr; 80 81 return (void *)ref->active ?: (void *)ref->retire ?: (void *)ref; 82 } 83 84 static struct debug_obj_descr active_debug_desc = { 85 .name = "i915_active", 86 .debug_hint = active_debug_hint, 87 }; 88 89 static void debug_active_init(struct i915_active *ref) 90 { 91 debug_object_init(ref, &active_debug_desc); 92 } 93 94 static void debug_active_activate(struct i915_active *ref) 95 { 96 lockdep_assert_held(&ref->tree_lock); 97 if (!atomic_read(&ref->count)) /* before the first inc */ 98 debug_object_activate(ref, &active_debug_desc); 99 } 100 101 static void debug_active_deactivate(struct i915_active *ref) 102 { 103 lockdep_assert_held(&ref->tree_lock); 104 if (!atomic_read(&ref->count)) /* after the last dec */ 105 debug_object_deactivate(ref, &active_debug_desc); 106 } 107 108 static void debug_active_fini(struct i915_active *ref) 109 { 110 debug_object_free(ref, &active_debug_desc); 111 } 112 113 static void debug_active_assert(struct i915_active *ref) 114 { 115 debug_object_assert_init(ref, &active_debug_desc); 116 } 117 118 #else 119 120 static inline void debug_active_init(struct i915_active *ref) { } 121 static inline void debug_active_activate(struct i915_active *ref) { } 122 static inline void debug_active_deactivate(struct i915_active *ref) { } 123 static inline void debug_active_fini(struct i915_active *ref) { } 124 static inline void debug_active_assert(struct i915_active *ref) { } 125 126 #endif 127 128 static void 129 __active_retire(struct i915_active *ref) 130 { 131 struct active_node *it, *n; 132 struct rb_root root; 133 unsigned long flags; 134 135 GEM_BUG_ON(i915_active_is_idle(ref)); 136 137 /* return the unused nodes to our slabcache -- flushing the allocator */ 138 if (!atomic_dec_and_lock_irqsave(&ref->count, &ref->tree_lock, flags)) 139 return; 140 141 GEM_BUG_ON(rcu_access_pointer(ref->excl.fence)); 142 debug_active_deactivate(ref); 143 144 root = ref->tree; 145 ref->tree = RB_ROOT; 146 ref->cache = NULL; 147 148 spin_unlock_irqrestore(&ref->tree_lock, flags); 149 150 /* After the final retire, the entire struct may be freed */ 151 if (ref->retire) 152 ref->retire(ref); 153 154 /* ... except if you wait on it, you must manage your own references! */ 155 wake_up_var(ref); 156 157 rbtree_postorder_for_each_entry_safe(it, n, &root, node) { 158 GEM_BUG_ON(i915_active_fence_isset(&it->base)); 159 kmem_cache_free(global.slab_cache, it); 160 } 161 } 162 163 static void 164 active_work(struct work_struct *wrk) 165 { 166 struct i915_active *ref = container_of(wrk, typeof(*ref), work); 167 168 GEM_BUG_ON(!atomic_read(&ref->count)); 169 if (atomic_add_unless(&ref->count, -1, 1)) 170 return; 171 172 __active_retire(ref); 173 } 174 175 static void 176 active_retire(struct i915_active *ref) 177 { 178 GEM_BUG_ON(!atomic_read(&ref->count)); 179 if (atomic_add_unless(&ref->count, -1, 1)) 180 return; 181 182 if (ref->flags & I915_ACTIVE_RETIRE_SLEEPS) { 183 queue_work(system_unbound_wq, &ref->work); 184 return; 185 } 186 187 __active_retire(ref); 188 } 189 190 static inline struct dma_fence ** 191 __active_fence_slot(struct i915_active_fence *active) 192 { 193 return (struct dma_fence ** __force)&active->fence; 194 } 195 196 static inline bool 197 active_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) 198 { 199 struct i915_active_fence *active = 200 container_of(cb, typeof(*active), cb); 201 202 return cmpxchg(__active_fence_slot(active), fence, NULL) == fence; 203 } 204 205 static void 206 node_retire(struct dma_fence *fence, struct dma_fence_cb *cb) 207 { 208 if (active_fence_cb(fence, cb)) 209 active_retire(container_of(cb, struct active_node, base.cb)->ref); 210 } 211 212 static void 213 excl_retire(struct dma_fence *fence, struct dma_fence_cb *cb) 214 { 215 if (active_fence_cb(fence, cb)) 216 active_retire(container_of(cb, struct i915_active, excl.cb)); 217 } 218 219 static struct i915_active_fence * 220 active_instance(struct i915_active *ref, struct intel_timeline *tl) 221 { 222 struct active_node *node, *prealloc; 223 struct rb_node **p, *parent; 224 u64 idx = tl->fence_context; 225 226 /* 227 * We track the most recently used timeline to skip a rbtree search 228 * for the common case, under typical loads we never need the rbtree 229 * at all. We can reuse the last slot if it is empty, that is 230 * after the previous activity has been retired, or if it matches the 231 * current timeline. 232 */ 233 node = READ_ONCE(ref->cache); 234 if (node && node->timeline == idx) 235 return &node->base; 236 237 /* Preallocate a replacement, just in case */ 238 prealloc = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); 239 if (!prealloc) 240 return NULL; 241 242 spin_lock_irq(&ref->tree_lock); 243 GEM_BUG_ON(i915_active_is_idle(ref)); 244 245 parent = NULL; 246 p = &ref->tree.rb_node; 247 while (*p) { 248 parent = *p; 249 250 node = rb_entry(parent, struct active_node, node); 251 if (node->timeline == idx) { 252 kmem_cache_free(global.slab_cache, prealloc); 253 goto out; 254 } 255 256 if (node->timeline < idx) 257 p = &parent->rb_right; 258 else 259 p = &parent->rb_left; 260 } 261 262 node = prealloc; 263 __i915_active_fence_init(&node->base, NULL, node_retire); 264 node->ref = ref; 265 node->timeline = idx; 266 267 rb_link_node(&node->node, parent, p); 268 rb_insert_color(&node->node, &ref->tree); 269 270 out: 271 ref->cache = node; 272 spin_unlock_irq(&ref->tree_lock); 273 274 BUILD_BUG_ON(offsetof(typeof(*node), base)); 275 return &node->base; 276 } 277 278 void __i915_active_init(struct i915_active *ref, 279 int (*active)(struct i915_active *ref), 280 void (*retire)(struct i915_active *ref), 281 struct lock_class_key *mkey, 282 struct lock_class_key *wkey) 283 { 284 unsigned long bits; 285 286 debug_active_init(ref); 287 288 ref->flags = 0; 289 ref->active = active; 290 ref->retire = ptr_unpack_bits(retire, &bits, 2); 291 if (bits & I915_ACTIVE_MAY_SLEEP) 292 ref->flags |= I915_ACTIVE_RETIRE_SLEEPS; 293 294 spin_lock_init(&ref->tree_lock); 295 ref->tree = RB_ROOT; 296 ref->cache = NULL; 297 298 init_llist_head(&ref->preallocated_barriers); 299 atomic_set(&ref->count, 0); 300 __mutex_init(&ref->mutex, "i915_active", mkey); 301 __i915_active_fence_init(&ref->excl, NULL, excl_retire); 302 INIT_WORK(&ref->work, active_work); 303 #if IS_ENABLED(CONFIG_LOCKDEP) 304 lockdep_init_map(&ref->work.lockdep_map, "i915_active.work", wkey, 0); 305 #endif 306 } 307 308 static bool ____active_del_barrier(struct i915_active *ref, 309 struct active_node *node, 310 struct intel_engine_cs *engine) 311 312 { 313 struct llist_node *head = NULL, *tail = NULL; 314 struct llist_node *pos, *next; 315 316 GEM_BUG_ON(node->timeline != engine->kernel_context->timeline->fence_context); 317 318 /* 319 * Rebuild the llist excluding our node. We may perform this 320 * outside of the kernel_context timeline mutex and so someone 321 * else may be manipulating the engine->barrier_tasks, in 322 * which case either we or they will be upset :) 323 * 324 * A second __active_del_barrier() will report failure to claim 325 * the active_node and the caller will just shrug and know not to 326 * claim ownership of its node. 327 * 328 * A concurrent i915_request_add_active_barriers() will miss adding 329 * any of the tasks, but we will try again on the next -- and since 330 * we are actively using the barrier, we know that there will be 331 * at least another opportunity when we idle. 332 */ 333 llist_for_each_safe(pos, next, llist_del_all(&engine->barrier_tasks)) { 334 if (node == barrier_from_ll(pos)) { 335 node = NULL; 336 continue; 337 } 338 339 pos->next = head; 340 head = pos; 341 if (!tail) 342 tail = pos; 343 } 344 if (head) 345 llist_add_batch(head, tail, &engine->barrier_tasks); 346 347 return !node; 348 } 349 350 static bool 351 __active_del_barrier(struct i915_active *ref, struct active_node *node) 352 { 353 return ____active_del_barrier(ref, node, barrier_to_engine(node)); 354 } 355 356 int i915_active_ref(struct i915_active *ref, 357 struct intel_timeline *tl, 358 struct dma_fence *fence) 359 { 360 struct i915_active_fence *active; 361 int err; 362 363 lockdep_assert_held(&tl->mutex); 364 365 /* Prevent reaping in case we malloc/wait while building the tree */ 366 err = i915_active_acquire(ref); 367 if (err) 368 return err; 369 370 active = active_instance(ref, tl); 371 if (!active) { 372 err = -ENOMEM; 373 goto out; 374 } 375 376 if (is_barrier(active)) { /* proto-node used by our idle barrier */ 377 /* 378 * This request is on the kernel_context timeline, and so 379 * we can use it to substitute for the pending idle-barrer 380 * request that we want to emit on the kernel_context. 381 */ 382 __active_del_barrier(ref, node_from_active(active)); 383 RCU_INIT_POINTER(active->fence, NULL); 384 atomic_dec(&ref->count); 385 } 386 if (!__i915_active_fence_set(active, fence)) 387 atomic_inc(&ref->count); 388 389 out: 390 i915_active_release(ref); 391 return err; 392 } 393 394 struct dma_fence * 395 i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f) 396 { 397 struct dma_fence *prev; 398 399 /* We expect the caller to manage the exclusive timeline ordering */ 400 GEM_BUG_ON(i915_active_is_idle(ref)); 401 402 rcu_read_lock(); 403 prev = __i915_active_fence_set(&ref->excl, f); 404 if (prev) 405 prev = dma_fence_get_rcu(prev); 406 else 407 atomic_inc(&ref->count); 408 rcu_read_unlock(); 409 410 return prev; 411 } 412 413 bool i915_active_acquire_if_busy(struct i915_active *ref) 414 { 415 debug_active_assert(ref); 416 return atomic_add_unless(&ref->count, 1, 0); 417 } 418 419 int i915_active_acquire(struct i915_active *ref) 420 { 421 int err; 422 423 if (i915_active_acquire_if_busy(ref)) 424 return 0; 425 426 err = mutex_lock_interruptible(&ref->mutex); 427 if (err) 428 return err; 429 430 if (likely(!i915_active_acquire_if_busy(ref))) { 431 if (ref->active) 432 err = ref->active(ref); 433 if (!err) { 434 spin_lock_irq(&ref->tree_lock); /* __active_retire() */ 435 debug_active_activate(ref); 436 atomic_inc(&ref->count); 437 spin_unlock_irq(&ref->tree_lock); 438 } 439 } 440 441 mutex_unlock(&ref->mutex); 442 443 return err; 444 } 445 446 void i915_active_release(struct i915_active *ref) 447 { 448 debug_active_assert(ref); 449 active_retire(ref); 450 } 451 452 static void enable_signaling(struct i915_active_fence *active) 453 { 454 struct dma_fence *fence; 455 456 if (unlikely(is_barrier(active))) 457 return; 458 459 fence = i915_active_fence_get(active); 460 if (!fence) 461 return; 462 463 dma_fence_enable_sw_signaling(fence); 464 dma_fence_put(fence); 465 } 466 467 static int flush_barrier(struct active_node *it) 468 { 469 struct intel_engine_cs *engine; 470 471 if (likely(!is_barrier(&it->base))) 472 return 0; 473 474 engine = __barrier_to_engine(it); 475 smp_rmb(); /* serialise with add_active_barriers */ 476 if (!is_barrier(&it->base)) 477 return 0; 478 479 return intel_engine_flush_barriers(engine); 480 } 481 482 static int flush_lazy_signals(struct i915_active *ref) 483 { 484 struct active_node *it, *n; 485 int err = 0; 486 487 enable_signaling(&ref->excl); 488 rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { 489 err = flush_barrier(it); /* unconnected idle barrier? */ 490 if (err) 491 break; 492 493 enable_signaling(&it->base); 494 } 495 496 return err; 497 } 498 499 int __i915_active_wait(struct i915_active *ref, int state) 500 { 501 int err; 502 503 might_sleep(); 504 505 if (!i915_active_acquire_if_busy(ref)) 506 return 0; 507 508 /* Any fence added after the wait begins will not be auto-signaled */ 509 err = flush_lazy_signals(ref); 510 i915_active_release(ref); 511 if (err) 512 return err; 513 514 if (!i915_active_is_idle(ref) && 515 ___wait_var_event(ref, i915_active_is_idle(ref), 516 state, 0, 0, schedule())) 517 return -EINTR; 518 519 flush_work(&ref->work); 520 return 0; 521 } 522 523 static int __await_active(struct i915_active_fence *active, 524 int (*fn)(void *arg, struct dma_fence *fence), 525 void *arg) 526 { 527 struct dma_fence *fence; 528 529 if (is_barrier(active)) /* XXX flush the barrier? */ 530 return 0; 531 532 fence = i915_active_fence_get(active); 533 if (fence) { 534 int err; 535 536 err = fn(arg, fence); 537 dma_fence_put(fence); 538 if (err < 0) 539 return err; 540 } 541 542 return 0; 543 } 544 545 struct wait_barrier { 546 struct wait_queue_entry base; 547 struct i915_active *ref; 548 }; 549 550 static int 551 barrier_wake(wait_queue_entry_t *wq, unsigned int mode, int flags, void *key) 552 { 553 struct wait_barrier *wb = container_of(wq, typeof(*wb), base); 554 555 if (i915_active_is_idle(wb->ref)) { 556 list_del(&wq->entry); 557 i915_sw_fence_complete(wq->private); 558 kfree(wq); 559 } 560 561 return 0; 562 } 563 564 static int __await_barrier(struct i915_active *ref, struct i915_sw_fence *fence) 565 { 566 struct wait_barrier *wb; 567 568 wb = kmalloc(sizeof(*wb), GFP_KERNEL); 569 if (unlikely(!wb)) 570 return -ENOMEM; 571 572 GEM_BUG_ON(i915_active_is_idle(ref)); 573 if (!i915_sw_fence_await(fence)) { 574 kfree(wb); 575 return -EINVAL; 576 } 577 578 wb->base.flags = 0; 579 wb->base.func = barrier_wake; 580 wb->base.private = fence; 581 wb->ref = ref; 582 583 add_wait_queue(__var_waitqueue(ref), &wb->base); 584 return 0; 585 } 586 587 static int await_active(struct i915_active *ref, 588 unsigned int flags, 589 int (*fn)(void *arg, struct dma_fence *fence), 590 void *arg, struct i915_sw_fence *barrier) 591 { 592 int err = 0; 593 594 if (!i915_active_acquire_if_busy(ref)) 595 return 0; 596 597 if (flags & I915_ACTIVE_AWAIT_EXCL && 598 rcu_access_pointer(ref->excl.fence)) { 599 err = __await_active(&ref->excl, fn, arg); 600 if (err) 601 goto out; 602 } 603 604 if (flags & I915_ACTIVE_AWAIT_ACTIVE) { 605 struct active_node *it, *n; 606 607 rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { 608 err = __await_active(&it->base, fn, arg); 609 if (err) 610 goto out; 611 } 612 } 613 614 if (flags & I915_ACTIVE_AWAIT_BARRIER) { 615 err = flush_lazy_signals(ref); 616 if (err) 617 goto out; 618 619 err = __await_barrier(ref, barrier); 620 if (err) 621 goto out; 622 } 623 624 out: 625 i915_active_release(ref); 626 return err; 627 } 628 629 static int rq_await_fence(void *arg, struct dma_fence *fence) 630 { 631 return i915_request_await_dma_fence(arg, fence); 632 } 633 634 int i915_request_await_active(struct i915_request *rq, 635 struct i915_active *ref, 636 unsigned int flags) 637 { 638 return await_active(ref, flags, rq_await_fence, rq, &rq->submit); 639 } 640 641 static int sw_await_fence(void *arg, struct dma_fence *fence) 642 { 643 return i915_sw_fence_await_dma_fence(arg, fence, 0, 644 GFP_NOWAIT | __GFP_NOWARN); 645 } 646 647 int i915_sw_fence_await_active(struct i915_sw_fence *fence, 648 struct i915_active *ref, 649 unsigned int flags) 650 { 651 return await_active(ref, flags, sw_await_fence, fence, fence); 652 } 653 654 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) 655 void i915_active_fini(struct i915_active *ref) 656 { 657 debug_active_fini(ref); 658 GEM_BUG_ON(atomic_read(&ref->count)); 659 GEM_BUG_ON(work_pending(&ref->work)); 660 GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree)); 661 mutex_destroy(&ref->mutex); 662 } 663 #endif 664 665 static inline bool is_idle_barrier(struct active_node *node, u64 idx) 666 { 667 return node->timeline == idx && !i915_active_fence_isset(&node->base); 668 } 669 670 static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx) 671 { 672 struct rb_node *prev, *p; 673 674 if (RB_EMPTY_ROOT(&ref->tree)) 675 return NULL; 676 677 spin_lock_irq(&ref->tree_lock); 678 GEM_BUG_ON(i915_active_is_idle(ref)); 679 680 /* 681 * Try to reuse any existing barrier nodes already allocated for this 682 * i915_active, due to overlapping active phases there is likely a 683 * node kept alive (as we reuse before parking). We prefer to reuse 684 * completely idle barriers (less hassle in manipulating the llists), 685 * but otherwise any will do. 686 */ 687 if (ref->cache && is_idle_barrier(ref->cache, idx)) { 688 p = &ref->cache->node; 689 goto match; 690 } 691 692 prev = NULL; 693 p = ref->tree.rb_node; 694 while (p) { 695 struct active_node *node = 696 rb_entry(p, struct active_node, node); 697 698 if (is_idle_barrier(node, idx)) 699 goto match; 700 701 prev = p; 702 if (node->timeline < idx) 703 p = p->rb_right; 704 else 705 p = p->rb_left; 706 } 707 708 /* 709 * No quick match, but we did find the leftmost rb_node for the 710 * kernel_context. Walk the rb_tree in-order to see if there were 711 * any idle-barriers on this timeline that we missed, or just use 712 * the first pending barrier. 713 */ 714 for (p = prev; p; p = rb_next(p)) { 715 struct active_node *node = 716 rb_entry(p, struct active_node, node); 717 struct intel_engine_cs *engine; 718 719 if (node->timeline > idx) 720 break; 721 722 if (node->timeline < idx) 723 continue; 724 725 if (is_idle_barrier(node, idx)) 726 goto match; 727 728 /* 729 * The list of pending barriers is protected by the 730 * kernel_context timeline, which notably we do not hold 731 * here. i915_request_add_active_barriers() may consume 732 * the barrier before we claim it, so we have to check 733 * for success. 734 */ 735 engine = __barrier_to_engine(node); 736 smp_rmb(); /* serialise with add_active_barriers */ 737 if (is_barrier(&node->base) && 738 ____active_del_barrier(ref, node, engine)) 739 goto match; 740 } 741 742 spin_unlock_irq(&ref->tree_lock); 743 744 return NULL; 745 746 match: 747 rb_erase(p, &ref->tree); /* Hide from waits and sibling allocations */ 748 if (p == &ref->cache->node) 749 ref->cache = NULL; 750 spin_unlock_irq(&ref->tree_lock); 751 752 return rb_entry(p, struct active_node, node); 753 } 754 755 int i915_active_acquire_preallocate_barrier(struct i915_active *ref, 756 struct intel_engine_cs *engine) 757 { 758 intel_engine_mask_t tmp, mask = engine->mask; 759 struct llist_node *first = NULL, *last = NULL; 760 struct intel_gt *gt = engine->gt; 761 int err; 762 763 GEM_BUG_ON(i915_active_is_idle(ref)); 764 765 /* Wait until the previous preallocation is completed */ 766 while (!llist_empty(&ref->preallocated_barriers)) 767 cond_resched(); 768 769 /* 770 * Preallocate a node for each physical engine supporting the target 771 * engine (remember virtual engines have more than one sibling). 772 * We can then use the preallocated nodes in 773 * i915_active_acquire_barrier() 774 */ 775 GEM_BUG_ON(!mask); 776 for_each_engine_masked(engine, gt, mask, tmp) { 777 u64 idx = engine->kernel_context->timeline->fence_context; 778 struct llist_node *prev = first; 779 struct active_node *node; 780 781 node = reuse_idle_barrier(ref, idx); 782 if (!node) { 783 node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); 784 if (!node) { 785 err = ENOMEM; 786 goto unwind; 787 } 788 789 RCU_INIT_POINTER(node->base.fence, NULL); 790 node->base.cb.func = node_retire; 791 node->timeline = idx; 792 node->ref = ref; 793 } 794 795 if (!i915_active_fence_isset(&node->base)) { 796 /* 797 * Mark this as being *our* unconnected proto-node. 798 * 799 * Since this node is not in any list, and we have 800 * decoupled it from the rbtree, we can reuse the 801 * request to indicate this is an idle-barrier node 802 * and then we can use the rb_node and list pointers 803 * for our tracking of the pending barrier. 804 */ 805 RCU_INIT_POINTER(node->base.fence, ERR_PTR(-EAGAIN)); 806 node->base.cb.node.prev = (void *)engine; 807 atomic_inc(&ref->count); 808 } 809 GEM_BUG_ON(rcu_access_pointer(node->base.fence) != ERR_PTR(-EAGAIN)); 810 811 GEM_BUG_ON(barrier_to_engine(node) != engine); 812 first = barrier_to_ll(node); 813 first->next = prev; 814 if (!last) 815 last = first; 816 intel_engine_pm_get(engine); 817 } 818 819 GEM_BUG_ON(!llist_empty(&ref->preallocated_barriers)); 820 llist_add_batch(first, last, &ref->preallocated_barriers); 821 822 return 0; 823 824 unwind: 825 while (first) { 826 struct active_node *node = barrier_from_ll(first); 827 828 first = first->next; 829 830 atomic_dec(&ref->count); 831 intel_engine_pm_put(barrier_to_engine(node)); 832 833 kmem_cache_free(global.slab_cache, node); 834 } 835 return err; 836 } 837 838 void i915_active_acquire_barrier(struct i915_active *ref) 839 { 840 struct llist_node *pos, *next; 841 unsigned long flags; 842 843 GEM_BUG_ON(i915_active_is_idle(ref)); 844 845 /* 846 * Transfer the list of preallocated barriers into the 847 * i915_active rbtree, but only as proto-nodes. They will be 848 * populated by i915_request_add_active_barriers() to point to the 849 * request that will eventually release them. 850 */ 851 llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) { 852 struct active_node *node = barrier_from_ll(pos); 853 struct intel_engine_cs *engine = barrier_to_engine(node); 854 struct rb_node **p, *parent; 855 856 spin_lock_irqsave_nested(&ref->tree_lock, flags, 857 SINGLE_DEPTH_NESTING); 858 parent = NULL; 859 p = &ref->tree.rb_node; 860 while (*p) { 861 struct active_node *it; 862 863 parent = *p; 864 865 it = rb_entry(parent, struct active_node, node); 866 if (it->timeline < node->timeline) 867 p = &parent->rb_right; 868 else 869 p = &parent->rb_left; 870 } 871 rb_link_node(&node->node, parent, p); 872 rb_insert_color(&node->node, &ref->tree); 873 spin_unlock_irqrestore(&ref->tree_lock, flags); 874 875 GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); 876 llist_add(barrier_to_ll(node), &engine->barrier_tasks); 877 intel_engine_pm_put_delay(engine, 1); 878 } 879 } 880 881 static struct dma_fence **ll_to_fence_slot(struct llist_node *node) 882 { 883 return __active_fence_slot(&barrier_from_ll(node)->base); 884 } 885 886 void i915_request_add_active_barriers(struct i915_request *rq) 887 { 888 struct intel_engine_cs *engine = rq->engine; 889 struct llist_node *node, *next; 890 unsigned long flags; 891 892 GEM_BUG_ON(!intel_context_is_barrier(rq->context)); 893 GEM_BUG_ON(intel_engine_is_virtual(engine)); 894 GEM_BUG_ON(i915_request_timeline(rq) != engine->kernel_context->timeline); 895 896 node = llist_del_all(&engine->barrier_tasks); 897 if (!node) 898 return; 899 /* 900 * Attach the list of proto-fences to the in-flight request such 901 * that the parent i915_active will be released when this request 902 * is retired. 903 */ 904 spin_lock_irqsave(&rq->lock, flags); 905 llist_for_each_safe(node, next, node) { 906 /* serialise with reuse_idle_barrier */ 907 smp_store_mb(*ll_to_fence_slot(node), &rq->fence); 908 list_add_tail((struct list_head *)node, &rq->fence.cb_list); 909 } 910 spin_unlock_irqrestore(&rq->lock, flags); 911 } 912 913 /* 914 * __i915_active_fence_set: Update the last active fence along its timeline 915 * @active: the active tracker 916 * @fence: the new fence (under construction) 917 * 918 * Records the new @fence as the last active fence along its timeline in 919 * this active tracker, moving the tracking callbacks from the previous 920 * fence onto this one. Returns the previous fence (if not already completed), 921 * which the caller must ensure is executed before the new fence. To ensure 922 * that the order of fences within the timeline of the i915_active_fence is 923 * understood, it should be locked by the caller. 924 */ 925 struct dma_fence * 926 __i915_active_fence_set(struct i915_active_fence *active, 927 struct dma_fence *fence) 928 { 929 struct dma_fence *prev; 930 unsigned long flags; 931 932 if (fence == rcu_access_pointer(active->fence)) 933 return fence; 934 935 GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)); 936 937 /* 938 * Consider that we have two threads arriving (A and B), with 939 * C already resident as the active->fence. 940 * 941 * A does the xchg first, and so it sees C or NULL depending 942 * on the timing of the interrupt handler. If it is NULL, the 943 * previous fence must have been signaled and we know that 944 * we are first on the timeline. If it is still present, 945 * we acquire the lock on that fence and serialise with the interrupt 946 * handler, in the process removing it from any future interrupt 947 * callback. A will then wait on C before executing (if present). 948 * 949 * As B is second, it sees A as the previous fence and so waits for 950 * it to complete its transition and takes over the occupancy for 951 * itself -- remembering that it needs to wait on A before executing. 952 * 953 * Note the strong ordering of the timeline also provides consistent 954 * nesting rules for the fence->lock; the inner lock is always the 955 * older lock. 956 */ 957 spin_lock_irqsave(fence->lock, flags); 958 prev = xchg(__active_fence_slot(active), fence); 959 if (prev) { 960 GEM_BUG_ON(prev == fence); 961 spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING); 962 __list_del_entry(&active->cb.node); 963 spin_unlock(prev->lock); /* serialise with prev->cb_list */ 964 } 965 list_add_tail(&active->cb.node, &fence->cb_list); 966 spin_unlock_irqrestore(fence->lock, flags); 967 968 return prev; 969 } 970 971 int i915_active_fence_set(struct i915_active_fence *active, 972 struct i915_request *rq) 973 { 974 struct dma_fence *fence; 975 int err = 0; 976 977 /* Must maintain timeline ordering wrt previous active requests */ 978 rcu_read_lock(); 979 fence = __i915_active_fence_set(active, &rq->fence); 980 if (fence) /* but the previous fence may not belong to that timeline! */ 981 fence = dma_fence_get_rcu(fence); 982 rcu_read_unlock(); 983 if (fence) { 984 err = i915_request_await_dma_fence(rq, fence); 985 dma_fence_put(fence); 986 } 987 988 return err; 989 } 990 991 void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb) 992 { 993 active_fence_cb(fence, cb); 994 } 995 996 struct auto_active { 997 struct i915_active base; 998 struct kref ref; 999 }; 1000 1001 struct i915_active *i915_active_get(struct i915_active *ref) 1002 { 1003 struct auto_active *aa = container_of(ref, typeof(*aa), base); 1004 1005 kref_get(&aa->ref); 1006 return &aa->base; 1007 } 1008 1009 static void auto_release(struct kref *ref) 1010 { 1011 struct auto_active *aa = container_of(ref, typeof(*aa), ref); 1012 1013 i915_active_fini(&aa->base); 1014 kfree(aa); 1015 } 1016 1017 void i915_active_put(struct i915_active *ref) 1018 { 1019 struct auto_active *aa = container_of(ref, typeof(*aa), base); 1020 1021 kref_put(&aa->ref, auto_release); 1022 } 1023 1024 static int auto_active(struct i915_active *ref) 1025 { 1026 i915_active_get(ref); 1027 return 0; 1028 } 1029 1030 static void auto_retire(struct i915_active *ref) 1031 { 1032 i915_active_put(ref); 1033 } 1034 1035 struct i915_active *i915_active_create(void) 1036 { 1037 struct auto_active *aa; 1038 1039 aa = kmalloc(sizeof(*aa), GFP_KERNEL); 1040 if (!aa) 1041 return NULL; 1042 1043 kref_init(&aa->ref); 1044 i915_active_init(&aa->base, auto_active, auto_retire); 1045 1046 return &aa->base; 1047 } 1048 1049 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 1050 #include "selftests/i915_active.c" 1051 #endif 1052 1053 static void i915_global_active_shrink(void) 1054 { 1055 kmem_cache_shrink(global.slab_cache); 1056 } 1057 1058 static void i915_global_active_exit(void) 1059 { 1060 kmem_cache_destroy(global.slab_cache); 1061 } 1062 1063 static struct i915_global_active global = { { 1064 .shrink = i915_global_active_shrink, 1065 .exit = i915_global_active_exit, 1066 } }; 1067 1068 int __init i915_global_active_init(void) 1069 { 1070 global.slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN); 1071 if (!global.slab_cache) 1072 return -ENOMEM; 1073 1074 i915_global_register(&global.base); 1075 return 0; 1076 } 1077