1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2019 Intel Corporation 5 */ 6 7 #include <linux/debugobjects.h> 8 9 #include "gt/intel_engine_pm.h" 10 11 #include "i915_drv.h" 12 #include "i915_active.h" 13 #include "i915_globals.h" 14 15 #define BKL(ref) (&(ref)->i915->drm.struct_mutex) 16 17 /* 18 * Active refs memory management 19 * 20 * To be more economical with memory, we reap all the i915_active trees as 21 * they idle (when we know the active requests are inactive) and allocate the 22 * nodes from a local slab cache to hopefully reduce the fragmentation. 23 */ 24 static struct i915_global_active { 25 struct i915_global base; 26 struct kmem_cache *slab_cache; 27 } global; 28 29 struct active_node { 30 struct i915_active_request base; 31 struct i915_active *ref; 32 struct rb_node node; 33 u64 timeline; 34 }; 35 36 static inline struct active_node * 37 node_from_active(struct i915_active_request *active) 38 { 39 return container_of(active, struct active_node, base); 40 } 41 42 #define take_preallocated_barriers(x) llist_del_all(&(x)->preallocated_barriers) 43 44 static inline bool is_barrier(const struct i915_active_request *active) 45 { 46 return IS_ERR(rcu_access_pointer(active->request)); 47 } 48 49 static inline struct llist_node *barrier_to_ll(struct active_node *node) 50 { 51 GEM_BUG_ON(!is_barrier(&node->base)); 52 return (struct llist_node *)&node->base.link; 53 } 54 55 static inline struct intel_engine_cs * 56 __barrier_to_engine(struct active_node *node) 57 { 58 return (struct intel_engine_cs *)READ_ONCE(node->base.link.prev); 59 } 60 61 static inline struct intel_engine_cs * 62 barrier_to_engine(struct active_node *node) 63 { 64 GEM_BUG_ON(!is_barrier(&node->base)); 65 return __barrier_to_engine(node); 66 } 67 68 static inline struct active_node *barrier_from_ll(struct llist_node *x) 69 { 70 return container_of((struct list_head *)x, 71 struct active_node, base.link); 72 } 73 74 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && IS_ENABLED(CONFIG_DEBUG_OBJECTS) 75 76 static void *active_debug_hint(void *addr) 77 { 78 struct i915_active *ref = addr; 79 80 return (void *)ref->active ?: (void *)ref->retire ?: (void *)ref; 81 } 82 83 static struct debug_obj_descr active_debug_desc = { 84 .name = "i915_active", 85 .debug_hint = active_debug_hint, 86 }; 87 88 static void debug_active_init(struct i915_active *ref) 89 { 90 debug_object_init(ref, &active_debug_desc); 91 } 92 93 static void debug_active_activate(struct i915_active *ref) 94 { 95 debug_object_activate(ref, &active_debug_desc); 96 } 97 98 static void debug_active_deactivate(struct i915_active *ref) 99 { 100 debug_object_deactivate(ref, &active_debug_desc); 101 } 102 103 static void debug_active_fini(struct i915_active *ref) 104 { 105 debug_object_free(ref, &active_debug_desc); 106 } 107 108 static void debug_active_assert(struct i915_active *ref) 109 { 110 debug_object_assert_init(ref, &active_debug_desc); 111 } 112 113 #else 114 115 static inline void debug_active_init(struct i915_active *ref) { } 116 static inline void debug_active_activate(struct i915_active *ref) { } 117 static inline void debug_active_deactivate(struct i915_active *ref) { } 118 static inline void debug_active_fini(struct i915_active *ref) { } 119 static inline void debug_active_assert(struct i915_active *ref) { } 120 121 #endif 122 123 static void 124 __active_retire(struct i915_active *ref) 125 { 126 struct active_node *it, *n; 127 struct rb_root root; 128 bool retire = false; 129 130 lockdep_assert_held(&ref->mutex); 131 132 /* return the unused nodes to our slabcache -- flushing the allocator */ 133 if (atomic_dec_and_test(&ref->count)) { 134 debug_active_deactivate(ref); 135 root = ref->tree; 136 ref->tree = RB_ROOT; 137 ref->cache = NULL; 138 retire = true; 139 } 140 141 mutex_unlock(&ref->mutex); 142 if (!retire) 143 return; 144 145 rbtree_postorder_for_each_entry_safe(it, n, &root, node) { 146 GEM_BUG_ON(i915_active_request_isset(&it->base)); 147 kmem_cache_free(global.slab_cache, it); 148 } 149 150 /* After the final retire, the entire struct may be freed */ 151 if (ref->retire) 152 ref->retire(ref); 153 } 154 155 static void 156 active_retire(struct i915_active *ref) 157 { 158 GEM_BUG_ON(!atomic_read(&ref->count)); 159 if (atomic_add_unless(&ref->count, -1, 1)) 160 return; 161 162 /* One active may be flushed from inside the acquire of another */ 163 mutex_lock_nested(&ref->mutex, SINGLE_DEPTH_NESTING); 164 __active_retire(ref); 165 } 166 167 static void 168 node_retire(struct i915_active_request *base, struct i915_request *rq) 169 { 170 active_retire(node_from_active(base)->ref); 171 } 172 173 static struct i915_active_request * 174 active_instance(struct i915_active *ref, struct intel_timeline *tl) 175 { 176 struct active_node *node, *prealloc; 177 struct rb_node **p, *parent; 178 u64 idx = tl->fence_context; 179 180 /* 181 * We track the most recently used timeline to skip a rbtree search 182 * for the common case, under typical loads we never need the rbtree 183 * at all. We can reuse the last slot if it is empty, that is 184 * after the previous activity has been retired, or if it matches the 185 * current timeline. 186 */ 187 node = READ_ONCE(ref->cache); 188 if (node && node->timeline == idx) 189 return &node->base; 190 191 /* Preallocate a replacement, just in case */ 192 prealloc = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); 193 if (!prealloc) 194 return NULL; 195 196 mutex_lock(&ref->mutex); 197 GEM_BUG_ON(i915_active_is_idle(ref)); 198 199 parent = NULL; 200 p = &ref->tree.rb_node; 201 while (*p) { 202 parent = *p; 203 204 node = rb_entry(parent, struct active_node, node); 205 if (node->timeline == idx) { 206 kmem_cache_free(global.slab_cache, prealloc); 207 goto out; 208 } 209 210 if (node->timeline < idx) 211 p = &parent->rb_right; 212 else 213 p = &parent->rb_left; 214 } 215 216 node = prealloc; 217 i915_active_request_init(&node->base, &tl->mutex, NULL, node_retire); 218 node->ref = ref; 219 node->timeline = idx; 220 221 rb_link_node(&node->node, parent, p); 222 rb_insert_color(&node->node, &ref->tree); 223 224 out: 225 ref->cache = node; 226 mutex_unlock(&ref->mutex); 227 228 BUILD_BUG_ON(offsetof(typeof(*node), base)); 229 return &node->base; 230 } 231 232 void __i915_active_init(struct drm_i915_private *i915, 233 struct i915_active *ref, 234 int (*active)(struct i915_active *ref), 235 void (*retire)(struct i915_active *ref), 236 struct lock_class_key *key) 237 { 238 debug_active_init(ref); 239 240 ref->i915 = i915; 241 ref->flags = 0; 242 ref->active = active; 243 ref->retire = retire; 244 ref->tree = RB_ROOT; 245 ref->cache = NULL; 246 init_llist_head(&ref->preallocated_barriers); 247 atomic_set(&ref->count, 0); 248 __mutex_init(&ref->mutex, "i915_active", key); 249 } 250 251 static bool ____active_del_barrier(struct i915_active *ref, 252 struct active_node *node, 253 struct intel_engine_cs *engine) 254 255 { 256 struct llist_node *head = NULL, *tail = NULL; 257 struct llist_node *pos, *next; 258 259 GEM_BUG_ON(node->timeline != engine->kernel_context->timeline->fence_context); 260 261 /* 262 * Rebuild the llist excluding our node. We may perform this 263 * outside of the kernel_context timeline mutex and so someone 264 * else may be manipulating the engine->barrier_tasks, in 265 * which case either we or they will be upset :) 266 * 267 * A second __active_del_barrier() will report failure to claim 268 * the active_node and the caller will just shrug and know not to 269 * claim ownership of its node. 270 * 271 * A concurrent i915_request_add_active_barriers() will miss adding 272 * any of the tasks, but we will try again on the next -- and since 273 * we are actively using the barrier, we know that there will be 274 * at least another opportunity when we idle. 275 */ 276 llist_for_each_safe(pos, next, llist_del_all(&engine->barrier_tasks)) { 277 if (node == barrier_from_ll(pos)) { 278 node = NULL; 279 continue; 280 } 281 282 pos->next = head; 283 head = pos; 284 if (!tail) 285 tail = pos; 286 } 287 if (head) 288 llist_add_batch(head, tail, &engine->barrier_tasks); 289 290 return !node; 291 } 292 293 static bool 294 __active_del_barrier(struct i915_active *ref, struct active_node *node) 295 { 296 return ____active_del_barrier(ref, node, barrier_to_engine(node)); 297 } 298 299 int i915_active_ref(struct i915_active *ref, 300 struct intel_timeline *tl, 301 struct i915_request *rq) 302 { 303 struct i915_active_request *active; 304 int err; 305 306 lockdep_assert_held(&tl->mutex); 307 308 /* Prevent reaping in case we malloc/wait while building the tree */ 309 err = i915_active_acquire(ref); 310 if (err) 311 return err; 312 313 active = active_instance(ref, tl); 314 if (!active) { 315 err = -ENOMEM; 316 goto out; 317 } 318 319 if (is_barrier(active)) { /* proto-node used by our idle barrier */ 320 /* 321 * This request is on the kernel_context timeline, and so 322 * we can use it to substitute for the pending idle-barrer 323 * request that we want to emit on the kernel_context. 324 */ 325 __active_del_barrier(ref, node_from_active(active)); 326 RCU_INIT_POINTER(active->request, NULL); 327 INIT_LIST_HEAD(&active->link); 328 } else { 329 if (!i915_active_request_isset(active)) 330 atomic_inc(&ref->count); 331 } 332 GEM_BUG_ON(!atomic_read(&ref->count)); 333 __i915_active_request_set(active, rq); 334 335 out: 336 i915_active_release(ref); 337 return err; 338 } 339 340 int i915_active_acquire(struct i915_active *ref) 341 { 342 int err; 343 344 debug_active_assert(ref); 345 if (atomic_add_unless(&ref->count, 1, 0)) 346 return 0; 347 348 err = mutex_lock_interruptible(&ref->mutex); 349 if (err) 350 return err; 351 352 if (!atomic_read(&ref->count) && ref->active) 353 err = ref->active(ref); 354 if (!err) { 355 debug_active_activate(ref); 356 atomic_inc(&ref->count); 357 } 358 359 mutex_unlock(&ref->mutex); 360 361 return err; 362 } 363 364 void i915_active_release(struct i915_active *ref) 365 { 366 debug_active_assert(ref); 367 active_retire(ref); 368 } 369 370 static void __active_ungrab(struct i915_active *ref) 371 { 372 clear_and_wake_up_bit(I915_ACTIVE_GRAB_BIT, &ref->flags); 373 } 374 375 bool i915_active_trygrab(struct i915_active *ref) 376 { 377 debug_active_assert(ref); 378 379 if (test_and_set_bit(I915_ACTIVE_GRAB_BIT, &ref->flags)) 380 return false; 381 382 if (!atomic_add_unless(&ref->count, 1, 0)) { 383 __active_ungrab(ref); 384 return false; 385 } 386 387 return true; 388 } 389 390 void i915_active_ungrab(struct i915_active *ref) 391 { 392 GEM_BUG_ON(!test_bit(I915_ACTIVE_GRAB_BIT, &ref->flags)); 393 394 active_retire(ref); 395 __active_ungrab(ref); 396 } 397 398 int i915_active_wait(struct i915_active *ref) 399 { 400 struct active_node *it, *n; 401 int err; 402 403 might_sleep(); 404 might_lock(&ref->mutex); 405 406 if (i915_active_is_idle(ref)) 407 return 0; 408 409 err = mutex_lock_interruptible(&ref->mutex); 410 if (err) 411 return err; 412 413 if (!atomic_add_unless(&ref->count, 1, 0)) { 414 mutex_unlock(&ref->mutex); 415 return 0; 416 } 417 418 rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { 419 if (is_barrier(&it->base)) { /* unconnected idle-barrier */ 420 err = -EBUSY; 421 break; 422 } 423 424 err = i915_active_request_retire(&it->base, BKL(ref)); 425 if (err) 426 break; 427 } 428 429 __active_retire(ref); 430 if (err) 431 return err; 432 433 if (wait_on_bit(&ref->flags, I915_ACTIVE_GRAB_BIT, TASK_KILLABLE)) 434 return -EINTR; 435 436 if (!i915_active_is_idle(ref)) 437 return -EBUSY; 438 439 return 0; 440 } 441 442 int i915_request_await_active_request(struct i915_request *rq, 443 struct i915_active_request *active) 444 { 445 struct i915_request *barrier = 446 i915_active_request_raw(active, &rq->i915->drm.struct_mutex); 447 448 return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0; 449 } 450 451 int i915_request_await_active(struct i915_request *rq, struct i915_active *ref) 452 { 453 struct active_node *it, *n; 454 int err; 455 456 if (RB_EMPTY_ROOT(&ref->tree)) 457 return 0; 458 459 /* await allocates and so we need to avoid hitting the shrinker */ 460 err = i915_active_acquire(ref); 461 if (err) 462 return err; 463 464 mutex_lock(&ref->mutex); 465 rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { 466 err = i915_request_await_active_request(rq, &it->base); 467 if (err) 468 break; 469 } 470 mutex_unlock(&ref->mutex); 471 472 i915_active_release(ref); 473 return err; 474 } 475 476 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) 477 void i915_active_fini(struct i915_active *ref) 478 { 479 debug_active_fini(ref); 480 GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree)); 481 GEM_BUG_ON(atomic_read(&ref->count)); 482 mutex_destroy(&ref->mutex); 483 } 484 #endif 485 486 static inline bool is_idle_barrier(struct active_node *node, u64 idx) 487 { 488 return node->timeline == idx && !i915_active_request_isset(&node->base); 489 } 490 491 static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx) 492 { 493 struct rb_node *prev, *p; 494 495 if (RB_EMPTY_ROOT(&ref->tree)) 496 return NULL; 497 498 mutex_lock(&ref->mutex); 499 GEM_BUG_ON(i915_active_is_idle(ref)); 500 501 /* 502 * Try to reuse any existing barrier nodes already allocated for this 503 * i915_active, due to overlapping active phases there is likely a 504 * node kept alive (as we reuse before parking). We prefer to reuse 505 * completely idle barriers (less hassle in manipulating the llists), 506 * but otherwise any will do. 507 */ 508 if (ref->cache && is_idle_barrier(ref->cache, idx)) { 509 p = &ref->cache->node; 510 goto match; 511 } 512 513 prev = NULL; 514 p = ref->tree.rb_node; 515 while (p) { 516 struct active_node *node = 517 rb_entry(p, struct active_node, node); 518 519 if (is_idle_barrier(node, idx)) 520 goto match; 521 522 prev = p; 523 if (node->timeline < idx) 524 p = p->rb_right; 525 else 526 p = p->rb_left; 527 } 528 529 /* 530 * No quick match, but we did find the leftmost rb_node for the 531 * kernel_context. Walk the rb_tree in-order to see if there were 532 * any idle-barriers on this timeline that we missed, or just use 533 * the first pending barrier. 534 */ 535 for (p = prev; p; p = rb_next(p)) { 536 struct active_node *node = 537 rb_entry(p, struct active_node, node); 538 struct intel_engine_cs *engine; 539 540 if (node->timeline > idx) 541 break; 542 543 if (node->timeline < idx) 544 continue; 545 546 if (is_idle_barrier(node, idx)) 547 goto match; 548 549 /* 550 * The list of pending barriers is protected by the 551 * kernel_context timeline, which notably we do not hold 552 * here. i915_request_add_active_barriers() may consume 553 * the barrier before we claim it, so we have to check 554 * for success. 555 */ 556 engine = __barrier_to_engine(node); 557 smp_rmb(); /* serialise with add_active_barriers */ 558 if (is_barrier(&node->base) && 559 ____active_del_barrier(ref, node, engine)) 560 goto match; 561 } 562 563 mutex_unlock(&ref->mutex); 564 565 return NULL; 566 567 match: 568 rb_erase(p, &ref->tree); /* Hide from waits and sibling allocations */ 569 if (p == &ref->cache->node) 570 ref->cache = NULL; 571 mutex_unlock(&ref->mutex); 572 573 return rb_entry(p, struct active_node, node); 574 } 575 576 int i915_active_acquire_preallocate_barrier(struct i915_active *ref, 577 struct intel_engine_cs *engine) 578 { 579 struct drm_i915_private *i915 = engine->i915; 580 intel_engine_mask_t tmp, mask = engine->mask; 581 struct llist_node *pos, *next; 582 int err; 583 584 GEM_BUG_ON(!llist_empty(&ref->preallocated_barriers)); 585 586 /* 587 * Preallocate a node for each physical engine supporting the target 588 * engine (remember virtual engines have more than one sibling). 589 * We can then use the preallocated nodes in 590 * i915_active_acquire_barrier() 591 */ 592 for_each_engine_masked(engine, i915, mask, tmp) { 593 u64 idx = engine->kernel_context->timeline->fence_context; 594 struct active_node *node; 595 596 node = reuse_idle_barrier(ref, idx); 597 if (!node) { 598 node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); 599 if (!node) { 600 err = ENOMEM; 601 goto unwind; 602 } 603 604 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) 605 node->base.lock = 606 &engine->kernel_context->timeline->mutex; 607 #endif 608 RCU_INIT_POINTER(node->base.request, NULL); 609 node->base.retire = node_retire; 610 node->timeline = idx; 611 node->ref = ref; 612 } 613 614 if (!i915_active_request_isset(&node->base)) { 615 /* 616 * Mark this as being *our* unconnected proto-node. 617 * 618 * Since this node is not in any list, and we have 619 * decoupled it from the rbtree, we can reuse the 620 * request to indicate this is an idle-barrier node 621 * and then we can use the rb_node and list pointers 622 * for our tracking of the pending barrier. 623 */ 624 RCU_INIT_POINTER(node->base.request, ERR_PTR(-EAGAIN)); 625 node->base.link.prev = (void *)engine; 626 atomic_inc(&ref->count); 627 } 628 629 GEM_BUG_ON(barrier_to_engine(node) != engine); 630 llist_add(barrier_to_ll(node), &ref->preallocated_barriers); 631 intel_engine_pm_get(engine); 632 } 633 634 return 0; 635 636 unwind: 637 llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) { 638 struct active_node *node = barrier_from_ll(pos); 639 640 atomic_dec(&ref->count); 641 intel_engine_pm_put(barrier_to_engine(node)); 642 643 kmem_cache_free(global.slab_cache, node); 644 } 645 return err; 646 } 647 648 void i915_active_acquire_barrier(struct i915_active *ref) 649 { 650 struct llist_node *pos, *next; 651 652 GEM_BUG_ON(i915_active_is_idle(ref)); 653 654 /* 655 * Transfer the list of preallocated barriers into the 656 * i915_active rbtree, but only as proto-nodes. They will be 657 * populated by i915_request_add_active_barriers() to point to the 658 * request that will eventually release them. 659 */ 660 mutex_lock_nested(&ref->mutex, SINGLE_DEPTH_NESTING); 661 llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) { 662 struct active_node *node = barrier_from_ll(pos); 663 struct intel_engine_cs *engine = barrier_to_engine(node); 664 struct rb_node **p, *parent; 665 666 parent = NULL; 667 p = &ref->tree.rb_node; 668 while (*p) { 669 struct active_node *it; 670 671 parent = *p; 672 673 it = rb_entry(parent, struct active_node, node); 674 if (it->timeline < node->timeline) 675 p = &parent->rb_right; 676 else 677 p = &parent->rb_left; 678 } 679 rb_link_node(&node->node, parent, p); 680 rb_insert_color(&node->node, &ref->tree); 681 682 llist_add(barrier_to_ll(node), &engine->barrier_tasks); 683 intel_engine_pm_put(engine); 684 } 685 mutex_unlock(&ref->mutex); 686 } 687 688 void i915_request_add_active_barriers(struct i915_request *rq) 689 { 690 struct intel_engine_cs *engine = rq->engine; 691 struct llist_node *node, *next; 692 693 GEM_BUG_ON(intel_engine_is_virtual(engine)); 694 GEM_BUG_ON(rq->timeline != engine->kernel_context->timeline); 695 696 /* 697 * Attach the list of proto-fences to the in-flight request such 698 * that the parent i915_active will be released when this request 699 * is retired. 700 */ 701 llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) { 702 RCU_INIT_POINTER(barrier_from_ll(node)->base.request, rq); 703 smp_wmb(); /* serialise with reuse_idle_barrier */ 704 list_add_tail((struct list_head *)node, &rq->active_list); 705 } 706 } 707 708 int i915_active_request_set(struct i915_active_request *active, 709 struct i915_request *rq) 710 { 711 int err; 712 713 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) 714 lockdep_assert_held(active->lock); 715 #endif 716 717 /* Must maintain ordering wrt previous active requests */ 718 err = i915_request_await_active_request(rq, active); 719 if (err) 720 return err; 721 722 __i915_active_request_set(active, rq); 723 return 0; 724 } 725 726 void i915_active_retire_noop(struct i915_active_request *active, 727 struct i915_request *request) 728 { 729 /* Space left intentionally blank */ 730 } 731 732 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 733 #include "selftests/i915_active.c" 734 #endif 735 736 static void i915_global_active_shrink(void) 737 { 738 kmem_cache_shrink(global.slab_cache); 739 } 740 741 static void i915_global_active_exit(void) 742 { 743 kmem_cache_destroy(global.slab_cache); 744 } 745 746 static struct i915_global_active global = { { 747 .shrink = i915_global_active_shrink, 748 .exit = i915_global_active_exit, 749 } }; 750 751 int __init i915_global_active_init(void) 752 { 753 global.slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN); 754 if (!global.slab_cache) 755 return -ENOMEM; 756 757 i915_global_register(&global.base); 758 return 0; 759 } 760