1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2019 Intel Corporation 5 */ 6 7 #include "gt/intel_engine_pm.h" 8 9 #include "i915_drv.h" 10 #include "i915_active.h" 11 #include "i915_globals.h" 12 13 #define BKL(ref) (&(ref)->i915->drm.struct_mutex) 14 15 /* 16 * Active refs memory management 17 * 18 * To be more economical with memory, we reap all the i915_active trees as 19 * they idle (when we know the active requests are inactive) and allocate the 20 * nodes from a local slab cache to hopefully reduce the fragmentation. 21 */ 22 static struct i915_global_active { 23 struct i915_global base; 24 struct kmem_cache *slab_cache; 25 } global; 26 27 struct active_node { 28 struct i915_active_request base; 29 struct i915_active *ref; 30 struct rb_node node; 31 u64 timeline; 32 }; 33 34 static void 35 __active_park(struct i915_active *ref) 36 { 37 struct active_node *it, *n; 38 39 rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { 40 GEM_BUG_ON(i915_active_request_isset(&it->base)); 41 kmem_cache_free(global.slab_cache, it); 42 } 43 ref->tree = RB_ROOT; 44 } 45 46 static void 47 __active_retire(struct i915_active *ref) 48 { 49 GEM_BUG_ON(!ref->count); 50 if (--ref->count) 51 return; 52 53 /* return the unused nodes to our slabcache */ 54 __active_park(ref); 55 56 ref->retire(ref); 57 } 58 59 static void 60 node_retire(struct i915_active_request *base, struct i915_request *rq) 61 { 62 __active_retire(container_of(base, struct active_node, base)->ref); 63 } 64 65 static void 66 last_retire(struct i915_active_request *base, struct i915_request *rq) 67 { 68 __active_retire(container_of(base, struct i915_active, last)); 69 } 70 71 static struct i915_active_request * 72 active_instance(struct i915_active *ref, u64 idx) 73 { 74 struct active_node *node; 75 struct rb_node **p, *parent; 76 struct i915_request *old; 77 78 /* 79 * We track the most recently used timeline to skip a rbtree search 80 * for the common case, under typical loads we never need the rbtree 81 * at all. We can reuse the last slot if it is empty, that is 82 * after the previous activity has been retired, or if it matches the 83 * current timeline. 84 * 85 * Note that we allow the timeline to be active simultaneously in 86 * the rbtree and the last cache. We do this to avoid having 87 * to search and replace the rbtree element for a new timeline, with 88 * the cost being that we must be aware that the ref may be retired 89 * twice for the same timeline (as the older rbtree element will be 90 * retired before the new request added to last). 91 */ 92 old = i915_active_request_raw(&ref->last, BKL(ref)); 93 if (!old || old->fence.context == idx) 94 goto out; 95 96 /* Move the currently active fence into the rbtree */ 97 idx = old->fence.context; 98 99 parent = NULL; 100 p = &ref->tree.rb_node; 101 while (*p) { 102 parent = *p; 103 104 node = rb_entry(parent, struct active_node, node); 105 if (node->timeline == idx) 106 goto replace; 107 108 if (node->timeline < idx) 109 p = &parent->rb_right; 110 else 111 p = &parent->rb_left; 112 } 113 114 node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); 115 116 /* kmalloc may retire the ref->last (thanks shrinker)! */ 117 if (unlikely(!i915_active_request_raw(&ref->last, BKL(ref)))) { 118 kmem_cache_free(global.slab_cache, node); 119 goto out; 120 } 121 122 if (unlikely(!node)) 123 return ERR_PTR(-ENOMEM); 124 125 i915_active_request_init(&node->base, NULL, node_retire); 126 node->ref = ref; 127 node->timeline = idx; 128 129 rb_link_node(&node->node, parent, p); 130 rb_insert_color(&node->node, &ref->tree); 131 132 replace: 133 /* 134 * Overwrite the previous active slot in the rbtree with last, 135 * leaving last zeroed. If the previous slot is still active, 136 * we must be careful as we now only expect to receive one retire 137 * callback not two, and so much undo the active counting for the 138 * overwritten slot. 139 */ 140 if (i915_active_request_isset(&node->base)) { 141 /* Retire ourselves from the old rq->active_list */ 142 __list_del_entry(&node->base.link); 143 ref->count--; 144 GEM_BUG_ON(!ref->count); 145 } 146 GEM_BUG_ON(list_empty(&ref->last.link)); 147 list_replace_init(&ref->last.link, &node->base.link); 148 node->base.request = fetch_and_zero(&ref->last.request); 149 150 out: 151 return &ref->last; 152 } 153 154 void i915_active_init(struct drm_i915_private *i915, 155 struct i915_active *ref, 156 void (*retire)(struct i915_active *ref)) 157 { 158 ref->i915 = i915; 159 ref->retire = retire; 160 ref->tree = RB_ROOT; 161 i915_active_request_init(&ref->last, NULL, last_retire); 162 init_llist_head(&ref->barriers); 163 ref->count = 0; 164 } 165 166 int i915_active_ref(struct i915_active *ref, 167 u64 timeline, 168 struct i915_request *rq) 169 { 170 struct i915_active_request *active; 171 int err = 0; 172 173 /* Prevent reaping in case we malloc/wait while building the tree */ 174 i915_active_acquire(ref); 175 176 active = active_instance(ref, timeline); 177 if (IS_ERR(active)) { 178 err = PTR_ERR(active); 179 goto out; 180 } 181 182 if (!i915_active_request_isset(active)) 183 ref->count++; 184 __i915_active_request_set(active, rq); 185 186 GEM_BUG_ON(!ref->count); 187 out: 188 i915_active_release(ref); 189 return err; 190 } 191 192 bool i915_active_acquire(struct i915_active *ref) 193 { 194 lockdep_assert_held(BKL(ref)); 195 return !ref->count++; 196 } 197 198 void i915_active_release(struct i915_active *ref) 199 { 200 lockdep_assert_held(BKL(ref)); 201 __active_retire(ref); 202 } 203 204 int i915_active_wait(struct i915_active *ref) 205 { 206 struct active_node *it, *n; 207 int ret = 0; 208 209 if (i915_active_acquire(ref)) 210 goto out_release; 211 212 ret = i915_active_request_retire(&ref->last, BKL(ref)); 213 if (ret) 214 goto out_release; 215 216 rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { 217 ret = i915_active_request_retire(&it->base, BKL(ref)); 218 if (ret) 219 break; 220 } 221 222 out_release: 223 i915_active_release(ref); 224 return ret; 225 } 226 227 int i915_request_await_active_request(struct i915_request *rq, 228 struct i915_active_request *active) 229 { 230 struct i915_request *barrier = 231 i915_active_request_raw(active, &rq->i915->drm.struct_mutex); 232 233 return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0; 234 } 235 236 int i915_request_await_active(struct i915_request *rq, struct i915_active *ref) 237 { 238 struct active_node *it, *n; 239 int err = 0; 240 241 /* await allocates and so we need to avoid hitting the shrinker */ 242 if (i915_active_acquire(ref)) 243 goto out; /* was idle */ 244 245 err = i915_request_await_active_request(rq, &ref->last); 246 if (err) 247 goto out; 248 249 rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { 250 err = i915_request_await_active_request(rq, &it->base); 251 if (err) 252 goto out; 253 } 254 255 out: 256 i915_active_release(ref); 257 return err; 258 } 259 260 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) 261 void i915_active_fini(struct i915_active *ref) 262 { 263 GEM_BUG_ON(i915_active_request_isset(&ref->last)); 264 GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree)); 265 GEM_BUG_ON(ref->count); 266 } 267 #endif 268 269 int i915_active_acquire_preallocate_barrier(struct i915_active *ref, 270 struct intel_engine_cs *engine) 271 { 272 struct drm_i915_private *i915 = engine->i915; 273 struct llist_node *pos, *next; 274 unsigned long tmp; 275 int err; 276 277 GEM_BUG_ON(!engine->mask); 278 for_each_engine_masked(engine, i915, engine->mask, tmp) { 279 struct intel_context *kctx = engine->kernel_context; 280 struct active_node *node; 281 282 node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); 283 if (unlikely(!node)) { 284 err = -ENOMEM; 285 goto unwind; 286 } 287 288 i915_active_request_init(&node->base, 289 (void *)engine, node_retire); 290 node->timeline = kctx->ring->timeline->fence_context; 291 node->ref = ref; 292 ref->count++; 293 294 intel_engine_pm_get(engine); 295 llist_add((struct llist_node *)&node->base.link, 296 &ref->barriers); 297 } 298 299 return 0; 300 301 unwind: 302 llist_for_each_safe(pos, next, llist_del_all(&ref->barriers)) { 303 struct active_node *node; 304 305 node = container_of((struct list_head *)pos, 306 typeof(*node), base.link); 307 engine = (void *)rcu_access_pointer(node->base.request); 308 309 intel_engine_pm_put(engine); 310 kmem_cache_free(global.slab_cache, node); 311 } 312 return err; 313 } 314 315 void i915_active_acquire_barrier(struct i915_active *ref) 316 { 317 struct llist_node *pos, *next; 318 319 i915_active_acquire(ref); 320 321 llist_for_each_safe(pos, next, llist_del_all(&ref->barriers)) { 322 struct intel_engine_cs *engine; 323 struct active_node *node; 324 struct rb_node **p, *parent; 325 326 node = container_of((struct list_head *)pos, 327 typeof(*node), base.link); 328 329 engine = (void *)rcu_access_pointer(node->base.request); 330 RCU_INIT_POINTER(node->base.request, ERR_PTR(-EAGAIN)); 331 332 parent = NULL; 333 p = &ref->tree.rb_node; 334 while (*p) { 335 parent = *p; 336 if (rb_entry(parent, 337 struct active_node, 338 node)->timeline < node->timeline) 339 p = &parent->rb_right; 340 else 341 p = &parent->rb_left; 342 } 343 rb_link_node(&node->node, parent, p); 344 rb_insert_color(&node->node, &ref->tree); 345 346 llist_add((struct llist_node *)&node->base.link, 347 &engine->barrier_tasks); 348 intel_engine_pm_put(engine); 349 } 350 i915_active_release(ref); 351 } 352 353 void i915_request_add_barriers(struct i915_request *rq) 354 { 355 struct intel_engine_cs *engine = rq->engine; 356 struct llist_node *node, *next; 357 358 llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) 359 list_add_tail((struct list_head *)node, &rq->active_list); 360 } 361 362 int i915_active_request_set(struct i915_active_request *active, 363 struct i915_request *rq) 364 { 365 int err; 366 367 /* Must maintain ordering wrt previous active requests */ 368 err = i915_request_await_active_request(rq, active); 369 if (err) 370 return err; 371 372 __i915_active_request_set(active, rq); 373 return 0; 374 } 375 376 void i915_active_retire_noop(struct i915_active_request *active, 377 struct i915_request *request) 378 { 379 /* Space left intentionally blank */ 380 } 381 382 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 383 #include "selftests/i915_active.c" 384 #endif 385 386 static void i915_global_active_shrink(void) 387 { 388 kmem_cache_shrink(global.slab_cache); 389 } 390 391 static void i915_global_active_exit(void) 392 { 393 kmem_cache_destroy(global.slab_cache); 394 } 395 396 static struct i915_global_active global = { { 397 .shrink = i915_global_active_shrink, 398 .exit = i915_global_active_exit, 399 } }; 400 401 int __init i915_global_active_init(void) 402 { 403 global.slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN); 404 if (!global.slab_cache) 405 return -ENOMEM; 406 407 i915_global_register(&global.base); 408 return 0; 409 } 410