1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include <linux/interval_tree_generic.h> 7 #include <linux/sched/mm.h> 8 9 #include "i915_sw_fence.h" 10 #include "i915_vma_resource.h" 11 #include "i915_drv.h" 12 #include "intel_memory_region.h" 13 14 #include "gt/intel_gtt.h" 15 16 static struct kmem_cache *slab_vma_resources; 17 18 /** 19 * DOC: 20 * We use a per-vm interval tree to keep track of vma_resources 21 * scheduled for unbind but not yet unbound. The tree is protected by 22 * the vm mutex, and nodes are removed just after the unbind fence signals. 23 * The removal takes the vm mutex from a kernel thread which we need to 24 * keep in mind so that we don't grab the mutex and try to wait for all 25 * pending unbinds to complete, because that will temporaryily block many 26 * of the workqueue threads, and people will get angry. 27 * 28 * We should consider using a single ordered fence per VM instead but that 29 * requires ordering the unbinds and might introduce unnecessary waiting 30 * for unrelated unbinds. Amount of code will probably be roughly the same 31 * due to the simplicity of using the interval tree interface. 32 * 33 * Another drawback of this interval tree is that the complexity of insertion 34 * and removal of fences increases as O(ln(pending_unbinds)) instead of 35 * O(1) for a single fence without interval tree. 36 */ 37 #define VMA_RES_START(_node) ((_node)->start) 38 #define VMA_RES_LAST(_node) ((_node)->start + (_node)->node_size - 1) 39 INTERVAL_TREE_DEFINE(struct i915_vma_resource, rb, 40 u64, __subtree_last, 41 VMA_RES_START, VMA_RES_LAST, static, vma_res_itree); 42 43 /* Callbacks for the unbind dma-fence. */ 44 45 /** 46 * i915_vma_resource_alloc - Allocate a vma resource 47 * 48 * Return: A pointer to a cleared struct i915_vma_resource or 49 * a -ENOMEM error pointer if allocation fails. 50 */ 51 struct i915_vma_resource *i915_vma_resource_alloc(void) 52 { 53 struct i915_vma_resource *vma_res = 54 kmem_cache_zalloc(slab_vma_resources, GFP_KERNEL); 55 56 return vma_res ? vma_res : ERR_PTR(-ENOMEM); 57 } 58 59 /** 60 * i915_vma_resource_free - Free a vma resource 61 * @vma_res: The vma resource to free. 62 */ 63 void i915_vma_resource_free(struct i915_vma_resource *vma_res) 64 { 65 if (vma_res) 66 kmem_cache_free(slab_vma_resources, vma_res); 67 } 68 69 static const char *get_driver_name(struct dma_fence *fence) 70 { 71 return "vma unbind fence"; 72 } 73 74 static const char *get_timeline_name(struct dma_fence *fence) 75 { 76 return "unbound"; 77 } 78 79 static void unbind_fence_free_rcu(struct rcu_head *head) 80 { 81 struct i915_vma_resource *vma_res = 82 container_of(head, typeof(*vma_res), unbind_fence.rcu); 83 84 i915_vma_resource_free(vma_res); 85 } 86 87 static void unbind_fence_release(struct dma_fence *fence) 88 { 89 struct i915_vma_resource *vma_res = 90 container_of(fence, typeof(*vma_res), unbind_fence); 91 92 i915_sw_fence_fini(&vma_res->chain); 93 94 call_rcu(&fence->rcu, unbind_fence_free_rcu); 95 } 96 97 static struct dma_fence_ops unbind_fence_ops = { 98 .get_driver_name = get_driver_name, 99 .get_timeline_name = get_timeline_name, 100 .release = unbind_fence_release, 101 }; 102 103 static void __i915_vma_resource_unhold(struct i915_vma_resource *vma_res) 104 { 105 struct i915_address_space *vm; 106 107 if (!refcount_dec_and_test(&vma_res->hold_count)) 108 return; 109 110 dma_fence_signal(&vma_res->unbind_fence); 111 112 vm = vma_res->vm; 113 if (vma_res->wakeref) 114 intel_runtime_pm_put(&vm->i915->runtime_pm, vma_res->wakeref); 115 116 vma_res->vm = NULL; 117 if (!RB_EMPTY_NODE(&vma_res->rb)) { 118 mutex_lock(&vm->mutex); 119 vma_res_itree_remove(vma_res, &vm->pending_unbind); 120 mutex_unlock(&vm->mutex); 121 } 122 123 if (vma_res->bi.pages_rsgt) 124 i915_refct_sgt_put(vma_res->bi.pages_rsgt); 125 } 126 127 /** 128 * i915_vma_resource_unhold - Unhold the signaling of the vma resource unbind 129 * fence. 130 * @vma_res: The vma resource. 131 * @lockdep_cookie: The lockdep cookie returned from i915_vma_resource_hold. 132 * 133 * The function may leave a dma_fence critical section. 134 */ 135 void i915_vma_resource_unhold(struct i915_vma_resource *vma_res, 136 bool lockdep_cookie) 137 { 138 dma_fence_end_signalling(lockdep_cookie); 139 140 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { 141 unsigned long irq_flags; 142 143 /* Inefficient open-coded might_lock_irqsave() */ 144 spin_lock_irqsave(&vma_res->lock, irq_flags); 145 spin_unlock_irqrestore(&vma_res->lock, irq_flags); 146 } 147 148 __i915_vma_resource_unhold(vma_res); 149 } 150 151 /** 152 * i915_vma_resource_hold - Hold the signaling of the vma resource unbind fence. 153 * @vma_res: The vma resource. 154 * @lockdep_cookie: Pointer to a bool serving as a lockdep cooke that should 155 * be given as an argument to the pairing i915_vma_resource_unhold. 156 * 157 * If returning true, the function enters a dma_fence signalling critical 158 * section if not in one already. 159 * 160 * Return: true if holding successful, false if not. 161 */ 162 bool i915_vma_resource_hold(struct i915_vma_resource *vma_res, 163 bool *lockdep_cookie) 164 { 165 bool held = refcount_inc_not_zero(&vma_res->hold_count); 166 167 if (held) 168 *lockdep_cookie = dma_fence_begin_signalling(); 169 170 return held; 171 } 172 173 static void i915_vma_resource_unbind_work(struct work_struct *work) 174 { 175 struct i915_vma_resource *vma_res = 176 container_of(work, typeof(*vma_res), work); 177 struct i915_address_space *vm = vma_res->vm; 178 bool lockdep_cookie; 179 180 lockdep_cookie = dma_fence_begin_signalling(); 181 if (likely(!vma_res->skip_pte_rewrite)) 182 vma_res->ops->unbind_vma(vm, vma_res); 183 184 dma_fence_end_signalling(lockdep_cookie); 185 __i915_vma_resource_unhold(vma_res); 186 i915_vma_resource_put(vma_res); 187 } 188 189 static int 190 i915_vma_resource_fence_notify(struct i915_sw_fence *fence, 191 enum i915_sw_fence_notify state) 192 { 193 struct i915_vma_resource *vma_res = 194 container_of(fence, typeof(*vma_res), chain); 195 struct dma_fence *unbind_fence = 196 &vma_res->unbind_fence; 197 198 switch (state) { 199 case FENCE_COMPLETE: 200 dma_fence_get(unbind_fence); 201 if (vma_res->immediate_unbind) { 202 i915_vma_resource_unbind_work(&vma_res->work); 203 } else { 204 INIT_WORK(&vma_res->work, i915_vma_resource_unbind_work); 205 queue_work(system_unbound_wq, &vma_res->work); 206 } 207 break; 208 case FENCE_FREE: 209 i915_vma_resource_put(vma_res); 210 break; 211 } 212 213 return NOTIFY_DONE; 214 } 215 216 /** 217 * i915_vma_resource_unbind - Unbind a vma resource 218 * @vma_res: The vma resource to unbind. 219 * @tlb: pointer to vma->obj->mm.tlb associated with the resource 220 * to be stored at vma_res->tlb. When not-NULL, it will be used 221 * to do TLB cache invalidation before freeing a VMA resource. 222 * Used only for async unbind. 223 * 224 * At this point this function does little more than publish a fence that 225 * signals immediately unless signaling is held back. 226 * 227 * Return: A refcounted pointer to a dma-fence that signals when unbinding is 228 * complete. 229 */ 230 struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res, 231 u32 *tlb) 232 { 233 struct i915_address_space *vm = vma_res->vm; 234 235 vma_res->tlb = tlb; 236 237 /* Reference for the sw fence */ 238 i915_vma_resource_get(vma_res); 239 240 /* Caller must already have a wakeref in this case. */ 241 if (vma_res->needs_wakeref) 242 vma_res->wakeref = intel_runtime_pm_get_if_in_use(&vm->i915->runtime_pm); 243 244 if (atomic_read(&vma_res->chain.pending) <= 1) { 245 RB_CLEAR_NODE(&vma_res->rb); 246 vma_res->immediate_unbind = 1; 247 } else { 248 vma_res_itree_insert(vma_res, &vma_res->vm->pending_unbind); 249 } 250 251 i915_sw_fence_commit(&vma_res->chain); 252 253 return &vma_res->unbind_fence; 254 } 255 256 /** 257 * __i915_vma_resource_init - Initialize a vma resource. 258 * @vma_res: The vma resource to initialize 259 * 260 * Initializes the private members of a vma resource. 261 */ 262 void __i915_vma_resource_init(struct i915_vma_resource *vma_res) 263 { 264 spin_lock_init(&vma_res->lock); 265 dma_fence_init(&vma_res->unbind_fence, &unbind_fence_ops, 266 &vma_res->lock, 0, 0); 267 refcount_set(&vma_res->hold_count, 1); 268 i915_sw_fence_init(&vma_res->chain, i915_vma_resource_fence_notify); 269 } 270 271 static void 272 i915_vma_resource_color_adjust_range(struct i915_address_space *vm, 273 u64 *start, 274 u64 *end) 275 { 276 if (i915_vm_has_cache_coloring(vm)) { 277 if (*start) 278 *start -= I915_GTT_PAGE_SIZE; 279 *end += I915_GTT_PAGE_SIZE; 280 } 281 } 282 283 /** 284 * i915_vma_resource_bind_dep_sync - Wait for / sync all unbinds touching a 285 * certain vm range. 286 * @vm: The vm to look at. 287 * @offset: The range start. 288 * @size: The range size. 289 * @intr: Whether to wait interrubtible. 290 * 291 * The function needs to be called with the vm lock held. 292 * 293 * Return: Zero on success, -ERESTARTSYS if interrupted and @intr==true 294 */ 295 int i915_vma_resource_bind_dep_sync(struct i915_address_space *vm, 296 u64 offset, 297 u64 size, 298 bool intr) 299 { 300 struct i915_vma_resource *node; 301 u64 last = offset + size - 1; 302 303 lockdep_assert_held(&vm->mutex); 304 might_sleep(); 305 306 i915_vma_resource_color_adjust_range(vm, &offset, &last); 307 node = vma_res_itree_iter_first(&vm->pending_unbind, offset, last); 308 while (node) { 309 int ret = dma_fence_wait(&node->unbind_fence, intr); 310 311 if (ret) 312 return ret; 313 314 node = vma_res_itree_iter_next(node, offset, last); 315 } 316 317 return 0; 318 } 319 320 /** 321 * i915_vma_resource_bind_dep_sync_all - Wait for / sync all unbinds of a vm, 322 * releasing the vm lock while waiting. 323 * @vm: The vm to look at. 324 * 325 * The function may not be called with the vm lock held. 326 * Typically this is called at vm destruction to finish any pending 327 * unbind operations. The vm mutex is released while waiting to avoid 328 * stalling kernel workqueues trying to grab the mutex. 329 */ 330 void i915_vma_resource_bind_dep_sync_all(struct i915_address_space *vm) 331 { 332 struct i915_vma_resource *node; 333 struct dma_fence *fence; 334 335 do { 336 fence = NULL; 337 mutex_lock(&vm->mutex); 338 node = vma_res_itree_iter_first(&vm->pending_unbind, 0, 339 U64_MAX); 340 if (node) 341 fence = dma_fence_get_rcu(&node->unbind_fence); 342 mutex_unlock(&vm->mutex); 343 344 if (fence) { 345 /* 346 * The wait makes sure the node eventually removes 347 * itself from the tree. 348 */ 349 dma_fence_wait(fence, false); 350 dma_fence_put(fence); 351 } 352 } while (node); 353 } 354 355 /** 356 * i915_vma_resource_bind_dep_await - Have a struct i915_sw_fence await all 357 * pending unbinds in a certain range of a vm. 358 * @vm: The vm to look at. 359 * @sw_fence: The struct i915_sw_fence that will be awaiting the unbinds. 360 * @offset: The range start. 361 * @size: The range size. 362 * @intr: Whether to wait interrubtible. 363 * @gfp: Allocation mode for memory allocations. 364 * 365 * The function makes @sw_fence await all pending unbinds in a certain 366 * vm range before calling the complete notifier. To be able to await 367 * each individual unbind, the function needs to allocate memory using 368 * the @gpf allocation mode. If that fails, the function will instead 369 * wait for the unbind fence to signal, using @intr to judge whether to 370 * wait interruptible or not. Note that @gfp should ideally be selected so 371 * as to avoid any expensive memory allocation stalls and rather fail and 372 * synchronize itself. For now the vm mutex is required when calling this 373 * function with means that @gfp can't call into direct reclaim. In reality 374 * this means that during heavy memory pressure, we will sync in this 375 * function. 376 * 377 * Return: Zero on success, -ERESTARTSYS if interrupted and @intr==true 378 */ 379 int i915_vma_resource_bind_dep_await(struct i915_address_space *vm, 380 struct i915_sw_fence *sw_fence, 381 u64 offset, 382 u64 size, 383 bool intr, 384 gfp_t gfp) 385 { 386 struct i915_vma_resource *node; 387 u64 last = offset + size - 1; 388 389 lockdep_assert_held(&vm->mutex); 390 might_alloc(gfp); 391 might_sleep(); 392 393 i915_vma_resource_color_adjust_range(vm, &offset, &last); 394 node = vma_res_itree_iter_first(&vm->pending_unbind, offset, last); 395 while (node) { 396 int ret; 397 398 ret = i915_sw_fence_await_dma_fence(sw_fence, 399 &node->unbind_fence, 400 0, gfp); 401 if (ret < 0) { 402 ret = dma_fence_wait(&node->unbind_fence, intr); 403 if (ret) 404 return ret; 405 } 406 407 node = vma_res_itree_iter_next(node, offset, last); 408 } 409 410 return 0; 411 } 412 413 void i915_vma_resource_module_exit(void) 414 { 415 kmem_cache_destroy(slab_vma_resources); 416 } 417 418 int __init i915_vma_resource_module_init(void) 419 { 420 slab_vma_resources = KMEM_CACHE(i915_vma_resource, SLAB_HWCACHE_ALIGN); 421 if (!slab_vma_resources) 422 return -ENOMEM; 423 424 return 0; 425 } 426