1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include <linux/interval_tree_generic.h>
7 #include <linux/sched/mm.h>
8 
9 #include "i915_sw_fence.h"
10 #include "i915_vma_resource.h"
11 #include "i915_drv.h"
12 #include "intel_memory_region.h"
13 
14 #include "gt/intel_gtt.h"
15 
16 static struct kmem_cache *slab_vma_resources;
17 
18 /**
19  * DOC:
20  * We use a per-vm interval tree to keep track of vma_resources
21  * scheduled for unbind but not yet unbound. The tree is protected by
22  * the vm mutex, and nodes are removed just after the unbind fence signals.
23  * The removal takes the vm mutex from a kernel thread which we need to
24  * keep in mind so that we don't grab the mutex and try to wait for all
25  * pending unbinds to complete, because that will temporaryily block many
26  * of the workqueue threads, and people will get angry.
27  *
28  * We should consider using a single ordered fence per VM instead but that
29  * requires ordering the unbinds and might introduce unnecessary waiting
30  * for unrelated unbinds. Amount of code will probably be roughly the same
31  * due to the simplicity of using the interval tree interface.
32  *
33  * Another drawback of this interval tree is that the complexity of insertion
34  * and removal of fences increases as O(ln(pending_unbinds)) instead of
35  * O(1) for a single fence without interval tree.
36  */
37 #define VMA_RES_START(_node) ((_node)->start)
38 #define VMA_RES_LAST(_node) ((_node)->start + (_node)->node_size - 1)
39 INTERVAL_TREE_DEFINE(struct i915_vma_resource, rb,
40 		     u64, __subtree_last,
41 		     VMA_RES_START, VMA_RES_LAST, static, vma_res_itree);
42 
43 /* Callbacks for the unbind dma-fence. */
44 
45 /**
46  * i915_vma_resource_alloc - Allocate a vma resource
47  *
48  * Return: A pointer to a cleared struct i915_vma_resource or
49  * a -ENOMEM error pointer if allocation fails.
50  */
51 struct i915_vma_resource *i915_vma_resource_alloc(void)
52 {
53 	struct i915_vma_resource *vma_res =
54 		kmem_cache_zalloc(slab_vma_resources, GFP_KERNEL);
55 
56 	return vma_res ? vma_res : ERR_PTR(-ENOMEM);
57 }
58 
59 /**
60  * i915_vma_resource_free - Free a vma resource
61  * @vma_res: The vma resource to free.
62  */
63 void i915_vma_resource_free(struct i915_vma_resource *vma_res)
64 {
65 	if (vma_res)
66 		kmem_cache_free(slab_vma_resources, vma_res);
67 }
68 
69 static const char *get_driver_name(struct dma_fence *fence)
70 {
71 	return "vma unbind fence";
72 }
73 
74 static const char *get_timeline_name(struct dma_fence *fence)
75 {
76 	return "unbound";
77 }
78 
79 static void unbind_fence_free_rcu(struct rcu_head *head)
80 {
81 	struct i915_vma_resource *vma_res =
82 		container_of(head, typeof(*vma_res), unbind_fence.rcu);
83 
84 	i915_vma_resource_free(vma_res);
85 }
86 
87 static void unbind_fence_release(struct dma_fence *fence)
88 {
89 	struct i915_vma_resource *vma_res =
90 		container_of(fence, typeof(*vma_res), unbind_fence);
91 
92 	i915_sw_fence_fini(&vma_res->chain);
93 
94 	call_rcu(&fence->rcu, unbind_fence_free_rcu);
95 }
96 
97 static struct dma_fence_ops unbind_fence_ops = {
98 	.get_driver_name = get_driver_name,
99 	.get_timeline_name = get_timeline_name,
100 	.release = unbind_fence_release,
101 };
102 
103 static void __i915_vma_resource_unhold(struct i915_vma_resource *vma_res)
104 {
105 	struct i915_address_space *vm;
106 
107 	if (!refcount_dec_and_test(&vma_res->hold_count))
108 		return;
109 
110 	dma_fence_signal(&vma_res->unbind_fence);
111 
112 	vm = vma_res->vm;
113 	if (vma_res->wakeref)
114 		intel_runtime_pm_put(&vm->i915->runtime_pm, vma_res->wakeref);
115 
116 	vma_res->vm = NULL;
117 	if (!RB_EMPTY_NODE(&vma_res->rb)) {
118 		mutex_lock(&vm->mutex);
119 		vma_res_itree_remove(vma_res, &vm->pending_unbind);
120 		mutex_unlock(&vm->mutex);
121 	}
122 
123 	if (vma_res->bi.pages_rsgt)
124 		i915_refct_sgt_put(vma_res->bi.pages_rsgt);
125 }
126 
127 /**
128  * i915_vma_resource_unhold - Unhold the signaling of the vma resource unbind
129  * fence.
130  * @vma_res: The vma resource.
131  * @lockdep_cookie: The lockdep cookie returned from i915_vma_resource_hold.
132  *
133  * The function may leave a dma_fence critical section.
134  */
135 void i915_vma_resource_unhold(struct i915_vma_resource *vma_res,
136 			      bool lockdep_cookie)
137 {
138 	dma_fence_end_signalling(lockdep_cookie);
139 
140 	if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
141 		unsigned long irq_flags;
142 
143 		/* Inefficient open-coded might_lock_irqsave() */
144 		spin_lock_irqsave(&vma_res->lock, irq_flags);
145 		spin_unlock_irqrestore(&vma_res->lock, irq_flags);
146 	}
147 
148 	__i915_vma_resource_unhold(vma_res);
149 }
150 
151 /**
152  * i915_vma_resource_hold - Hold the signaling of the vma resource unbind fence.
153  * @vma_res: The vma resource.
154  * @lockdep_cookie: Pointer to a bool serving as a lockdep cooke that should
155  * be given as an argument to the pairing i915_vma_resource_unhold.
156  *
157  * If returning true, the function enters a dma_fence signalling critical
158  * section if not in one already.
159  *
160  * Return: true if holding successful, false if not.
161  */
162 bool i915_vma_resource_hold(struct i915_vma_resource *vma_res,
163 			    bool *lockdep_cookie)
164 {
165 	bool held = refcount_inc_not_zero(&vma_res->hold_count);
166 
167 	if (held)
168 		*lockdep_cookie = dma_fence_begin_signalling();
169 
170 	return held;
171 }
172 
173 static void i915_vma_resource_unbind_work(struct work_struct *work)
174 {
175 	struct i915_vma_resource *vma_res =
176 		container_of(work, typeof(*vma_res), work);
177 	struct i915_address_space *vm = vma_res->vm;
178 	bool lockdep_cookie;
179 
180 	lockdep_cookie = dma_fence_begin_signalling();
181 	if (likely(atomic_read(&vm->open)))
182 		vma_res->ops->unbind_vma(vm, vma_res);
183 
184 	dma_fence_end_signalling(lockdep_cookie);
185 	__i915_vma_resource_unhold(vma_res);
186 	i915_vma_resource_put(vma_res);
187 }
188 
189 static int
190 i915_vma_resource_fence_notify(struct i915_sw_fence *fence,
191 			       enum i915_sw_fence_notify state)
192 {
193 	struct i915_vma_resource *vma_res =
194 		container_of(fence, typeof(*vma_res), chain);
195 	struct dma_fence *unbind_fence =
196 		&vma_res->unbind_fence;
197 
198 	switch (state) {
199 	case FENCE_COMPLETE:
200 		dma_fence_get(unbind_fence);
201 		if (vma_res->immediate_unbind) {
202 			i915_vma_resource_unbind_work(&vma_res->work);
203 		} else {
204 			INIT_WORK(&vma_res->work, i915_vma_resource_unbind_work);
205 			queue_work(system_unbound_wq, &vma_res->work);
206 		}
207 		break;
208 	case FENCE_FREE:
209 		i915_vma_resource_put(vma_res);
210 		break;
211 	}
212 
213 	return NOTIFY_DONE;
214 }
215 
216 /**
217  * i915_vma_resource_unbind - Unbind a vma resource
218  * @vma_res: The vma resource to unbind.
219  *
220  * At this point this function does little more than publish a fence that
221  * signals immediately unless signaling is held back.
222  *
223  * Return: A refcounted pointer to a dma-fence that signals when unbinding is
224  * complete.
225  */
226 struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res)
227 {
228 	struct i915_address_space *vm = vma_res->vm;
229 
230 	/* Reference for the sw fence */
231 	i915_vma_resource_get(vma_res);
232 
233 	/* Caller must already have a wakeref in this case. */
234 	if (vma_res->needs_wakeref)
235 		vma_res->wakeref = intel_runtime_pm_get_if_in_use(&vm->i915->runtime_pm);
236 
237 	if (atomic_read(&vma_res->chain.pending) <= 1) {
238 		RB_CLEAR_NODE(&vma_res->rb);
239 		vma_res->immediate_unbind = 1;
240 	} else {
241 		vma_res_itree_insert(vma_res, &vma_res->vm->pending_unbind);
242 	}
243 
244 	i915_sw_fence_commit(&vma_res->chain);
245 
246 	return &vma_res->unbind_fence;
247 }
248 
249 /**
250  * __i915_vma_resource_init - Initialize a vma resource.
251  * @vma_res: The vma resource to initialize
252  *
253  * Initializes the private members of a vma resource.
254  */
255 void __i915_vma_resource_init(struct i915_vma_resource *vma_res)
256 {
257 	spin_lock_init(&vma_res->lock);
258 	dma_fence_init(&vma_res->unbind_fence, &unbind_fence_ops,
259 		       &vma_res->lock, 0, 0);
260 	refcount_set(&vma_res->hold_count, 1);
261 	i915_sw_fence_init(&vma_res->chain, i915_vma_resource_fence_notify);
262 }
263 
264 static void
265 i915_vma_resource_color_adjust_range(struct i915_address_space *vm,
266 				     u64 *start,
267 				     u64 *end)
268 {
269 	if (i915_vm_has_cache_coloring(vm)) {
270 		if (*start)
271 			*start -= I915_GTT_PAGE_SIZE;
272 		*end += I915_GTT_PAGE_SIZE;
273 	}
274 }
275 
276 /**
277  * i915_vma_resource_bind_dep_sync - Wait for / sync all unbinds touching a
278  * certain vm range.
279  * @vm: The vm to look at.
280  * @offset: The range start.
281  * @size: The range size.
282  * @intr: Whether to wait interrubtible.
283  *
284  * The function needs to be called with the vm lock held.
285  *
286  * Return: Zero on success, -ERESTARTSYS if interrupted and @intr==true
287  */
288 int i915_vma_resource_bind_dep_sync(struct i915_address_space *vm,
289 				    u64 offset,
290 				    u64 size,
291 				    bool intr)
292 {
293 	struct i915_vma_resource *node;
294 	u64 last = offset + size - 1;
295 
296 	lockdep_assert_held(&vm->mutex);
297 	might_sleep();
298 
299 	i915_vma_resource_color_adjust_range(vm, &offset, &last);
300 	node = vma_res_itree_iter_first(&vm->pending_unbind, offset, last);
301 	while (node) {
302 		int ret = dma_fence_wait(&node->unbind_fence, intr);
303 
304 		if (ret)
305 			return ret;
306 
307 		node = vma_res_itree_iter_next(node, offset, last);
308 	}
309 
310 	return 0;
311 }
312 
313 /**
314  * i915_vma_resource_bind_dep_sync_all - Wait for / sync all unbinds of a vm,
315  * releasing the vm lock while waiting.
316  * @vm: The vm to look at.
317  *
318  * The function may not be called with the vm lock held.
319  * Typically this is called at vm destruction to finish any pending
320  * unbind operations. The vm mutex is released while waiting to avoid
321  * stalling kernel workqueues trying to grab the mutex.
322  */
323 void i915_vma_resource_bind_dep_sync_all(struct i915_address_space *vm)
324 {
325 	struct i915_vma_resource *node;
326 	struct dma_fence *fence;
327 
328 	do {
329 		fence = NULL;
330 		mutex_lock(&vm->mutex);
331 		node = vma_res_itree_iter_first(&vm->pending_unbind, 0,
332 						U64_MAX);
333 		if (node)
334 			fence = dma_fence_get_rcu(&node->unbind_fence);
335 		mutex_unlock(&vm->mutex);
336 
337 		if (fence) {
338 			/*
339 			 * The wait makes sure the node eventually removes
340 			 * itself from the tree.
341 			 */
342 			dma_fence_wait(fence, false);
343 			dma_fence_put(fence);
344 		}
345 	} while (node);
346 }
347 
348 /**
349  * i915_vma_resource_bind_dep_await - Have a struct i915_sw_fence await all
350  * pending unbinds in a certain range of a vm.
351  * @vm: The vm to look at.
352  * @sw_fence: The struct i915_sw_fence that will be awaiting the unbinds.
353  * @offset: The range start.
354  * @size: The range size.
355  * @intr: Whether to wait interrubtible.
356  * @gfp: Allocation mode for memory allocations.
357  *
358  * The function makes @sw_fence await all pending unbinds in a certain
359  * vm range before calling the complete notifier. To be able to await
360  * each individual unbind, the function needs to allocate memory using
361  * the @gpf allocation mode. If that fails, the function will instead
362  * wait for the unbind fence to signal, using @intr to judge whether to
363  * wait interruptible or not. Note that @gfp should ideally be selected so
364  * as to avoid any expensive memory allocation stalls and rather fail and
365  * synchronize itself. For now the vm mutex is required when calling this
366  * function with means that @gfp can't call into direct reclaim. In reality
367  * this means that during heavy memory pressure, we will sync in this
368  * function.
369  *
370  * Return: Zero on success, -ERESTARTSYS if interrupted and @intr==true
371  */
372 int i915_vma_resource_bind_dep_await(struct i915_address_space *vm,
373 				     struct i915_sw_fence *sw_fence,
374 				     u64 offset,
375 				     u64 size,
376 				     bool intr,
377 				     gfp_t gfp)
378 {
379 	struct i915_vma_resource *node;
380 	u64 last = offset + size - 1;
381 
382 	lockdep_assert_held(&vm->mutex);
383 	might_alloc(gfp);
384 	might_sleep();
385 
386 	i915_vma_resource_color_adjust_range(vm, &offset, &last);
387 	node = vma_res_itree_iter_first(&vm->pending_unbind, offset, last);
388 	while (node) {
389 		int ret;
390 
391 		ret = i915_sw_fence_await_dma_fence(sw_fence,
392 						    &node->unbind_fence,
393 						    0, gfp);
394 		if (ret < 0) {
395 			ret = dma_fence_wait(&node->unbind_fence, intr);
396 			if (ret)
397 				return ret;
398 		}
399 
400 		node = vma_res_itree_iter_next(node, offset, last);
401 	}
402 
403 	return 0;
404 }
405 
406 void i915_vma_resource_module_exit(void)
407 {
408 	kmem_cache_destroy(slab_vma_resources);
409 }
410 
411 int __init i915_vma_resource_module_init(void)
412 {
413 	slab_vma_resources = KMEM_CACHE(i915_vma_resource, SLAB_HWCACHE_ALIGN);
414 	if (!slab_vma_resources)
415 		return -ENOMEM;
416 
417 	return 0;
418 }
419