xref: /openbmc/linux/drivers/gpu/drm/i915/i915_active.c (revision c2cd9d04)
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2019 Intel Corporation
5  */
6 
7 #include "gt/intel_engine_pm.h"
8 
9 #include "i915_drv.h"
10 #include "i915_active.h"
11 #include "i915_globals.h"
12 
13 #define BKL(ref) (&(ref)->i915->drm.struct_mutex)
14 
15 /*
16  * Active refs memory management
17  *
18  * To be more economical with memory, we reap all the i915_active trees as
19  * they idle (when we know the active requests are inactive) and allocate the
20  * nodes from a local slab cache to hopefully reduce the fragmentation.
21  */
22 static struct i915_global_active {
23 	struct i915_global base;
24 	struct kmem_cache *slab_cache;
25 } global;
26 
27 struct active_node {
28 	struct i915_active_request base;
29 	struct i915_active *ref;
30 	struct rb_node node;
31 	u64 timeline;
32 };
33 
34 static void
35 __active_park(struct i915_active *ref)
36 {
37 	struct active_node *it, *n;
38 
39 	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
40 		GEM_BUG_ON(i915_active_request_isset(&it->base));
41 		kmem_cache_free(global.slab_cache, it);
42 	}
43 	ref->tree = RB_ROOT;
44 }
45 
46 static void
47 __active_retire(struct i915_active *ref)
48 {
49 	GEM_BUG_ON(!ref->count);
50 	if (--ref->count)
51 		return;
52 
53 	/* return the unused nodes to our slabcache */
54 	__active_park(ref);
55 
56 	ref->retire(ref);
57 }
58 
59 static void
60 node_retire(struct i915_active_request *base, struct i915_request *rq)
61 {
62 	__active_retire(container_of(base, struct active_node, base)->ref);
63 }
64 
65 static void
66 last_retire(struct i915_active_request *base, struct i915_request *rq)
67 {
68 	__active_retire(container_of(base, struct i915_active, last));
69 }
70 
71 static struct i915_active_request *
72 active_instance(struct i915_active *ref, u64 idx)
73 {
74 	struct active_node *node;
75 	struct rb_node **p, *parent;
76 	struct i915_request *old;
77 
78 	/*
79 	 * We track the most recently used timeline to skip a rbtree search
80 	 * for the common case, under typical loads we never need the rbtree
81 	 * at all. We can reuse the last slot if it is empty, that is
82 	 * after the previous activity has been retired, or if it matches the
83 	 * current timeline.
84 	 *
85 	 * Note that we allow the timeline to be active simultaneously in
86 	 * the rbtree and the last cache. We do this to avoid having
87 	 * to search and replace the rbtree element for a new timeline, with
88 	 * the cost being that we must be aware that the ref may be retired
89 	 * twice for the same timeline (as the older rbtree element will be
90 	 * retired before the new request added to last).
91 	 */
92 	old = i915_active_request_raw(&ref->last, BKL(ref));
93 	if (!old || old->fence.context == idx)
94 		goto out;
95 
96 	/* Move the currently active fence into the rbtree */
97 	idx = old->fence.context;
98 
99 	parent = NULL;
100 	p = &ref->tree.rb_node;
101 	while (*p) {
102 		parent = *p;
103 
104 		node = rb_entry(parent, struct active_node, node);
105 		if (node->timeline == idx)
106 			goto replace;
107 
108 		if (node->timeline < idx)
109 			p = &parent->rb_right;
110 		else
111 			p = &parent->rb_left;
112 	}
113 
114 	node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
115 
116 	/* kmalloc may retire the ref->last (thanks shrinker)! */
117 	if (unlikely(!i915_active_request_raw(&ref->last, BKL(ref)))) {
118 		kmem_cache_free(global.slab_cache, node);
119 		goto out;
120 	}
121 
122 	if (unlikely(!node))
123 		return ERR_PTR(-ENOMEM);
124 
125 	i915_active_request_init(&node->base, NULL, node_retire);
126 	node->ref = ref;
127 	node->timeline = idx;
128 
129 	rb_link_node(&node->node, parent, p);
130 	rb_insert_color(&node->node, &ref->tree);
131 
132 replace:
133 	/*
134 	 * Overwrite the previous active slot in the rbtree with last,
135 	 * leaving last zeroed. If the previous slot is still active,
136 	 * we must be careful as we now only expect to receive one retire
137 	 * callback not two, and so much undo the active counting for the
138 	 * overwritten slot.
139 	 */
140 	if (i915_active_request_isset(&node->base)) {
141 		/* Retire ourselves from the old rq->active_list */
142 		__list_del_entry(&node->base.link);
143 		ref->count--;
144 		GEM_BUG_ON(!ref->count);
145 	}
146 	GEM_BUG_ON(list_empty(&ref->last.link));
147 	list_replace_init(&ref->last.link, &node->base.link);
148 	node->base.request = fetch_and_zero(&ref->last.request);
149 
150 out:
151 	return &ref->last;
152 }
153 
154 void i915_active_init(struct drm_i915_private *i915,
155 		      struct i915_active *ref,
156 		      void (*retire)(struct i915_active *ref))
157 {
158 	ref->i915 = i915;
159 	ref->retire = retire;
160 	ref->tree = RB_ROOT;
161 	i915_active_request_init(&ref->last, NULL, last_retire);
162 	init_llist_head(&ref->barriers);
163 	ref->count = 0;
164 }
165 
166 int i915_active_ref(struct i915_active *ref,
167 		    u64 timeline,
168 		    struct i915_request *rq)
169 {
170 	struct i915_active_request *active;
171 	int err = 0;
172 
173 	/* Prevent reaping in case we malloc/wait while building the tree */
174 	i915_active_acquire(ref);
175 
176 	active = active_instance(ref, timeline);
177 	if (IS_ERR(active)) {
178 		err = PTR_ERR(active);
179 		goto out;
180 	}
181 
182 	if (!i915_active_request_isset(active))
183 		ref->count++;
184 	__i915_active_request_set(active, rq);
185 
186 	GEM_BUG_ON(!ref->count);
187 out:
188 	i915_active_release(ref);
189 	return err;
190 }
191 
192 bool i915_active_acquire(struct i915_active *ref)
193 {
194 	lockdep_assert_held(BKL(ref));
195 	return !ref->count++;
196 }
197 
198 void i915_active_release(struct i915_active *ref)
199 {
200 	lockdep_assert_held(BKL(ref));
201 	__active_retire(ref);
202 }
203 
204 int i915_active_wait(struct i915_active *ref)
205 {
206 	struct active_node *it, *n;
207 	int ret = 0;
208 
209 	if (i915_active_acquire(ref))
210 		goto out_release;
211 
212 	ret = i915_active_request_retire(&ref->last, BKL(ref));
213 	if (ret)
214 		goto out_release;
215 
216 	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
217 		ret = i915_active_request_retire(&it->base, BKL(ref));
218 		if (ret)
219 			break;
220 	}
221 
222 out_release:
223 	i915_active_release(ref);
224 	return ret;
225 }
226 
227 int i915_request_await_active_request(struct i915_request *rq,
228 				      struct i915_active_request *active)
229 {
230 	struct i915_request *barrier =
231 		i915_active_request_raw(active, &rq->i915->drm.struct_mutex);
232 
233 	return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0;
234 }
235 
236 int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
237 {
238 	struct active_node *it, *n;
239 	int err = 0;
240 
241 	/* await allocates and so we need to avoid hitting the shrinker */
242 	if (i915_active_acquire(ref))
243 		goto out; /* was idle */
244 
245 	err = i915_request_await_active_request(rq, &ref->last);
246 	if (err)
247 		goto out;
248 
249 	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
250 		err = i915_request_await_active_request(rq, &it->base);
251 		if (err)
252 			goto out;
253 	}
254 
255 out:
256 	i915_active_release(ref);
257 	return err;
258 }
259 
260 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
261 void i915_active_fini(struct i915_active *ref)
262 {
263 	GEM_BUG_ON(i915_active_request_isset(&ref->last));
264 	GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree));
265 	GEM_BUG_ON(ref->count);
266 }
267 #endif
268 
269 int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
270 					    struct intel_engine_cs *engine)
271 {
272 	struct drm_i915_private *i915 = engine->i915;
273 	struct llist_node *pos, *next;
274 	unsigned long tmp;
275 	int err;
276 
277 	GEM_BUG_ON(!engine->mask);
278 	for_each_engine_masked(engine, i915, engine->mask, tmp) {
279 		struct intel_context *kctx = engine->kernel_context;
280 		struct active_node *node;
281 
282 		node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
283 		if (unlikely(!node)) {
284 			err = -ENOMEM;
285 			goto unwind;
286 		}
287 
288 		i915_active_request_init(&node->base,
289 					 (void *)engine, node_retire);
290 		node->timeline = kctx->ring->timeline->fence_context;
291 		node->ref = ref;
292 		ref->count++;
293 
294 		intel_engine_pm_get(engine);
295 		llist_add((struct llist_node *)&node->base.link,
296 			  &ref->barriers);
297 	}
298 
299 	return 0;
300 
301 unwind:
302 	llist_for_each_safe(pos, next, llist_del_all(&ref->barriers)) {
303 		struct active_node *node;
304 
305 		node = container_of((struct list_head *)pos,
306 				    typeof(*node), base.link);
307 		engine = (void *)rcu_access_pointer(node->base.request);
308 
309 		intel_engine_pm_put(engine);
310 		kmem_cache_free(global.slab_cache, node);
311 	}
312 	return err;
313 }
314 
315 void i915_active_acquire_barrier(struct i915_active *ref)
316 {
317 	struct llist_node *pos, *next;
318 
319 	i915_active_acquire(ref);
320 
321 	llist_for_each_safe(pos, next, llist_del_all(&ref->barriers)) {
322 		struct intel_engine_cs *engine;
323 		struct active_node *node;
324 		struct rb_node **p, *parent;
325 
326 		node = container_of((struct list_head *)pos,
327 				    typeof(*node), base.link);
328 
329 		engine = (void *)rcu_access_pointer(node->base.request);
330 		RCU_INIT_POINTER(node->base.request, ERR_PTR(-EAGAIN));
331 
332 		parent = NULL;
333 		p = &ref->tree.rb_node;
334 		while (*p) {
335 			parent = *p;
336 			if (rb_entry(parent,
337 				     struct active_node,
338 				     node)->timeline < node->timeline)
339 				p = &parent->rb_right;
340 			else
341 				p = &parent->rb_left;
342 		}
343 		rb_link_node(&node->node, parent, p);
344 		rb_insert_color(&node->node, &ref->tree);
345 
346 		llist_add((struct llist_node *)&node->base.link,
347 			  &engine->barrier_tasks);
348 		intel_engine_pm_put(engine);
349 	}
350 	i915_active_release(ref);
351 }
352 
353 void i915_request_add_barriers(struct i915_request *rq)
354 {
355 	struct intel_engine_cs *engine = rq->engine;
356 	struct llist_node *node, *next;
357 
358 	llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks))
359 		list_add_tail((struct list_head *)node, &rq->active_list);
360 }
361 
362 int i915_active_request_set(struct i915_active_request *active,
363 			    struct i915_request *rq)
364 {
365 	int err;
366 
367 	/* Must maintain ordering wrt previous active requests */
368 	err = i915_request_await_active_request(rq, active);
369 	if (err)
370 		return err;
371 
372 	__i915_active_request_set(active, rq);
373 	return 0;
374 }
375 
376 void i915_active_retire_noop(struct i915_active_request *active,
377 			     struct i915_request *request)
378 {
379 	/* Space left intentionally blank */
380 }
381 
382 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
383 #include "selftests/i915_active.c"
384 #endif
385 
386 static void i915_global_active_shrink(void)
387 {
388 	kmem_cache_shrink(global.slab_cache);
389 }
390 
391 static void i915_global_active_exit(void)
392 {
393 	kmem_cache_destroy(global.slab_cache);
394 }
395 
396 static struct i915_global_active global = { {
397 	.shrink = i915_global_active_shrink,
398 	.exit = i915_global_active_exit,
399 } };
400 
401 int __init i915_global_active_init(void)
402 {
403 	global.slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN);
404 	if (!global.slab_cache)
405 		return -ENOMEM;
406 
407 	i915_global_register(&global.base);
408 	return 0;
409 }
410