xref: /openbmc/linux/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c (revision 04eb94d526423ff082efce61f4f26b0369d0bfdd)
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2008-2015 Intel Corporation
5  */
6 
7 #include <linux/oom.h>
8 #include <linux/sched/mm.h>
9 #include <linux/shmem_fs.h>
10 #include <linux/slab.h>
11 #include <linux/swap.h>
12 #include <linux/pci.h>
13 #include <linux/dma-buf.h>
14 #include <linux/vmalloc.h>
15 #include <drm/i915_drm.h>
16 
17 #include "i915_trace.h"
18 
19 static bool shrinker_lock(struct drm_i915_private *i915,
20 			  unsigned int flags,
21 			  bool *unlock)
22 {
23 	struct mutex *m = &i915->drm.struct_mutex;
24 
25 	switch (mutex_trylock_recursive(m)) {
26 	case MUTEX_TRYLOCK_RECURSIVE:
27 		*unlock = false;
28 		return true;
29 
30 	case MUTEX_TRYLOCK_FAILED:
31 		*unlock = false;
32 		if (flags & I915_SHRINK_ACTIVE &&
33 		    mutex_lock_killable_nested(m, I915_MM_SHRINKER) == 0)
34 			*unlock = true;
35 		return *unlock;
36 
37 	case MUTEX_TRYLOCK_SUCCESS:
38 		*unlock = true;
39 		return true;
40 	}
41 
42 	BUG();
43 }
44 
45 static void shrinker_unlock(struct drm_i915_private *i915, bool unlock)
46 {
47 	if (!unlock)
48 		return;
49 
50 	mutex_unlock(&i915->drm.struct_mutex);
51 }
52 
53 static bool swap_available(void)
54 {
55 	return get_nr_swap_pages() > 0;
56 }
57 
58 static bool can_release_pages(struct drm_i915_gem_object *obj)
59 {
60 	/* Consider only shrinkable ojects. */
61 	if (!i915_gem_object_is_shrinkable(obj))
62 		return false;
63 
64 	/* Only report true if by unbinding the object and putting its pages
65 	 * we can actually make forward progress towards freeing physical
66 	 * pages.
67 	 *
68 	 * If the pages are pinned for any other reason than being bound
69 	 * to the GPU, simply unbinding from the GPU is not going to succeed
70 	 * in releasing our pin count on the pages themselves.
71 	 */
72 	if (atomic_read(&obj->mm.pages_pin_count) > atomic_read(&obj->bind_count))
73 		return false;
74 
75 	/* If any vma are "permanently" pinned, it will prevent us from
76 	 * reclaiming the obj->mm.pages. We only allow scanout objects to claim
77 	 * a permanent pin, along with a few others like the context objects.
78 	 * To simplify the scan, and to avoid walking the list of vma under the
79 	 * object, we just check the count of its permanently pinned.
80 	 */
81 	if (READ_ONCE(obj->pin_global))
82 		return false;
83 
84 	/* We can only return physical pages to the system if we can either
85 	 * discard the contents (because the user has marked them as being
86 	 * purgeable) or if we can move their contents out to swap.
87 	 */
88 	return swap_available() || obj->mm.madv == I915_MADV_DONTNEED;
89 }
90 
91 static bool unsafe_drop_pages(struct drm_i915_gem_object *obj)
92 {
93 	if (i915_gem_object_unbind(obj) == 0)
94 		__i915_gem_object_put_pages(obj, I915_MM_SHRINKER);
95 	return !i915_gem_object_has_pages(obj);
96 }
97 
98 static void try_to_writeback(struct drm_i915_gem_object *obj,
99 			     unsigned int flags)
100 {
101 	switch (obj->mm.madv) {
102 	case I915_MADV_DONTNEED:
103 		i915_gem_object_truncate(obj);
104 	case __I915_MADV_PURGED:
105 		return;
106 	}
107 
108 	if (flags & I915_SHRINK_WRITEBACK)
109 		i915_gem_object_writeback(obj);
110 }
111 
112 /**
113  * i915_gem_shrink - Shrink buffer object caches
114  * @i915: i915 device
115  * @target: amount of memory to make available, in pages
116  * @nr_scanned: optional output for number of pages scanned (incremental)
117  * @shrink: control flags for selecting cache types
118  *
119  * This function is the main interface to the shrinker. It will try to release
120  * up to @target pages of main memory backing storage from buffer objects.
121  * Selection of the specific caches can be done with @flags. This is e.g. useful
122  * when purgeable objects should be removed from caches preferentially.
123  *
124  * Note that it's not guaranteed that released amount is actually available as
125  * free system memory - the pages might still be in-used to due to other reasons
126  * (like cpu mmaps) or the mm core has reused them before we could grab them.
127  * Therefore code that needs to explicitly shrink buffer objects caches (e.g. to
128  * avoid deadlocks in memory reclaim) must fall back to i915_gem_shrink_all().
129  *
130  * Also note that any kind of pinning (both per-vma address space pins and
131  * backing storage pins at the buffer object level) result in the shrinker code
132  * having to skip the object.
133  *
134  * Returns:
135  * The number of pages of backing storage actually released.
136  */
137 unsigned long
138 i915_gem_shrink(struct drm_i915_private *i915,
139 		unsigned long target,
140 		unsigned long *nr_scanned,
141 		unsigned int shrink)
142 {
143 	const struct {
144 		struct list_head *list;
145 		unsigned int bit;
146 	} phases[] = {
147 		{ &i915->mm.purge_list, ~0u },
148 		{
149 			&i915->mm.shrink_list,
150 			I915_SHRINK_BOUND | I915_SHRINK_UNBOUND
151 		},
152 		{ NULL, 0 },
153 	}, *phase;
154 	intel_wakeref_t wakeref = 0;
155 	unsigned long count = 0;
156 	unsigned long scanned = 0;
157 	bool unlock;
158 
159 	if (!shrinker_lock(i915, shrink, &unlock))
160 		return 0;
161 
162 	/*
163 	 * When shrinking the active list, we should also consider active
164 	 * contexts. Active contexts are pinned until they are retired, and
165 	 * so can not be simply unbound to retire and unpin their pages. To
166 	 * shrink the contexts, we must wait until the gpu is idle and
167 	 * completed its switch to the kernel context. In short, we do
168 	 * not have a good mechanism for idling a specific context.
169 	 */
170 
171 	trace_i915_gem_shrink(i915, target, shrink);
172 	i915_retire_requests(i915);
173 
174 	/*
175 	 * Unbinding of objects will require HW access; Let us not wake the
176 	 * device just to recover a little memory. If absolutely necessary,
177 	 * we will force the wake during oom-notifier.
178 	 */
179 	if (shrink & I915_SHRINK_BOUND) {
180 		wakeref = intel_runtime_pm_get_if_in_use(&i915->runtime_pm);
181 		if (!wakeref)
182 			shrink &= ~I915_SHRINK_BOUND;
183 	}
184 
185 	/*
186 	 * As we may completely rewrite the (un)bound list whilst unbinding
187 	 * (due to retiring requests) we have to strictly process only
188 	 * one element of the list at the time, and recheck the list
189 	 * on every iteration.
190 	 *
191 	 * In particular, we must hold a reference whilst removing the
192 	 * object as we may end up waiting for and/or retiring the objects.
193 	 * This might release the final reference (held by the active list)
194 	 * and result in the object being freed from under us. This is
195 	 * similar to the precautions the eviction code must take whilst
196 	 * removing objects.
197 	 *
198 	 * Also note that although these lists do not hold a reference to
199 	 * the object we can safely grab one here: The final object
200 	 * unreferencing and the bound_list are both protected by the
201 	 * dev->struct_mutex and so we won't ever be able to observe an
202 	 * object on the bound_list with a reference count equals 0.
203 	 */
204 	for (phase = phases; phase->list; phase++) {
205 		struct list_head still_in_list;
206 		struct drm_i915_gem_object *obj;
207 		unsigned long flags;
208 
209 		if ((shrink & phase->bit) == 0)
210 			continue;
211 
212 		INIT_LIST_HEAD(&still_in_list);
213 
214 		/*
215 		 * We serialize our access to unreferenced objects through
216 		 * the use of the struct_mutex. While the objects are not
217 		 * yet freed (due to RCU then a workqueue) we still want
218 		 * to be able to shrink their pages, so they remain on
219 		 * the unbound/bound list until actually freed.
220 		 */
221 		spin_lock_irqsave(&i915->mm.obj_lock, flags);
222 		while (count < target &&
223 		       (obj = list_first_entry_or_null(phase->list,
224 						       typeof(*obj),
225 						       mm.link))) {
226 			list_move_tail(&obj->mm.link, &still_in_list);
227 
228 			if (shrink & I915_SHRINK_VMAPS &&
229 			    !is_vmalloc_addr(obj->mm.mapping))
230 				continue;
231 
232 			if (!(shrink & I915_SHRINK_ACTIVE) &&
233 			    (i915_gem_object_is_active(obj) ||
234 			     i915_gem_object_is_framebuffer(obj)))
235 				continue;
236 
237 			if (!(shrink & I915_SHRINK_BOUND) &&
238 			    atomic_read(&obj->bind_count))
239 				continue;
240 
241 			if (!can_release_pages(obj))
242 				continue;
243 
244 			if (!kref_get_unless_zero(&obj->base.refcount))
245 				continue;
246 
247 			spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
248 
249 			if (unsafe_drop_pages(obj)) {
250 				/* May arrive from get_pages on another bo */
251 				mutex_lock_nested(&obj->mm.lock,
252 						  I915_MM_SHRINKER);
253 				if (!i915_gem_object_has_pages(obj)) {
254 					try_to_writeback(obj, shrink);
255 					count += obj->base.size >> PAGE_SHIFT;
256 				}
257 				mutex_unlock(&obj->mm.lock);
258 			}
259 
260 			scanned += obj->base.size >> PAGE_SHIFT;
261 			i915_gem_object_put(obj);
262 
263 			spin_lock_irqsave(&i915->mm.obj_lock, flags);
264 		}
265 		list_splice_tail(&still_in_list, phase->list);
266 		spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
267 	}
268 
269 	if (shrink & I915_SHRINK_BOUND)
270 		intel_runtime_pm_put(&i915->runtime_pm, wakeref);
271 
272 	i915_retire_requests(i915);
273 
274 	shrinker_unlock(i915, unlock);
275 
276 	if (nr_scanned)
277 		*nr_scanned += scanned;
278 	return count;
279 }
280 
281 /**
282  * i915_gem_shrink_all - Shrink buffer object caches completely
283  * @i915: i915 device
284  *
285  * This is a simple wraper around i915_gem_shrink() to aggressively shrink all
286  * caches completely. It also first waits for and retires all outstanding
287  * requests to also be able to release backing storage for active objects.
288  *
289  * This should only be used in code to intentionally quiescent the gpu or as a
290  * last-ditch effort when memory seems to have run out.
291  *
292  * Returns:
293  * The number of pages of backing storage actually released.
294  */
295 unsigned long i915_gem_shrink_all(struct drm_i915_private *i915)
296 {
297 	intel_wakeref_t wakeref;
298 	unsigned long freed = 0;
299 
300 	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
301 		freed = i915_gem_shrink(i915, -1UL, NULL,
302 					I915_SHRINK_BOUND |
303 					I915_SHRINK_UNBOUND |
304 					I915_SHRINK_ACTIVE);
305 	}
306 
307 	return freed;
308 }
309 
310 static unsigned long
311 i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
312 {
313 	struct drm_i915_private *i915 =
314 		container_of(shrinker, struct drm_i915_private, mm.shrinker);
315 	unsigned long num_objects;
316 	unsigned long count;
317 
318 	count = READ_ONCE(i915->mm.shrink_memory) >> PAGE_SHIFT;
319 	num_objects = READ_ONCE(i915->mm.shrink_count);
320 
321 	/*
322 	 * Update our preferred vmscan batch size for the next pass.
323 	 * Our rough guess for an effective batch size is roughly 2
324 	 * available GEM objects worth of pages. That is we don't want
325 	 * the shrinker to fire, until it is worth the cost of freeing an
326 	 * entire GEM object.
327 	 */
328 	if (num_objects) {
329 		unsigned long avg = 2 * count / num_objects;
330 
331 		i915->mm.shrinker.batch =
332 			max((i915->mm.shrinker.batch + avg) >> 1,
333 			    128ul /* default SHRINK_BATCH */);
334 	}
335 
336 	return count;
337 }
338 
339 static unsigned long
340 i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
341 {
342 	struct drm_i915_private *i915 =
343 		container_of(shrinker, struct drm_i915_private, mm.shrinker);
344 	unsigned long freed;
345 	bool unlock;
346 
347 	sc->nr_scanned = 0;
348 
349 	if (!shrinker_lock(i915, 0, &unlock))
350 		return SHRINK_STOP;
351 
352 	freed = i915_gem_shrink(i915,
353 				sc->nr_to_scan,
354 				&sc->nr_scanned,
355 				I915_SHRINK_BOUND |
356 				I915_SHRINK_UNBOUND |
357 				I915_SHRINK_WRITEBACK);
358 	if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) {
359 		intel_wakeref_t wakeref;
360 
361 		with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
362 			freed += i915_gem_shrink(i915,
363 						 sc->nr_to_scan - sc->nr_scanned,
364 						 &sc->nr_scanned,
365 						 I915_SHRINK_ACTIVE |
366 						 I915_SHRINK_BOUND |
367 						 I915_SHRINK_UNBOUND |
368 						 I915_SHRINK_WRITEBACK);
369 		}
370 	}
371 
372 	shrinker_unlock(i915, unlock);
373 
374 	return sc->nr_scanned ? freed : SHRINK_STOP;
375 }
376 
377 static int
378 i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
379 {
380 	struct drm_i915_private *i915 =
381 		container_of(nb, struct drm_i915_private, mm.oom_notifier);
382 	struct drm_i915_gem_object *obj;
383 	unsigned long unevictable, available, freed_pages;
384 	intel_wakeref_t wakeref;
385 	unsigned long flags;
386 
387 	freed_pages = 0;
388 	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
389 		freed_pages += i915_gem_shrink(i915, -1UL, NULL,
390 					       I915_SHRINK_BOUND |
391 					       I915_SHRINK_UNBOUND |
392 					       I915_SHRINK_WRITEBACK);
393 
394 	/* Because we may be allocating inside our own driver, we cannot
395 	 * assert that there are no objects with pinned pages that are not
396 	 * being pointed to by hardware.
397 	 */
398 	available = unevictable = 0;
399 	spin_lock_irqsave(&i915->mm.obj_lock, flags);
400 	list_for_each_entry(obj, &i915->mm.shrink_list, mm.link) {
401 		if (!can_release_pages(obj))
402 			unevictable += obj->base.size >> PAGE_SHIFT;
403 		else
404 			available += obj->base.size >> PAGE_SHIFT;
405 	}
406 	spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
407 
408 	if (freed_pages || available)
409 		pr_info("Purging GPU memory, %lu pages freed, "
410 			"%lu pages still pinned, %lu pages left available.\n",
411 			freed_pages, unevictable, available);
412 
413 	*(unsigned long *)ptr += freed_pages;
414 	return NOTIFY_DONE;
415 }
416 
417 static int
418 i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr)
419 {
420 	struct drm_i915_private *i915 =
421 		container_of(nb, struct drm_i915_private, mm.vmap_notifier);
422 	struct i915_vma *vma, *next;
423 	unsigned long freed_pages = 0;
424 	intel_wakeref_t wakeref;
425 	bool unlock;
426 
427 	if (!shrinker_lock(i915, 0, &unlock))
428 		return NOTIFY_DONE;
429 
430 	/* Force everything onto the inactive lists */
431 	if (i915_gem_wait_for_idle(i915,
432 				   I915_WAIT_LOCKED,
433 				   MAX_SCHEDULE_TIMEOUT))
434 		goto out;
435 
436 	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
437 		freed_pages += i915_gem_shrink(i915, -1UL, NULL,
438 					       I915_SHRINK_BOUND |
439 					       I915_SHRINK_UNBOUND |
440 					       I915_SHRINK_VMAPS);
441 
442 	/* We also want to clear any cached iomaps as they wrap vmap */
443 	mutex_lock(&i915->ggtt.vm.mutex);
444 	list_for_each_entry_safe(vma, next,
445 				 &i915->ggtt.vm.bound_list, vm_link) {
446 		unsigned long count = vma->node.size >> PAGE_SHIFT;
447 
448 		if (!vma->iomap || i915_vma_is_active(vma))
449 			continue;
450 
451 		mutex_unlock(&i915->ggtt.vm.mutex);
452 		if (i915_vma_unbind(vma) == 0)
453 			freed_pages += count;
454 		mutex_lock(&i915->ggtt.vm.mutex);
455 	}
456 	mutex_unlock(&i915->ggtt.vm.mutex);
457 
458 out:
459 	shrinker_unlock(i915, unlock);
460 
461 	*(unsigned long *)ptr += freed_pages;
462 	return NOTIFY_DONE;
463 }
464 
465 /**
466  * i915_gem_shrinker_register - Register the i915 shrinker
467  * @i915: i915 device
468  *
469  * This function registers and sets up the i915 shrinker and OOM handler.
470  */
471 void i915_gem_shrinker_register(struct drm_i915_private *i915)
472 {
473 	i915->mm.shrinker.scan_objects = i915_gem_shrinker_scan;
474 	i915->mm.shrinker.count_objects = i915_gem_shrinker_count;
475 	i915->mm.shrinker.seeks = DEFAULT_SEEKS;
476 	i915->mm.shrinker.batch = 4096;
477 	WARN_ON(register_shrinker(&i915->mm.shrinker));
478 
479 	i915->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom;
480 	WARN_ON(register_oom_notifier(&i915->mm.oom_notifier));
481 
482 	i915->mm.vmap_notifier.notifier_call = i915_gem_shrinker_vmap;
483 	WARN_ON(register_vmap_purge_notifier(&i915->mm.vmap_notifier));
484 }
485 
486 /**
487  * i915_gem_shrinker_unregister - Unregisters the i915 shrinker
488  * @i915: i915 device
489  *
490  * This function unregisters the i915 shrinker and OOM handler.
491  */
492 void i915_gem_shrinker_unregister(struct drm_i915_private *i915)
493 {
494 	WARN_ON(unregister_vmap_purge_notifier(&i915->mm.vmap_notifier));
495 	WARN_ON(unregister_oom_notifier(&i915->mm.oom_notifier));
496 	unregister_shrinker(&i915->mm.shrinker);
497 }
498 
499 void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915,
500 				    struct mutex *mutex)
501 {
502 	bool unlock = false;
503 
504 	if (!IS_ENABLED(CONFIG_LOCKDEP))
505 		return;
506 
507 	if (!lockdep_is_held_type(&i915->drm.struct_mutex, -1)) {
508 		mutex_acquire(&i915->drm.struct_mutex.dep_map,
509 			      I915_MM_NORMAL, 0, _RET_IP_);
510 		unlock = true;
511 	}
512 
513 	fs_reclaim_acquire(GFP_KERNEL);
514 
515 	/*
516 	 * As we invariably rely on the struct_mutex within the shrinker,
517 	 * but have a complicated recursion dance, taint all the mutexes used
518 	 * within the shrinker with the struct_mutex. For completeness, we
519 	 * taint with all subclass of struct_mutex, even though we should
520 	 * only need tainting by I915_MM_NORMAL to catch possible ABBA
521 	 * deadlocks from using struct_mutex inside @mutex.
522 	 */
523 	mutex_acquire(&i915->drm.struct_mutex.dep_map,
524 		      I915_MM_SHRINKER, 0, _RET_IP_);
525 
526 	mutex_acquire(&mutex->dep_map, 0, 0, _RET_IP_);
527 	mutex_release(&mutex->dep_map, 0, _RET_IP_);
528 
529 	mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_);
530 
531 	fs_reclaim_release(GFP_KERNEL);
532 
533 	if (unlock)
534 		mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_);
535 }
536