1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2008-2015 Intel Corporation 5 */ 6 7 #include <linux/oom.h> 8 #include <linux/sched/mm.h> 9 #include <linux/shmem_fs.h> 10 #include <linux/slab.h> 11 #include <linux/swap.h> 12 #include <linux/pci.h> 13 #include <linux/dma-buf.h> 14 #include <linux/vmalloc.h> 15 16 #include "i915_trace.h" 17 18 static bool swap_available(void) 19 { 20 return get_nr_swap_pages() > 0; 21 } 22 23 static bool can_release_pages(struct drm_i915_gem_object *obj) 24 { 25 /* Consider only shrinkable ojects. */ 26 if (!i915_gem_object_is_shrinkable(obj)) 27 return false; 28 29 /* 30 * We can only return physical pages to the system if we can either 31 * discard the contents (because the user has marked them as being 32 * purgeable) or if we can move their contents out to swap. 33 */ 34 return swap_available() || obj->mm.madv == I915_MADV_DONTNEED; 35 } 36 37 static bool unsafe_drop_pages(struct drm_i915_gem_object *obj, 38 unsigned long shrink) 39 { 40 unsigned long flags; 41 42 flags = 0; 43 if (shrink & I915_SHRINK_ACTIVE) 44 flags = I915_GEM_OBJECT_UNBIND_ACTIVE; 45 if (!(shrink & I915_SHRINK_BOUND)) 46 flags = I915_GEM_OBJECT_UNBIND_TEST; 47 48 if (i915_gem_object_unbind(obj, flags) == 0) 49 __i915_gem_object_put_pages(obj); 50 51 return !i915_gem_object_has_pages(obj); 52 } 53 54 static void try_to_writeback(struct drm_i915_gem_object *obj, 55 unsigned int flags) 56 { 57 switch (obj->mm.madv) { 58 case I915_MADV_DONTNEED: 59 i915_gem_object_truncate(obj); 60 case __I915_MADV_PURGED: 61 return; 62 } 63 64 if (flags & I915_SHRINK_WRITEBACK) 65 i915_gem_object_writeback(obj); 66 } 67 68 /** 69 * i915_gem_shrink - Shrink buffer object caches 70 * @i915: i915 device 71 * @target: amount of memory to make available, in pages 72 * @nr_scanned: optional output for number of pages scanned (incremental) 73 * @shrink: control flags for selecting cache types 74 * 75 * This function is the main interface to the shrinker. It will try to release 76 * up to @target pages of main memory backing storage from buffer objects. 77 * Selection of the specific caches can be done with @flags. This is e.g. useful 78 * when purgeable objects should be removed from caches preferentially. 79 * 80 * Note that it's not guaranteed that released amount is actually available as 81 * free system memory - the pages might still be in-used to due to other reasons 82 * (like cpu mmaps) or the mm core has reused them before we could grab them. 83 * Therefore code that needs to explicitly shrink buffer objects caches (e.g. to 84 * avoid deadlocks in memory reclaim) must fall back to i915_gem_shrink_all(). 85 * 86 * Also note that any kind of pinning (both per-vma address space pins and 87 * backing storage pins at the buffer object level) result in the shrinker code 88 * having to skip the object. 89 * 90 * Returns: 91 * The number of pages of backing storage actually released. 92 */ 93 unsigned long 94 i915_gem_shrink(struct drm_i915_private *i915, 95 unsigned long target, 96 unsigned long *nr_scanned, 97 unsigned int shrink) 98 { 99 const struct { 100 struct list_head *list; 101 unsigned int bit; 102 } phases[] = { 103 { &i915->mm.purge_list, ~0u }, 104 { 105 &i915->mm.shrink_list, 106 I915_SHRINK_BOUND | I915_SHRINK_UNBOUND 107 }, 108 { NULL, 0 }, 109 }, *phase; 110 intel_wakeref_t wakeref = 0; 111 unsigned long count = 0; 112 unsigned long scanned = 0; 113 114 /* 115 * When shrinking the active list, we should also consider active 116 * contexts. Active contexts are pinned until they are retired, and 117 * so can not be simply unbound to retire and unpin their pages. To 118 * shrink the contexts, we must wait until the gpu is idle and 119 * completed its switch to the kernel context. In short, we do 120 * not have a good mechanism for idling a specific context. 121 */ 122 123 trace_i915_gem_shrink(i915, target, shrink); 124 125 /* 126 * Unbinding of objects will require HW access; Let us not wake the 127 * device just to recover a little memory. If absolutely necessary, 128 * we will force the wake during oom-notifier. 129 */ 130 if (shrink & I915_SHRINK_BOUND) { 131 wakeref = intel_runtime_pm_get_if_in_use(&i915->runtime_pm); 132 if (!wakeref) 133 shrink &= ~I915_SHRINK_BOUND; 134 } 135 136 /* 137 * As we may completely rewrite the (un)bound list whilst unbinding 138 * (due to retiring requests) we have to strictly process only 139 * one element of the list at the time, and recheck the list 140 * on every iteration. 141 * 142 * In particular, we must hold a reference whilst removing the 143 * object as we may end up waiting for and/or retiring the objects. 144 * This might release the final reference (held by the active list) 145 * and result in the object being freed from under us. This is 146 * similar to the precautions the eviction code must take whilst 147 * removing objects. 148 * 149 * Also note that although these lists do not hold a reference to 150 * the object we can safely grab one here: The final object 151 * unreferencing and the bound_list are both protected by the 152 * dev->struct_mutex and so we won't ever be able to observe an 153 * object on the bound_list with a reference count equals 0. 154 */ 155 for (phase = phases; phase->list; phase++) { 156 struct list_head still_in_list; 157 struct drm_i915_gem_object *obj; 158 unsigned long flags; 159 160 if ((shrink & phase->bit) == 0) 161 continue; 162 163 INIT_LIST_HEAD(&still_in_list); 164 165 /* 166 * We serialize our access to unreferenced objects through 167 * the use of the struct_mutex. While the objects are not 168 * yet freed (due to RCU then a workqueue) we still want 169 * to be able to shrink their pages, so they remain on 170 * the unbound/bound list until actually freed. 171 */ 172 spin_lock_irqsave(&i915->mm.obj_lock, flags); 173 while (count < target && 174 (obj = list_first_entry_or_null(phase->list, 175 typeof(*obj), 176 mm.link))) { 177 list_move_tail(&obj->mm.link, &still_in_list); 178 179 if (shrink & I915_SHRINK_VMAPS && 180 !is_vmalloc_addr(obj->mm.mapping)) 181 continue; 182 183 if (!(shrink & I915_SHRINK_ACTIVE) && 184 i915_gem_object_is_framebuffer(obj)) 185 continue; 186 187 if (!can_release_pages(obj)) 188 continue; 189 190 if (!kref_get_unless_zero(&obj->base.refcount)) 191 continue; 192 193 spin_unlock_irqrestore(&i915->mm.obj_lock, flags); 194 195 if (unsafe_drop_pages(obj, shrink)) { 196 /* May arrive from get_pages on another bo */ 197 mutex_lock(&obj->mm.lock); 198 if (!i915_gem_object_has_pages(obj)) { 199 try_to_writeback(obj, shrink); 200 count += obj->base.size >> PAGE_SHIFT; 201 } 202 mutex_unlock(&obj->mm.lock); 203 } 204 205 scanned += obj->base.size >> PAGE_SHIFT; 206 i915_gem_object_put(obj); 207 208 spin_lock_irqsave(&i915->mm.obj_lock, flags); 209 } 210 list_splice_tail(&still_in_list, phase->list); 211 spin_unlock_irqrestore(&i915->mm.obj_lock, flags); 212 } 213 214 if (shrink & I915_SHRINK_BOUND) 215 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 216 217 if (nr_scanned) 218 *nr_scanned += scanned; 219 return count; 220 } 221 222 /** 223 * i915_gem_shrink_all - Shrink buffer object caches completely 224 * @i915: i915 device 225 * 226 * This is a simple wraper around i915_gem_shrink() to aggressively shrink all 227 * caches completely. It also first waits for and retires all outstanding 228 * requests to also be able to release backing storage for active objects. 229 * 230 * This should only be used in code to intentionally quiescent the gpu or as a 231 * last-ditch effort when memory seems to have run out. 232 * 233 * Returns: 234 * The number of pages of backing storage actually released. 235 */ 236 unsigned long i915_gem_shrink_all(struct drm_i915_private *i915) 237 { 238 intel_wakeref_t wakeref; 239 unsigned long freed = 0; 240 241 with_intel_runtime_pm(&i915->runtime_pm, wakeref) { 242 freed = i915_gem_shrink(i915, -1UL, NULL, 243 I915_SHRINK_BOUND | 244 I915_SHRINK_UNBOUND); 245 } 246 247 return freed; 248 } 249 250 static unsigned long 251 i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) 252 { 253 struct drm_i915_private *i915 = 254 container_of(shrinker, struct drm_i915_private, mm.shrinker); 255 unsigned long num_objects; 256 unsigned long count; 257 258 count = READ_ONCE(i915->mm.shrink_memory) >> PAGE_SHIFT; 259 num_objects = READ_ONCE(i915->mm.shrink_count); 260 261 /* 262 * Update our preferred vmscan batch size for the next pass. 263 * Our rough guess for an effective batch size is roughly 2 264 * available GEM objects worth of pages. That is we don't want 265 * the shrinker to fire, until it is worth the cost of freeing an 266 * entire GEM object. 267 */ 268 if (num_objects) { 269 unsigned long avg = 2 * count / num_objects; 270 271 i915->mm.shrinker.batch = 272 max((i915->mm.shrinker.batch + avg) >> 1, 273 128ul /* default SHRINK_BATCH */); 274 } 275 276 return count; 277 } 278 279 static unsigned long 280 i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) 281 { 282 struct drm_i915_private *i915 = 283 container_of(shrinker, struct drm_i915_private, mm.shrinker); 284 unsigned long freed; 285 286 sc->nr_scanned = 0; 287 288 freed = i915_gem_shrink(i915, 289 sc->nr_to_scan, 290 &sc->nr_scanned, 291 I915_SHRINK_BOUND | 292 I915_SHRINK_UNBOUND); 293 if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) { 294 intel_wakeref_t wakeref; 295 296 with_intel_runtime_pm(&i915->runtime_pm, wakeref) { 297 freed += i915_gem_shrink(i915, 298 sc->nr_to_scan - sc->nr_scanned, 299 &sc->nr_scanned, 300 I915_SHRINK_ACTIVE | 301 I915_SHRINK_BOUND | 302 I915_SHRINK_UNBOUND | 303 I915_SHRINK_WRITEBACK); 304 } 305 } 306 307 return sc->nr_scanned ? freed : SHRINK_STOP; 308 } 309 310 static int 311 i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) 312 { 313 struct drm_i915_private *i915 = 314 container_of(nb, struct drm_i915_private, mm.oom_notifier); 315 struct drm_i915_gem_object *obj; 316 unsigned long unevictable, available, freed_pages; 317 intel_wakeref_t wakeref; 318 unsigned long flags; 319 320 freed_pages = 0; 321 with_intel_runtime_pm(&i915->runtime_pm, wakeref) 322 freed_pages += i915_gem_shrink(i915, -1UL, NULL, 323 I915_SHRINK_BOUND | 324 I915_SHRINK_UNBOUND | 325 I915_SHRINK_WRITEBACK); 326 327 /* Because we may be allocating inside our own driver, we cannot 328 * assert that there are no objects with pinned pages that are not 329 * being pointed to by hardware. 330 */ 331 available = unevictable = 0; 332 spin_lock_irqsave(&i915->mm.obj_lock, flags); 333 list_for_each_entry(obj, &i915->mm.shrink_list, mm.link) { 334 if (!can_release_pages(obj)) 335 unevictable += obj->base.size >> PAGE_SHIFT; 336 else 337 available += obj->base.size >> PAGE_SHIFT; 338 } 339 spin_unlock_irqrestore(&i915->mm.obj_lock, flags); 340 341 if (freed_pages || available) 342 pr_info("Purging GPU memory, %lu pages freed, " 343 "%lu pages still pinned, %lu pages left available.\n", 344 freed_pages, unevictable, available); 345 346 *(unsigned long *)ptr += freed_pages; 347 return NOTIFY_DONE; 348 } 349 350 static int 351 i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr) 352 { 353 struct drm_i915_private *i915 = 354 container_of(nb, struct drm_i915_private, mm.vmap_notifier); 355 struct i915_vma *vma, *next; 356 unsigned long freed_pages = 0; 357 intel_wakeref_t wakeref; 358 359 with_intel_runtime_pm(&i915->runtime_pm, wakeref) 360 freed_pages += i915_gem_shrink(i915, -1UL, NULL, 361 I915_SHRINK_BOUND | 362 I915_SHRINK_UNBOUND | 363 I915_SHRINK_VMAPS); 364 365 /* We also want to clear any cached iomaps as they wrap vmap */ 366 mutex_lock(&i915->ggtt.vm.mutex); 367 list_for_each_entry_safe(vma, next, 368 &i915->ggtt.vm.bound_list, vm_link) { 369 unsigned long count = vma->node.size >> PAGE_SHIFT; 370 371 if (!vma->iomap || i915_vma_is_active(vma)) 372 continue; 373 374 if (__i915_vma_unbind(vma) == 0) 375 freed_pages += count; 376 } 377 mutex_unlock(&i915->ggtt.vm.mutex); 378 379 *(unsigned long *)ptr += freed_pages; 380 return NOTIFY_DONE; 381 } 382 383 void i915_gem_driver_register__shrinker(struct drm_i915_private *i915) 384 { 385 i915->mm.shrinker.scan_objects = i915_gem_shrinker_scan; 386 i915->mm.shrinker.count_objects = i915_gem_shrinker_count; 387 i915->mm.shrinker.seeks = DEFAULT_SEEKS; 388 i915->mm.shrinker.batch = 4096; 389 drm_WARN_ON(&i915->drm, register_shrinker(&i915->mm.shrinker)); 390 391 i915->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom; 392 drm_WARN_ON(&i915->drm, register_oom_notifier(&i915->mm.oom_notifier)); 393 394 i915->mm.vmap_notifier.notifier_call = i915_gem_shrinker_vmap; 395 drm_WARN_ON(&i915->drm, 396 register_vmap_purge_notifier(&i915->mm.vmap_notifier)); 397 } 398 399 void i915_gem_driver_unregister__shrinker(struct drm_i915_private *i915) 400 { 401 drm_WARN_ON(&i915->drm, 402 unregister_vmap_purge_notifier(&i915->mm.vmap_notifier)); 403 drm_WARN_ON(&i915->drm, 404 unregister_oom_notifier(&i915->mm.oom_notifier)); 405 unregister_shrinker(&i915->mm.shrinker); 406 } 407 408 void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, 409 struct mutex *mutex) 410 { 411 bool unlock = false; 412 413 if (!IS_ENABLED(CONFIG_LOCKDEP)) 414 return; 415 416 if (!lockdep_is_held_type(&i915->drm.struct_mutex, -1)) { 417 mutex_acquire(&i915->drm.struct_mutex.dep_map, 418 I915_MM_NORMAL, 0, _RET_IP_); 419 unlock = true; 420 } 421 422 fs_reclaim_acquire(GFP_KERNEL); 423 424 mutex_acquire(&mutex->dep_map, 0, 0, _RET_IP_); 425 mutex_release(&mutex->dep_map, _RET_IP_); 426 427 fs_reclaim_release(GFP_KERNEL); 428 429 if (unlock) 430 mutex_release(&i915->drm.struct_mutex.dep_map, _RET_IP_); 431 } 432 433 #define obj_to_i915(obj__) to_i915((obj__)->base.dev) 434 435 void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj) 436 { 437 struct drm_i915_private *i915 = obj_to_i915(obj); 438 unsigned long flags; 439 440 /* 441 * We can only be called while the pages are pinned or when 442 * the pages are released. If pinned, we should only be called 443 * from a single caller under controlled conditions; and on release 444 * only one caller may release us. Neither the two may cross. 445 */ 446 if (atomic_add_unless(&obj->mm.shrink_pin, 1, 0)) 447 return; 448 449 spin_lock_irqsave(&i915->mm.obj_lock, flags); 450 if (!atomic_fetch_inc(&obj->mm.shrink_pin) && 451 !list_empty(&obj->mm.link)) { 452 list_del_init(&obj->mm.link); 453 i915->mm.shrink_count--; 454 i915->mm.shrink_memory -= obj->base.size; 455 } 456 spin_unlock_irqrestore(&i915->mm.obj_lock, flags); 457 } 458 459 static void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj, 460 struct list_head *head) 461 { 462 struct drm_i915_private *i915 = obj_to_i915(obj); 463 unsigned long flags; 464 465 GEM_BUG_ON(!i915_gem_object_has_pages(obj)); 466 if (!i915_gem_object_is_shrinkable(obj)) 467 return; 468 469 if (atomic_add_unless(&obj->mm.shrink_pin, -1, 1)) 470 return; 471 472 spin_lock_irqsave(&i915->mm.obj_lock, flags); 473 GEM_BUG_ON(!kref_read(&obj->base.refcount)); 474 if (atomic_dec_and_test(&obj->mm.shrink_pin)) { 475 GEM_BUG_ON(!list_empty(&obj->mm.link)); 476 477 list_add_tail(&obj->mm.link, head); 478 i915->mm.shrink_count++; 479 i915->mm.shrink_memory += obj->base.size; 480 481 } 482 spin_unlock_irqrestore(&i915->mm.obj_lock, flags); 483 } 484 485 void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj) 486 { 487 __i915_gem_object_make_shrinkable(obj, 488 &obj_to_i915(obj)->mm.shrink_list); 489 } 490 491 void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj) 492 { 493 __i915_gem_object_make_shrinkable(obj, 494 &obj_to_i915(obj)->mm.purge_list); 495 } 496