1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2008-2015 Intel Corporation 5 */ 6 7 #include <linux/oom.h> 8 #include <linux/sched/mm.h> 9 #include <linux/shmem_fs.h> 10 #include <linux/slab.h> 11 #include <linux/swap.h> 12 #include <linux/pci.h> 13 #include <linux/dma-buf.h> 14 #include <linux/vmalloc.h> 15 #include <drm/i915_drm.h> 16 17 #include "i915_trace.h" 18 19 static bool shrinker_lock(struct drm_i915_private *i915, 20 unsigned int flags, 21 bool *unlock) 22 { 23 struct mutex *m = &i915->drm.struct_mutex; 24 25 switch (mutex_trylock_recursive(m)) { 26 case MUTEX_TRYLOCK_RECURSIVE: 27 *unlock = false; 28 return true; 29 30 case MUTEX_TRYLOCK_FAILED: 31 *unlock = false; 32 if (flags & I915_SHRINK_ACTIVE && 33 mutex_lock_killable_nested(m, I915_MM_SHRINKER) == 0) 34 *unlock = true; 35 return *unlock; 36 37 case MUTEX_TRYLOCK_SUCCESS: 38 *unlock = true; 39 return true; 40 } 41 42 BUG(); 43 } 44 45 static void shrinker_unlock(struct drm_i915_private *i915, bool unlock) 46 { 47 if (!unlock) 48 return; 49 50 mutex_unlock(&i915->drm.struct_mutex); 51 } 52 53 static bool swap_available(void) 54 { 55 return get_nr_swap_pages() > 0; 56 } 57 58 static bool can_release_pages(struct drm_i915_gem_object *obj) 59 { 60 /* Consider only shrinkable ojects. */ 61 if (!i915_gem_object_is_shrinkable(obj)) 62 return false; 63 64 /* Only report true if by unbinding the object and putting its pages 65 * we can actually make forward progress towards freeing physical 66 * pages. 67 * 68 * If the pages are pinned for any other reason than being bound 69 * to the GPU, simply unbinding from the GPU is not going to succeed 70 * in releasing our pin count on the pages themselves. 71 */ 72 if (atomic_read(&obj->mm.pages_pin_count) > atomic_read(&obj->bind_count)) 73 return false; 74 75 /* If any vma are "permanently" pinned, it will prevent us from 76 * reclaiming the obj->mm.pages. We only allow scanout objects to claim 77 * a permanent pin, along with a few others like the context objects. 78 * To simplify the scan, and to avoid walking the list of vma under the 79 * object, we just check the count of its permanently pinned. 80 */ 81 if (READ_ONCE(obj->pin_global)) 82 return false; 83 84 /* We can only return physical pages to the system if we can either 85 * discard the contents (because the user has marked them as being 86 * purgeable) or if we can move their contents out to swap. 87 */ 88 return swap_available() || obj->mm.madv == I915_MADV_DONTNEED; 89 } 90 91 static bool unsafe_drop_pages(struct drm_i915_gem_object *obj) 92 { 93 if (i915_gem_object_unbind(obj) == 0) 94 __i915_gem_object_put_pages(obj, I915_MM_SHRINKER); 95 return !i915_gem_object_has_pages(obj); 96 } 97 98 static void try_to_writeback(struct drm_i915_gem_object *obj, 99 unsigned int flags) 100 { 101 switch (obj->mm.madv) { 102 case I915_MADV_DONTNEED: 103 i915_gem_object_truncate(obj); 104 case __I915_MADV_PURGED: 105 return; 106 } 107 108 if (flags & I915_SHRINK_WRITEBACK) 109 i915_gem_object_writeback(obj); 110 } 111 112 /** 113 * i915_gem_shrink - Shrink buffer object caches 114 * @i915: i915 device 115 * @target: amount of memory to make available, in pages 116 * @nr_scanned: optional output for number of pages scanned (incremental) 117 * @shrink: control flags for selecting cache types 118 * 119 * This function is the main interface to the shrinker. It will try to release 120 * up to @target pages of main memory backing storage from buffer objects. 121 * Selection of the specific caches can be done with @flags. This is e.g. useful 122 * when purgeable objects should be removed from caches preferentially. 123 * 124 * Note that it's not guaranteed that released amount is actually available as 125 * free system memory - the pages might still be in-used to due to other reasons 126 * (like cpu mmaps) or the mm core has reused them before we could grab them. 127 * Therefore code that needs to explicitly shrink buffer objects caches (e.g. to 128 * avoid deadlocks in memory reclaim) must fall back to i915_gem_shrink_all(). 129 * 130 * Also note that any kind of pinning (both per-vma address space pins and 131 * backing storage pins at the buffer object level) result in the shrinker code 132 * having to skip the object. 133 * 134 * Returns: 135 * The number of pages of backing storage actually released. 136 */ 137 unsigned long 138 i915_gem_shrink(struct drm_i915_private *i915, 139 unsigned long target, 140 unsigned long *nr_scanned, 141 unsigned int shrink) 142 { 143 const struct { 144 struct list_head *list; 145 unsigned int bit; 146 } phases[] = { 147 { &i915->mm.purge_list, ~0u }, 148 { 149 &i915->mm.shrink_list, 150 I915_SHRINK_BOUND | I915_SHRINK_UNBOUND 151 }, 152 { NULL, 0 }, 153 }, *phase; 154 intel_wakeref_t wakeref = 0; 155 unsigned long count = 0; 156 unsigned long scanned = 0; 157 bool unlock; 158 159 if (!shrinker_lock(i915, shrink, &unlock)) 160 return 0; 161 162 /* 163 * When shrinking the active list, we should also consider active 164 * contexts. Active contexts are pinned until they are retired, and 165 * so can not be simply unbound to retire and unpin their pages. To 166 * shrink the contexts, we must wait until the gpu is idle and 167 * completed its switch to the kernel context. In short, we do 168 * not have a good mechanism for idling a specific context. 169 */ 170 171 trace_i915_gem_shrink(i915, target, shrink); 172 i915_retire_requests(i915); 173 174 /* 175 * Unbinding of objects will require HW access; Let us not wake the 176 * device just to recover a little memory. If absolutely necessary, 177 * we will force the wake during oom-notifier. 178 */ 179 if (shrink & I915_SHRINK_BOUND) { 180 wakeref = intel_runtime_pm_get_if_in_use(&i915->runtime_pm); 181 if (!wakeref) 182 shrink &= ~I915_SHRINK_BOUND; 183 } 184 185 /* 186 * As we may completely rewrite the (un)bound list whilst unbinding 187 * (due to retiring requests) we have to strictly process only 188 * one element of the list at the time, and recheck the list 189 * on every iteration. 190 * 191 * In particular, we must hold a reference whilst removing the 192 * object as we may end up waiting for and/or retiring the objects. 193 * This might release the final reference (held by the active list) 194 * and result in the object being freed from under us. This is 195 * similar to the precautions the eviction code must take whilst 196 * removing objects. 197 * 198 * Also note that although these lists do not hold a reference to 199 * the object we can safely grab one here: The final object 200 * unreferencing and the bound_list are both protected by the 201 * dev->struct_mutex and so we won't ever be able to observe an 202 * object on the bound_list with a reference count equals 0. 203 */ 204 for (phase = phases; phase->list; phase++) { 205 struct list_head still_in_list; 206 struct drm_i915_gem_object *obj; 207 unsigned long flags; 208 209 if ((shrink & phase->bit) == 0) 210 continue; 211 212 INIT_LIST_HEAD(&still_in_list); 213 214 /* 215 * We serialize our access to unreferenced objects through 216 * the use of the struct_mutex. While the objects are not 217 * yet freed (due to RCU then a workqueue) we still want 218 * to be able to shrink their pages, so they remain on 219 * the unbound/bound list until actually freed. 220 */ 221 spin_lock_irqsave(&i915->mm.obj_lock, flags); 222 while (count < target && 223 (obj = list_first_entry_or_null(phase->list, 224 typeof(*obj), 225 mm.link))) { 226 list_move_tail(&obj->mm.link, &still_in_list); 227 228 if (shrink & I915_SHRINK_VMAPS && 229 !is_vmalloc_addr(obj->mm.mapping)) 230 continue; 231 232 if (!(shrink & I915_SHRINK_ACTIVE) && 233 (i915_gem_object_is_active(obj) || 234 i915_gem_object_is_framebuffer(obj))) 235 continue; 236 237 if (!(shrink & I915_SHRINK_BOUND) && 238 atomic_read(&obj->bind_count)) 239 continue; 240 241 if (!can_release_pages(obj)) 242 continue; 243 244 if (!kref_get_unless_zero(&obj->base.refcount)) 245 continue; 246 247 spin_unlock_irqrestore(&i915->mm.obj_lock, flags); 248 249 if (unsafe_drop_pages(obj)) { 250 /* May arrive from get_pages on another bo */ 251 mutex_lock_nested(&obj->mm.lock, 252 I915_MM_SHRINKER); 253 if (!i915_gem_object_has_pages(obj)) { 254 try_to_writeback(obj, shrink); 255 count += obj->base.size >> PAGE_SHIFT; 256 } 257 mutex_unlock(&obj->mm.lock); 258 } 259 260 scanned += obj->base.size >> PAGE_SHIFT; 261 i915_gem_object_put(obj); 262 263 spin_lock_irqsave(&i915->mm.obj_lock, flags); 264 } 265 list_splice_tail(&still_in_list, phase->list); 266 spin_unlock_irqrestore(&i915->mm.obj_lock, flags); 267 } 268 269 if (shrink & I915_SHRINK_BOUND) 270 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 271 272 i915_retire_requests(i915); 273 274 shrinker_unlock(i915, unlock); 275 276 if (nr_scanned) 277 *nr_scanned += scanned; 278 return count; 279 } 280 281 /** 282 * i915_gem_shrink_all - Shrink buffer object caches completely 283 * @i915: i915 device 284 * 285 * This is a simple wraper around i915_gem_shrink() to aggressively shrink all 286 * caches completely. It also first waits for and retires all outstanding 287 * requests to also be able to release backing storage for active objects. 288 * 289 * This should only be used in code to intentionally quiescent the gpu or as a 290 * last-ditch effort when memory seems to have run out. 291 * 292 * Returns: 293 * The number of pages of backing storage actually released. 294 */ 295 unsigned long i915_gem_shrink_all(struct drm_i915_private *i915) 296 { 297 intel_wakeref_t wakeref; 298 unsigned long freed = 0; 299 300 with_intel_runtime_pm(&i915->runtime_pm, wakeref) { 301 freed = i915_gem_shrink(i915, -1UL, NULL, 302 I915_SHRINK_BOUND | 303 I915_SHRINK_UNBOUND | 304 I915_SHRINK_ACTIVE); 305 } 306 307 return freed; 308 } 309 310 static unsigned long 311 i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) 312 { 313 struct drm_i915_private *i915 = 314 container_of(shrinker, struct drm_i915_private, mm.shrinker); 315 unsigned long num_objects; 316 unsigned long count; 317 318 count = READ_ONCE(i915->mm.shrink_memory) >> PAGE_SHIFT; 319 num_objects = READ_ONCE(i915->mm.shrink_count); 320 321 /* 322 * Update our preferred vmscan batch size for the next pass. 323 * Our rough guess for an effective batch size is roughly 2 324 * available GEM objects worth of pages. That is we don't want 325 * the shrinker to fire, until it is worth the cost of freeing an 326 * entire GEM object. 327 */ 328 if (num_objects) { 329 unsigned long avg = 2 * count / num_objects; 330 331 i915->mm.shrinker.batch = 332 max((i915->mm.shrinker.batch + avg) >> 1, 333 128ul /* default SHRINK_BATCH */); 334 } 335 336 return count; 337 } 338 339 static unsigned long 340 i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) 341 { 342 struct drm_i915_private *i915 = 343 container_of(shrinker, struct drm_i915_private, mm.shrinker); 344 unsigned long freed; 345 bool unlock; 346 347 sc->nr_scanned = 0; 348 349 if (!shrinker_lock(i915, 0, &unlock)) 350 return SHRINK_STOP; 351 352 freed = i915_gem_shrink(i915, 353 sc->nr_to_scan, 354 &sc->nr_scanned, 355 I915_SHRINK_BOUND | 356 I915_SHRINK_UNBOUND | 357 I915_SHRINK_WRITEBACK); 358 if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) { 359 intel_wakeref_t wakeref; 360 361 with_intel_runtime_pm(&i915->runtime_pm, wakeref) { 362 freed += i915_gem_shrink(i915, 363 sc->nr_to_scan - sc->nr_scanned, 364 &sc->nr_scanned, 365 I915_SHRINK_ACTIVE | 366 I915_SHRINK_BOUND | 367 I915_SHRINK_UNBOUND | 368 I915_SHRINK_WRITEBACK); 369 } 370 } 371 372 shrinker_unlock(i915, unlock); 373 374 return sc->nr_scanned ? freed : SHRINK_STOP; 375 } 376 377 static int 378 i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) 379 { 380 struct drm_i915_private *i915 = 381 container_of(nb, struct drm_i915_private, mm.oom_notifier); 382 struct drm_i915_gem_object *obj; 383 unsigned long unevictable, available, freed_pages; 384 intel_wakeref_t wakeref; 385 unsigned long flags; 386 387 freed_pages = 0; 388 with_intel_runtime_pm(&i915->runtime_pm, wakeref) 389 freed_pages += i915_gem_shrink(i915, -1UL, NULL, 390 I915_SHRINK_BOUND | 391 I915_SHRINK_UNBOUND | 392 I915_SHRINK_WRITEBACK); 393 394 /* Because we may be allocating inside our own driver, we cannot 395 * assert that there are no objects with pinned pages that are not 396 * being pointed to by hardware. 397 */ 398 available = unevictable = 0; 399 spin_lock_irqsave(&i915->mm.obj_lock, flags); 400 list_for_each_entry(obj, &i915->mm.shrink_list, mm.link) { 401 if (!can_release_pages(obj)) 402 unevictable += obj->base.size >> PAGE_SHIFT; 403 else 404 available += obj->base.size >> PAGE_SHIFT; 405 } 406 spin_unlock_irqrestore(&i915->mm.obj_lock, flags); 407 408 if (freed_pages || available) 409 pr_info("Purging GPU memory, %lu pages freed, " 410 "%lu pages still pinned, %lu pages left available.\n", 411 freed_pages, unevictable, available); 412 413 *(unsigned long *)ptr += freed_pages; 414 return NOTIFY_DONE; 415 } 416 417 static int 418 i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr) 419 { 420 struct drm_i915_private *i915 = 421 container_of(nb, struct drm_i915_private, mm.vmap_notifier); 422 struct i915_vma *vma, *next; 423 unsigned long freed_pages = 0; 424 intel_wakeref_t wakeref; 425 bool unlock; 426 427 if (!shrinker_lock(i915, 0, &unlock)) 428 return NOTIFY_DONE; 429 430 /* Force everything onto the inactive lists */ 431 if (i915_gem_wait_for_idle(i915, 432 I915_WAIT_LOCKED, 433 MAX_SCHEDULE_TIMEOUT)) 434 goto out; 435 436 with_intel_runtime_pm(&i915->runtime_pm, wakeref) 437 freed_pages += i915_gem_shrink(i915, -1UL, NULL, 438 I915_SHRINK_BOUND | 439 I915_SHRINK_UNBOUND | 440 I915_SHRINK_VMAPS); 441 442 /* We also want to clear any cached iomaps as they wrap vmap */ 443 mutex_lock(&i915->ggtt.vm.mutex); 444 list_for_each_entry_safe(vma, next, 445 &i915->ggtt.vm.bound_list, vm_link) { 446 unsigned long count = vma->node.size >> PAGE_SHIFT; 447 448 if (!vma->iomap || i915_vma_is_active(vma)) 449 continue; 450 451 mutex_unlock(&i915->ggtt.vm.mutex); 452 if (i915_vma_unbind(vma) == 0) 453 freed_pages += count; 454 mutex_lock(&i915->ggtt.vm.mutex); 455 } 456 mutex_unlock(&i915->ggtt.vm.mutex); 457 458 out: 459 shrinker_unlock(i915, unlock); 460 461 *(unsigned long *)ptr += freed_pages; 462 return NOTIFY_DONE; 463 } 464 465 /** 466 * i915_gem_shrinker_register - Register the i915 shrinker 467 * @i915: i915 device 468 * 469 * This function registers and sets up the i915 shrinker and OOM handler. 470 */ 471 void i915_gem_shrinker_register(struct drm_i915_private *i915) 472 { 473 i915->mm.shrinker.scan_objects = i915_gem_shrinker_scan; 474 i915->mm.shrinker.count_objects = i915_gem_shrinker_count; 475 i915->mm.shrinker.seeks = DEFAULT_SEEKS; 476 i915->mm.shrinker.batch = 4096; 477 WARN_ON(register_shrinker(&i915->mm.shrinker)); 478 479 i915->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom; 480 WARN_ON(register_oom_notifier(&i915->mm.oom_notifier)); 481 482 i915->mm.vmap_notifier.notifier_call = i915_gem_shrinker_vmap; 483 WARN_ON(register_vmap_purge_notifier(&i915->mm.vmap_notifier)); 484 } 485 486 /** 487 * i915_gem_shrinker_unregister - Unregisters the i915 shrinker 488 * @i915: i915 device 489 * 490 * This function unregisters the i915 shrinker and OOM handler. 491 */ 492 void i915_gem_shrinker_unregister(struct drm_i915_private *i915) 493 { 494 WARN_ON(unregister_vmap_purge_notifier(&i915->mm.vmap_notifier)); 495 WARN_ON(unregister_oom_notifier(&i915->mm.oom_notifier)); 496 unregister_shrinker(&i915->mm.shrinker); 497 } 498 499 void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, 500 struct mutex *mutex) 501 { 502 bool unlock = false; 503 504 if (!IS_ENABLED(CONFIG_LOCKDEP)) 505 return; 506 507 if (!lockdep_is_held_type(&i915->drm.struct_mutex, -1)) { 508 mutex_acquire(&i915->drm.struct_mutex.dep_map, 509 I915_MM_NORMAL, 0, _RET_IP_); 510 unlock = true; 511 } 512 513 fs_reclaim_acquire(GFP_KERNEL); 514 515 /* 516 * As we invariably rely on the struct_mutex within the shrinker, 517 * but have a complicated recursion dance, taint all the mutexes used 518 * within the shrinker with the struct_mutex. For completeness, we 519 * taint with all subclass of struct_mutex, even though we should 520 * only need tainting by I915_MM_NORMAL to catch possible ABBA 521 * deadlocks from using struct_mutex inside @mutex. 522 */ 523 mutex_acquire(&i915->drm.struct_mutex.dep_map, 524 I915_MM_SHRINKER, 0, _RET_IP_); 525 526 mutex_acquire(&mutex->dep_map, 0, 0, _RET_IP_); 527 mutex_release(&mutex->dep_map, 0, _RET_IP_); 528 529 mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_); 530 531 fs_reclaim_release(GFP_KERNEL); 532 533 if (unlock) 534 mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_); 535 } 536