1 /* 2 * Copyright © 2017 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25 #include "display/intel_frontbuffer.h" 26 #include "gt/intel_gt.h" 27 #include "i915_drv.h" 28 #include "i915_gem_clflush.h" 29 #include "i915_gem_context.h" 30 #include "i915_gem_object.h" 31 #include "i915_globals.h" 32 33 static struct i915_global_object { 34 struct i915_global base; 35 struct kmem_cache *slab_objects; 36 } global; 37 38 struct drm_i915_gem_object *i915_gem_object_alloc(void) 39 { 40 return kmem_cache_zalloc(global.slab_objects, GFP_KERNEL); 41 } 42 43 void i915_gem_object_free(struct drm_i915_gem_object *obj) 44 { 45 return kmem_cache_free(global.slab_objects, obj); 46 } 47 48 static void 49 frontbuffer_retire(struct i915_active_request *active, 50 struct i915_request *request) 51 { 52 struct drm_i915_gem_object *obj = 53 container_of(active, typeof(*obj), frontbuffer_write); 54 55 intel_fb_obj_flush(obj, ORIGIN_CS); 56 } 57 58 void i915_gem_object_init(struct drm_i915_gem_object *obj, 59 const struct drm_i915_gem_object_ops *ops) 60 { 61 mutex_init(&obj->mm.lock); 62 63 spin_lock_init(&obj->vma.lock); 64 INIT_LIST_HEAD(&obj->vma.list); 65 66 INIT_LIST_HEAD(&obj->lut_list); 67 INIT_LIST_HEAD(&obj->batch_pool_link); 68 69 init_rcu_head(&obj->rcu); 70 71 obj->ops = ops; 72 73 obj->frontbuffer_ggtt_origin = ORIGIN_GTT; 74 i915_active_request_init(&obj->frontbuffer_write, 75 NULL, frontbuffer_retire); 76 77 obj->mm.madv = I915_MADV_WILLNEED; 78 INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN); 79 mutex_init(&obj->mm.get_page.lock); 80 } 81 82 /** 83 * Mark up the object's coherency levels for a given cache_level 84 * @obj: #drm_i915_gem_object 85 * @cache_level: cache level 86 */ 87 void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, 88 unsigned int cache_level) 89 { 90 obj->cache_level = cache_level; 91 92 if (cache_level != I915_CACHE_NONE) 93 obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ | 94 I915_BO_CACHE_COHERENT_FOR_WRITE); 95 else if (HAS_LLC(to_i915(obj->base.dev))) 96 obj->cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ; 97 else 98 obj->cache_coherent = 0; 99 100 obj->cache_dirty = 101 !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE); 102 } 103 104 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) 105 { 106 struct drm_i915_gem_object *obj = to_intel_bo(gem); 107 struct drm_i915_file_private *fpriv = file->driver_priv; 108 struct i915_lut_handle *lut, *ln; 109 LIST_HEAD(close); 110 111 i915_gem_object_lock(obj); 112 list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) { 113 struct i915_gem_context *ctx = lut->ctx; 114 115 if (ctx->file_priv != fpriv) 116 continue; 117 118 i915_gem_context_get(ctx); 119 list_move(&lut->obj_link, &close); 120 } 121 i915_gem_object_unlock(obj); 122 123 list_for_each_entry_safe(lut, ln, &close, obj_link) { 124 struct i915_gem_context *ctx = lut->ctx; 125 struct i915_vma *vma; 126 127 /* 128 * We allow the process to have multiple handles to the same 129 * vma, in the same fd namespace, by virtue of flink/open. 130 */ 131 132 mutex_lock(&ctx->mutex); 133 vma = radix_tree_delete(&ctx->handles_vma, lut->handle); 134 if (vma) { 135 GEM_BUG_ON(vma->obj != obj); 136 GEM_BUG_ON(!atomic_read(&vma->open_count)); 137 if (atomic_dec_and_test(&vma->open_count) && 138 !i915_vma_is_ggtt(vma)) 139 i915_vma_close(vma); 140 } 141 mutex_unlock(&ctx->mutex); 142 143 i915_gem_context_put(lut->ctx); 144 i915_lut_handle_free(lut); 145 i915_gem_object_put(obj); 146 } 147 } 148 149 static void __i915_gem_free_object_rcu(struct rcu_head *head) 150 { 151 struct drm_i915_gem_object *obj = 152 container_of(head, typeof(*obj), rcu); 153 struct drm_i915_private *i915 = to_i915(obj->base.dev); 154 155 reservation_object_fini(&obj->base._resv); 156 i915_gem_object_free(obj); 157 158 GEM_BUG_ON(!atomic_read(&i915->mm.free_count)); 159 atomic_dec(&i915->mm.free_count); 160 } 161 162 static void __i915_gem_free_objects(struct drm_i915_private *i915, 163 struct llist_node *freed) 164 { 165 struct drm_i915_gem_object *obj, *on; 166 intel_wakeref_t wakeref; 167 168 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 169 llist_for_each_entry_safe(obj, on, freed, freed) { 170 struct i915_vma *vma, *vn; 171 172 trace_i915_gem_object_destroy(obj); 173 174 mutex_lock(&i915->drm.struct_mutex); 175 176 list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) { 177 GEM_BUG_ON(i915_vma_is_active(vma)); 178 vma->flags &= ~I915_VMA_PIN_MASK; 179 i915_vma_destroy(vma); 180 } 181 GEM_BUG_ON(!list_empty(&obj->vma.list)); 182 GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree)); 183 184 mutex_unlock(&i915->drm.struct_mutex); 185 186 GEM_BUG_ON(atomic_read(&obj->bind_count)); 187 GEM_BUG_ON(obj->userfault_count); 188 GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits)); 189 GEM_BUG_ON(!list_empty(&obj->lut_list)); 190 191 atomic_set(&obj->mm.pages_pin_count, 0); 192 __i915_gem_object_put_pages(obj, I915_MM_NORMAL); 193 GEM_BUG_ON(i915_gem_object_has_pages(obj)); 194 bitmap_free(obj->bit_17); 195 196 if (obj->base.import_attach) 197 drm_prime_gem_destroy(&obj->base, NULL); 198 199 drm_gem_free_mmap_offset(&obj->base); 200 201 if (obj->ops->release) 202 obj->ops->release(obj); 203 204 /* But keep the pointer alive for RCU-protected lookups */ 205 call_rcu(&obj->rcu, __i915_gem_free_object_rcu); 206 } 207 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 208 } 209 210 void i915_gem_flush_free_objects(struct drm_i915_private *i915) 211 { 212 struct llist_node *freed; 213 214 /* Free the oldest, most stale object to keep the free_list short */ 215 freed = NULL; 216 if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */ 217 /* Only one consumer of llist_del_first() allowed */ 218 spin_lock(&i915->mm.free_lock); 219 freed = llist_del_first(&i915->mm.free_list); 220 spin_unlock(&i915->mm.free_lock); 221 } 222 if (unlikely(freed)) { 223 freed->next = NULL; 224 __i915_gem_free_objects(i915, freed); 225 } 226 } 227 228 static void __i915_gem_free_work(struct work_struct *work) 229 { 230 struct drm_i915_private *i915 = 231 container_of(work, struct drm_i915_private, mm.free_work); 232 struct llist_node *freed; 233 234 /* 235 * All file-owned VMA should have been released by this point through 236 * i915_gem_close_object(), or earlier by i915_gem_context_close(). 237 * However, the object may also be bound into the global GTT (e.g. 238 * older GPUs without per-process support, or for direct access through 239 * the GTT either for the user or for scanout). Those VMA still need to 240 * unbound now. 241 */ 242 243 spin_lock(&i915->mm.free_lock); 244 while ((freed = llist_del_all(&i915->mm.free_list))) { 245 spin_unlock(&i915->mm.free_lock); 246 247 __i915_gem_free_objects(i915, freed); 248 if (need_resched()) 249 return; 250 251 spin_lock(&i915->mm.free_lock); 252 } 253 spin_unlock(&i915->mm.free_lock); 254 } 255 256 void i915_gem_free_object(struct drm_gem_object *gem_obj) 257 { 258 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 259 struct drm_i915_private *i915 = to_i915(obj->base.dev); 260 261 /* 262 * Before we free the object, make sure any pure RCU-only 263 * read-side critical sections are complete, e.g. 264 * i915_gem_busy_ioctl(). For the corresponding synchronized 265 * lookup see i915_gem_object_lookup_rcu(). 266 */ 267 atomic_inc(&i915->mm.free_count); 268 269 /* 270 * This serializes freeing with the shrinker. Since the free 271 * is delayed, first by RCU then by the workqueue, we want the 272 * shrinker to be able to free pages of unreferenced objects, 273 * or else we may oom whilst there are plenty of deferred 274 * freed objects. 275 */ 276 if (i915_gem_object_has_pages(obj) && 277 i915_gem_object_is_shrinkable(obj)) { 278 unsigned long flags; 279 280 spin_lock_irqsave(&i915->mm.obj_lock, flags); 281 list_del_init(&obj->mm.link); 282 spin_unlock_irqrestore(&i915->mm.obj_lock, flags); 283 } 284 285 /* 286 * Since we require blocking on struct_mutex to unbind the freed 287 * object from the GPU before releasing resources back to the 288 * system, we can not do that directly from the RCU callback (which may 289 * be a softirq context), but must instead then defer that work onto a 290 * kthread. We use the RCU callback rather than move the freed object 291 * directly onto the work queue so that we can mix between using the 292 * worker and performing frees directly from subsequent allocations for 293 * crude but effective memory throttling. 294 */ 295 if (llist_add(&obj->freed, &i915->mm.free_list)) 296 queue_work(i915->wq, &i915->mm.free_work); 297 } 298 299 static inline enum fb_op_origin 300 fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain) 301 { 302 return (domain == I915_GEM_DOMAIN_GTT ? 303 obj->frontbuffer_ggtt_origin : ORIGIN_CPU); 304 } 305 306 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) 307 { 308 return !(obj->cache_level == I915_CACHE_NONE || 309 obj->cache_level == I915_CACHE_WT); 310 } 311 312 void 313 i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj, 314 unsigned int flush_domains) 315 { 316 struct i915_vma *vma; 317 318 assert_object_held(obj); 319 320 if (!(obj->write_domain & flush_domains)) 321 return; 322 323 switch (obj->write_domain) { 324 case I915_GEM_DOMAIN_GTT: 325 for_each_ggtt_vma(vma, obj) 326 intel_gt_flush_ggtt_writes(vma->vm->gt); 327 328 intel_fb_obj_flush(obj, 329 fb_write_origin(obj, I915_GEM_DOMAIN_GTT)); 330 331 for_each_ggtt_vma(vma, obj) { 332 if (vma->iomap) 333 continue; 334 335 i915_vma_unset_ggtt_write(vma); 336 } 337 338 break; 339 340 case I915_GEM_DOMAIN_WC: 341 wmb(); 342 break; 343 344 case I915_GEM_DOMAIN_CPU: 345 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 346 break; 347 348 case I915_GEM_DOMAIN_RENDER: 349 if (gpu_write_needs_clflush(obj)) 350 obj->cache_dirty = true; 351 break; 352 } 353 354 obj->write_domain = 0; 355 } 356 357 void i915_gem_init__objects(struct drm_i915_private *i915) 358 { 359 INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); 360 } 361 362 static void i915_global_objects_shrink(void) 363 { 364 kmem_cache_shrink(global.slab_objects); 365 } 366 367 static void i915_global_objects_exit(void) 368 { 369 kmem_cache_destroy(global.slab_objects); 370 } 371 372 static struct i915_global_object global = { { 373 .shrink = i915_global_objects_shrink, 374 .exit = i915_global_objects_exit, 375 } }; 376 377 int __init i915_global_objects_init(void) 378 { 379 global.slab_objects = 380 KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN); 381 if (!global.slab_objects) 382 return -ENOMEM; 383 384 i915_global_register(&global.base); 385 return 0; 386 } 387 388 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 389 #include "selftests/huge_gem_object.c" 390 #include "selftests/huge_pages.c" 391 #include "selftests/i915_gem_object.c" 392 #include "selftests/i915_gem_coherency.c" 393 #endif 394