1 /* 2 * Copyright © 2017 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25 #include "display/intel_frontbuffer.h" 26 27 #include "i915_drv.h" 28 #include "i915_gem_clflush.h" 29 #include "i915_gem_context.h" 30 #include "i915_gem_object.h" 31 #include "i915_globals.h" 32 33 static struct i915_global_object { 34 struct i915_global base; 35 struct kmem_cache *slab_objects; 36 } global; 37 38 struct drm_i915_gem_object *i915_gem_object_alloc(void) 39 { 40 return kmem_cache_zalloc(global.slab_objects, GFP_KERNEL); 41 } 42 43 void i915_gem_object_free(struct drm_i915_gem_object *obj) 44 { 45 return kmem_cache_free(global.slab_objects, obj); 46 } 47 48 static void 49 frontbuffer_retire(struct i915_active_request *active, 50 struct i915_request *request) 51 { 52 struct drm_i915_gem_object *obj = 53 container_of(active, typeof(*obj), frontbuffer_write); 54 55 intel_fb_obj_flush(obj, ORIGIN_CS); 56 } 57 58 void i915_gem_object_init(struct drm_i915_gem_object *obj, 59 const struct drm_i915_gem_object_ops *ops) 60 { 61 mutex_init(&obj->mm.lock); 62 63 spin_lock_init(&obj->vma.lock); 64 INIT_LIST_HEAD(&obj->vma.list); 65 66 INIT_LIST_HEAD(&obj->lut_list); 67 INIT_LIST_HEAD(&obj->batch_pool_link); 68 69 init_rcu_head(&obj->rcu); 70 71 obj->ops = ops; 72 73 obj->frontbuffer_ggtt_origin = ORIGIN_GTT; 74 i915_active_request_init(&obj->frontbuffer_write, 75 NULL, frontbuffer_retire); 76 77 obj->mm.madv = I915_MADV_WILLNEED; 78 INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN); 79 mutex_init(&obj->mm.get_page.lock); 80 } 81 82 /** 83 * Mark up the object's coherency levels for a given cache_level 84 * @obj: #drm_i915_gem_object 85 * @cache_level: cache level 86 */ 87 void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, 88 unsigned int cache_level) 89 { 90 obj->cache_level = cache_level; 91 92 if (cache_level != I915_CACHE_NONE) 93 obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ | 94 I915_BO_CACHE_COHERENT_FOR_WRITE); 95 else if (HAS_LLC(to_i915(obj->base.dev))) 96 obj->cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ; 97 else 98 obj->cache_coherent = 0; 99 100 obj->cache_dirty = 101 !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE); 102 } 103 104 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) 105 { 106 struct drm_i915_gem_object *obj = to_intel_bo(gem); 107 struct drm_i915_file_private *fpriv = file->driver_priv; 108 struct i915_lut_handle *lut, *ln; 109 LIST_HEAD(close); 110 111 i915_gem_object_lock(obj); 112 list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) { 113 struct i915_gem_context *ctx = lut->ctx; 114 115 if (ctx->file_priv != fpriv) 116 continue; 117 118 i915_gem_context_get(ctx); 119 list_move(&lut->obj_link, &close); 120 } 121 i915_gem_object_unlock(obj); 122 123 list_for_each_entry_safe(lut, ln, &close, obj_link) { 124 struct i915_gem_context *ctx = lut->ctx; 125 struct i915_vma *vma; 126 127 /* 128 * We allow the process to have multiple handles to the same 129 * vma, in the same fd namespace, by virtue of flink/open. 130 */ 131 132 mutex_lock(&ctx->mutex); 133 vma = radix_tree_delete(&ctx->handles_vma, lut->handle); 134 if (vma) { 135 GEM_BUG_ON(vma->obj != obj); 136 GEM_BUG_ON(!atomic_read(&vma->open_count)); 137 if (atomic_dec_and_test(&vma->open_count) && 138 !i915_vma_is_ggtt(vma)) 139 i915_vma_close(vma); 140 } 141 mutex_unlock(&ctx->mutex); 142 143 i915_gem_context_put(lut->ctx); 144 i915_lut_handle_free(lut); 145 i915_gem_object_put(obj); 146 } 147 } 148 149 static void __i915_gem_free_objects(struct drm_i915_private *i915, 150 struct llist_node *freed) 151 { 152 struct drm_i915_gem_object *obj, *on; 153 intel_wakeref_t wakeref; 154 155 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 156 llist_for_each_entry_safe(obj, on, freed, freed) { 157 struct i915_vma *vma, *vn; 158 159 trace_i915_gem_object_destroy(obj); 160 161 mutex_lock(&i915->drm.struct_mutex); 162 163 GEM_BUG_ON(i915_gem_object_is_active(obj)); 164 list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) { 165 GEM_BUG_ON(i915_vma_is_active(vma)); 166 vma->flags &= ~I915_VMA_PIN_MASK; 167 i915_vma_destroy(vma); 168 } 169 GEM_BUG_ON(!list_empty(&obj->vma.list)); 170 GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree)); 171 172 /* 173 * This serializes freeing with the shrinker. Since the free 174 * is delayed, first by RCU then by the workqueue, we want the 175 * shrinker to be able to free pages of unreferenced objects, 176 * or else we may oom whilst there are plenty of deferred 177 * freed objects. 178 */ 179 if (i915_gem_object_has_pages(obj) && 180 i915_gem_object_is_shrinkable(obj)) { 181 unsigned long flags; 182 183 spin_lock_irqsave(&i915->mm.obj_lock, flags); 184 list_del_init(&obj->mm.link); 185 spin_unlock_irqrestore(&i915->mm.obj_lock, flags); 186 } 187 188 mutex_unlock(&i915->drm.struct_mutex); 189 190 GEM_BUG_ON(atomic_read(&obj->bind_count)); 191 GEM_BUG_ON(obj->userfault_count); 192 GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits)); 193 GEM_BUG_ON(!list_empty(&obj->lut_list)); 194 195 if (obj->ops->release) 196 obj->ops->release(obj); 197 198 atomic_set(&obj->mm.pages_pin_count, 0); 199 __i915_gem_object_put_pages(obj, I915_MM_NORMAL); 200 GEM_BUG_ON(i915_gem_object_has_pages(obj)); 201 202 if (obj->base.import_attach) 203 drm_prime_gem_destroy(&obj->base, NULL); 204 205 drm_gem_object_release(&obj->base); 206 207 bitmap_free(obj->bit_17); 208 i915_gem_object_free(obj); 209 210 GEM_BUG_ON(!atomic_read(&i915->mm.free_count)); 211 atomic_dec(&i915->mm.free_count); 212 213 cond_resched(); 214 } 215 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 216 } 217 218 void i915_gem_flush_free_objects(struct drm_i915_private *i915) 219 { 220 struct llist_node *freed; 221 222 /* Free the oldest, most stale object to keep the free_list short */ 223 freed = NULL; 224 if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */ 225 /* Only one consumer of llist_del_first() allowed */ 226 spin_lock(&i915->mm.free_lock); 227 freed = llist_del_first(&i915->mm.free_list); 228 spin_unlock(&i915->mm.free_lock); 229 } 230 if (unlikely(freed)) { 231 freed->next = NULL; 232 __i915_gem_free_objects(i915, freed); 233 } 234 } 235 236 static void __i915_gem_free_work(struct work_struct *work) 237 { 238 struct drm_i915_private *i915 = 239 container_of(work, struct drm_i915_private, mm.free_work); 240 struct llist_node *freed; 241 242 /* 243 * All file-owned VMA should have been released by this point through 244 * i915_gem_close_object(), or earlier by i915_gem_context_close(). 245 * However, the object may also be bound into the global GTT (e.g. 246 * older GPUs without per-process support, or for direct access through 247 * the GTT either for the user or for scanout). Those VMA still need to 248 * unbound now. 249 */ 250 251 spin_lock(&i915->mm.free_lock); 252 while ((freed = llist_del_all(&i915->mm.free_list))) { 253 spin_unlock(&i915->mm.free_lock); 254 255 __i915_gem_free_objects(i915, freed); 256 if (need_resched()) 257 return; 258 259 spin_lock(&i915->mm.free_lock); 260 } 261 spin_unlock(&i915->mm.free_lock); 262 } 263 264 static void __i915_gem_free_object_rcu(struct rcu_head *head) 265 { 266 struct drm_i915_gem_object *obj = 267 container_of(head, typeof(*obj), rcu); 268 struct drm_i915_private *i915 = to_i915(obj->base.dev); 269 270 /* 271 * We reuse obj->rcu for the freed list, so we had better not treat 272 * it like a rcu_head from this point forwards. And we expect all 273 * objects to be freed via this path. 274 */ 275 destroy_rcu_head(&obj->rcu); 276 277 /* 278 * Since we require blocking on struct_mutex to unbind the freed 279 * object from the GPU before releasing resources back to the 280 * system, we can not do that directly from the RCU callback (which may 281 * be a softirq context), but must instead then defer that work onto a 282 * kthread. We use the RCU callback rather than move the freed object 283 * directly onto the work queue so that we can mix between using the 284 * worker and performing frees directly from subsequent allocations for 285 * crude but effective memory throttling. 286 */ 287 if (llist_add(&obj->freed, &i915->mm.free_list)) 288 queue_work(i915->wq, &i915->mm.free_work); 289 } 290 291 void i915_gem_free_object(struct drm_gem_object *gem_obj) 292 { 293 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 294 295 /* 296 * Before we free the object, make sure any pure RCU-only 297 * read-side critical sections are complete, e.g. 298 * i915_gem_busy_ioctl(). For the corresponding synchronized 299 * lookup see i915_gem_object_lookup_rcu(). 300 */ 301 atomic_inc(&to_i915(obj->base.dev)->mm.free_count); 302 call_rcu(&obj->rcu, __i915_gem_free_object_rcu); 303 } 304 305 static inline enum fb_op_origin 306 fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain) 307 { 308 return (domain == I915_GEM_DOMAIN_GTT ? 309 obj->frontbuffer_ggtt_origin : ORIGIN_CPU); 310 } 311 312 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) 313 { 314 return !(obj->cache_level == I915_CACHE_NONE || 315 obj->cache_level == I915_CACHE_WT); 316 } 317 318 void 319 i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj, 320 unsigned int flush_domains) 321 { 322 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 323 struct i915_vma *vma; 324 325 assert_object_held(obj); 326 327 if (!(obj->write_domain & flush_domains)) 328 return; 329 330 switch (obj->write_domain) { 331 case I915_GEM_DOMAIN_GTT: 332 i915_gem_flush_ggtt_writes(dev_priv); 333 334 intel_fb_obj_flush(obj, 335 fb_write_origin(obj, I915_GEM_DOMAIN_GTT)); 336 337 for_each_ggtt_vma(vma, obj) { 338 if (vma->iomap) 339 continue; 340 341 i915_vma_unset_ggtt_write(vma); 342 } 343 break; 344 345 case I915_GEM_DOMAIN_WC: 346 wmb(); 347 break; 348 349 case I915_GEM_DOMAIN_CPU: 350 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 351 break; 352 353 case I915_GEM_DOMAIN_RENDER: 354 if (gpu_write_needs_clflush(obj)) 355 obj->cache_dirty = true; 356 break; 357 } 358 359 obj->write_domain = 0; 360 } 361 362 void i915_gem_init__objects(struct drm_i915_private *i915) 363 { 364 INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); 365 } 366 367 static void i915_global_objects_shrink(void) 368 { 369 kmem_cache_shrink(global.slab_objects); 370 } 371 372 static void i915_global_objects_exit(void) 373 { 374 kmem_cache_destroy(global.slab_objects); 375 } 376 377 static struct i915_global_object global = { { 378 .shrink = i915_global_objects_shrink, 379 .exit = i915_global_objects_exit, 380 } }; 381 382 int __init i915_global_objects_init(void) 383 { 384 global.slab_objects = 385 KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN); 386 if (!global.slab_objects) 387 return -ENOMEM; 388 389 i915_global_register(&global.base); 390 return 0; 391 } 392 393 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 394 #include "selftests/huge_gem_object.c" 395 #include "selftests/huge_pages.c" 396 #include "selftests/i915_gem_object.c" 397 #include "selftests/i915_gem_coherency.c" 398 #endif 399