1 /* 2 * Copyright © 2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25 #include <linux/sched/mm.h> 26 #include <drm/drm_gem.h> 27 28 #include "display/intel_frontbuffer.h" 29 30 #include "gt/intel_engine.h" 31 #include "gt/intel_engine_heartbeat.h" 32 #include "gt/intel_gt.h" 33 #include "gt/intel_gt_requests.h" 34 35 #include "i915_drv.h" 36 #include "i915_globals.h" 37 #include "i915_sw_fence_work.h" 38 #include "i915_trace.h" 39 #include "i915_vma.h" 40 41 static struct i915_global_vma { 42 struct i915_global base; 43 struct kmem_cache *slab_vmas; 44 } global; 45 46 struct i915_vma *i915_vma_alloc(void) 47 { 48 return kmem_cache_zalloc(global.slab_vmas, GFP_KERNEL); 49 } 50 51 void i915_vma_free(struct i915_vma *vma) 52 { 53 return kmem_cache_free(global.slab_vmas, vma); 54 } 55 56 #if IS_ENABLED(CONFIG_DRM_I915_ERRLOG_GEM) && IS_ENABLED(CONFIG_DRM_DEBUG_MM) 57 58 #include <linux/stackdepot.h> 59 60 static void vma_print_allocator(struct i915_vma *vma, const char *reason) 61 { 62 unsigned long *entries; 63 unsigned int nr_entries; 64 char buf[512]; 65 66 if (!vma->node.stack) { 67 DRM_DEBUG_DRIVER("vma.node [%08llx + %08llx] %s: unknown owner\n", 68 vma->node.start, vma->node.size, reason); 69 return; 70 } 71 72 nr_entries = stack_depot_fetch(vma->node.stack, &entries); 73 stack_trace_snprint(buf, sizeof(buf), entries, nr_entries, 0); 74 DRM_DEBUG_DRIVER("vma.node [%08llx + %08llx] %s: inserted at %s\n", 75 vma->node.start, vma->node.size, reason, buf); 76 } 77 78 #else 79 80 static void vma_print_allocator(struct i915_vma *vma, const char *reason) 81 { 82 } 83 84 #endif 85 86 static inline struct i915_vma *active_to_vma(struct i915_active *ref) 87 { 88 return container_of(ref, typeof(struct i915_vma), active); 89 } 90 91 static int __i915_vma_active(struct i915_active *ref) 92 { 93 return i915_vma_tryget(active_to_vma(ref)) ? 0 : -ENOENT; 94 } 95 96 __i915_active_call 97 static void __i915_vma_retire(struct i915_active *ref) 98 { 99 i915_vma_put(active_to_vma(ref)); 100 } 101 102 static struct i915_vma * 103 vma_create(struct drm_i915_gem_object *obj, 104 struct i915_address_space *vm, 105 const struct i915_ggtt_view *view) 106 { 107 struct i915_vma *vma; 108 struct rb_node *rb, **p; 109 110 /* The aliasing_ppgtt should never be used directly! */ 111 GEM_BUG_ON(vm == &vm->gt->ggtt->alias->vm); 112 113 vma = i915_vma_alloc(); 114 if (vma == NULL) 115 return ERR_PTR(-ENOMEM); 116 117 kref_init(&vma->ref); 118 mutex_init(&vma->pages_mutex); 119 vma->vm = i915_vm_get(vm); 120 vma->ops = &vm->vma_ops; 121 vma->obj = obj; 122 vma->resv = obj->base.resv; 123 vma->size = obj->base.size; 124 vma->display_alignment = I915_GTT_MIN_ALIGNMENT; 125 126 i915_active_init(&vma->active, __i915_vma_active, __i915_vma_retire); 127 128 /* Declare ourselves safe for use inside shrinkers */ 129 if (IS_ENABLED(CONFIG_LOCKDEP)) { 130 fs_reclaim_acquire(GFP_KERNEL); 131 might_lock(&vma->active.mutex); 132 fs_reclaim_release(GFP_KERNEL); 133 } 134 135 INIT_LIST_HEAD(&vma->closed_link); 136 137 if (view && view->type != I915_GGTT_VIEW_NORMAL) { 138 vma->ggtt_view = *view; 139 if (view->type == I915_GGTT_VIEW_PARTIAL) { 140 GEM_BUG_ON(range_overflows_t(u64, 141 view->partial.offset, 142 view->partial.size, 143 obj->base.size >> PAGE_SHIFT)); 144 vma->size = view->partial.size; 145 vma->size <<= PAGE_SHIFT; 146 GEM_BUG_ON(vma->size > obj->base.size); 147 } else if (view->type == I915_GGTT_VIEW_ROTATED) { 148 vma->size = intel_rotation_info_size(&view->rotated); 149 vma->size <<= PAGE_SHIFT; 150 } else if (view->type == I915_GGTT_VIEW_REMAPPED) { 151 vma->size = intel_remapped_info_size(&view->remapped); 152 vma->size <<= PAGE_SHIFT; 153 } 154 } 155 156 if (unlikely(vma->size > vm->total)) 157 goto err_vma; 158 159 GEM_BUG_ON(!IS_ALIGNED(vma->size, I915_GTT_PAGE_SIZE)); 160 161 if (i915_is_ggtt(vm)) { 162 if (unlikely(overflows_type(vma->size, u32))) 163 goto err_vma; 164 165 vma->fence_size = i915_gem_fence_size(vm->i915, vma->size, 166 i915_gem_object_get_tiling(obj), 167 i915_gem_object_get_stride(obj)); 168 if (unlikely(vma->fence_size < vma->size || /* overflow */ 169 vma->fence_size > vm->total)) 170 goto err_vma; 171 172 GEM_BUG_ON(!IS_ALIGNED(vma->fence_size, I915_GTT_MIN_ALIGNMENT)); 173 174 vma->fence_alignment = i915_gem_fence_alignment(vm->i915, vma->size, 175 i915_gem_object_get_tiling(obj), 176 i915_gem_object_get_stride(obj)); 177 GEM_BUG_ON(!is_power_of_2(vma->fence_alignment)); 178 179 __set_bit(I915_VMA_GGTT_BIT, __i915_vma_flags(vma)); 180 } 181 182 spin_lock(&obj->vma.lock); 183 184 rb = NULL; 185 p = &obj->vma.tree.rb_node; 186 while (*p) { 187 struct i915_vma *pos; 188 long cmp; 189 190 rb = *p; 191 pos = rb_entry(rb, struct i915_vma, obj_node); 192 193 /* 194 * If the view already exists in the tree, another thread 195 * already created a matching vma, so return the older instance 196 * and dispose of ours. 197 */ 198 cmp = i915_vma_compare(pos, vm, view); 199 if (cmp == 0) { 200 spin_unlock(&obj->vma.lock); 201 i915_vma_free(vma); 202 return pos; 203 } 204 205 if (cmp < 0) 206 p = &rb->rb_right; 207 else 208 p = &rb->rb_left; 209 } 210 rb_link_node(&vma->obj_node, rb, p); 211 rb_insert_color(&vma->obj_node, &obj->vma.tree); 212 213 if (i915_vma_is_ggtt(vma)) 214 /* 215 * We put the GGTT vma at the start of the vma-list, followed 216 * by the ppGGTT vma. This allows us to break early when 217 * iterating over only the GGTT vma for an object, see 218 * for_each_ggtt_vma() 219 */ 220 list_add(&vma->obj_link, &obj->vma.list); 221 else 222 list_add_tail(&vma->obj_link, &obj->vma.list); 223 224 spin_unlock(&obj->vma.lock); 225 226 return vma; 227 228 err_vma: 229 i915_vma_free(vma); 230 return ERR_PTR(-E2BIG); 231 } 232 233 static struct i915_vma * 234 vma_lookup(struct drm_i915_gem_object *obj, 235 struct i915_address_space *vm, 236 const struct i915_ggtt_view *view) 237 { 238 struct rb_node *rb; 239 240 rb = obj->vma.tree.rb_node; 241 while (rb) { 242 struct i915_vma *vma = rb_entry(rb, struct i915_vma, obj_node); 243 long cmp; 244 245 cmp = i915_vma_compare(vma, vm, view); 246 if (cmp == 0) 247 return vma; 248 249 if (cmp < 0) 250 rb = rb->rb_right; 251 else 252 rb = rb->rb_left; 253 } 254 255 return NULL; 256 } 257 258 /** 259 * i915_vma_instance - return the singleton instance of the VMA 260 * @obj: parent &struct drm_i915_gem_object to be mapped 261 * @vm: address space in which the mapping is located 262 * @view: additional mapping requirements 263 * 264 * i915_vma_instance() looks up an existing VMA of the @obj in the @vm with 265 * the same @view characteristics. If a match is not found, one is created. 266 * Once created, the VMA is kept until either the object is freed, or the 267 * address space is closed. 268 * 269 * Returns the vma, or an error pointer. 270 */ 271 struct i915_vma * 272 i915_vma_instance(struct drm_i915_gem_object *obj, 273 struct i915_address_space *vm, 274 const struct i915_ggtt_view *view) 275 { 276 struct i915_vma *vma; 277 278 GEM_BUG_ON(view && !i915_is_ggtt(vm)); 279 GEM_BUG_ON(!atomic_read(&vm->open)); 280 281 spin_lock(&obj->vma.lock); 282 vma = vma_lookup(obj, vm, view); 283 spin_unlock(&obj->vma.lock); 284 285 /* vma_create() will resolve the race if another creates the vma */ 286 if (unlikely(!vma)) 287 vma = vma_create(obj, vm, view); 288 289 GEM_BUG_ON(!IS_ERR(vma) && i915_vma_compare(vma, vm, view)); 290 return vma; 291 } 292 293 struct i915_vma_work { 294 struct dma_fence_work base; 295 struct i915_vma *vma; 296 struct drm_i915_gem_object *pinned; 297 enum i915_cache_level cache_level; 298 unsigned int flags; 299 }; 300 301 static int __vma_bind(struct dma_fence_work *work) 302 { 303 struct i915_vma_work *vw = container_of(work, typeof(*vw), base); 304 struct i915_vma *vma = vw->vma; 305 int err; 306 307 err = vma->ops->bind_vma(vma, vw->cache_level, vw->flags); 308 if (err) 309 atomic_or(I915_VMA_ERROR, &vma->flags); 310 311 return err; 312 } 313 314 static void __vma_release(struct dma_fence_work *work) 315 { 316 struct i915_vma_work *vw = container_of(work, typeof(*vw), base); 317 318 if (vw->pinned) 319 __i915_gem_object_unpin_pages(vw->pinned); 320 } 321 322 static const struct dma_fence_work_ops bind_ops = { 323 .name = "bind", 324 .work = __vma_bind, 325 .release = __vma_release, 326 }; 327 328 struct i915_vma_work *i915_vma_work(void) 329 { 330 struct i915_vma_work *vw; 331 332 vw = kzalloc(sizeof(*vw), GFP_KERNEL); 333 if (!vw) 334 return NULL; 335 336 dma_fence_work_init(&vw->base, &bind_ops); 337 vw->base.dma.error = -EAGAIN; /* disable the worker by default */ 338 339 return vw; 340 } 341 342 /** 343 * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space. 344 * @vma: VMA to map 345 * @cache_level: mapping cache level 346 * @flags: flags like global or local mapping 347 * @work: preallocated worker for allocating and binding the PTE 348 * 349 * DMA addresses are taken from the scatter-gather table of this object (or of 350 * this VMA in case of non-default GGTT views) and PTE entries set up. 351 * Note that DMA addresses are also the only part of the SG table we care about. 352 */ 353 int i915_vma_bind(struct i915_vma *vma, 354 enum i915_cache_level cache_level, 355 u32 flags, 356 struct i915_vma_work *work) 357 { 358 u32 bind_flags; 359 u32 vma_flags; 360 int ret; 361 362 GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); 363 GEM_BUG_ON(vma->size > vma->node.size); 364 365 if (GEM_DEBUG_WARN_ON(range_overflows(vma->node.start, 366 vma->node.size, 367 vma->vm->total))) 368 return -ENODEV; 369 370 if (GEM_DEBUG_WARN_ON(!flags)) 371 return -EINVAL; 372 373 bind_flags = flags; 374 bind_flags &= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; 375 376 vma_flags = atomic_read(&vma->flags); 377 vma_flags &= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; 378 if (flags & PIN_UPDATE) 379 bind_flags |= vma_flags; 380 else 381 bind_flags &= ~vma_flags; 382 if (bind_flags == 0) 383 return 0; 384 385 GEM_BUG_ON(!vma->pages); 386 387 trace_i915_vma_bind(vma, bind_flags); 388 if (work && (bind_flags & ~vma_flags) & vma->vm->bind_async_flags) { 389 work->vma = vma; 390 work->cache_level = cache_level; 391 work->flags = bind_flags | I915_VMA_ALLOC; 392 393 /* 394 * Note we only want to chain up to the migration fence on 395 * the pages (not the object itself). As we don't track that, 396 * yet, we have to use the exclusive fence instead. 397 * 398 * Also note that we do not want to track the async vma as 399 * part of the obj->resv->excl_fence as it only affects 400 * execution and not content or object's backing store lifetime. 401 */ 402 GEM_BUG_ON(i915_active_has_exclusive(&vma->active)); 403 i915_active_set_exclusive(&vma->active, &work->base.dma); 404 work->base.dma.error = 0; /* enable the queue_work() */ 405 406 if (vma->obj) { 407 __i915_gem_object_pin_pages(vma->obj); 408 work->pinned = vma->obj; 409 } 410 } else { 411 GEM_BUG_ON((bind_flags & ~vma_flags) & vma->vm->bind_async_flags); 412 ret = vma->ops->bind_vma(vma, cache_level, bind_flags); 413 if (ret) 414 return ret; 415 } 416 417 atomic_or(bind_flags, &vma->flags); 418 return 0; 419 } 420 421 void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) 422 { 423 void __iomem *ptr; 424 int err; 425 426 if (GEM_WARN_ON(!i915_vma_is_map_and_fenceable(vma))) { 427 err = -ENODEV; 428 goto err; 429 } 430 431 GEM_BUG_ON(!i915_vma_is_ggtt(vma)); 432 GEM_BUG_ON(!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)); 433 434 ptr = READ_ONCE(vma->iomap); 435 if (ptr == NULL) { 436 ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->iomap, 437 vma->node.start, 438 vma->node.size); 439 if (ptr == NULL) { 440 err = -ENOMEM; 441 goto err; 442 } 443 444 if (unlikely(cmpxchg(&vma->iomap, NULL, ptr))) { 445 io_mapping_unmap(ptr); 446 ptr = vma->iomap; 447 } 448 } 449 450 __i915_vma_pin(vma); 451 452 err = i915_vma_pin_fence(vma); 453 if (err) 454 goto err_unpin; 455 456 i915_vma_set_ggtt_write(vma); 457 458 /* NB Access through the GTT requires the device to be awake. */ 459 return ptr; 460 461 err_unpin: 462 __i915_vma_unpin(vma); 463 err: 464 return IO_ERR_PTR(err); 465 } 466 467 void i915_vma_flush_writes(struct i915_vma *vma) 468 { 469 if (i915_vma_unset_ggtt_write(vma)) 470 intel_gt_flush_ggtt_writes(vma->vm->gt); 471 } 472 473 void i915_vma_unpin_iomap(struct i915_vma *vma) 474 { 475 GEM_BUG_ON(vma->iomap == NULL); 476 477 i915_vma_flush_writes(vma); 478 479 i915_vma_unpin_fence(vma); 480 i915_vma_unpin(vma); 481 } 482 483 void i915_vma_unpin_and_release(struct i915_vma **p_vma, unsigned int flags) 484 { 485 struct i915_vma *vma; 486 struct drm_i915_gem_object *obj; 487 488 vma = fetch_and_zero(p_vma); 489 if (!vma) 490 return; 491 492 obj = vma->obj; 493 GEM_BUG_ON(!obj); 494 495 i915_vma_unpin(vma); 496 i915_vma_close(vma); 497 498 if (flags & I915_VMA_RELEASE_MAP) 499 i915_gem_object_unpin_map(obj); 500 501 i915_gem_object_put(obj); 502 } 503 504 bool i915_vma_misplaced(const struct i915_vma *vma, 505 u64 size, u64 alignment, u64 flags) 506 { 507 if (!drm_mm_node_allocated(&vma->node)) 508 return false; 509 510 if (test_bit(I915_VMA_ERROR_BIT, __i915_vma_flags(vma))) 511 return true; 512 513 if (vma->node.size < size) 514 return true; 515 516 GEM_BUG_ON(alignment && !is_power_of_2(alignment)); 517 if (alignment && !IS_ALIGNED(vma->node.start, alignment)) 518 return true; 519 520 if (flags & PIN_MAPPABLE && !i915_vma_is_map_and_fenceable(vma)) 521 return true; 522 523 if (flags & PIN_OFFSET_BIAS && 524 vma->node.start < (flags & PIN_OFFSET_MASK)) 525 return true; 526 527 if (flags & PIN_OFFSET_FIXED && 528 vma->node.start != (flags & PIN_OFFSET_MASK)) 529 return true; 530 531 return false; 532 } 533 534 void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) 535 { 536 bool mappable, fenceable; 537 538 GEM_BUG_ON(!i915_vma_is_ggtt(vma)); 539 GEM_BUG_ON(!vma->fence_size); 540 541 fenceable = (vma->node.size >= vma->fence_size && 542 IS_ALIGNED(vma->node.start, vma->fence_alignment)); 543 544 mappable = vma->node.start + vma->fence_size <= i915_vm_to_ggtt(vma->vm)->mappable_end; 545 546 if (mappable && fenceable) 547 set_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(vma)); 548 else 549 clear_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(vma)); 550 } 551 552 bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color) 553 { 554 struct drm_mm_node *node = &vma->node; 555 struct drm_mm_node *other; 556 557 /* 558 * On some machines we have to be careful when putting differing types 559 * of snoopable memory together to avoid the prefetcher crossing memory 560 * domains and dying. During vm initialisation, we decide whether or not 561 * these constraints apply and set the drm_mm.color_adjust 562 * appropriately. 563 */ 564 if (!i915_vm_has_cache_coloring(vma->vm)) 565 return true; 566 567 /* Only valid to be called on an already inserted vma */ 568 GEM_BUG_ON(!drm_mm_node_allocated(node)); 569 GEM_BUG_ON(list_empty(&node->node_list)); 570 571 other = list_prev_entry(node, node_list); 572 if (i915_node_color_differs(other, color) && 573 !drm_mm_hole_follows(other)) 574 return false; 575 576 other = list_next_entry(node, node_list); 577 if (i915_node_color_differs(other, color) && 578 !drm_mm_hole_follows(node)) 579 return false; 580 581 return true; 582 } 583 584 static void assert_bind_count(const struct drm_i915_gem_object *obj) 585 { 586 /* 587 * Combine the assertion that the object is bound and that we have 588 * pinned its pages. But we should never have bound the object 589 * more than we have pinned its pages. (For complete accuracy, we 590 * assume that no else is pinning the pages, but as a rough assertion 591 * that we will not run into problems later, this will do!) 592 */ 593 GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < atomic_read(&obj->bind_count)); 594 } 595 596 /** 597 * i915_vma_insert - finds a slot for the vma in its address space 598 * @vma: the vma 599 * @size: requested size in bytes (can be larger than the VMA) 600 * @alignment: required alignment 601 * @flags: mask of PIN_* flags to use 602 * 603 * First we try to allocate some free space that meets the requirements for 604 * the VMA. Failiing that, if the flags permit, it will evict an old VMA, 605 * preferrably the oldest idle entry to make room for the new VMA. 606 * 607 * Returns: 608 * 0 on success, negative error code otherwise. 609 */ 610 static int 611 i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) 612 { 613 unsigned long color; 614 u64 start, end; 615 int ret; 616 617 GEM_BUG_ON(i915_vma_is_closed(vma)); 618 GEM_BUG_ON(i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); 619 GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); 620 621 size = max(size, vma->size); 622 alignment = max(alignment, vma->display_alignment); 623 if (flags & PIN_MAPPABLE) { 624 size = max_t(typeof(size), size, vma->fence_size); 625 alignment = max_t(typeof(alignment), 626 alignment, vma->fence_alignment); 627 } 628 629 GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); 630 GEM_BUG_ON(!IS_ALIGNED(alignment, I915_GTT_MIN_ALIGNMENT)); 631 GEM_BUG_ON(!is_power_of_2(alignment)); 632 633 start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; 634 GEM_BUG_ON(!IS_ALIGNED(start, I915_GTT_PAGE_SIZE)); 635 636 end = vma->vm->total; 637 if (flags & PIN_MAPPABLE) 638 end = min_t(u64, end, i915_vm_to_ggtt(vma->vm)->mappable_end); 639 if (flags & PIN_ZONE_4G) 640 end = min_t(u64, end, (1ULL << 32) - I915_GTT_PAGE_SIZE); 641 GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE)); 642 643 /* If binding the object/GGTT view requires more space than the entire 644 * aperture has, reject it early before evicting everything in a vain 645 * attempt to find space. 646 */ 647 if (size > end) { 648 DRM_DEBUG("Attempting to bind an object larger than the aperture: request=%llu > %s aperture=%llu\n", 649 size, flags & PIN_MAPPABLE ? "mappable" : "total", 650 end); 651 return -ENOSPC; 652 } 653 654 color = 0; 655 if (vma->obj && i915_vm_has_cache_coloring(vma->vm)) 656 color = vma->obj->cache_level; 657 658 if (flags & PIN_OFFSET_FIXED) { 659 u64 offset = flags & PIN_OFFSET_MASK; 660 if (!IS_ALIGNED(offset, alignment) || 661 range_overflows(offset, size, end)) 662 return -EINVAL; 663 664 ret = i915_gem_gtt_reserve(vma->vm, &vma->node, 665 size, offset, color, 666 flags); 667 if (ret) 668 return ret; 669 } else { 670 /* 671 * We only support huge gtt pages through the 48b PPGTT, 672 * however we also don't want to force any alignment for 673 * objects which need to be tightly packed into the low 32bits. 674 * 675 * Note that we assume that GGTT are limited to 4GiB for the 676 * forseeable future. See also i915_ggtt_offset(). 677 */ 678 if (upper_32_bits(end - 1) && 679 vma->page_sizes.sg > I915_GTT_PAGE_SIZE) { 680 /* 681 * We can't mix 64K and 4K PTEs in the same page-table 682 * (2M block), and so to avoid the ugliness and 683 * complexity of coloring we opt for just aligning 64K 684 * objects to 2M. 685 */ 686 u64 page_alignment = 687 rounddown_pow_of_two(vma->page_sizes.sg | 688 I915_GTT_PAGE_SIZE_2M); 689 690 /* 691 * Check we don't expand for the limited Global GTT 692 * (mappable aperture is even more precious!). This 693 * also checks that we exclude the aliasing-ppgtt. 694 */ 695 GEM_BUG_ON(i915_vma_is_ggtt(vma)); 696 697 alignment = max(alignment, page_alignment); 698 699 if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) 700 size = round_up(size, I915_GTT_PAGE_SIZE_2M); 701 } 702 703 ret = i915_gem_gtt_insert(vma->vm, &vma->node, 704 size, alignment, color, 705 start, end, flags); 706 if (ret) 707 return ret; 708 709 GEM_BUG_ON(vma->node.start < start); 710 GEM_BUG_ON(vma->node.start + vma->node.size > end); 711 } 712 GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); 713 GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, color)); 714 715 if (vma->obj) { 716 struct drm_i915_gem_object *obj = vma->obj; 717 718 atomic_inc(&obj->bind_count); 719 assert_bind_count(obj); 720 } 721 list_add_tail(&vma->vm_link, &vma->vm->bound_list); 722 723 return 0; 724 } 725 726 static void 727 i915_vma_detach(struct i915_vma *vma) 728 { 729 GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); 730 GEM_BUG_ON(i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); 731 732 /* 733 * And finally now the object is completely decoupled from this 734 * vma, we can drop its hold on the backing storage and allow 735 * it to be reaped by the shrinker. 736 */ 737 list_del(&vma->vm_link); 738 if (vma->obj) { 739 struct drm_i915_gem_object *obj = vma->obj; 740 741 assert_bind_count(obj); 742 atomic_dec(&obj->bind_count); 743 } 744 } 745 746 static bool try_qad_pin(struct i915_vma *vma, unsigned int flags) 747 { 748 unsigned int bound; 749 bool pinned = true; 750 751 bound = atomic_read(&vma->flags); 752 do { 753 if (unlikely(flags & ~bound)) 754 return false; 755 756 if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR))) 757 return false; 758 759 if (!(bound & I915_VMA_PIN_MASK)) 760 goto unpinned; 761 762 GEM_BUG_ON(((bound + 1) & I915_VMA_PIN_MASK) == 0); 763 } while (!atomic_try_cmpxchg(&vma->flags, &bound, bound + 1)); 764 765 return true; 766 767 unpinned: 768 /* 769 * If pin_count==0, but we are bound, check under the lock to avoid 770 * racing with a concurrent i915_vma_unbind(). 771 */ 772 mutex_lock(&vma->vm->mutex); 773 do { 774 if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR))) { 775 pinned = false; 776 break; 777 } 778 779 if (unlikely(flags & ~bound)) { 780 pinned = false; 781 break; 782 } 783 } while (!atomic_try_cmpxchg(&vma->flags, &bound, bound + 1)); 784 mutex_unlock(&vma->vm->mutex); 785 786 return pinned; 787 } 788 789 static int vma_get_pages(struct i915_vma *vma) 790 { 791 int err = 0; 792 793 if (atomic_add_unless(&vma->pages_count, 1, 0)) 794 return 0; 795 796 /* Allocations ahoy! */ 797 if (mutex_lock_interruptible(&vma->pages_mutex)) 798 return -EINTR; 799 800 if (!atomic_read(&vma->pages_count)) { 801 if (vma->obj) { 802 err = i915_gem_object_pin_pages(vma->obj); 803 if (err) 804 goto unlock; 805 } 806 807 err = vma->ops->set_pages(vma); 808 if (err) { 809 if (vma->obj) 810 i915_gem_object_unpin_pages(vma->obj); 811 goto unlock; 812 } 813 } 814 atomic_inc(&vma->pages_count); 815 816 unlock: 817 mutex_unlock(&vma->pages_mutex); 818 819 return err; 820 } 821 822 static void __vma_put_pages(struct i915_vma *vma, unsigned int count) 823 { 824 /* We allocate under vma_get_pages, so beware the shrinker */ 825 mutex_lock_nested(&vma->pages_mutex, SINGLE_DEPTH_NESTING); 826 GEM_BUG_ON(atomic_read(&vma->pages_count) < count); 827 if (atomic_sub_return(count, &vma->pages_count) == 0) { 828 vma->ops->clear_pages(vma); 829 GEM_BUG_ON(vma->pages); 830 if (vma->obj) 831 i915_gem_object_unpin_pages(vma->obj); 832 } 833 mutex_unlock(&vma->pages_mutex); 834 } 835 836 static void vma_put_pages(struct i915_vma *vma) 837 { 838 if (atomic_add_unless(&vma->pages_count, -1, 1)) 839 return; 840 841 __vma_put_pages(vma, 1); 842 } 843 844 static void vma_unbind_pages(struct i915_vma *vma) 845 { 846 unsigned int count; 847 848 lockdep_assert_held(&vma->vm->mutex); 849 850 /* The upper portion of pages_count is the number of bindings */ 851 count = atomic_read(&vma->pages_count); 852 count >>= I915_VMA_PAGES_BIAS; 853 GEM_BUG_ON(!count); 854 855 __vma_put_pages(vma, count | count << I915_VMA_PAGES_BIAS); 856 } 857 858 int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) 859 { 860 struct i915_vma_work *work = NULL; 861 intel_wakeref_t wakeref = 0; 862 unsigned int bound; 863 int err; 864 865 BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND); 866 BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND); 867 868 GEM_BUG_ON(flags & PIN_UPDATE); 869 GEM_BUG_ON(!(flags & (PIN_USER | PIN_GLOBAL))); 870 871 /* First try and grab the pin without rebinding the vma */ 872 if (try_qad_pin(vma, flags & I915_VMA_BIND_MASK)) 873 return 0; 874 875 err = vma_get_pages(vma); 876 if (err) 877 return err; 878 879 if (flags & vma->vm->bind_async_flags) { 880 work = i915_vma_work(); 881 if (!work) { 882 err = -ENOMEM; 883 goto err_pages; 884 } 885 } 886 887 if (flags & PIN_GLOBAL) 888 wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm); 889 890 /* No more allocations allowed once we hold vm->mutex */ 891 err = mutex_lock_interruptible(&vma->vm->mutex); 892 if (err) 893 goto err_fence; 894 895 bound = atomic_read(&vma->flags); 896 if (unlikely(bound & I915_VMA_ERROR)) { 897 err = -ENOMEM; 898 goto err_unlock; 899 } 900 901 if (unlikely(!((bound + 1) & I915_VMA_PIN_MASK))) { 902 err = -EAGAIN; /* pins are meant to be fairly temporary */ 903 goto err_unlock; 904 } 905 906 if (unlikely(!(flags & ~bound & I915_VMA_BIND_MASK))) { 907 __i915_vma_pin(vma); 908 goto err_unlock; 909 } 910 911 err = i915_active_acquire(&vma->active); 912 if (err) 913 goto err_unlock; 914 915 if (!(bound & I915_VMA_BIND_MASK)) { 916 err = i915_vma_insert(vma, size, alignment, flags); 917 if (err) 918 goto err_active; 919 920 if (i915_is_ggtt(vma->vm)) 921 __i915_vma_set_map_and_fenceable(vma); 922 } 923 924 GEM_BUG_ON(!vma->pages); 925 err = i915_vma_bind(vma, 926 vma->obj ? vma->obj->cache_level : 0, 927 flags, work); 928 if (err) 929 goto err_remove; 930 931 /* There should only be at most 2 active bindings (user, global) */ 932 GEM_BUG_ON(bound + I915_VMA_PAGES_ACTIVE < bound); 933 atomic_add(I915_VMA_PAGES_ACTIVE, &vma->pages_count); 934 list_move_tail(&vma->vm_link, &vma->vm->bound_list); 935 936 __i915_vma_pin(vma); 937 GEM_BUG_ON(!i915_vma_is_pinned(vma)); 938 GEM_BUG_ON(!i915_vma_is_bound(vma, flags)); 939 GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags)); 940 941 err_remove: 942 if (!i915_vma_is_bound(vma, I915_VMA_BIND_MASK)) { 943 i915_vma_detach(vma); 944 drm_mm_remove_node(&vma->node); 945 } 946 err_active: 947 i915_active_release(&vma->active); 948 err_unlock: 949 mutex_unlock(&vma->vm->mutex); 950 err_fence: 951 if (work) 952 dma_fence_work_commit(&work->base); 953 if (wakeref) 954 intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref); 955 err_pages: 956 vma_put_pages(vma); 957 return err; 958 } 959 960 static void flush_idle_contexts(struct intel_gt *gt) 961 { 962 struct intel_engine_cs *engine; 963 enum intel_engine_id id; 964 965 for_each_engine(engine, gt, id) 966 intel_engine_flush_barriers(engine); 967 968 intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT); 969 } 970 971 int i915_ggtt_pin(struct i915_vma *vma, u32 align, unsigned int flags) 972 { 973 struct i915_address_space *vm = vma->vm; 974 int err; 975 976 GEM_BUG_ON(!i915_vma_is_ggtt(vma)); 977 978 do { 979 err = i915_vma_pin(vma, 0, align, flags | PIN_GLOBAL); 980 if (err != -ENOSPC) 981 return err; 982 983 /* Unlike i915_vma_pin, we don't take no for an answer! */ 984 flush_idle_contexts(vm->gt); 985 if (mutex_lock_interruptible(&vm->mutex) == 0) { 986 i915_gem_evict_vm(vm); 987 mutex_unlock(&vm->mutex); 988 } 989 } while (1); 990 } 991 992 void i915_vma_close(struct i915_vma *vma) 993 { 994 struct intel_gt *gt = vma->vm->gt; 995 unsigned long flags; 996 997 GEM_BUG_ON(i915_vma_is_closed(vma)); 998 999 /* 1000 * We defer actually closing, unbinding and destroying the VMA until 1001 * the next idle point, or if the object is freed in the meantime. By 1002 * postponing the unbind, we allow for it to be resurrected by the 1003 * client, avoiding the work required to rebind the VMA. This is 1004 * advantageous for DRI, where the client/server pass objects 1005 * between themselves, temporarily opening a local VMA to the 1006 * object, and then closing it again. The same object is then reused 1007 * on the next frame (or two, depending on the depth of the swap queue) 1008 * causing us to rebind the VMA once more. This ends up being a lot 1009 * of wasted work for the steady state. 1010 */ 1011 spin_lock_irqsave(>->closed_lock, flags); 1012 list_add(&vma->closed_link, >->closed_vma); 1013 spin_unlock_irqrestore(>->closed_lock, flags); 1014 } 1015 1016 static void __i915_vma_remove_closed(struct i915_vma *vma) 1017 { 1018 struct intel_gt *gt = vma->vm->gt; 1019 1020 spin_lock_irq(>->closed_lock); 1021 list_del_init(&vma->closed_link); 1022 spin_unlock_irq(>->closed_lock); 1023 } 1024 1025 void i915_vma_reopen(struct i915_vma *vma) 1026 { 1027 if (i915_vma_is_closed(vma)) 1028 __i915_vma_remove_closed(vma); 1029 } 1030 1031 void i915_vma_release(struct kref *ref) 1032 { 1033 struct i915_vma *vma = container_of(ref, typeof(*vma), ref); 1034 1035 if (drm_mm_node_allocated(&vma->node)) { 1036 mutex_lock(&vma->vm->mutex); 1037 atomic_and(~I915_VMA_PIN_MASK, &vma->flags); 1038 WARN_ON(__i915_vma_unbind(vma)); 1039 mutex_unlock(&vma->vm->mutex); 1040 GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); 1041 } 1042 GEM_BUG_ON(i915_vma_is_active(vma)); 1043 1044 if (vma->obj) { 1045 struct drm_i915_gem_object *obj = vma->obj; 1046 1047 spin_lock(&obj->vma.lock); 1048 list_del(&vma->obj_link); 1049 rb_erase(&vma->obj_node, &obj->vma.tree); 1050 spin_unlock(&obj->vma.lock); 1051 } 1052 1053 __i915_vma_remove_closed(vma); 1054 i915_vm_put(vma->vm); 1055 1056 i915_active_fini(&vma->active); 1057 i915_vma_free(vma); 1058 } 1059 1060 void i915_vma_parked(struct intel_gt *gt) 1061 { 1062 struct i915_vma *vma, *next; 1063 1064 spin_lock_irq(>->closed_lock); 1065 list_for_each_entry_safe(vma, next, >->closed_vma, closed_link) { 1066 struct drm_i915_gem_object *obj = vma->obj; 1067 struct i915_address_space *vm = vma->vm; 1068 1069 /* XXX All to avoid keeping a reference on i915_vma itself */ 1070 1071 if (!kref_get_unless_zero(&obj->base.refcount)) 1072 continue; 1073 1074 if (i915_vm_tryopen(vm)) { 1075 list_del_init(&vma->closed_link); 1076 } else { 1077 i915_gem_object_put(obj); 1078 obj = NULL; 1079 } 1080 1081 spin_unlock_irq(>->closed_lock); 1082 1083 if (obj) { 1084 __i915_vma_put(vma); 1085 i915_gem_object_put(obj); 1086 } 1087 1088 i915_vm_close(vm); 1089 1090 /* Restart after dropping lock */ 1091 spin_lock_irq(>->closed_lock); 1092 next = list_first_entry(>->closed_vma, 1093 typeof(*next), closed_link); 1094 } 1095 spin_unlock_irq(>->closed_lock); 1096 } 1097 1098 static void __i915_vma_iounmap(struct i915_vma *vma) 1099 { 1100 GEM_BUG_ON(i915_vma_is_pinned(vma)); 1101 1102 if (vma->iomap == NULL) 1103 return; 1104 1105 io_mapping_unmap(vma->iomap); 1106 vma->iomap = NULL; 1107 } 1108 1109 void i915_vma_revoke_mmap(struct i915_vma *vma) 1110 { 1111 struct drm_vma_offset_node *node; 1112 u64 vma_offset; 1113 1114 if (!i915_vma_has_userfault(vma)) 1115 return; 1116 1117 GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma)); 1118 GEM_BUG_ON(!vma->obj->userfault_count); 1119 1120 node = &vma->mmo->vma_node; 1121 vma_offset = vma->ggtt_view.partial.offset << PAGE_SHIFT; 1122 unmap_mapping_range(vma->vm->i915->drm.anon_inode->i_mapping, 1123 drm_vma_node_offset_addr(node) + vma_offset, 1124 vma->size, 1125 1); 1126 1127 i915_vma_unset_userfault(vma); 1128 if (!--vma->obj->userfault_count) 1129 list_del(&vma->obj->userfault_link); 1130 } 1131 1132 int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq) 1133 { 1134 int err; 1135 1136 GEM_BUG_ON(!i915_vma_is_pinned(vma)); 1137 1138 /* Wait for the vma to be bound before we start! */ 1139 err = i915_request_await_active(rq, &vma->active); 1140 if (err) 1141 return err; 1142 1143 return i915_active_add_request(&vma->active, rq); 1144 } 1145 1146 int i915_vma_move_to_active(struct i915_vma *vma, 1147 struct i915_request *rq, 1148 unsigned int flags) 1149 { 1150 struct drm_i915_gem_object *obj = vma->obj; 1151 int err; 1152 1153 assert_object_held(obj); 1154 1155 err = __i915_vma_move_to_active(vma, rq); 1156 if (unlikely(err)) 1157 return err; 1158 1159 if (flags & EXEC_OBJECT_WRITE) { 1160 struct intel_frontbuffer *front; 1161 1162 front = __intel_frontbuffer_get(obj); 1163 if (unlikely(front)) { 1164 if (intel_frontbuffer_invalidate(front, ORIGIN_CS)) 1165 i915_active_add_request(&front->write, rq); 1166 intel_frontbuffer_put(front); 1167 } 1168 1169 dma_resv_add_excl_fence(vma->resv, &rq->fence); 1170 obj->write_domain = I915_GEM_DOMAIN_RENDER; 1171 obj->read_domains = 0; 1172 } else { 1173 err = dma_resv_reserve_shared(vma->resv, 1); 1174 if (unlikely(err)) 1175 return err; 1176 1177 dma_resv_add_shared_fence(vma->resv, &rq->fence); 1178 obj->write_domain = 0; 1179 } 1180 obj->read_domains |= I915_GEM_GPU_DOMAINS; 1181 obj->mm.dirty = true; 1182 1183 GEM_BUG_ON(!i915_vma_is_active(vma)); 1184 return 0; 1185 } 1186 1187 int __i915_vma_unbind(struct i915_vma *vma) 1188 { 1189 int ret; 1190 1191 lockdep_assert_held(&vma->vm->mutex); 1192 1193 /* 1194 * First wait upon any activity as retiring the request may 1195 * have side-effects such as unpinning or even unbinding this vma. 1196 * 1197 * XXX Actually waiting under the vm->mutex is a hinderance and 1198 * should be pipelined wherever possible. In cases where that is 1199 * unavoidable, we should lift the wait to before the mutex. 1200 */ 1201 ret = i915_vma_sync(vma); 1202 if (ret) 1203 return ret; 1204 1205 GEM_BUG_ON(i915_vma_is_active(vma)); 1206 if (i915_vma_is_pinned(vma)) { 1207 vma_print_allocator(vma, "is pinned"); 1208 return -EAGAIN; 1209 } 1210 1211 GEM_BUG_ON(i915_vma_is_active(vma)); 1212 if (!drm_mm_node_allocated(&vma->node)) 1213 return 0; 1214 1215 if (i915_vma_is_map_and_fenceable(vma)) { 1216 /* 1217 * Check that we have flushed all writes through the GGTT 1218 * before the unbind, other due to non-strict nature of those 1219 * indirect writes they may end up referencing the GGTT PTE 1220 * after the unbind. 1221 */ 1222 i915_vma_flush_writes(vma); 1223 GEM_BUG_ON(i915_vma_has_ggtt_write(vma)); 1224 1225 /* release the fence reg _after_ flushing */ 1226 ret = i915_vma_revoke_fence(vma); 1227 if (ret) 1228 return ret; 1229 1230 /* Force a pagefault for domain tracking on next user access */ 1231 i915_vma_revoke_mmap(vma); 1232 1233 __i915_vma_iounmap(vma); 1234 clear_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(vma)); 1235 } 1236 GEM_BUG_ON(vma->fence); 1237 GEM_BUG_ON(i915_vma_has_userfault(vma)); 1238 1239 if (likely(atomic_read(&vma->vm->open))) { 1240 trace_i915_vma_unbind(vma); 1241 vma->ops->unbind_vma(vma); 1242 } 1243 atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_ERROR), &vma->flags); 1244 1245 i915_vma_detach(vma); 1246 vma_unbind_pages(vma); 1247 1248 drm_mm_remove_node(&vma->node); /* pairs with i915_vma_release() */ 1249 return 0; 1250 } 1251 1252 int i915_vma_unbind(struct i915_vma *vma) 1253 { 1254 struct i915_address_space *vm = vma->vm; 1255 intel_wakeref_t wakeref = 0; 1256 int err; 1257 1258 if (!drm_mm_node_allocated(&vma->node)) 1259 return 0; 1260 1261 if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) 1262 /* XXX not always required: nop_clear_range */ 1263 wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm); 1264 1265 err = mutex_lock_interruptible(&vm->mutex); 1266 if (err) 1267 return err; 1268 1269 err = __i915_vma_unbind(vma); 1270 mutex_unlock(&vm->mutex); 1271 1272 if (wakeref) 1273 intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref); 1274 1275 return err; 1276 } 1277 1278 struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma) 1279 { 1280 i915_gem_object_make_unshrinkable(vma->obj); 1281 return vma; 1282 } 1283 1284 void i915_vma_make_shrinkable(struct i915_vma *vma) 1285 { 1286 i915_gem_object_make_shrinkable(vma->obj); 1287 } 1288 1289 void i915_vma_make_purgeable(struct i915_vma *vma) 1290 { 1291 i915_gem_object_make_purgeable(vma->obj); 1292 } 1293 1294 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 1295 #include "selftests/i915_vma.c" 1296 #endif 1297 1298 static void i915_global_vma_shrink(void) 1299 { 1300 kmem_cache_shrink(global.slab_vmas); 1301 } 1302 1303 static void i915_global_vma_exit(void) 1304 { 1305 kmem_cache_destroy(global.slab_vmas); 1306 } 1307 1308 static struct i915_global_vma global = { { 1309 .shrink = i915_global_vma_shrink, 1310 .exit = i915_global_vma_exit, 1311 } }; 1312 1313 int __init i915_global_vma_init(void) 1314 { 1315 global.slab_vmas = KMEM_CACHE(i915_vma, SLAB_HWCACHE_ALIGN); 1316 if (!global.slab_vmas) 1317 return -ENOMEM; 1318 1319 i915_global_register(&global.base); 1320 return 0; 1321 } 1322