1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drm_vma_manager.h> 29 #include <drm/drm_pci.h> 30 #include <drm/i915_drm.h> 31 #include <linux/dma-fence-array.h> 32 #include <linux/kthread.h> 33 #include <linux/reservation.h> 34 #include <linux/shmem_fs.h> 35 #include <linux/slab.h> 36 #include <linux/stop_machine.h> 37 #include <linux/swap.h> 38 #include <linux/pci.h> 39 #include <linux/dma-buf.h> 40 #include <linux/mman.h> 41 42 #include "gt/intel_engine_pm.h" 43 #include "gt/intel_gt_pm.h" 44 #include "gt/intel_mocs.h" 45 #include "gt/intel_reset.h" 46 #include "gt/intel_workarounds.h" 47 48 #include "i915_drv.h" 49 #include "i915_gem_clflush.h" 50 #include "i915_gemfs.h" 51 #include "i915_gem_pm.h" 52 #include "i915_trace.h" 53 #include "i915_vgpu.h" 54 55 #include "intel_display.h" 56 #include "intel_drv.h" 57 #include "intel_frontbuffer.h" 58 #include "intel_pm.h" 59 60 static void i915_gem_flush_free_objects(struct drm_i915_private *i915); 61 62 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 63 { 64 if (obj->cache_dirty) 65 return false; 66 67 if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) 68 return true; 69 70 return obj->pin_global; /* currently in use by HW, keep flushed */ 71 } 72 73 static int 74 insert_mappable_node(struct i915_ggtt *ggtt, 75 struct drm_mm_node *node, u32 size) 76 { 77 memset(node, 0, sizeof(*node)); 78 return drm_mm_insert_node_in_range(&ggtt->vm.mm, node, 79 size, 0, I915_COLOR_UNEVICTABLE, 80 0, ggtt->mappable_end, 81 DRM_MM_INSERT_LOW); 82 } 83 84 static void 85 remove_mappable_node(struct drm_mm_node *node) 86 { 87 drm_mm_remove_node(node); 88 } 89 90 /* some bookkeeping */ 91 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 92 u64 size) 93 { 94 spin_lock(&dev_priv->mm.object_stat_lock); 95 dev_priv->mm.object_count++; 96 dev_priv->mm.object_memory += size; 97 spin_unlock(&dev_priv->mm.object_stat_lock); 98 } 99 100 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 101 u64 size) 102 { 103 spin_lock(&dev_priv->mm.object_stat_lock); 104 dev_priv->mm.object_count--; 105 dev_priv->mm.object_memory -= size; 106 spin_unlock(&dev_priv->mm.object_stat_lock); 107 } 108 109 int 110 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 111 struct drm_file *file) 112 { 113 struct i915_ggtt *ggtt = &to_i915(dev)->ggtt; 114 struct drm_i915_gem_get_aperture *args = data; 115 struct i915_vma *vma; 116 u64 pinned; 117 118 mutex_lock(&ggtt->vm.mutex); 119 120 pinned = ggtt->vm.reserved; 121 list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) 122 if (i915_vma_is_pinned(vma)) 123 pinned += vma->node.size; 124 125 mutex_unlock(&ggtt->vm.mutex); 126 127 args->aper_size = ggtt->vm.total; 128 args->aper_available_size = args->aper_size - pinned; 129 130 return 0; 131 } 132 133 static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) 134 { 135 struct address_space *mapping = obj->base.filp->f_mapping; 136 drm_dma_handle_t *phys; 137 struct sg_table *st; 138 struct scatterlist *sg; 139 char *vaddr; 140 int i; 141 int err; 142 143 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) 144 return -EINVAL; 145 146 /* Always aligning to the object size, allows a single allocation 147 * to handle all possible callers, and given typical object sizes, 148 * the alignment of the buddy allocation will naturally match. 149 */ 150 phys = drm_pci_alloc(obj->base.dev, 151 roundup_pow_of_two(obj->base.size), 152 roundup_pow_of_two(obj->base.size)); 153 if (!phys) 154 return -ENOMEM; 155 156 vaddr = phys->vaddr; 157 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 158 struct page *page; 159 char *src; 160 161 page = shmem_read_mapping_page(mapping, i); 162 if (IS_ERR(page)) { 163 err = PTR_ERR(page); 164 goto err_phys; 165 } 166 167 src = kmap_atomic(page); 168 memcpy(vaddr, src, PAGE_SIZE); 169 drm_clflush_virt_range(vaddr, PAGE_SIZE); 170 kunmap_atomic(src); 171 172 put_page(page); 173 vaddr += PAGE_SIZE; 174 } 175 176 i915_gem_chipset_flush(to_i915(obj->base.dev)); 177 178 st = kmalloc(sizeof(*st), GFP_KERNEL); 179 if (!st) { 180 err = -ENOMEM; 181 goto err_phys; 182 } 183 184 if (sg_alloc_table(st, 1, GFP_KERNEL)) { 185 kfree(st); 186 err = -ENOMEM; 187 goto err_phys; 188 } 189 190 sg = st->sgl; 191 sg->offset = 0; 192 sg->length = obj->base.size; 193 194 sg_dma_address(sg) = phys->busaddr; 195 sg_dma_len(sg) = obj->base.size; 196 197 obj->phys_handle = phys; 198 199 __i915_gem_object_set_pages(obj, st, sg->length); 200 201 return 0; 202 203 err_phys: 204 drm_pci_free(obj->base.dev, phys); 205 206 return err; 207 } 208 209 static void __start_cpu_write(struct drm_i915_gem_object *obj) 210 { 211 obj->read_domains = I915_GEM_DOMAIN_CPU; 212 obj->write_domain = I915_GEM_DOMAIN_CPU; 213 if (cpu_write_needs_clflush(obj)) 214 obj->cache_dirty = true; 215 } 216 217 void 218 __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, 219 struct sg_table *pages, 220 bool needs_clflush) 221 { 222 GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED); 223 224 if (obj->mm.madv == I915_MADV_DONTNEED) 225 obj->mm.dirty = false; 226 227 if (needs_clflush && 228 (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 && 229 !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) 230 drm_clflush_sg(pages); 231 232 __start_cpu_write(obj); 233 } 234 235 static void 236 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj, 237 struct sg_table *pages) 238 { 239 __i915_gem_object_release_shmem(obj, pages, false); 240 241 if (obj->mm.dirty) { 242 struct address_space *mapping = obj->base.filp->f_mapping; 243 char *vaddr = obj->phys_handle->vaddr; 244 int i; 245 246 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 247 struct page *page; 248 char *dst; 249 250 page = shmem_read_mapping_page(mapping, i); 251 if (IS_ERR(page)) 252 continue; 253 254 dst = kmap_atomic(page); 255 drm_clflush_virt_range(vaddr, PAGE_SIZE); 256 memcpy(dst, vaddr, PAGE_SIZE); 257 kunmap_atomic(dst); 258 259 set_page_dirty(page); 260 if (obj->mm.madv == I915_MADV_WILLNEED) 261 mark_page_accessed(page); 262 put_page(page); 263 vaddr += PAGE_SIZE; 264 } 265 obj->mm.dirty = false; 266 } 267 268 sg_free_table(pages); 269 kfree(pages); 270 271 drm_pci_free(obj->base.dev, obj->phys_handle); 272 } 273 274 static void 275 i915_gem_object_release_phys(struct drm_i915_gem_object *obj) 276 { 277 i915_gem_object_unpin_pages(obj); 278 } 279 280 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { 281 .get_pages = i915_gem_object_get_pages_phys, 282 .put_pages = i915_gem_object_put_pages_phys, 283 .release = i915_gem_object_release_phys, 284 }; 285 286 static const struct drm_i915_gem_object_ops i915_gem_object_ops; 287 288 int i915_gem_object_unbind(struct drm_i915_gem_object *obj) 289 { 290 struct i915_vma *vma; 291 LIST_HEAD(still_in_list); 292 int ret; 293 294 lockdep_assert_held(&obj->base.dev->struct_mutex); 295 296 /* Closed vma are removed from the obj->vma_list - but they may 297 * still have an active binding on the object. To remove those we 298 * must wait for all rendering to complete to the object (as unbinding 299 * must anyway), and retire the requests. 300 */ 301 ret = i915_gem_object_set_to_cpu_domain(obj, false); 302 if (ret) 303 return ret; 304 305 spin_lock(&obj->vma.lock); 306 while (!ret && (vma = list_first_entry_or_null(&obj->vma.list, 307 struct i915_vma, 308 obj_link))) { 309 list_move_tail(&vma->obj_link, &still_in_list); 310 spin_unlock(&obj->vma.lock); 311 312 ret = i915_vma_unbind(vma); 313 314 spin_lock(&obj->vma.lock); 315 } 316 list_splice(&still_in_list, &obj->vma.list); 317 spin_unlock(&obj->vma.lock); 318 319 return ret; 320 } 321 322 static long 323 i915_gem_object_wait_fence(struct dma_fence *fence, 324 unsigned int flags, 325 long timeout) 326 { 327 struct i915_request *rq; 328 329 BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1); 330 331 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) 332 return timeout; 333 334 if (!dma_fence_is_i915(fence)) 335 return dma_fence_wait_timeout(fence, 336 flags & I915_WAIT_INTERRUPTIBLE, 337 timeout); 338 339 rq = to_request(fence); 340 if (i915_request_completed(rq)) 341 goto out; 342 343 timeout = i915_request_wait(rq, flags, timeout); 344 345 out: 346 if (flags & I915_WAIT_LOCKED && i915_request_completed(rq)) 347 i915_request_retire_upto(rq); 348 349 return timeout; 350 } 351 352 static long 353 i915_gem_object_wait_reservation(struct reservation_object *resv, 354 unsigned int flags, 355 long timeout) 356 { 357 unsigned int seq = __read_seqcount_begin(&resv->seq); 358 struct dma_fence *excl; 359 bool prune_fences = false; 360 361 if (flags & I915_WAIT_ALL) { 362 struct dma_fence **shared; 363 unsigned int count, i; 364 int ret; 365 366 ret = reservation_object_get_fences_rcu(resv, 367 &excl, &count, &shared); 368 if (ret) 369 return ret; 370 371 for (i = 0; i < count; i++) { 372 timeout = i915_gem_object_wait_fence(shared[i], 373 flags, timeout); 374 if (timeout < 0) 375 break; 376 377 dma_fence_put(shared[i]); 378 } 379 380 for (; i < count; i++) 381 dma_fence_put(shared[i]); 382 kfree(shared); 383 384 /* 385 * If both shared fences and an exclusive fence exist, 386 * then by construction the shared fences must be later 387 * than the exclusive fence. If we successfully wait for 388 * all the shared fences, we know that the exclusive fence 389 * must all be signaled. If all the shared fences are 390 * signaled, we can prune the array and recover the 391 * floating references on the fences/requests. 392 */ 393 prune_fences = count && timeout >= 0; 394 } else { 395 excl = reservation_object_get_excl_rcu(resv); 396 } 397 398 if (excl && timeout >= 0) 399 timeout = i915_gem_object_wait_fence(excl, flags, timeout); 400 401 dma_fence_put(excl); 402 403 /* 404 * Opportunistically prune the fences iff we know they have *all* been 405 * signaled and that the reservation object has not been changed (i.e. 406 * no new fences have been added). 407 */ 408 if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) { 409 if (reservation_object_trylock(resv)) { 410 if (!__read_seqcount_retry(&resv->seq, seq)) 411 reservation_object_add_excl_fence(resv, NULL); 412 reservation_object_unlock(resv); 413 } 414 } 415 416 return timeout; 417 } 418 419 static void __fence_set_priority(struct dma_fence *fence, 420 const struct i915_sched_attr *attr) 421 { 422 struct i915_request *rq; 423 struct intel_engine_cs *engine; 424 425 if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence)) 426 return; 427 428 rq = to_request(fence); 429 engine = rq->engine; 430 431 local_bh_disable(); 432 rcu_read_lock(); /* RCU serialisation for set-wedged protection */ 433 if (engine->schedule) 434 engine->schedule(rq, attr); 435 rcu_read_unlock(); 436 local_bh_enable(); /* kick the tasklets if queues were reprioritised */ 437 } 438 439 static void fence_set_priority(struct dma_fence *fence, 440 const struct i915_sched_attr *attr) 441 { 442 /* Recurse once into a fence-array */ 443 if (dma_fence_is_array(fence)) { 444 struct dma_fence_array *array = to_dma_fence_array(fence); 445 int i; 446 447 for (i = 0; i < array->num_fences; i++) 448 __fence_set_priority(array->fences[i], attr); 449 } else { 450 __fence_set_priority(fence, attr); 451 } 452 } 453 454 int 455 i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, 456 unsigned int flags, 457 const struct i915_sched_attr *attr) 458 { 459 struct dma_fence *excl; 460 461 if (flags & I915_WAIT_ALL) { 462 struct dma_fence **shared; 463 unsigned int count, i; 464 int ret; 465 466 ret = reservation_object_get_fences_rcu(obj->resv, 467 &excl, &count, &shared); 468 if (ret) 469 return ret; 470 471 for (i = 0; i < count; i++) { 472 fence_set_priority(shared[i], attr); 473 dma_fence_put(shared[i]); 474 } 475 476 kfree(shared); 477 } else { 478 excl = reservation_object_get_excl_rcu(obj->resv); 479 } 480 481 if (excl) { 482 fence_set_priority(excl, attr); 483 dma_fence_put(excl); 484 } 485 return 0; 486 } 487 488 /** 489 * Waits for rendering to the object to be completed 490 * @obj: i915 gem object 491 * @flags: how to wait (under a lock, for all rendering or just for writes etc) 492 * @timeout: how long to wait 493 */ 494 int 495 i915_gem_object_wait(struct drm_i915_gem_object *obj, 496 unsigned int flags, 497 long timeout) 498 { 499 might_sleep(); 500 GEM_BUG_ON(timeout < 0); 501 502 timeout = i915_gem_object_wait_reservation(obj->resv, flags, timeout); 503 return timeout < 0 ? timeout : 0; 504 } 505 506 static int 507 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 508 struct drm_i915_gem_pwrite *args, 509 struct drm_file *file) 510 { 511 void *vaddr = obj->phys_handle->vaddr + args->offset; 512 char __user *user_data = u64_to_user_ptr(args->data_ptr); 513 514 /* We manually control the domain here and pretend that it 515 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 516 */ 517 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 518 if (copy_from_user(vaddr, user_data, args->size)) 519 return -EFAULT; 520 521 drm_clflush_virt_range(vaddr, args->size); 522 i915_gem_chipset_flush(to_i915(obj->base.dev)); 523 524 intel_fb_obj_flush(obj, ORIGIN_CPU); 525 return 0; 526 } 527 528 static int 529 i915_gem_create(struct drm_file *file, 530 struct drm_i915_private *dev_priv, 531 u64 *size_p, 532 u32 *handle_p) 533 { 534 struct drm_i915_gem_object *obj; 535 u32 handle; 536 u64 size; 537 int ret; 538 539 size = round_up(*size_p, PAGE_SIZE); 540 if (size == 0) 541 return -EINVAL; 542 543 /* Allocate the new object */ 544 obj = i915_gem_object_create(dev_priv, size); 545 if (IS_ERR(obj)) 546 return PTR_ERR(obj); 547 548 ret = drm_gem_handle_create(file, &obj->base, &handle); 549 /* drop reference from allocate - handle holds it now */ 550 i915_gem_object_put(obj); 551 if (ret) 552 return ret; 553 554 *handle_p = handle; 555 *size_p = size; 556 return 0; 557 } 558 559 int 560 i915_gem_dumb_create(struct drm_file *file, 561 struct drm_device *dev, 562 struct drm_mode_create_dumb *args) 563 { 564 int cpp = DIV_ROUND_UP(args->bpp, 8); 565 u32 format; 566 567 switch (cpp) { 568 case 1: 569 format = DRM_FORMAT_C8; 570 break; 571 case 2: 572 format = DRM_FORMAT_RGB565; 573 break; 574 case 4: 575 format = DRM_FORMAT_XRGB8888; 576 break; 577 default: 578 return -EINVAL; 579 } 580 581 /* have to work out size/pitch and return them */ 582 args->pitch = ALIGN(args->width * cpp, 64); 583 584 /* align stride to page size so that we can remap */ 585 if (args->pitch > intel_plane_fb_max_stride(to_i915(dev), format, 586 DRM_FORMAT_MOD_LINEAR)) 587 args->pitch = ALIGN(args->pitch, 4096); 588 589 args->size = args->pitch * args->height; 590 return i915_gem_create(file, to_i915(dev), 591 &args->size, &args->handle); 592 } 593 594 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) 595 { 596 return !(obj->cache_level == I915_CACHE_NONE || 597 obj->cache_level == I915_CACHE_WT); 598 } 599 600 /** 601 * Creates a new mm object and returns a handle to it. 602 * @dev: drm device pointer 603 * @data: ioctl data blob 604 * @file: drm file pointer 605 */ 606 int 607 i915_gem_create_ioctl(struct drm_device *dev, void *data, 608 struct drm_file *file) 609 { 610 struct drm_i915_private *dev_priv = to_i915(dev); 611 struct drm_i915_gem_create *args = data; 612 613 i915_gem_flush_free_objects(dev_priv); 614 615 return i915_gem_create(file, dev_priv, 616 &args->size, &args->handle); 617 } 618 619 static inline enum fb_op_origin 620 fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain) 621 { 622 return (domain == I915_GEM_DOMAIN_GTT ? 623 obj->frontbuffer_ggtt_origin : ORIGIN_CPU); 624 } 625 626 void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv) 627 { 628 intel_wakeref_t wakeref; 629 630 /* 631 * No actual flushing is required for the GTT write domain for reads 632 * from the GTT domain. Writes to it "immediately" go to main memory 633 * as far as we know, so there's no chipset flush. It also doesn't 634 * land in the GPU render cache. 635 * 636 * However, we do have to enforce the order so that all writes through 637 * the GTT land before any writes to the device, such as updates to 638 * the GATT itself. 639 * 640 * We also have to wait a bit for the writes to land from the GTT. 641 * An uncached read (i.e. mmio) seems to be ideal for the round-trip 642 * timing. This issue has only been observed when switching quickly 643 * between GTT writes and CPU reads from inside the kernel on recent hw, 644 * and it appears to only affect discrete GTT blocks (i.e. on LLC 645 * system agents we cannot reproduce this behaviour, until Cannonlake 646 * that was!). 647 */ 648 649 wmb(); 650 651 if (INTEL_INFO(dev_priv)->has_coherent_ggtt) 652 return; 653 654 i915_gem_chipset_flush(dev_priv); 655 656 with_intel_runtime_pm(dev_priv, wakeref) { 657 spin_lock_irq(&dev_priv->uncore.lock); 658 659 POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE)); 660 661 spin_unlock_irq(&dev_priv->uncore.lock); 662 } 663 } 664 665 static void 666 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) 667 { 668 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 669 struct i915_vma *vma; 670 671 if (!(obj->write_domain & flush_domains)) 672 return; 673 674 switch (obj->write_domain) { 675 case I915_GEM_DOMAIN_GTT: 676 i915_gem_flush_ggtt_writes(dev_priv); 677 678 intel_fb_obj_flush(obj, 679 fb_write_origin(obj, I915_GEM_DOMAIN_GTT)); 680 681 for_each_ggtt_vma(vma, obj) { 682 if (vma->iomap) 683 continue; 684 685 i915_vma_unset_ggtt_write(vma); 686 } 687 break; 688 689 case I915_GEM_DOMAIN_WC: 690 wmb(); 691 break; 692 693 case I915_GEM_DOMAIN_CPU: 694 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 695 break; 696 697 case I915_GEM_DOMAIN_RENDER: 698 if (gpu_write_needs_clflush(obj)) 699 obj->cache_dirty = true; 700 break; 701 } 702 703 obj->write_domain = 0; 704 } 705 706 /* 707 * Pins the specified object's pages and synchronizes the object with 708 * GPU accesses. Sets needs_clflush to non-zero if the caller should 709 * flush the object from the CPU cache. 710 */ 711 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, 712 unsigned int *needs_clflush) 713 { 714 int ret; 715 716 lockdep_assert_held(&obj->base.dev->struct_mutex); 717 718 *needs_clflush = 0; 719 if (!i915_gem_object_has_struct_page(obj)) 720 return -ENODEV; 721 722 ret = i915_gem_object_wait(obj, 723 I915_WAIT_INTERRUPTIBLE | 724 I915_WAIT_LOCKED, 725 MAX_SCHEDULE_TIMEOUT); 726 if (ret) 727 return ret; 728 729 ret = i915_gem_object_pin_pages(obj); 730 if (ret) 731 return ret; 732 733 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ || 734 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 735 ret = i915_gem_object_set_to_cpu_domain(obj, false); 736 if (ret) 737 goto err_unpin; 738 else 739 goto out; 740 } 741 742 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 743 744 /* If we're not in the cpu read domain, set ourself into the gtt 745 * read domain and manually flush cachelines (if required). This 746 * optimizes for the case when the gpu will dirty the data 747 * anyway again before the next pread happens. 748 */ 749 if (!obj->cache_dirty && 750 !(obj->read_domains & I915_GEM_DOMAIN_CPU)) 751 *needs_clflush = CLFLUSH_BEFORE; 752 753 out: 754 /* return with the pages pinned */ 755 return 0; 756 757 err_unpin: 758 i915_gem_object_unpin_pages(obj); 759 return ret; 760 } 761 762 int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, 763 unsigned int *needs_clflush) 764 { 765 int ret; 766 767 lockdep_assert_held(&obj->base.dev->struct_mutex); 768 769 *needs_clflush = 0; 770 if (!i915_gem_object_has_struct_page(obj)) 771 return -ENODEV; 772 773 ret = i915_gem_object_wait(obj, 774 I915_WAIT_INTERRUPTIBLE | 775 I915_WAIT_LOCKED | 776 I915_WAIT_ALL, 777 MAX_SCHEDULE_TIMEOUT); 778 if (ret) 779 return ret; 780 781 ret = i915_gem_object_pin_pages(obj); 782 if (ret) 783 return ret; 784 785 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE || 786 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 787 ret = i915_gem_object_set_to_cpu_domain(obj, true); 788 if (ret) 789 goto err_unpin; 790 else 791 goto out; 792 } 793 794 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 795 796 /* If we're not in the cpu write domain, set ourself into the 797 * gtt write domain and manually flush cachelines (as required). 798 * This optimizes for the case when the gpu will use the data 799 * right away and we therefore have to clflush anyway. 800 */ 801 if (!obj->cache_dirty) { 802 *needs_clflush |= CLFLUSH_AFTER; 803 804 /* 805 * Same trick applies to invalidate partially written 806 * cachelines read before writing. 807 */ 808 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU)) 809 *needs_clflush |= CLFLUSH_BEFORE; 810 } 811 812 out: 813 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 814 obj->mm.dirty = true; 815 /* return with the pages pinned */ 816 return 0; 817 818 err_unpin: 819 i915_gem_object_unpin_pages(obj); 820 return ret; 821 } 822 823 static int 824 shmem_pread(struct page *page, int offset, int len, char __user *user_data, 825 bool needs_clflush) 826 { 827 char *vaddr; 828 int ret; 829 830 vaddr = kmap(page); 831 832 if (needs_clflush) 833 drm_clflush_virt_range(vaddr + offset, len); 834 835 ret = __copy_to_user(user_data, vaddr + offset, len); 836 837 kunmap(page); 838 839 return ret ? -EFAULT : 0; 840 } 841 842 static int 843 i915_gem_shmem_pread(struct drm_i915_gem_object *obj, 844 struct drm_i915_gem_pread *args) 845 { 846 char __user *user_data; 847 u64 remain; 848 unsigned int needs_clflush; 849 unsigned int idx, offset; 850 int ret; 851 852 ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex); 853 if (ret) 854 return ret; 855 856 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); 857 mutex_unlock(&obj->base.dev->struct_mutex); 858 if (ret) 859 return ret; 860 861 remain = args->size; 862 user_data = u64_to_user_ptr(args->data_ptr); 863 offset = offset_in_page(args->offset); 864 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 865 struct page *page = i915_gem_object_get_page(obj, idx); 866 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); 867 868 ret = shmem_pread(page, offset, length, user_data, 869 needs_clflush); 870 if (ret) 871 break; 872 873 remain -= length; 874 user_data += length; 875 offset = 0; 876 } 877 878 i915_gem_obj_finish_shmem_access(obj); 879 return ret; 880 } 881 882 static inline bool 883 gtt_user_read(struct io_mapping *mapping, 884 loff_t base, int offset, 885 char __user *user_data, int length) 886 { 887 void __iomem *vaddr; 888 unsigned long unwritten; 889 890 /* We can use the cpu mem copy function because this is X86. */ 891 vaddr = io_mapping_map_atomic_wc(mapping, base); 892 unwritten = __copy_to_user_inatomic(user_data, 893 (void __force *)vaddr + offset, 894 length); 895 io_mapping_unmap_atomic(vaddr); 896 if (unwritten) { 897 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 898 unwritten = copy_to_user(user_data, 899 (void __force *)vaddr + offset, 900 length); 901 io_mapping_unmap(vaddr); 902 } 903 return unwritten; 904 } 905 906 static int 907 i915_gem_gtt_pread(struct drm_i915_gem_object *obj, 908 const struct drm_i915_gem_pread *args) 909 { 910 struct drm_i915_private *i915 = to_i915(obj->base.dev); 911 struct i915_ggtt *ggtt = &i915->ggtt; 912 intel_wakeref_t wakeref; 913 struct drm_mm_node node; 914 struct i915_vma *vma; 915 void __user *user_data; 916 u64 remain, offset; 917 int ret; 918 919 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 920 if (ret) 921 return ret; 922 923 wakeref = intel_runtime_pm_get(i915); 924 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 925 PIN_MAPPABLE | 926 PIN_NONFAULT | 927 PIN_NONBLOCK); 928 if (!IS_ERR(vma)) { 929 node.start = i915_ggtt_offset(vma); 930 node.allocated = false; 931 ret = i915_vma_put_fence(vma); 932 if (ret) { 933 i915_vma_unpin(vma); 934 vma = ERR_PTR(ret); 935 } 936 } 937 if (IS_ERR(vma)) { 938 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 939 if (ret) 940 goto out_unlock; 941 GEM_BUG_ON(!node.allocated); 942 } 943 944 ret = i915_gem_object_set_to_gtt_domain(obj, false); 945 if (ret) 946 goto out_unpin; 947 948 mutex_unlock(&i915->drm.struct_mutex); 949 950 user_data = u64_to_user_ptr(args->data_ptr); 951 remain = args->size; 952 offset = args->offset; 953 954 while (remain > 0) { 955 /* Operation in this page 956 * 957 * page_base = page offset within aperture 958 * page_offset = offset within page 959 * page_length = bytes to copy for this page 960 */ 961 u32 page_base = node.start; 962 unsigned page_offset = offset_in_page(offset); 963 unsigned page_length = PAGE_SIZE - page_offset; 964 page_length = remain < page_length ? remain : page_length; 965 if (node.allocated) { 966 wmb(); 967 ggtt->vm.insert_page(&ggtt->vm, 968 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 969 node.start, I915_CACHE_NONE, 0); 970 wmb(); 971 } else { 972 page_base += offset & PAGE_MASK; 973 } 974 975 if (gtt_user_read(&ggtt->iomap, page_base, page_offset, 976 user_data, page_length)) { 977 ret = -EFAULT; 978 break; 979 } 980 981 remain -= page_length; 982 user_data += page_length; 983 offset += page_length; 984 } 985 986 mutex_lock(&i915->drm.struct_mutex); 987 out_unpin: 988 if (node.allocated) { 989 wmb(); 990 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); 991 remove_mappable_node(&node); 992 } else { 993 i915_vma_unpin(vma); 994 } 995 out_unlock: 996 intel_runtime_pm_put(i915, wakeref); 997 mutex_unlock(&i915->drm.struct_mutex); 998 999 return ret; 1000 } 1001 1002 /** 1003 * Reads data from the object referenced by handle. 1004 * @dev: drm device pointer 1005 * @data: ioctl data blob 1006 * @file: drm file pointer 1007 * 1008 * On error, the contents of *data are undefined. 1009 */ 1010 int 1011 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 1012 struct drm_file *file) 1013 { 1014 struct drm_i915_gem_pread *args = data; 1015 struct drm_i915_gem_object *obj; 1016 int ret; 1017 1018 if (args->size == 0) 1019 return 0; 1020 1021 if (!access_ok(u64_to_user_ptr(args->data_ptr), 1022 args->size)) 1023 return -EFAULT; 1024 1025 obj = i915_gem_object_lookup(file, args->handle); 1026 if (!obj) 1027 return -ENOENT; 1028 1029 /* Bounds check source. */ 1030 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 1031 ret = -EINVAL; 1032 goto out; 1033 } 1034 1035 trace_i915_gem_object_pread(obj, args->offset, args->size); 1036 1037 ret = i915_gem_object_wait(obj, 1038 I915_WAIT_INTERRUPTIBLE, 1039 MAX_SCHEDULE_TIMEOUT); 1040 if (ret) 1041 goto out; 1042 1043 ret = i915_gem_object_pin_pages(obj); 1044 if (ret) 1045 goto out; 1046 1047 ret = i915_gem_shmem_pread(obj, args); 1048 if (ret == -EFAULT || ret == -ENODEV) 1049 ret = i915_gem_gtt_pread(obj, args); 1050 1051 i915_gem_object_unpin_pages(obj); 1052 out: 1053 i915_gem_object_put(obj); 1054 return ret; 1055 } 1056 1057 /* This is the fast write path which cannot handle 1058 * page faults in the source data 1059 */ 1060 1061 static inline bool 1062 ggtt_write(struct io_mapping *mapping, 1063 loff_t base, int offset, 1064 char __user *user_data, int length) 1065 { 1066 void __iomem *vaddr; 1067 unsigned long unwritten; 1068 1069 /* We can use the cpu mem copy function because this is X86. */ 1070 vaddr = io_mapping_map_atomic_wc(mapping, base); 1071 unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset, 1072 user_data, length); 1073 io_mapping_unmap_atomic(vaddr); 1074 if (unwritten) { 1075 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 1076 unwritten = copy_from_user((void __force *)vaddr + offset, 1077 user_data, length); 1078 io_mapping_unmap(vaddr); 1079 } 1080 1081 return unwritten; 1082 } 1083 1084 /** 1085 * This is the fast pwrite path, where we copy the data directly from the 1086 * user into the GTT, uncached. 1087 * @obj: i915 GEM object 1088 * @args: pwrite arguments structure 1089 */ 1090 static int 1091 i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, 1092 const struct drm_i915_gem_pwrite *args) 1093 { 1094 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1095 struct i915_ggtt *ggtt = &i915->ggtt; 1096 intel_wakeref_t wakeref; 1097 struct drm_mm_node node; 1098 struct i915_vma *vma; 1099 u64 remain, offset; 1100 void __user *user_data; 1101 int ret; 1102 1103 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1104 if (ret) 1105 return ret; 1106 1107 if (i915_gem_object_has_struct_page(obj)) { 1108 /* 1109 * Avoid waking the device up if we can fallback, as 1110 * waking/resuming is very slow (worst-case 10-100 ms 1111 * depending on PCI sleeps and our own resume time). 1112 * This easily dwarfs any performance advantage from 1113 * using the cache bypass of indirect GGTT access. 1114 */ 1115 wakeref = intel_runtime_pm_get_if_in_use(i915); 1116 if (!wakeref) { 1117 ret = -EFAULT; 1118 goto out_unlock; 1119 } 1120 } else { 1121 /* No backing pages, no fallback, we must force GGTT access */ 1122 wakeref = intel_runtime_pm_get(i915); 1123 } 1124 1125 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 1126 PIN_MAPPABLE | 1127 PIN_NONFAULT | 1128 PIN_NONBLOCK); 1129 if (!IS_ERR(vma)) { 1130 node.start = i915_ggtt_offset(vma); 1131 node.allocated = false; 1132 ret = i915_vma_put_fence(vma); 1133 if (ret) { 1134 i915_vma_unpin(vma); 1135 vma = ERR_PTR(ret); 1136 } 1137 } 1138 if (IS_ERR(vma)) { 1139 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 1140 if (ret) 1141 goto out_rpm; 1142 GEM_BUG_ON(!node.allocated); 1143 } 1144 1145 ret = i915_gem_object_set_to_gtt_domain(obj, true); 1146 if (ret) 1147 goto out_unpin; 1148 1149 mutex_unlock(&i915->drm.struct_mutex); 1150 1151 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 1152 1153 user_data = u64_to_user_ptr(args->data_ptr); 1154 offset = args->offset; 1155 remain = args->size; 1156 while (remain) { 1157 /* Operation in this page 1158 * 1159 * page_base = page offset within aperture 1160 * page_offset = offset within page 1161 * page_length = bytes to copy for this page 1162 */ 1163 u32 page_base = node.start; 1164 unsigned int page_offset = offset_in_page(offset); 1165 unsigned int page_length = PAGE_SIZE - page_offset; 1166 page_length = remain < page_length ? remain : page_length; 1167 if (node.allocated) { 1168 wmb(); /* flush the write before we modify the GGTT */ 1169 ggtt->vm.insert_page(&ggtt->vm, 1170 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 1171 node.start, I915_CACHE_NONE, 0); 1172 wmb(); /* flush modifications to the GGTT (insert_page) */ 1173 } else { 1174 page_base += offset & PAGE_MASK; 1175 } 1176 /* If we get a fault while copying data, then (presumably) our 1177 * source page isn't available. Return the error and we'll 1178 * retry in the slow path. 1179 * If the object is non-shmem backed, we retry again with the 1180 * path that handles page fault. 1181 */ 1182 if (ggtt_write(&ggtt->iomap, page_base, page_offset, 1183 user_data, page_length)) { 1184 ret = -EFAULT; 1185 break; 1186 } 1187 1188 remain -= page_length; 1189 user_data += page_length; 1190 offset += page_length; 1191 } 1192 intel_fb_obj_flush(obj, ORIGIN_CPU); 1193 1194 mutex_lock(&i915->drm.struct_mutex); 1195 out_unpin: 1196 if (node.allocated) { 1197 wmb(); 1198 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); 1199 remove_mappable_node(&node); 1200 } else { 1201 i915_vma_unpin(vma); 1202 } 1203 out_rpm: 1204 intel_runtime_pm_put(i915, wakeref); 1205 out_unlock: 1206 mutex_unlock(&i915->drm.struct_mutex); 1207 return ret; 1208 } 1209 1210 /* Per-page copy function for the shmem pwrite fastpath. 1211 * Flushes invalid cachelines before writing to the target if 1212 * needs_clflush_before is set and flushes out any written cachelines after 1213 * writing if needs_clflush is set. 1214 */ 1215 static int 1216 shmem_pwrite(struct page *page, int offset, int len, char __user *user_data, 1217 bool needs_clflush_before, 1218 bool needs_clflush_after) 1219 { 1220 char *vaddr; 1221 int ret; 1222 1223 vaddr = kmap(page); 1224 1225 if (needs_clflush_before) 1226 drm_clflush_virt_range(vaddr + offset, len); 1227 1228 ret = __copy_from_user(vaddr + offset, user_data, len); 1229 if (!ret && needs_clflush_after) 1230 drm_clflush_virt_range(vaddr + offset, len); 1231 1232 kunmap(page); 1233 1234 return ret ? -EFAULT : 0; 1235 } 1236 1237 static int 1238 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, 1239 const struct drm_i915_gem_pwrite *args) 1240 { 1241 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1242 void __user *user_data; 1243 u64 remain; 1244 unsigned int partial_cacheline_write; 1245 unsigned int needs_clflush; 1246 unsigned int offset, idx; 1247 int ret; 1248 1249 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1250 if (ret) 1251 return ret; 1252 1253 ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush); 1254 mutex_unlock(&i915->drm.struct_mutex); 1255 if (ret) 1256 return ret; 1257 1258 /* If we don't overwrite a cacheline completely we need to be 1259 * careful to have up-to-date data by first clflushing. Don't 1260 * overcomplicate things and flush the entire patch. 1261 */ 1262 partial_cacheline_write = 0; 1263 if (needs_clflush & CLFLUSH_BEFORE) 1264 partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1; 1265 1266 user_data = u64_to_user_ptr(args->data_ptr); 1267 remain = args->size; 1268 offset = offset_in_page(args->offset); 1269 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 1270 struct page *page = i915_gem_object_get_page(obj, idx); 1271 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); 1272 1273 ret = shmem_pwrite(page, offset, length, user_data, 1274 (offset | length) & partial_cacheline_write, 1275 needs_clflush & CLFLUSH_AFTER); 1276 if (ret) 1277 break; 1278 1279 remain -= length; 1280 user_data += length; 1281 offset = 0; 1282 } 1283 1284 intel_fb_obj_flush(obj, ORIGIN_CPU); 1285 i915_gem_obj_finish_shmem_access(obj); 1286 return ret; 1287 } 1288 1289 /** 1290 * Writes data to the object referenced by handle. 1291 * @dev: drm device 1292 * @data: ioctl data blob 1293 * @file: drm file 1294 * 1295 * On error, the contents of the buffer that were to be modified are undefined. 1296 */ 1297 int 1298 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1299 struct drm_file *file) 1300 { 1301 struct drm_i915_gem_pwrite *args = data; 1302 struct drm_i915_gem_object *obj; 1303 int ret; 1304 1305 if (args->size == 0) 1306 return 0; 1307 1308 if (!access_ok(u64_to_user_ptr(args->data_ptr), args->size)) 1309 return -EFAULT; 1310 1311 obj = i915_gem_object_lookup(file, args->handle); 1312 if (!obj) 1313 return -ENOENT; 1314 1315 /* Bounds check destination. */ 1316 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 1317 ret = -EINVAL; 1318 goto err; 1319 } 1320 1321 /* Writes not allowed into this read-only object */ 1322 if (i915_gem_object_is_readonly(obj)) { 1323 ret = -EINVAL; 1324 goto err; 1325 } 1326 1327 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1328 1329 ret = -ENODEV; 1330 if (obj->ops->pwrite) 1331 ret = obj->ops->pwrite(obj, args); 1332 if (ret != -ENODEV) 1333 goto err; 1334 1335 ret = i915_gem_object_wait(obj, 1336 I915_WAIT_INTERRUPTIBLE | 1337 I915_WAIT_ALL, 1338 MAX_SCHEDULE_TIMEOUT); 1339 if (ret) 1340 goto err; 1341 1342 ret = i915_gem_object_pin_pages(obj); 1343 if (ret) 1344 goto err; 1345 1346 ret = -EFAULT; 1347 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1348 * it would end up going through the fenced access, and we'll get 1349 * different detiling behavior between reading and writing. 1350 * pread/pwrite currently are reading and writing from the CPU 1351 * perspective, requiring manual detiling by the client. 1352 */ 1353 if (!i915_gem_object_has_struct_page(obj) || 1354 cpu_write_needs_clflush(obj)) 1355 /* Note that the gtt paths might fail with non-page-backed user 1356 * pointers (e.g. gtt mappings when moving data between 1357 * textures). Fallback to the shmem path in that case. 1358 */ 1359 ret = i915_gem_gtt_pwrite_fast(obj, args); 1360 1361 if (ret == -EFAULT || ret == -ENOSPC) { 1362 if (obj->phys_handle) 1363 ret = i915_gem_phys_pwrite(obj, args, file); 1364 else 1365 ret = i915_gem_shmem_pwrite(obj, args); 1366 } 1367 1368 i915_gem_object_unpin_pages(obj); 1369 err: 1370 i915_gem_object_put(obj); 1371 return ret; 1372 } 1373 1374 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) 1375 { 1376 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1377 struct list_head *list; 1378 struct i915_vma *vma; 1379 1380 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 1381 1382 mutex_lock(&i915->ggtt.vm.mutex); 1383 for_each_ggtt_vma(vma, obj) { 1384 if (!drm_mm_node_allocated(&vma->node)) 1385 continue; 1386 1387 list_move_tail(&vma->vm_link, &vma->vm->bound_list); 1388 } 1389 mutex_unlock(&i915->ggtt.vm.mutex); 1390 1391 spin_lock(&i915->mm.obj_lock); 1392 list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list; 1393 list_move_tail(&obj->mm.link, list); 1394 spin_unlock(&i915->mm.obj_lock); 1395 } 1396 1397 /** 1398 * Called when user space prepares to use an object with the CPU, either 1399 * through the mmap ioctl's mapping or a GTT mapping. 1400 * @dev: drm device 1401 * @data: ioctl data blob 1402 * @file: drm file 1403 */ 1404 int 1405 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1406 struct drm_file *file) 1407 { 1408 struct drm_i915_gem_set_domain *args = data; 1409 struct drm_i915_gem_object *obj; 1410 u32 read_domains = args->read_domains; 1411 u32 write_domain = args->write_domain; 1412 int err; 1413 1414 /* Only handle setting domains to types used by the CPU. */ 1415 if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) 1416 return -EINVAL; 1417 1418 /* 1419 * Having something in the write domain implies it's in the read 1420 * domain, and only that read domain. Enforce that in the request. 1421 */ 1422 if (write_domain && read_domains != write_domain) 1423 return -EINVAL; 1424 1425 if (!read_domains) 1426 return 0; 1427 1428 obj = i915_gem_object_lookup(file, args->handle); 1429 if (!obj) 1430 return -ENOENT; 1431 1432 /* 1433 * Already in the desired write domain? Nothing for us to do! 1434 * 1435 * We apply a little bit of cunning here to catch a broader set of 1436 * no-ops. If obj->write_domain is set, we must be in the same 1437 * obj->read_domains, and only that domain. Therefore, if that 1438 * obj->write_domain matches the request read_domains, we are 1439 * already in the same read/write domain and can skip the operation, 1440 * without having to further check the requested write_domain. 1441 */ 1442 if (READ_ONCE(obj->write_domain) == read_domains) { 1443 err = 0; 1444 goto out; 1445 } 1446 1447 /* 1448 * Try to flush the object off the GPU without holding the lock. 1449 * We will repeat the flush holding the lock in the normal manner 1450 * to catch cases where we are gazumped. 1451 */ 1452 err = i915_gem_object_wait(obj, 1453 I915_WAIT_INTERRUPTIBLE | 1454 I915_WAIT_PRIORITY | 1455 (write_domain ? I915_WAIT_ALL : 0), 1456 MAX_SCHEDULE_TIMEOUT); 1457 if (err) 1458 goto out; 1459 1460 /* 1461 * Proxy objects do not control access to the backing storage, ergo 1462 * they cannot be used as a means to manipulate the cache domain 1463 * tracking for that backing storage. The proxy object is always 1464 * considered to be outside of any cache domain. 1465 */ 1466 if (i915_gem_object_is_proxy(obj)) { 1467 err = -ENXIO; 1468 goto out; 1469 } 1470 1471 /* 1472 * Flush and acquire obj->pages so that we are coherent through 1473 * direct access in memory with previous cached writes through 1474 * shmemfs and that our cache domain tracking remains valid. 1475 * For example, if the obj->filp was moved to swap without us 1476 * being notified and releasing the pages, we would mistakenly 1477 * continue to assume that the obj remained out of the CPU cached 1478 * domain. 1479 */ 1480 err = i915_gem_object_pin_pages(obj); 1481 if (err) 1482 goto out; 1483 1484 err = i915_mutex_lock_interruptible(dev); 1485 if (err) 1486 goto out_unpin; 1487 1488 if (read_domains & I915_GEM_DOMAIN_WC) 1489 err = i915_gem_object_set_to_wc_domain(obj, write_domain); 1490 else if (read_domains & I915_GEM_DOMAIN_GTT) 1491 err = i915_gem_object_set_to_gtt_domain(obj, write_domain); 1492 else 1493 err = i915_gem_object_set_to_cpu_domain(obj, write_domain); 1494 1495 /* And bump the LRU for this access */ 1496 i915_gem_object_bump_inactive_ggtt(obj); 1497 1498 mutex_unlock(&dev->struct_mutex); 1499 1500 if (write_domain != 0) 1501 intel_fb_obj_invalidate(obj, 1502 fb_write_origin(obj, write_domain)); 1503 1504 out_unpin: 1505 i915_gem_object_unpin_pages(obj); 1506 out: 1507 i915_gem_object_put(obj); 1508 return err; 1509 } 1510 1511 /** 1512 * Called when user space has done writes to this buffer 1513 * @dev: drm device 1514 * @data: ioctl data blob 1515 * @file: drm file 1516 */ 1517 int 1518 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1519 struct drm_file *file) 1520 { 1521 struct drm_i915_gem_sw_finish *args = data; 1522 struct drm_i915_gem_object *obj; 1523 1524 obj = i915_gem_object_lookup(file, args->handle); 1525 if (!obj) 1526 return -ENOENT; 1527 1528 /* 1529 * Proxy objects are barred from CPU access, so there is no 1530 * need to ban sw_finish as it is a nop. 1531 */ 1532 1533 /* Pinned buffers may be scanout, so flush the cache */ 1534 i915_gem_object_flush_if_display(obj); 1535 i915_gem_object_put(obj); 1536 1537 return 0; 1538 } 1539 1540 static inline bool 1541 __vma_matches(struct vm_area_struct *vma, struct file *filp, 1542 unsigned long addr, unsigned long size) 1543 { 1544 if (vma->vm_file != filp) 1545 return false; 1546 1547 return vma->vm_start == addr && 1548 (vma->vm_end - vma->vm_start) == PAGE_ALIGN(size); 1549 } 1550 1551 /** 1552 * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address 1553 * it is mapped to. 1554 * @dev: drm device 1555 * @data: ioctl data blob 1556 * @file: drm file 1557 * 1558 * While the mapping holds a reference on the contents of the object, it doesn't 1559 * imply a ref on the object itself. 1560 * 1561 * IMPORTANT: 1562 * 1563 * DRM driver writers who look a this function as an example for how to do GEM 1564 * mmap support, please don't implement mmap support like here. The modern way 1565 * to implement DRM mmap support is with an mmap offset ioctl (like 1566 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly. 1567 * That way debug tooling like valgrind will understand what's going on, hiding 1568 * the mmap call in a driver private ioctl will break that. The i915 driver only 1569 * does cpu mmaps this way because we didn't know better. 1570 */ 1571 int 1572 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1573 struct drm_file *file) 1574 { 1575 struct drm_i915_gem_mmap *args = data; 1576 struct drm_i915_gem_object *obj; 1577 unsigned long addr; 1578 1579 if (args->flags & ~(I915_MMAP_WC)) 1580 return -EINVAL; 1581 1582 if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT)) 1583 return -ENODEV; 1584 1585 obj = i915_gem_object_lookup(file, args->handle); 1586 if (!obj) 1587 return -ENOENT; 1588 1589 /* prime objects have no backing filp to GEM mmap 1590 * pages from. 1591 */ 1592 if (!obj->base.filp) { 1593 addr = -ENXIO; 1594 goto err; 1595 } 1596 1597 if (range_overflows(args->offset, args->size, (u64)obj->base.size)) { 1598 addr = -EINVAL; 1599 goto err; 1600 } 1601 1602 addr = vm_mmap(obj->base.filp, 0, args->size, 1603 PROT_READ | PROT_WRITE, MAP_SHARED, 1604 args->offset); 1605 if (IS_ERR_VALUE(addr)) 1606 goto err; 1607 1608 if (args->flags & I915_MMAP_WC) { 1609 struct mm_struct *mm = current->mm; 1610 struct vm_area_struct *vma; 1611 1612 if (down_write_killable(&mm->mmap_sem)) { 1613 addr = -EINTR; 1614 goto err; 1615 } 1616 vma = find_vma(mm, addr); 1617 if (vma && __vma_matches(vma, obj->base.filp, addr, args->size)) 1618 vma->vm_page_prot = 1619 pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); 1620 else 1621 addr = -ENOMEM; 1622 up_write(&mm->mmap_sem); 1623 if (IS_ERR_VALUE(addr)) 1624 goto err; 1625 1626 /* This may race, but that's ok, it only gets set */ 1627 WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU); 1628 } 1629 i915_gem_object_put(obj); 1630 1631 args->addr_ptr = (u64)addr; 1632 return 0; 1633 1634 err: 1635 i915_gem_object_put(obj); 1636 return addr; 1637 } 1638 1639 static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj) 1640 { 1641 return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT; 1642 } 1643 1644 /** 1645 * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps 1646 * 1647 * A history of the GTT mmap interface: 1648 * 1649 * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to 1650 * aligned and suitable for fencing, and still fit into the available 1651 * mappable space left by the pinned display objects. A classic problem 1652 * we called the page-fault-of-doom where we would ping-pong between 1653 * two objects that could not fit inside the GTT and so the memcpy 1654 * would page one object in at the expense of the other between every 1655 * single byte. 1656 * 1657 * 1 - Objects can be any size, and have any compatible fencing (X Y, or none 1658 * as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the 1659 * object is too large for the available space (or simply too large 1660 * for the mappable aperture!), a view is created instead and faulted 1661 * into userspace. (This view is aligned and sized appropriately for 1662 * fenced access.) 1663 * 1664 * 2 - Recognise WC as a separate cache domain so that we can flush the 1665 * delayed writes via GTT before performing direct access via WC. 1666 * 1667 * 3 - Remove implicit set-domain(GTT) and synchronisation on initial 1668 * pagefault; swapin remains transparent. 1669 * 1670 * Restrictions: 1671 * 1672 * * snoopable objects cannot be accessed via the GTT. It can cause machine 1673 * hangs on some architectures, corruption on others. An attempt to service 1674 * a GTT page fault from a snoopable object will generate a SIGBUS. 1675 * 1676 * * the object must be able to fit into RAM (physical memory, though no 1677 * limited to the mappable aperture). 1678 * 1679 * 1680 * Caveats: 1681 * 1682 * * a new GTT page fault will synchronize rendering from the GPU and flush 1683 * all data to system memory. Subsequent access will not be synchronized. 1684 * 1685 * * all mappings are revoked on runtime device suspend. 1686 * 1687 * * there are only 8, 16 or 32 fence registers to share between all users 1688 * (older machines require fence register for display and blitter access 1689 * as well). Contention of the fence registers will cause the previous users 1690 * to be unmapped and any new access will generate new page faults. 1691 * 1692 * * running out of memory while servicing a fault may generate a SIGBUS, 1693 * rather than the expected SIGSEGV. 1694 */ 1695 int i915_gem_mmap_gtt_version(void) 1696 { 1697 return 3; 1698 } 1699 1700 static inline struct i915_ggtt_view 1701 compute_partial_view(const struct drm_i915_gem_object *obj, 1702 pgoff_t page_offset, 1703 unsigned int chunk) 1704 { 1705 struct i915_ggtt_view view; 1706 1707 if (i915_gem_object_is_tiled(obj)) 1708 chunk = roundup(chunk, tile_row_pages(obj)); 1709 1710 view.type = I915_GGTT_VIEW_PARTIAL; 1711 view.partial.offset = rounddown(page_offset, chunk); 1712 view.partial.size = 1713 min_t(unsigned int, chunk, 1714 (obj->base.size >> PAGE_SHIFT) - view.partial.offset); 1715 1716 /* If the partial covers the entire object, just create a normal VMA. */ 1717 if (chunk >= obj->base.size >> PAGE_SHIFT) 1718 view.type = I915_GGTT_VIEW_NORMAL; 1719 1720 return view; 1721 } 1722 1723 /** 1724 * i915_gem_fault - fault a page into the GTT 1725 * @vmf: fault info 1726 * 1727 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1728 * from userspace. The fault handler takes care of binding the object to 1729 * the GTT (if needed), allocating and programming a fence register (again, 1730 * only if needed based on whether the old reg is still valid or the object 1731 * is tiled) and inserting a new PTE into the faulting process. 1732 * 1733 * Note that the faulting process may involve evicting existing objects 1734 * from the GTT and/or fence registers to make room. So performance may 1735 * suffer if the GTT working set is large or there are few fence registers 1736 * left. 1737 * 1738 * The current feature set supported by i915_gem_fault() and thus GTT mmaps 1739 * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version). 1740 */ 1741 vm_fault_t i915_gem_fault(struct vm_fault *vmf) 1742 { 1743 #define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT) 1744 struct vm_area_struct *area = vmf->vma; 1745 struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data); 1746 struct drm_device *dev = obj->base.dev; 1747 struct drm_i915_private *dev_priv = to_i915(dev); 1748 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1749 bool write = area->vm_flags & VM_WRITE; 1750 intel_wakeref_t wakeref; 1751 struct i915_vma *vma; 1752 pgoff_t page_offset; 1753 int srcu; 1754 int ret; 1755 1756 /* Sanity check that we allow writing into this object */ 1757 if (i915_gem_object_is_readonly(obj) && write) 1758 return VM_FAULT_SIGBUS; 1759 1760 /* We don't use vmf->pgoff since that has the fake offset */ 1761 page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT; 1762 1763 trace_i915_gem_object_fault(obj, page_offset, true, write); 1764 1765 ret = i915_gem_object_pin_pages(obj); 1766 if (ret) 1767 goto err; 1768 1769 wakeref = intel_runtime_pm_get(dev_priv); 1770 1771 srcu = i915_reset_trylock(dev_priv); 1772 if (srcu < 0) { 1773 ret = srcu; 1774 goto err_rpm; 1775 } 1776 1777 ret = i915_mutex_lock_interruptible(dev); 1778 if (ret) 1779 goto err_reset; 1780 1781 /* Access to snoopable pages through the GTT is incoherent. */ 1782 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) { 1783 ret = -EFAULT; 1784 goto err_unlock; 1785 } 1786 1787 /* Now pin it into the GTT as needed */ 1788 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 1789 PIN_MAPPABLE | 1790 PIN_NONBLOCK | 1791 PIN_NONFAULT); 1792 if (IS_ERR(vma)) { 1793 /* Use a partial view if it is bigger than available space */ 1794 struct i915_ggtt_view view = 1795 compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES); 1796 unsigned int flags; 1797 1798 flags = PIN_MAPPABLE; 1799 if (view.type == I915_GGTT_VIEW_NORMAL) 1800 flags |= PIN_NONBLOCK; /* avoid warnings for pinned */ 1801 1802 /* 1803 * Userspace is now writing through an untracked VMA, abandon 1804 * all hope that the hardware is able to track future writes. 1805 */ 1806 obj->frontbuffer_ggtt_origin = ORIGIN_CPU; 1807 1808 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); 1809 if (IS_ERR(vma) && !view.type) { 1810 flags = PIN_MAPPABLE; 1811 view.type = I915_GGTT_VIEW_PARTIAL; 1812 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); 1813 } 1814 } 1815 if (IS_ERR(vma)) { 1816 ret = PTR_ERR(vma); 1817 goto err_unlock; 1818 } 1819 1820 ret = i915_vma_pin_fence(vma); 1821 if (ret) 1822 goto err_unpin; 1823 1824 /* Finally, remap it using the new GTT offset */ 1825 ret = remap_io_mapping(area, 1826 area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT), 1827 (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT, 1828 min_t(u64, vma->size, area->vm_end - area->vm_start), 1829 &ggtt->iomap); 1830 if (ret) 1831 goto err_fence; 1832 1833 /* Mark as being mmapped into userspace for later revocation */ 1834 assert_rpm_wakelock_held(dev_priv); 1835 if (!i915_vma_set_userfault(vma) && !obj->userfault_count++) 1836 list_add(&obj->userfault_link, &dev_priv->mm.userfault_list); 1837 GEM_BUG_ON(!obj->userfault_count); 1838 1839 i915_vma_set_ggtt_write(vma); 1840 1841 err_fence: 1842 i915_vma_unpin_fence(vma); 1843 err_unpin: 1844 __i915_vma_unpin(vma); 1845 err_unlock: 1846 mutex_unlock(&dev->struct_mutex); 1847 err_reset: 1848 i915_reset_unlock(dev_priv, srcu); 1849 err_rpm: 1850 intel_runtime_pm_put(dev_priv, wakeref); 1851 i915_gem_object_unpin_pages(obj); 1852 err: 1853 switch (ret) { 1854 case -EIO: 1855 /* 1856 * We eat errors when the gpu is terminally wedged to avoid 1857 * userspace unduly crashing (gl has no provisions for mmaps to 1858 * fail). But any other -EIO isn't ours (e.g. swap in failure) 1859 * and so needs to be reported. 1860 */ 1861 if (!i915_terminally_wedged(dev_priv)) 1862 return VM_FAULT_SIGBUS; 1863 /* else: fall through */ 1864 case -EAGAIN: 1865 /* 1866 * EAGAIN means the gpu is hung and we'll wait for the error 1867 * handler to reset everything when re-faulting in 1868 * i915_mutex_lock_interruptible. 1869 */ 1870 case 0: 1871 case -ERESTARTSYS: 1872 case -EINTR: 1873 case -EBUSY: 1874 /* 1875 * EBUSY is ok: this just means that another thread 1876 * already did the job. 1877 */ 1878 return VM_FAULT_NOPAGE; 1879 case -ENOMEM: 1880 return VM_FAULT_OOM; 1881 case -ENOSPC: 1882 case -EFAULT: 1883 return VM_FAULT_SIGBUS; 1884 default: 1885 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 1886 return VM_FAULT_SIGBUS; 1887 } 1888 } 1889 1890 static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) 1891 { 1892 struct i915_vma *vma; 1893 1894 GEM_BUG_ON(!obj->userfault_count); 1895 1896 obj->userfault_count = 0; 1897 list_del(&obj->userfault_link); 1898 drm_vma_node_unmap(&obj->base.vma_node, 1899 obj->base.dev->anon_inode->i_mapping); 1900 1901 for_each_ggtt_vma(vma, obj) 1902 i915_vma_unset_userfault(vma); 1903 } 1904 1905 /** 1906 * i915_gem_release_mmap - remove physical page mappings 1907 * @obj: obj in question 1908 * 1909 * Preserve the reservation of the mmapping with the DRM core code, but 1910 * relinquish ownership of the pages back to the system. 1911 * 1912 * It is vital that we remove the page mapping if we have mapped a tiled 1913 * object through the GTT and then lose the fence register due to 1914 * resource pressure. Similarly if the object has been moved out of the 1915 * aperture, than pages mapped into userspace must be revoked. Removing the 1916 * mapping will then trigger a page fault on the next user access, allowing 1917 * fixup by i915_gem_fault(). 1918 */ 1919 void 1920 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 1921 { 1922 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1923 intel_wakeref_t wakeref; 1924 1925 /* Serialisation between user GTT access and our code depends upon 1926 * revoking the CPU's PTE whilst the mutex is held. The next user 1927 * pagefault then has to wait until we release the mutex. 1928 * 1929 * Note that RPM complicates somewhat by adding an additional 1930 * requirement that operations to the GGTT be made holding the RPM 1931 * wakeref. 1932 */ 1933 lockdep_assert_held(&i915->drm.struct_mutex); 1934 wakeref = intel_runtime_pm_get(i915); 1935 1936 if (!obj->userfault_count) 1937 goto out; 1938 1939 __i915_gem_object_release_mmap(obj); 1940 1941 /* Ensure that the CPU's PTE are revoked and there are not outstanding 1942 * memory transactions from userspace before we return. The TLB 1943 * flushing implied above by changing the PTE above *should* be 1944 * sufficient, an extra barrier here just provides us with a bit 1945 * of paranoid documentation about our requirement to serialise 1946 * memory writes before touching registers / GSM. 1947 */ 1948 wmb(); 1949 1950 out: 1951 intel_runtime_pm_put(i915, wakeref); 1952 } 1953 1954 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv) 1955 { 1956 struct drm_i915_gem_object *obj, *on; 1957 int i; 1958 1959 /* 1960 * Only called during RPM suspend. All users of the userfault_list 1961 * must be holding an RPM wakeref to ensure that this can not 1962 * run concurrently with themselves (and use the struct_mutex for 1963 * protection between themselves). 1964 */ 1965 1966 list_for_each_entry_safe(obj, on, 1967 &dev_priv->mm.userfault_list, userfault_link) 1968 __i915_gem_object_release_mmap(obj); 1969 1970 /* The fence will be lost when the device powers down. If any were 1971 * in use by hardware (i.e. they are pinned), we should not be powering 1972 * down! All other fences will be reacquired by the user upon waking. 1973 */ 1974 for (i = 0; i < dev_priv->num_fence_regs; i++) { 1975 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 1976 1977 /* Ideally we want to assert that the fence register is not 1978 * live at this point (i.e. that no piece of code will be 1979 * trying to write through fence + GTT, as that both violates 1980 * our tracking of activity and associated locking/barriers, 1981 * but also is illegal given that the hw is powered down). 1982 * 1983 * Previously we used reg->pin_count as a "liveness" indicator. 1984 * That is not sufficient, and we need a more fine-grained 1985 * tool if we want to have a sanity check here. 1986 */ 1987 1988 if (!reg->vma) 1989 continue; 1990 1991 GEM_BUG_ON(i915_vma_has_userfault(reg->vma)); 1992 reg->dirty = true; 1993 } 1994 } 1995 1996 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 1997 { 1998 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 1999 int err; 2000 2001 err = drm_gem_create_mmap_offset(&obj->base); 2002 if (likely(!err)) 2003 return 0; 2004 2005 /* Attempt to reap some mmap space from dead objects */ 2006 do { 2007 err = i915_gem_wait_for_idle(dev_priv, 2008 I915_WAIT_INTERRUPTIBLE, 2009 MAX_SCHEDULE_TIMEOUT); 2010 if (err) 2011 break; 2012 2013 i915_gem_drain_freed_objects(dev_priv); 2014 err = drm_gem_create_mmap_offset(&obj->base); 2015 if (!err) 2016 break; 2017 2018 } while (flush_delayed_work(&dev_priv->gem.retire_work)); 2019 2020 return err; 2021 } 2022 2023 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 2024 { 2025 drm_gem_free_mmap_offset(&obj->base); 2026 } 2027 2028 int 2029 i915_gem_mmap_gtt(struct drm_file *file, 2030 struct drm_device *dev, 2031 u32 handle, 2032 u64 *offset) 2033 { 2034 struct drm_i915_gem_object *obj; 2035 int ret; 2036 2037 obj = i915_gem_object_lookup(file, handle); 2038 if (!obj) 2039 return -ENOENT; 2040 2041 ret = i915_gem_object_create_mmap_offset(obj); 2042 if (ret == 0) 2043 *offset = drm_vma_node_offset_addr(&obj->base.vma_node); 2044 2045 i915_gem_object_put(obj); 2046 return ret; 2047 } 2048 2049 /** 2050 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 2051 * @dev: DRM device 2052 * @data: GTT mapping ioctl data 2053 * @file: GEM object info 2054 * 2055 * Simply returns the fake offset to userspace so it can mmap it. 2056 * The mmap call will end up in drm_gem_mmap(), which will set things 2057 * up so we can get faults in the handler above. 2058 * 2059 * The fault handler will take care of binding the object into the GTT 2060 * (since it may have been evicted to make room for something), allocating 2061 * a fence register, and mapping the appropriate aperture address into 2062 * userspace. 2063 */ 2064 int 2065 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 2066 struct drm_file *file) 2067 { 2068 struct drm_i915_gem_mmap_gtt *args = data; 2069 2070 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 2071 } 2072 2073 /* Immediately discard the backing storage */ 2074 void __i915_gem_object_truncate(struct drm_i915_gem_object *obj) 2075 { 2076 i915_gem_object_free_mmap_offset(obj); 2077 2078 if (obj->base.filp == NULL) 2079 return; 2080 2081 /* Our goal here is to return as much of the memory as 2082 * is possible back to the system as we are called from OOM. 2083 * To do this we must instruct the shmfs to drop all of its 2084 * backing pages, *now*. 2085 */ 2086 shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1); 2087 obj->mm.madv = __I915_MADV_PURGED; 2088 obj->mm.pages = ERR_PTR(-EFAULT); 2089 } 2090 2091 /* 2092 * Move pages to appropriate lru and release the pagevec, decrementing the 2093 * ref count of those pages. 2094 */ 2095 static void check_release_pagevec(struct pagevec *pvec) 2096 { 2097 check_move_unevictable_pages(pvec); 2098 __pagevec_release(pvec); 2099 cond_resched(); 2100 } 2101 2102 static void 2103 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj, 2104 struct sg_table *pages) 2105 { 2106 struct sgt_iter sgt_iter; 2107 struct pagevec pvec; 2108 struct page *page; 2109 2110 __i915_gem_object_release_shmem(obj, pages, true); 2111 i915_gem_gtt_finish_pages(obj, pages); 2112 2113 if (i915_gem_object_needs_bit17_swizzle(obj)) 2114 i915_gem_object_save_bit_17_swizzle(obj, pages); 2115 2116 mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping); 2117 2118 pagevec_init(&pvec); 2119 for_each_sgt_page(page, sgt_iter, pages) { 2120 if (obj->mm.dirty) 2121 set_page_dirty(page); 2122 2123 if (obj->mm.madv == I915_MADV_WILLNEED) 2124 mark_page_accessed(page); 2125 2126 if (!pagevec_add(&pvec, page)) 2127 check_release_pagevec(&pvec); 2128 } 2129 if (pagevec_count(&pvec)) 2130 check_release_pagevec(&pvec); 2131 obj->mm.dirty = false; 2132 2133 sg_free_table(pages); 2134 kfree(pages); 2135 } 2136 2137 static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj) 2138 { 2139 struct radix_tree_iter iter; 2140 void __rcu **slot; 2141 2142 rcu_read_lock(); 2143 radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0) 2144 radix_tree_delete(&obj->mm.get_page.radix, iter.index); 2145 rcu_read_unlock(); 2146 } 2147 2148 static struct sg_table * 2149 __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) 2150 { 2151 struct drm_i915_private *i915 = to_i915(obj->base.dev); 2152 struct sg_table *pages; 2153 2154 pages = fetch_and_zero(&obj->mm.pages); 2155 if (IS_ERR_OR_NULL(pages)) 2156 return pages; 2157 2158 spin_lock(&i915->mm.obj_lock); 2159 list_del(&obj->mm.link); 2160 spin_unlock(&i915->mm.obj_lock); 2161 2162 if (obj->mm.mapping) { 2163 void *ptr; 2164 2165 ptr = page_mask_bits(obj->mm.mapping); 2166 if (is_vmalloc_addr(ptr)) 2167 vunmap(ptr); 2168 else 2169 kunmap(kmap_to_page(ptr)); 2170 2171 obj->mm.mapping = NULL; 2172 } 2173 2174 __i915_gem_object_reset_page_iter(obj); 2175 obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0; 2176 2177 return pages; 2178 } 2179 2180 int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, 2181 enum i915_mm_subclass subclass) 2182 { 2183 struct sg_table *pages; 2184 int ret; 2185 2186 if (i915_gem_object_has_pinned_pages(obj)) 2187 return -EBUSY; 2188 2189 GEM_BUG_ON(obj->bind_count); 2190 2191 /* May be called by shrinker from within get_pages() (on another bo) */ 2192 mutex_lock_nested(&obj->mm.lock, subclass); 2193 if (unlikely(atomic_read(&obj->mm.pages_pin_count))) { 2194 ret = -EBUSY; 2195 goto unlock; 2196 } 2197 2198 /* 2199 * ->put_pages might need to allocate memory for the bit17 swizzle 2200 * array, hence protect them from being reaped by removing them from gtt 2201 * lists early. 2202 */ 2203 pages = __i915_gem_object_unset_pages(obj); 2204 2205 /* 2206 * XXX Temporary hijinx to avoid updating all backends to handle 2207 * NULL pages. In the future, when we have more asynchronous 2208 * get_pages backends we should be better able to handle the 2209 * cancellation of the async task in a more uniform manner. 2210 */ 2211 if (!pages && !i915_gem_object_needs_async_cancel(obj)) 2212 pages = ERR_PTR(-EINVAL); 2213 2214 if (!IS_ERR(pages)) 2215 obj->ops->put_pages(obj, pages); 2216 2217 ret = 0; 2218 unlock: 2219 mutex_unlock(&obj->mm.lock); 2220 2221 return ret; 2222 } 2223 2224 bool i915_sg_trim(struct sg_table *orig_st) 2225 { 2226 struct sg_table new_st; 2227 struct scatterlist *sg, *new_sg; 2228 unsigned int i; 2229 2230 if (orig_st->nents == orig_st->orig_nents) 2231 return false; 2232 2233 if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN)) 2234 return false; 2235 2236 new_sg = new_st.sgl; 2237 for_each_sg(orig_st->sgl, sg, orig_st->nents, i) { 2238 sg_set_page(new_sg, sg_page(sg), sg->length, 0); 2239 sg_dma_address(new_sg) = sg_dma_address(sg); 2240 sg_dma_len(new_sg) = sg_dma_len(sg); 2241 2242 new_sg = sg_next(new_sg); 2243 } 2244 GEM_BUG_ON(new_sg); /* Should walk exactly nents and hit the end */ 2245 2246 sg_free_table(orig_st); 2247 2248 *orig_st = new_st; 2249 return true; 2250 } 2251 2252 static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2253 { 2254 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2255 const unsigned long page_count = obj->base.size / PAGE_SIZE; 2256 unsigned long i; 2257 struct address_space *mapping; 2258 struct sg_table *st; 2259 struct scatterlist *sg; 2260 struct sgt_iter sgt_iter; 2261 struct page *page; 2262 unsigned long last_pfn = 0; /* suppress gcc warning */ 2263 unsigned int max_segment = i915_sg_segment_size(); 2264 unsigned int sg_page_sizes; 2265 struct pagevec pvec; 2266 gfp_t noreclaim; 2267 int ret; 2268 2269 /* 2270 * Assert that the object is not currently in any GPU domain. As it 2271 * wasn't in the GTT, there shouldn't be any way it could have been in 2272 * a GPU cache 2273 */ 2274 GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS); 2275 GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS); 2276 2277 /* 2278 * If there's no chance of allocating enough pages for the whole 2279 * object, bail early. 2280 */ 2281 if (page_count > totalram_pages()) 2282 return -ENOMEM; 2283 2284 st = kmalloc(sizeof(*st), GFP_KERNEL); 2285 if (st == NULL) 2286 return -ENOMEM; 2287 2288 rebuild_st: 2289 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 2290 kfree(st); 2291 return -ENOMEM; 2292 } 2293 2294 /* 2295 * Get the list of pages out of our struct file. They'll be pinned 2296 * at this point until we release them. 2297 * 2298 * Fail silently without starting the shrinker 2299 */ 2300 mapping = obj->base.filp->f_mapping; 2301 mapping_set_unevictable(mapping); 2302 noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM); 2303 noreclaim |= __GFP_NORETRY | __GFP_NOWARN; 2304 2305 sg = st->sgl; 2306 st->nents = 0; 2307 sg_page_sizes = 0; 2308 for (i = 0; i < page_count; i++) { 2309 const unsigned int shrink[] = { 2310 I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE, 2311 0, 2312 }, *s = shrink; 2313 gfp_t gfp = noreclaim; 2314 2315 do { 2316 cond_resched(); 2317 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2318 if (!IS_ERR(page)) 2319 break; 2320 2321 if (!*s) { 2322 ret = PTR_ERR(page); 2323 goto err_sg; 2324 } 2325 2326 i915_gem_shrink(dev_priv, 2 * page_count, NULL, *s++); 2327 2328 /* 2329 * We've tried hard to allocate the memory by reaping 2330 * our own buffer, now let the real VM do its job and 2331 * go down in flames if truly OOM. 2332 * 2333 * However, since graphics tend to be disposable, 2334 * defer the oom here by reporting the ENOMEM back 2335 * to userspace. 2336 */ 2337 if (!*s) { 2338 /* reclaim and warn, but no oom */ 2339 gfp = mapping_gfp_mask(mapping); 2340 2341 /* 2342 * Our bo are always dirty and so we require 2343 * kswapd to reclaim our pages (direct reclaim 2344 * does not effectively begin pageout of our 2345 * buffers on its own). However, direct reclaim 2346 * only waits for kswapd when under allocation 2347 * congestion. So as a result __GFP_RECLAIM is 2348 * unreliable and fails to actually reclaim our 2349 * dirty pages -- unless you try over and over 2350 * again with !__GFP_NORETRY. However, we still 2351 * want to fail this allocation rather than 2352 * trigger the out-of-memory killer and for 2353 * this we want __GFP_RETRY_MAYFAIL. 2354 */ 2355 gfp |= __GFP_RETRY_MAYFAIL; 2356 } 2357 } while (1); 2358 2359 if (!i || 2360 sg->length >= max_segment || 2361 page_to_pfn(page) != last_pfn + 1) { 2362 if (i) { 2363 sg_page_sizes |= sg->length; 2364 sg = sg_next(sg); 2365 } 2366 st->nents++; 2367 sg_set_page(sg, page, PAGE_SIZE, 0); 2368 } else { 2369 sg->length += PAGE_SIZE; 2370 } 2371 last_pfn = page_to_pfn(page); 2372 2373 /* Check that the i965g/gm workaround works. */ 2374 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL)); 2375 } 2376 if (sg) { /* loop terminated early; short sg table */ 2377 sg_page_sizes |= sg->length; 2378 sg_mark_end(sg); 2379 } 2380 2381 /* Trim unused sg entries to avoid wasting memory. */ 2382 i915_sg_trim(st); 2383 2384 ret = i915_gem_gtt_prepare_pages(obj, st); 2385 if (ret) { 2386 /* 2387 * DMA remapping failed? One possible cause is that 2388 * it could not reserve enough large entries, asking 2389 * for PAGE_SIZE chunks instead may be helpful. 2390 */ 2391 if (max_segment > PAGE_SIZE) { 2392 for_each_sgt_page(page, sgt_iter, st) 2393 put_page(page); 2394 sg_free_table(st); 2395 2396 max_segment = PAGE_SIZE; 2397 goto rebuild_st; 2398 } else { 2399 dev_warn(&dev_priv->drm.pdev->dev, 2400 "Failed to DMA remap %lu pages\n", 2401 page_count); 2402 goto err_pages; 2403 } 2404 } 2405 2406 if (i915_gem_object_needs_bit17_swizzle(obj)) 2407 i915_gem_object_do_bit_17_swizzle(obj, st); 2408 2409 __i915_gem_object_set_pages(obj, st, sg_page_sizes); 2410 2411 return 0; 2412 2413 err_sg: 2414 sg_mark_end(sg); 2415 err_pages: 2416 mapping_clear_unevictable(mapping); 2417 pagevec_init(&pvec); 2418 for_each_sgt_page(page, sgt_iter, st) { 2419 if (!pagevec_add(&pvec, page)) 2420 check_release_pagevec(&pvec); 2421 } 2422 if (pagevec_count(&pvec)) 2423 check_release_pagevec(&pvec); 2424 sg_free_table(st); 2425 kfree(st); 2426 2427 /* 2428 * shmemfs first checks if there is enough memory to allocate the page 2429 * and reports ENOSPC should there be insufficient, along with the usual 2430 * ENOMEM for a genuine allocation failure. 2431 * 2432 * We use ENOSPC in our driver to mean that we have run out of aperture 2433 * space and so want to translate the error from shmemfs back to our 2434 * usual understanding of ENOMEM. 2435 */ 2436 if (ret == -ENOSPC) 2437 ret = -ENOMEM; 2438 2439 return ret; 2440 } 2441 2442 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, 2443 struct sg_table *pages, 2444 unsigned int sg_page_sizes) 2445 { 2446 struct drm_i915_private *i915 = to_i915(obj->base.dev); 2447 unsigned long supported = INTEL_INFO(i915)->page_sizes; 2448 int i; 2449 2450 lockdep_assert_held(&obj->mm.lock); 2451 2452 /* Make the pages coherent with the GPU (flushing any swapin). */ 2453 if (obj->cache_dirty) { 2454 obj->write_domain = 0; 2455 if (i915_gem_object_has_struct_page(obj)) 2456 drm_clflush_sg(pages); 2457 obj->cache_dirty = false; 2458 } 2459 2460 obj->mm.get_page.sg_pos = pages->sgl; 2461 obj->mm.get_page.sg_idx = 0; 2462 2463 obj->mm.pages = pages; 2464 2465 if (i915_gem_object_is_tiled(obj) && 2466 i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 2467 GEM_BUG_ON(obj->mm.quirked); 2468 __i915_gem_object_pin_pages(obj); 2469 obj->mm.quirked = true; 2470 } 2471 2472 GEM_BUG_ON(!sg_page_sizes); 2473 obj->mm.page_sizes.phys = sg_page_sizes; 2474 2475 /* 2476 * Calculate the supported page-sizes which fit into the given 2477 * sg_page_sizes. This will give us the page-sizes which we may be able 2478 * to use opportunistically when later inserting into the GTT. For 2479 * example if phys=2G, then in theory we should be able to use 1G, 2M, 2480 * 64K or 4K pages, although in practice this will depend on a number of 2481 * other factors. 2482 */ 2483 obj->mm.page_sizes.sg = 0; 2484 for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { 2485 if (obj->mm.page_sizes.phys & ~0u << i) 2486 obj->mm.page_sizes.sg |= BIT(i); 2487 } 2488 GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg)); 2489 2490 spin_lock(&i915->mm.obj_lock); 2491 list_add(&obj->mm.link, &i915->mm.unbound_list); 2492 spin_unlock(&i915->mm.obj_lock); 2493 } 2494 2495 static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2496 { 2497 int err; 2498 2499 if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) { 2500 DRM_DEBUG("Attempting to obtain a purgeable object\n"); 2501 return -EFAULT; 2502 } 2503 2504 err = obj->ops->get_pages(obj); 2505 GEM_BUG_ON(!err && !i915_gem_object_has_pages(obj)); 2506 2507 return err; 2508 } 2509 2510 /* Ensure that the associated pages are gathered from the backing storage 2511 * and pinned into our object. i915_gem_object_pin_pages() may be called 2512 * multiple times before they are released by a single call to 2513 * i915_gem_object_unpin_pages() - once the pages are no longer referenced 2514 * either as a result of memory pressure (reaping pages under the shrinker) 2515 * or as the object is itself released. 2516 */ 2517 int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2518 { 2519 int err; 2520 2521 err = mutex_lock_interruptible(&obj->mm.lock); 2522 if (err) 2523 return err; 2524 2525 if (unlikely(!i915_gem_object_has_pages(obj))) { 2526 GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); 2527 2528 err = ____i915_gem_object_get_pages(obj); 2529 if (err) 2530 goto unlock; 2531 2532 smp_mb__before_atomic(); 2533 } 2534 atomic_inc(&obj->mm.pages_pin_count); 2535 2536 unlock: 2537 mutex_unlock(&obj->mm.lock); 2538 return err; 2539 } 2540 2541 /* The 'mapping' part of i915_gem_object_pin_map() below */ 2542 static void *i915_gem_object_map(const struct drm_i915_gem_object *obj, 2543 enum i915_map_type type) 2544 { 2545 unsigned long n_pages = obj->base.size >> PAGE_SHIFT; 2546 struct sg_table *sgt = obj->mm.pages; 2547 struct sgt_iter sgt_iter; 2548 struct page *page; 2549 struct page *stack_pages[32]; 2550 struct page **pages = stack_pages; 2551 unsigned long i = 0; 2552 pgprot_t pgprot; 2553 void *addr; 2554 2555 /* A single page can always be kmapped */ 2556 if (n_pages == 1 && type == I915_MAP_WB) 2557 return kmap(sg_page(sgt->sgl)); 2558 2559 if (n_pages > ARRAY_SIZE(stack_pages)) { 2560 /* Too big for stack -- allocate temporary array instead */ 2561 pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL); 2562 if (!pages) 2563 return NULL; 2564 } 2565 2566 for_each_sgt_page(page, sgt_iter, sgt) 2567 pages[i++] = page; 2568 2569 /* Check that we have the expected number of pages */ 2570 GEM_BUG_ON(i != n_pages); 2571 2572 switch (type) { 2573 default: 2574 MISSING_CASE(type); 2575 /* fallthrough to use PAGE_KERNEL anyway */ 2576 case I915_MAP_WB: 2577 pgprot = PAGE_KERNEL; 2578 break; 2579 case I915_MAP_WC: 2580 pgprot = pgprot_writecombine(PAGE_KERNEL_IO); 2581 break; 2582 } 2583 addr = vmap(pages, n_pages, 0, pgprot); 2584 2585 if (pages != stack_pages) 2586 kvfree(pages); 2587 2588 return addr; 2589 } 2590 2591 /* get, pin, and map the pages of the object into kernel space */ 2592 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, 2593 enum i915_map_type type) 2594 { 2595 enum i915_map_type has_type; 2596 bool pinned; 2597 void *ptr; 2598 int ret; 2599 2600 if (unlikely(!i915_gem_object_has_struct_page(obj))) 2601 return ERR_PTR(-ENXIO); 2602 2603 ret = mutex_lock_interruptible(&obj->mm.lock); 2604 if (ret) 2605 return ERR_PTR(ret); 2606 2607 pinned = !(type & I915_MAP_OVERRIDE); 2608 type &= ~I915_MAP_OVERRIDE; 2609 2610 if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) { 2611 if (unlikely(!i915_gem_object_has_pages(obj))) { 2612 GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); 2613 2614 ret = ____i915_gem_object_get_pages(obj); 2615 if (ret) 2616 goto err_unlock; 2617 2618 smp_mb__before_atomic(); 2619 } 2620 atomic_inc(&obj->mm.pages_pin_count); 2621 pinned = false; 2622 } 2623 GEM_BUG_ON(!i915_gem_object_has_pages(obj)); 2624 2625 ptr = page_unpack_bits(obj->mm.mapping, &has_type); 2626 if (ptr && has_type != type) { 2627 if (pinned) { 2628 ret = -EBUSY; 2629 goto err_unpin; 2630 } 2631 2632 if (is_vmalloc_addr(ptr)) 2633 vunmap(ptr); 2634 else 2635 kunmap(kmap_to_page(ptr)); 2636 2637 ptr = obj->mm.mapping = NULL; 2638 } 2639 2640 if (!ptr) { 2641 ptr = i915_gem_object_map(obj, type); 2642 if (!ptr) { 2643 ret = -ENOMEM; 2644 goto err_unpin; 2645 } 2646 2647 obj->mm.mapping = page_pack_bits(ptr, type); 2648 } 2649 2650 out_unlock: 2651 mutex_unlock(&obj->mm.lock); 2652 return ptr; 2653 2654 err_unpin: 2655 atomic_dec(&obj->mm.pages_pin_count); 2656 err_unlock: 2657 ptr = ERR_PTR(ret); 2658 goto out_unlock; 2659 } 2660 2661 void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj, 2662 unsigned long offset, 2663 unsigned long size) 2664 { 2665 enum i915_map_type has_type; 2666 void *ptr; 2667 2668 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 2669 GEM_BUG_ON(range_overflows_t(typeof(obj->base.size), 2670 offset, size, obj->base.size)); 2671 2672 obj->mm.dirty = true; 2673 2674 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE) 2675 return; 2676 2677 ptr = page_unpack_bits(obj->mm.mapping, &has_type); 2678 if (has_type == I915_MAP_WC) 2679 return; 2680 2681 drm_clflush_virt_range(ptr + offset, size); 2682 if (size == obj->base.size) { 2683 obj->write_domain &= ~I915_GEM_DOMAIN_CPU; 2684 obj->cache_dirty = false; 2685 } 2686 } 2687 2688 static int 2689 i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj, 2690 const struct drm_i915_gem_pwrite *arg) 2691 { 2692 struct address_space *mapping = obj->base.filp->f_mapping; 2693 char __user *user_data = u64_to_user_ptr(arg->data_ptr); 2694 u64 remain, offset; 2695 unsigned int pg; 2696 2697 /* Caller already validated user args */ 2698 GEM_BUG_ON(!access_ok(user_data, arg->size)); 2699 2700 /* 2701 * Before we instantiate/pin the backing store for our use, we 2702 * can prepopulate the shmemfs filp efficiently using a write into 2703 * the pagecache. We avoid the penalty of instantiating all the 2704 * pages, important if the user is just writing to a few and never 2705 * uses the object on the GPU, and using a direct write into shmemfs 2706 * allows it to avoid the cost of retrieving a page (either swapin 2707 * or clearing-before-use) before it is overwritten. 2708 */ 2709 if (i915_gem_object_has_pages(obj)) 2710 return -ENODEV; 2711 2712 if (obj->mm.madv != I915_MADV_WILLNEED) 2713 return -EFAULT; 2714 2715 /* 2716 * Before the pages are instantiated the object is treated as being 2717 * in the CPU domain. The pages will be clflushed as required before 2718 * use, and we can freely write into the pages directly. If userspace 2719 * races pwrite with any other operation; corruption will ensue - 2720 * that is userspace's prerogative! 2721 */ 2722 2723 remain = arg->size; 2724 offset = arg->offset; 2725 pg = offset_in_page(offset); 2726 2727 do { 2728 unsigned int len, unwritten; 2729 struct page *page; 2730 void *data, *vaddr; 2731 int err; 2732 char c; 2733 2734 len = PAGE_SIZE - pg; 2735 if (len > remain) 2736 len = remain; 2737 2738 /* Prefault the user page to reduce potential recursion */ 2739 err = __get_user(c, user_data); 2740 if (err) 2741 return err; 2742 2743 err = __get_user(c, user_data + len - 1); 2744 if (err) 2745 return err; 2746 2747 err = pagecache_write_begin(obj->base.filp, mapping, 2748 offset, len, 0, 2749 &page, &data); 2750 if (err < 0) 2751 return err; 2752 2753 vaddr = kmap_atomic(page); 2754 unwritten = __copy_from_user_inatomic(vaddr + pg, 2755 user_data, 2756 len); 2757 kunmap_atomic(vaddr); 2758 2759 err = pagecache_write_end(obj->base.filp, mapping, 2760 offset, len, len - unwritten, 2761 page, data); 2762 if (err < 0) 2763 return err; 2764 2765 /* We don't handle -EFAULT, leave it to the caller to check */ 2766 if (unwritten) 2767 return -ENODEV; 2768 2769 remain -= len; 2770 user_data += len; 2771 offset += len; 2772 pg = 0; 2773 } while (remain); 2774 2775 return 0; 2776 } 2777 2778 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) 2779 { 2780 struct drm_i915_private *i915 = to_i915(gem->dev); 2781 struct drm_i915_gem_object *obj = to_intel_bo(gem); 2782 struct drm_i915_file_private *fpriv = file->driver_priv; 2783 struct i915_lut_handle *lut, *ln; 2784 2785 mutex_lock(&i915->drm.struct_mutex); 2786 2787 list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) { 2788 struct i915_gem_context *ctx = lut->ctx; 2789 struct i915_vma *vma; 2790 2791 GEM_BUG_ON(ctx->file_priv == ERR_PTR(-EBADF)); 2792 if (ctx->file_priv != fpriv) 2793 continue; 2794 2795 vma = radix_tree_delete(&ctx->handles_vma, lut->handle); 2796 GEM_BUG_ON(vma->obj != obj); 2797 2798 /* We allow the process to have multiple handles to the same 2799 * vma, in the same fd namespace, by virtue of flink/open. 2800 */ 2801 GEM_BUG_ON(!vma->open_count); 2802 if (!--vma->open_count && !i915_vma_is_ggtt(vma)) 2803 i915_vma_close(vma); 2804 2805 list_del(&lut->obj_link); 2806 list_del(&lut->ctx_link); 2807 2808 i915_lut_handle_free(lut); 2809 __i915_gem_object_release_unless_active(obj); 2810 } 2811 2812 mutex_unlock(&i915->drm.struct_mutex); 2813 } 2814 2815 static unsigned long to_wait_timeout(s64 timeout_ns) 2816 { 2817 if (timeout_ns < 0) 2818 return MAX_SCHEDULE_TIMEOUT; 2819 2820 if (timeout_ns == 0) 2821 return 0; 2822 2823 return nsecs_to_jiffies_timeout(timeout_ns); 2824 } 2825 2826 /** 2827 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 2828 * @dev: drm device pointer 2829 * @data: ioctl data blob 2830 * @file: drm file pointer 2831 * 2832 * Returns 0 if successful, else an error is returned with the remaining time in 2833 * the timeout parameter. 2834 * -ETIME: object is still busy after timeout 2835 * -ERESTARTSYS: signal interrupted the wait 2836 * -ENONENT: object doesn't exist 2837 * Also possible, but rare: 2838 * -EAGAIN: incomplete, restart syscall 2839 * -ENOMEM: damn 2840 * -ENODEV: Internal IRQ fail 2841 * -E?: The add request failed 2842 * 2843 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 2844 * non-zero timeout parameter the wait ioctl will wait for the given number of 2845 * nanoseconds on an object becoming unbusy. Since the wait itself does so 2846 * without holding struct_mutex the object may become re-busied before this 2847 * function completes. A similar but shorter * race condition exists in the busy 2848 * ioctl 2849 */ 2850 int 2851 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 2852 { 2853 struct drm_i915_gem_wait *args = data; 2854 struct drm_i915_gem_object *obj; 2855 ktime_t start; 2856 long ret; 2857 2858 if (args->flags != 0) 2859 return -EINVAL; 2860 2861 obj = i915_gem_object_lookup(file, args->bo_handle); 2862 if (!obj) 2863 return -ENOENT; 2864 2865 start = ktime_get(); 2866 2867 ret = i915_gem_object_wait(obj, 2868 I915_WAIT_INTERRUPTIBLE | 2869 I915_WAIT_PRIORITY | 2870 I915_WAIT_ALL, 2871 to_wait_timeout(args->timeout_ns)); 2872 2873 if (args->timeout_ns > 0) { 2874 args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start)); 2875 if (args->timeout_ns < 0) 2876 args->timeout_ns = 0; 2877 2878 /* 2879 * Apparently ktime isn't accurate enough and occasionally has a 2880 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch 2881 * things up to make the test happy. We allow up to 1 jiffy. 2882 * 2883 * This is a regression from the timespec->ktime conversion. 2884 */ 2885 if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns)) 2886 args->timeout_ns = 0; 2887 2888 /* Asked to wait beyond the jiffie/scheduler precision? */ 2889 if (ret == -ETIME && args->timeout_ns) 2890 ret = -EAGAIN; 2891 } 2892 2893 i915_gem_object_put(obj); 2894 return ret; 2895 } 2896 2897 static int wait_for_engines(struct drm_i915_private *i915) 2898 { 2899 if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) { 2900 dev_err(i915->drm.dev, 2901 "Failed to idle engines, declaring wedged!\n"); 2902 GEM_TRACE_DUMP(); 2903 i915_gem_set_wedged(i915); 2904 return -EIO; 2905 } 2906 2907 return 0; 2908 } 2909 2910 static long 2911 wait_for_timelines(struct drm_i915_private *i915, 2912 unsigned int flags, long timeout) 2913 { 2914 struct i915_gt_timelines *gt = &i915->gt.timelines; 2915 struct i915_timeline *tl; 2916 2917 mutex_lock(>->mutex); 2918 list_for_each_entry(tl, >->active_list, link) { 2919 struct i915_request *rq; 2920 2921 rq = i915_active_request_get_unlocked(&tl->last_request); 2922 if (!rq) 2923 continue; 2924 2925 mutex_unlock(>->mutex); 2926 2927 /* 2928 * "Race-to-idle". 2929 * 2930 * Switching to the kernel context is often used a synchronous 2931 * step prior to idling, e.g. in suspend for flushing all 2932 * current operations to memory before sleeping. These we 2933 * want to complete as quickly as possible to avoid prolonged 2934 * stalls, so allow the gpu to boost to maximum clocks. 2935 */ 2936 if (flags & I915_WAIT_FOR_IDLE_BOOST) 2937 gen6_rps_boost(rq); 2938 2939 timeout = i915_request_wait(rq, flags, timeout); 2940 i915_request_put(rq); 2941 if (timeout < 0) 2942 return timeout; 2943 2944 /* restart after reacquiring the lock */ 2945 mutex_lock(>->mutex); 2946 tl = list_entry(>->active_list, typeof(*tl), link); 2947 } 2948 mutex_unlock(>->mutex); 2949 2950 return timeout; 2951 } 2952 2953 int i915_gem_wait_for_idle(struct drm_i915_private *i915, 2954 unsigned int flags, long timeout) 2955 { 2956 GEM_TRACE("flags=%x (%s), timeout=%ld%s, awake?=%s\n", 2957 flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked", 2958 timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : "", 2959 yesno(i915->gt.awake)); 2960 2961 /* If the device is asleep, we have no requests outstanding */ 2962 if (!READ_ONCE(i915->gt.awake)) 2963 return 0; 2964 2965 timeout = wait_for_timelines(i915, flags, timeout); 2966 if (timeout < 0) 2967 return timeout; 2968 2969 if (flags & I915_WAIT_LOCKED) { 2970 int err; 2971 2972 lockdep_assert_held(&i915->drm.struct_mutex); 2973 2974 err = wait_for_engines(i915); 2975 if (err) 2976 return err; 2977 2978 i915_retire_requests(i915); 2979 } 2980 2981 return 0; 2982 } 2983 2984 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) 2985 { 2986 /* 2987 * We manually flush the CPU domain so that we can override and 2988 * force the flush for the display, and perform it asyncrhonously. 2989 */ 2990 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 2991 if (obj->cache_dirty) 2992 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); 2993 obj->write_domain = 0; 2994 } 2995 2996 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) 2997 { 2998 if (!READ_ONCE(obj->pin_global)) 2999 return; 3000 3001 mutex_lock(&obj->base.dev->struct_mutex); 3002 __i915_gem_object_flush_for_display(obj); 3003 mutex_unlock(&obj->base.dev->struct_mutex); 3004 } 3005 3006 /** 3007 * Moves a single object to the WC read, and possibly write domain. 3008 * @obj: object to act on 3009 * @write: ask for write access or read only 3010 * 3011 * This function returns when the move is complete, including waiting on 3012 * flushes to occur. 3013 */ 3014 int 3015 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) 3016 { 3017 int ret; 3018 3019 lockdep_assert_held(&obj->base.dev->struct_mutex); 3020 3021 ret = i915_gem_object_wait(obj, 3022 I915_WAIT_INTERRUPTIBLE | 3023 I915_WAIT_LOCKED | 3024 (write ? I915_WAIT_ALL : 0), 3025 MAX_SCHEDULE_TIMEOUT); 3026 if (ret) 3027 return ret; 3028 3029 if (obj->write_domain == I915_GEM_DOMAIN_WC) 3030 return 0; 3031 3032 /* Flush and acquire obj->pages so that we are coherent through 3033 * direct access in memory with previous cached writes through 3034 * shmemfs and that our cache domain tracking remains valid. 3035 * For example, if the obj->filp was moved to swap without us 3036 * being notified and releasing the pages, we would mistakenly 3037 * continue to assume that the obj remained out of the CPU cached 3038 * domain. 3039 */ 3040 ret = i915_gem_object_pin_pages(obj); 3041 if (ret) 3042 return ret; 3043 3044 flush_write_domain(obj, ~I915_GEM_DOMAIN_WC); 3045 3046 /* Serialise direct access to this object with the barriers for 3047 * coherent writes from the GPU, by effectively invalidating the 3048 * WC domain upon first access. 3049 */ 3050 if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0) 3051 mb(); 3052 3053 /* It should now be out of any other write domains, and we can update 3054 * the domain values for our changes. 3055 */ 3056 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0); 3057 obj->read_domains |= I915_GEM_DOMAIN_WC; 3058 if (write) { 3059 obj->read_domains = I915_GEM_DOMAIN_WC; 3060 obj->write_domain = I915_GEM_DOMAIN_WC; 3061 obj->mm.dirty = true; 3062 } 3063 3064 i915_gem_object_unpin_pages(obj); 3065 return 0; 3066 } 3067 3068 /** 3069 * Moves a single object to the GTT read, and possibly write domain. 3070 * @obj: object to act on 3071 * @write: ask for write access or read only 3072 * 3073 * This function returns when the move is complete, including waiting on 3074 * flushes to occur. 3075 */ 3076 int 3077 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3078 { 3079 int ret; 3080 3081 lockdep_assert_held(&obj->base.dev->struct_mutex); 3082 3083 ret = i915_gem_object_wait(obj, 3084 I915_WAIT_INTERRUPTIBLE | 3085 I915_WAIT_LOCKED | 3086 (write ? I915_WAIT_ALL : 0), 3087 MAX_SCHEDULE_TIMEOUT); 3088 if (ret) 3089 return ret; 3090 3091 if (obj->write_domain == I915_GEM_DOMAIN_GTT) 3092 return 0; 3093 3094 /* Flush and acquire obj->pages so that we are coherent through 3095 * direct access in memory with previous cached writes through 3096 * shmemfs and that our cache domain tracking remains valid. 3097 * For example, if the obj->filp was moved to swap without us 3098 * being notified and releasing the pages, we would mistakenly 3099 * continue to assume that the obj remained out of the CPU cached 3100 * domain. 3101 */ 3102 ret = i915_gem_object_pin_pages(obj); 3103 if (ret) 3104 return ret; 3105 3106 flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT); 3107 3108 /* Serialise direct access to this object with the barriers for 3109 * coherent writes from the GPU, by effectively invalidating the 3110 * GTT domain upon first access. 3111 */ 3112 if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0) 3113 mb(); 3114 3115 /* It should now be out of any other write domains, and we can update 3116 * the domain values for our changes. 3117 */ 3118 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3119 obj->read_domains |= I915_GEM_DOMAIN_GTT; 3120 if (write) { 3121 obj->read_domains = I915_GEM_DOMAIN_GTT; 3122 obj->write_domain = I915_GEM_DOMAIN_GTT; 3123 obj->mm.dirty = true; 3124 } 3125 3126 i915_gem_object_unpin_pages(obj); 3127 return 0; 3128 } 3129 3130 /** 3131 * Changes the cache-level of an object across all VMA. 3132 * @obj: object to act on 3133 * @cache_level: new cache level to set for the object 3134 * 3135 * After this function returns, the object will be in the new cache-level 3136 * across all GTT and the contents of the backing storage will be coherent, 3137 * with respect to the new cache-level. In order to keep the backing storage 3138 * coherent for all users, we only allow a single cache level to be set 3139 * globally on the object and prevent it from being changed whilst the 3140 * hardware is reading from the object. That is if the object is currently 3141 * on the scanout it will be set to uncached (or equivalent display 3142 * cache coherency) and all non-MOCS GPU access will also be uncached so 3143 * that all direct access to the scanout remains coherent. 3144 */ 3145 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 3146 enum i915_cache_level cache_level) 3147 { 3148 struct i915_vma *vma; 3149 int ret; 3150 3151 lockdep_assert_held(&obj->base.dev->struct_mutex); 3152 3153 if (obj->cache_level == cache_level) 3154 return 0; 3155 3156 /* Inspect the list of currently bound VMA and unbind any that would 3157 * be invalid given the new cache-level. This is principally to 3158 * catch the issue of the CS prefetch crossing page boundaries and 3159 * reading an invalid PTE on older architectures. 3160 */ 3161 restart: 3162 list_for_each_entry(vma, &obj->vma.list, obj_link) { 3163 if (!drm_mm_node_allocated(&vma->node)) 3164 continue; 3165 3166 if (i915_vma_is_pinned(vma)) { 3167 DRM_DEBUG("can not change the cache level of pinned objects\n"); 3168 return -EBUSY; 3169 } 3170 3171 if (!i915_vma_is_closed(vma) && 3172 i915_gem_valid_gtt_space(vma, cache_level)) 3173 continue; 3174 3175 ret = i915_vma_unbind(vma); 3176 if (ret) 3177 return ret; 3178 3179 /* As unbinding may affect other elements in the 3180 * obj->vma_list (due to side-effects from retiring 3181 * an active vma), play safe and restart the iterator. 3182 */ 3183 goto restart; 3184 } 3185 3186 /* We can reuse the existing drm_mm nodes but need to change the 3187 * cache-level on the PTE. We could simply unbind them all and 3188 * rebind with the correct cache-level on next use. However since 3189 * we already have a valid slot, dma mapping, pages etc, we may as 3190 * rewrite the PTE in the belief that doing so tramples upon less 3191 * state and so involves less work. 3192 */ 3193 if (obj->bind_count) { 3194 /* Before we change the PTE, the GPU must not be accessing it. 3195 * If we wait upon the object, we know that all the bound 3196 * VMA are no longer active. 3197 */ 3198 ret = i915_gem_object_wait(obj, 3199 I915_WAIT_INTERRUPTIBLE | 3200 I915_WAIT_LOCKED | 3201 I915_WAIT_ALL, 3202 MAX_SCHEDULE_TIMEOUT); 3203 if (ret) 3204 return ret; 3205 3206 if (!HAS_LLC(to_i915(obj->base.dev)) && 3207 cache_level != I915_CACHE_NONE) { 3208 /* Access to snoopable pages through the GTT is 3209 * incoherent and on some machines causes a hard 3210 * lockup. Relinquish the CPU mmaping to force 3211 * userspace to refault in the pages and we can 3212 * then double check if the GTT mapping is still 3213 * valid for that pointer access. 3214 */ 3215 i915_gem_release_mmap(obj); 3216 3217 /* As we no longer need a fence for GTT access, 3218 * we can relinquish it now (and so prevent having 3219 * to steal a fence from someone else on the next 3220 * fence request). Note GPU activity would have 3221 * dropped the fence as all snoopable access is 3222 * supposed to be linear. 3223 */ 3224 for_each_ggtt_vma(vma, obj) { 3225 ret = i915_vma_put_fence(vma); 3226 if (ret) 3227 return ret; 3228 } 3229 } else { 3230 /* We either have incoherent backing store and 3231 * so no GTT access or the architecture is fully 3232 * coherent. In such cases, existing GTT mmaps 3233 * ignore the cache bit in the PTE and we can 3234 * rewrite it without confusing the GPU or having 3235 * to force userspace to fault back in its mmaps. 3236 */ 3237 } 3238 3239 list_for_each_entry(vma, &obj->vma.list, obj_link) { 3240 if (!drm_mm_node_allocated(&vma->node)) 3241 continue; 3242 3243 ret = i915_vma_bind(vma, cache_level, PIN_UPDATE); 3244 if (ret) 3245 return ret; 3246 } 3247 } 3248 3249 list_for_each_entry(vma, &obj->vma.list, obj_link) 3250 vma->node.color = cache_level; 3251 i915_gem_object_set_cache_coherency(obj, cache_level); 3252 obj->cache_dirty = true; /* Always invalidate stale cachelines */ 3253 3254 return 0; 3255 } 3256 3257 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 3258 struct drm_file *file) 3259 { 3260 struct drm_i915_gem_caching *args = data; 3261 struct drm_i915_gem_object *obj; 3262 int err = 0; 3263 3264 rcu_read_lock(); 3265 obj = i915_gem_object_lookup_rcu(file, args->handle); 3266 if (!obj) { 3267 err = -ENOENT; 3268 goto out; 3269 } 3270 3271 switch (obj->cache_level) { 3272 case I915_CACHE_LLC: 3273 case I915_CACHE_L3_LLC: 3274 args->caching = I915_CACHING_CACHED; 3275 break; 3276 3277 case I915_CACHE_WT: 3278 args->caching = I915_CACHING_DISPLAY; 3279 break; 3280 3281 default: 3282 args->caching = I915_CACHING_NONE; 3283 break; 3284 } 3285 out: 3286 rcu_read_unlock(); 3287 return err; 3288 } 3289 3290 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 3291 struct drm_file *file) 3292 { 3293 struct drm_i915_private *i915 = to_i915(dev); 3294 struct drm_i915_gem_caching *args = data; 3295 struct drm_i915_gem_object *obj; 3296 enum i915_cache_level level; 3297 int ret = 0; 3298 3299 switch (args->caching) { 3300 case I915_CACHING_NONE: 3301 level = I915_CACHE_NONE; 3302 break; 3303 case I915_CACHING_CACHED: 3304 /* 3305 * Due to a HW issue on BXT A stepping, GPU stores via a 3306 * snooped mapping may leave stale data in a corresponding CPU 3307 * cacheline, whereas normally such cachelines would get 3308 * invalidated. 3309 */ 3310 if (!HAS_LLC(i915) && !HAS_SNOOP(i915)) 3311 return -ENODEV; 3312 3313 level = I915_CACHE_LLC; 3314 break; 3315 case I915_CACHING_DISPLAY: 3316 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE; 3317 break; 3318 default: 3319 return -EINVAL; 3320 } 3321 3322 obj = i915_gem_object_lookup(file, args->handle); 3323 if (!obj) 3324 return -ENOENT; 3325 3326 /* 3327 * The caching mode of proxy object is handled by its generator, and 3328 * not allowed to be changed by userspace. 3329 */ 3330 if (i915_gem_object_is_proxy(obj)) { 3331 ret = -ENXIO; 3332 goto out; 3333 } 3334 3335 if (obj->cache_level == level) 3336 goto out; 3337 3338 ret = i915_gem_object_wait(obj, 3339 I915_WAIT_INTERRUPTIBLE, 3340 MAX_SCHEDULE_TIMEOUT); 3341 if (ret) 3342 goto out; 3343 3344 ret = i915_mutex_lock_interruptible(dev); 3345 if (ret) 3346 goto out; 3347 3348 ret = i915_gem_object_set_cache_level(obj, level); 3349 mutex_unlock(&dev->struct_mutex); 3350 3351 out: 3352 i915_gem_object_put(obj); 3353 return ret; 3354 } 3355 3356 /* 3357 * Prepare buffer for display plane (scanout, cursors, etc). Can be called from 3358 * an uninterruptible phase (modesetting) and allows any flushes to be pipelined 3359 * (for pageflips). We only flush the caches while preparing the buffer for 3360 * display, the callers are responsible for frontbuffer flush. 3361 */ 3362 struct i915_vma * 3363 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 3364 u32 alignment, 3365 const struct i915_ggtt_view *view, 3366 unsigned int flags) 3367 { 3368 struct i915_vma *vma; 3369 int ret; 3370 3371 lockdep_assert_held(&obj->base.dev->struct_mutex); 3372 3373 /* Mark the global pin early so that we account for the 3374 * display coherency whilst setting up the cache domains. 3375 */ 3376 obj->pin_global++; 3377 3378 /* The display engine is not coherent with the LLC cache on gen6. As 3379 * a result, we make sure that the pinning that is about to occur is 3380 * done with uncached PTEs. This is lowest common denominator for all 3381 * chipsets. 3382 * 3383 * However for gen6+, we could do better by using the GFDT bit instead 3384 * of uncaching, which would allow us to flush all the LLC-cached data 3385 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 3386 */ 3387 ret = i915_gem_object_set_cache_level(obj, 3388 HAS_WT(to_i915(obj->base.dev)) ? 3389 I915_CACHE_WT : I915_CACHE_NONE); 3390 if (ret) { 3391 vma = ERR_PTR(ret); 3392 goto err_unpin_global; 3393 } 3394 3395 /* As the user may map the buffer once pinned in the display plane 3396 * (e.g. libkms for the bootup splash), we have to ensure that we 3397 * always use map_and_fenceable for all scanout buffers. However, 3398 * it may simply be too big to fit into mappable, in which case 3399 * put it anyway and hope that userspace can cope (but always first 3400 * try to preserve the existing ABI). 3401 */ 3402 vma = ERR_PTR(-ENOSPC); 3403 if ((flags & PIN_MAPPABLE) == 0 && 3404 (!view || view->type == I915_GGTT_VIEW_NORMAL)) 3405 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 3406 flags | 3407 PIN_MAPPABLE | 3408 PIN_NONBLOCK); 3409 if (IS_ERR(vma)) 3410 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags); 3411 if (IS_ERR(vma)) 3412 goto err_unpin_global; 3413 3414 vma->display_alignment = max_t(u64, vma->display_alignment, alignment); 3415 3416 __i915_gem_object_flush_for_display(obj); 3417 3418 /* It should now be out of any other write domains, and we can update 3419 * the domain values for our changes. 3420 */ 3421 obj->read_domains |= I915_GEM_DOMAIN_GTT; 3422 3423 return vma; 3424 3425 err_unpin_global: 3426 obj->pin_global--; 3427 return vma; 3428 } 3429 3430 void 3431 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) 3432 { 3433 lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); 3434 3435 if (WARN_ON(vma->obj->pin_global == 0)) 3436 return; 3437 3438 if (--vma->obj->pin_global == 0) 3439 vma->display_alignment = I915_GTT_MIN_ALIGNMENT; 3440 3441 /* Bump the LRU to try and avoid premature eviction whilst flipping */ 3442 i915_gem_object_bump_inactive_ggtt(vma->obj); 3443 3444 i915_vma_unpin(vma); 3445 } 3446 3447 /** 3448 * Moves a single object to the CPU read, and possibly write domain. 3449 * @obj: object to act on 3450 * @write: requesting write or read-only access 3451 * 3452 * This function returns when the move is complete, including waiting on 3453 * flushes to occur. 3454 */ 3455 int 3456 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 3457 { 3458 int ret; 3459 3460 lockdep_assert_held(&obj->base.dev->struct_mutex); 3461 3462 ret = i915_gem_object_wait(obj, 3463 I915_WAIT_INTERRUPTIBLE | 3464 I915_WAIT_LOCKED | 3465 (write ? I915_WAIT_ALL : 0), 3466 MAX_SCHEDULE_TIMEOUT); 3467 if (ret) 3468 return ret; 3469 3470 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 3471 3472 /* Flush the CPU cache if it's still invalid. */ 3473 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { 3474 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 3475 obj->read_domains |= I915_GEM_DOMAIN_CPU; 3476 } 3477 3478 /* It should now be out of any other write domains, and we can update 3479 * the domain values for our changes. 3480 */ 3481 GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU); 3482 3483 /* If we're writing through the CPU, then the GPU read domains will 3484 * need to be invalidated at next use. 3485 */ 3486 if (write) 3487 __start_cpu_write(obj); 3488 3489 return 0; 3490 } 3491 3492 /* Throttle our rendering by waiting until the ring has completed our requests 3493 * emitted over 20 msec ago. 3494 * 3495 * Note that if we were to use the current jiffies each time around the loop, 3496 * we wouldn't escape the function with any frames outstanding if the time to 3497 * render a frame was over 20ms. 3498 * 3499 * This should get us reasonable parallelism between CPU and GPU but also 3500 * relatively low latency when blocking on a particular request to finish. 3501 */ 3502 static int 3503 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 3504 { 3505 struct drm_i915_private *dev_priv = to_i915(dev); 3506 struct drm_i915_file_private *file_priv = file->driver_priv; 3507 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; 3508 struct i915_request *request, *target = NULL; 3509 long ret; 3510 3511 /* ABI: return -EIO if already wedged */ 3512 ret = i915_terminally_wedged(dev_priv); 3513 if (ret) 3514 return ret; 3515 3516 spin_lock(&file_priv->mm.lock); 3517 list_for_each_entry(request, &file_priv->mm.request_list, client_link) { 3518 if (time_after_eq(request->emitted_jiffies, recent_enough)) 3519 break; 3520 3521 if (target) { 3522 list_del(&target->client_link); 3523 target->file_priv = NULL; 3524 } 3525 3526 target = request; 3527 } 3528 if (target) 3529 i915_request_get(target); 3530 spin_unlock(&file_priv->mm.lock); 3531 3532 if (target == NULL) 3533 return 0; 3534 3535 ret = i915_request_wait(target, 3536 I915_WAIT_INTERRUPTIBLE, 3537 MAX_SCHEDULE_TIMEOUT); 3538 i915_request_put(target); 3539 3540 return ret < 0 ? ret : 0; 3541 } 3542 3543 struct i915_vma * 3544 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 3545 const struct i915_ggtt_view *view, 3546 u64 size, 3547 u64 alignment, 3548 u64 flags) 3549 { 3550 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 3551 struct i915_address_space *vm = &dev_priv->ggtt.vm; 3552 struct i915_vma *vma; 3553 int ret; 3554 3555 lockdep_assert_held(&obj->base.dev->struct_mutex); 3556 3557 if (flags & PIN_MAPPABLE && 3558 (!view || view->type == I915_GGTT_VIEW_NORMAL)) { 3559 /* If the required space is larger than the available 3560 * aperture, we will not able to find a slot for the 3561 * object and unbinding the object now will be in 3562 * vain. Worse, doing so may cause us to ping-pong 3563 * the object in and out of the Global GTT and 3564 * waste a lot of cycles under the mutex. 3565 */ 3566 if (obj->base.size > dev_priv->ggtt.mappable_end) 3567 return ERR_PTR(-E2BIG); 3568 3569 /* If NONBLOCK is set the caller is optimistically 3570 * trying to cache the full object within the mappable 3571 * aperture, and *must* have a fallback in place for 3572 * situations where we cannot bind the object. We 3573 * can be a little more lax here and use the fallback 3574 * more often to avoid costly migrations of ourselves 3575 * and other objects within the aperture. 3576 * 3577 * Half-the-aperture is used as a simple heuristic. 3578 * More interesting would to do search for a free 3579 * block prior to making the commitment to unbind. 3580 * That caters for the self-harm case, and with a 3581 * little more heuristics (e.g. NOFAULT, NOEVICT) 3582 * we could try to minimise harm to others. 3583 */ 3584 if (flags & PIN_NONBLOCK && 3585 obj->base.size > dev_priv->ggtt.mappable_end / 2) 3586 return ERR_PTR(-ENOSPC); 3587 } 3588 3589 vma = i915_vma_instance(obj, vm, view); 3590 if (IS_ERR(vma)) 3591 return vma; 3592 3593 if (i915_vma_misplaced(vma, size, alignment, flags)) { 3594 if (flags & PIN_NONBLOCK) { 3595 if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) 3596 return ERR_PTR(-ENOSPC); 3597 3598 if (flags & PIN_MAPPABLE && 3599 vma->fence_size > dev_priv->ggtt.mappable_end / 2) 3600 return ERR_PTR(-ENOSPC); 3601 } 3602 3603 WARN(i915_vma_is_pinned(vma), 3604 "bo is already pinned in ggtt with incorrect alignment:" 3605 " offset=%08x, req.alignment=%llx," 3606 " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n", 3607 i915_ggtt_offset(vma), alignment, 3608 !!(flags & PIN_MAPPABLE), 3609 i915_vma_is_map_and_fenceable(vma)); 3610 ret = i915_vma_unbind(vma); 3611 if (ret) 3612 return ERR_PTR(ret); 3613 } 3614 3615 ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); 3616 if (ret) 3617 return ERR_PTR(ret); 3618 3619 return vma; 3620 } 3621 3622 static __always_inline u32 __busy_read_flag(u8 id) 3623 { 3624 if (id == (u8)I915_ENGINE_CLASS_INVALID) 3625 return 0xffff0000u; 3626 3627 GEM_BUG_ON(id >= 16); 3628 return 0x10000u << id; 3629 } 3630 3631 static __always_inline u32 __busy_write_id(u8 id) 3632 { 3633 /* 3634 * The uABI guarantees an active writer is also amongst the read 3635 * engines. This would be true if we accessed the activity tracking 3636 * under the lock, but as we perform the lookup of the object and 3637 * its activity locklessly we can not guarantee that the last_write 3638 * being active implies that we have set the same engine flag from 3639 * last_read - hence we always set both read and write busy for 3640 * last_write. 3641 */ 3642 if (id == (u8)I915_ENGINE_CLASS_INVALID) 3643 return 0xffffffffu; 3644 3645 return (id + 1) | __busy_read_flag(id); 3646 } 3647 3648 static __always_inline unsigned int 3649 __busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u8 id)) 3650 { 3651 const struct i915_request *rq; 3652 3653 /* 3654 * We have to check the current hw status of the fence as the uABI 3655 * guarantees forward progress. We could rely on the idle worker 3656 * to eventually flush us, but to minimise latency just ask the 3657 * hardware. 3658 * 3659 * Note we only report on the status of native fences. 3660 */ 3661 if (!dma_fence_is_i915(fence)) 3662 return 0; 3663 3664 /* opencode to_request() in order to avoid const warnings */ 3665 rq = container_of(fence, const struct i915_request, fence); 3666 if (i915_request_completed(rq)) 3667 return 0; 3668 3669 /* Beware type-expansion follies! */ 3670 BUILD_BUG_ON(!typecheck(u8, rq->engine->uabi_class)); 3671 return flag(rq->engine->uabi_class); 3672 } 3673 3674 static __always_inline unsigned int 3675 busy_check_reader(const struct dma_fence *fence) 3676 { 3677 return __busy_set_if_active(fence, __busy_read_flag); 3678 } 3679 3680 static __always_inline unsigned int 3681 busy_check_writer(const struct dma_fence *fence) 3682 { 3683 if (!fence) 3684 return 0; 3685 3686 return __busy_set_if_active(fence, __busy_write_id); 3687 } 3688 3689 int 3690 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 3691 struct drm_file *file) 3692 { 3693 struct drm_i915_gem_busy *args = data; 3694 struct drm_i915_gem_object *obj; 3695 struct reservation_object_list *list; 3696 unsigned int seq; 3697 int err; 3698 3699 err = -ENOENT; 3700 rcu_read_lock(); 3701 obj = i915_gem_object_lookup_rcu(file, args->handle); 3702 if (!obj) 3703 goto out; 3704 3705 /* 3706 * A discrepancy here is that we do not report the status of 3707 * non-i915 fences, i.e. even though we may report the object as idle, 3708 * a call to set-domain may still stall waiting for foreign rendering. 3709 * This also means that wait-ioctl may report an object as busy, 3710 * where busy-ioctl considers it idle. 3711 * 3712 * We trade the ability to warn of foreign fences to report on which 3713 * i915 engines are active for the object. 3714 * 3715 * Alternatively, we can trade that extra information on read/write 3716 * activity with 3717 * args->busy = 3718 * !reservation_object_test_signaled_rcu(obj->resv, true); 3719 * to report the overall busyness. This is what the wait-ioctl does. 3720 * 3721 */ 3722 retry: 3723 seq = raw_read_seqcount(&obj->resv->seq); 3724 3725 /* Translate the exclusive fence to the READ *and* WRITE engine */ 3726 args->busy = busy_check_writer(rcu_dereference(obj->resv->fence_excl)); 3727 3728 /* Translate shared fences to READ set of engines */ 3729 list = rcu_dereference(obj->resv->fence); 3730 if (list) { 3731 unsigned int shared_count = list->shared_count, i; 3732 3733 for (i = 0; i < shared_count; ++i) { 3734 struct dma_fence *fence = 3735 rcu_dereference(list->shared[i]); 3736 3737 args->busy |= busy_check_reader(fence); 3738 } 3739 } 3740 3741 if (args->busy && read_seqcount_retry(&obj->resv->seq, seq)) 3742 goto retry; 3743 3744 err = 0; 3745 out: 3746 rcu_read_unlock(); 3747 return err; 3748 } 3749 3750 int 3751 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 3752 struct drm_file *file_priv) 3753 { 3754 return i915_gem_ring_throttle(dev, file_priv); 3755 } 3756 3757 int 3758 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 3759 struct drm_file *file_priv) 3760 { 3761 struct drm_i915_private *dev_priv = to_i915(dev); 3762 struct drm_i915_gem_madvise *args = data; 3763 struct drm_i915_gem_object *obj; 3764 int err; 3765 3766 switch (args->madv) { 3767 case I915_MADV_DONTNEED: 3768 case I915_MADV_WILLNEED: 3769 break; 3770 default: 3771 return -EINVAL; 3772 } 3773 3774 obj = i915_gem_object_lookup(file_priv, args->handle); 3775 if (!obj) 3776 return -ENOENT; 3777 3778 err = mutex_lock_interruptible(&obj->mm.lock); 3779 if (err) 3780 goto out; 3781 3782 if (i915_gem_object_has_pages(obj) && 3783 i915_gem_object_is_tiled(obj) && 3784 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 3785 if (obj->mm.madv == I915_MADV_WILLNEED) { 3786 GEM_BUG_ON(!obj->mm.quirked); 3787 __i915_gem_object_unpin_pages(obj); 3788 obj->mm.quirked = false; 3789 } 3790 if (args->madv == I915_MADV_WILLNEED) { 3791 GEM_BUG_ON(obj->mm.quirked); 3792 __i915_gem_object_pin_pages(obj); 3793 obj->mm.quirked = true; 3794 } 3795 } 3796 3797 if (obj->mm.madv != __I915_MADV_PURGED) 3798 obj->mm.madv = args->madv; 3799 3800 /* if the object is no longer attached, discard its backing storage */ 3801 if (obj->mm.madv == I915_MADV_DONTNEED && 3802 !i915_gem_object_has_pages(obj)) 3803 __i915_gem_object_truncate(obj); 3804 3805 args->retained = obj->mm.madv != __I915_MADV_PURGED; 3806 mutex_unlock(&obj->mm.lock); 3807 3808 out: 3809 i915_gem_object_put(obj); 3810 return err; 3811 } 3812 3813 static void 3814 frontbuffer_retire(struct i915_active_request *active, 3815 struct i915_request *request) 3816 { 3817 struct drm_i915_gem_object *obj = 3818 container_of(active, typeof(*obj), frontbuffer_write); 3819 3820 intel_fb_obj_flush(obj, ORIGIN_CS); 3821 } 3822 3823 void i915_gem_object_init(struct drm_i915_gem_object *obj, 3824 const struct drm_i915_gem_object_ops *ops) 3825 { 3826 mutex_init(&obj->mm.lock); 3827 3828 spin_lock_init(&obj->vma.lock); 3829 INIT_LIST_HEAD(&obj->vma.list); 3830 3831 INIT_LIST_HEAD(&obj->lut_list); 3832 INIT_LIST_HEAD(&obj->batch_pool_link); 3833 3834 init_rcu_head(&obj->rcu); 3835 3836 obj->ops = ops; 3837 3838 reservation_object_init(&obj->__builtin_resv); 3839 obj->resv = &obj->__builtin_resv; 3840 3841 obj->frontbuffer_ggtt_origin = ORIGIN_GTT; 3842 i915_active_request_init(&obj->frontbuffer_write, 3843 NULL, frontbuffer_retire); 3844 3845 obj->mm.madv = I915_MADV_WILLNEED; 3846 INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN); 3847 mutex_init(&obj->mm.get_page.lock); 3848 3849 i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size); 3850 } 3851 3852 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 3853 .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | 3854 I915_GEM_OBJECT_IS_SHRINKABLE, 3855 3856 .get_pages = i915_gem_object_get_pages_gtt, 3857 .put_pages = i915_gem_object_put_pages_gtt, 3858 3859 .pwrite = i915_gem_object_pwrite_gtt, 3860 }; 3861 3862 static int i915_gem_object_create_shmem(struct drm_device *dev, 3863 struct drm_gem_object *obj, 3864 size_t size) 3865 { 3866 struct drm_i915_private *i915 = to_i915(dev); 3867 unsigned long flags = VM_NORESERVE; 3868 struct file *filp; 3869 3870 drm_gem_private_object_init(dev, obj, size); 3871 3872 if (i915->mm.gemfs) 3873 filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size, 3874 flags); 3875 else 3876 filp = shmem_file_setup("i915", size, flags); 3877 3878 if (IS_ERR(filp)) 3879 return PTR_ERR(filp); 3880 3881 obj->filp = filp; 3882 3883 return 0; 3884 } 3885 3886 struct drm_i915_gem_object * 3887 i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) 3888 { 3889 struct drm_i915_gem_object *obj; 3890 struct address_space *mapping; 3891 unsigned int cache_level; 3892 gfp_t mask; 3893 int ret; 3894 3895 /* There is a prevalence of the assumption that we fit the object's 3896 * page count inside a 32bit _signed_ variable. Let's document this and 3897 * catch if we ever need to fix it. In the meantime, if you do spot 3898 * such a local variable, please consider fixing! 3899 */ 3900 if (size >> PAGE_SHIFT > INT_MAX) 3901 return ERR_PTR(-E2BIG); 3902 3903 if (overflows_type(size, obj->base.size)) 3904 return ERR_PTR(-E2BIG); 3905 3906 obj = i915_gem_object_alloc(); 3907 if (obj == NULL) 3908 return ERR_PTR(-ENOMEM); 3909 3910 ret = i915_gem_object_create_shmem(&dev_priv->drm, &obj->base, size); 3911 if (ret) 3912 goto fail; 3913 3914 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 3915 if (IS_I965GM(dev_priv) || IS_I965G(dev_priv)) { 3916 /* 965gm cannot relocate objects above 4GiB. */ 3917 mask &= ~__GFP_HIGHMEM; 3918 mask |= __GFP_DMA32; 3919 } 3920 3921 mapping = obj->base.filp->f_mapping; 3922 mapping_set_gfp_mask(mapping, mask); 3923 GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM)); 3924 3925 i915_gem_object_init(obj, &i915_gem_object_ops); 3926 3927 obj->write_domain = I915_GEM_DOMAIN_CPU; 3928 obj->read_domains = I915_GEM_DOMAIN_CPU; 3929 3930 if (HAS_LLC(dev_priv)) 3931 /* On some devices, we can have the GPU use the LLC (the CPU 3932 * cache) for about a 10% performance improvement 3933 * compared to uncached. Graphics requests other than 3934 * display scanout are coherent with the CPU in 3935 * accessing this cache. This means in this mode we 3936 * don't need to clflush on the CPU side, and on the 3937 * GPU side we only need to flush internal caches to 3938 * get data visible to the CPU. 3939 * 3940 * However, we maintain the display planes as UC, and so 3941 * need to rebind when first used as such. 3942 */ 3943 cache_level = I915_CACHE_LLC; 3944 else 3945 cache_level = I915_CACHE_NONE; 3946 3947 i915_gem_object_set_cache_coherency(obj, cache_level); 3948 3949 trace_i915_gem_object_create(obj); 3950 3951 return obj; 3952 3953 fail: 3954 i915_gem_object_free(obj); 3955 return ERR_PTR(ret); 3956 } 3957 3958 static bool discard_backing_storage(struct drm_i915_gem_object *obj) 3959 { 3960 /* If we are the last user of the backing storage (be it shmemfs 3961 * pages or stolen etc), we know that the pages are going to be 3962 * immediately released. In this case, we can then skip copying 3963 * back the contents from the GPU. 3964 */ 3965 3966 if (obj->mm.madv != I915_MADV_WILLNEED) 3967 return false; 3968 3969 if (obj->base.filp == NULL) 3970 return true; 3971 3972 /* At first glance, this looks racy, but then again so would be 3973 * userspace racing mmap against close. However, the first external 3974 * reference to the filp can only be obtained through the 3975 * i915_gem_mmap_ioctl() which safeguards us against the user 3976 * acquiring such a reference whilst we are in the middle of 3977 * freeing the object. 3978 */ 3979 return file_count(obj->base.filp) == 1; 3980 } 3981 3982 static void __i915_gem_free_objects(struct drm_i915_private *i915, 3983 struct llist_node *freed) 3984 { 3985 struct drm_i915_gem_object *obj, *on; 3986 intel_wakeref_t wakeref; 3987 3988 wakeref = intel_runtime_pm_get(i915); 3989 llist_for_each_entry_safe(obj, on, freed, freed) { 3990 struct i915_vma *vma, *vn; 3991 3992 trace_i915_gem_object_destroy(obj); 3993 3994 mutex_lock(&i915->drm.struct_mutex); 3995 3996 GEM_BUG_ON(i915_gem_object_is_active(obj)); 3997 list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) { 3998 GEM_BUG_ON(i915_vma_is_active(vma)); 3999 vma->flags &= ~I915_VMA_PIN_MASK; 4000 i915_vma_destroy(vma); 4001 } 4002 GEM_BUG_ON(!list_empty(&obj->vma.list)); 4003 GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree)); 4004 4005 /* This serializes freeing with the shrinker. Since the free 4006 * is delayed, first by RCU then by the workqueue, we want the 4007 * shrinker to be able to free pages of unreferenced objects, 4008 * or else we may oom whilst there are plenty of deferred 4009 * freed objects. 4010 */ 4011 if (i915_gem_object_has_pages(obj)) { 4012 spin_lock(&i915->mm.obj_lock); 4013 list_del_init(&obj->mm.link); 4014 spin_unlock(&i915->mm.obj_lock); 4015 } 4016 4017 mutex_unlock(&i915->drm.struct_mutex); 4018 4019 GEM_BUG_ON(obj->bind_count); 4020 GEM_BUG_ON(obj->userfault_count); 4021 GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits)); 4022 GEM_BUG_ON(!list_empty(&obj->lut_list)); 4023 4024 if (obj->ops->release) 4025 obj->ops->release(obj); 4026 4027 if (WARN_ON(i915_gem_object_has_pinned_pages(obj))) 4028 atomic_set(&obj->mm.pages_pin_count, 0); 4029 __i915_gem_object_put_pages(obj, I915_MM_NORMAL); 4030 GEM_BUG_ON(i915_gem_object_has_pages(obj)); 4031 4032 if (obj->base.import_attach) 4033 drm_prime_gem_destroy(&obj->base, NULL); 4034 4035 reservation_object_fini(&obj->__builtin_resv); 4036 drm_gem_object_release(&obj->base); 4037 i915_gem_info_remove_obj(i915, obj->base.size); 4038 4039 bitmap_free(obj->bit_17); 4040 i915_gem_object_free(obj); 4041 4042 GEM_BUG_ON(!atomic_read(&i915->mm.free_count)); 4043 atomic_dec(&i915->mm.free_count); 4044 4045 if (on) 4046 cond_resched(); 4047 } 4048 intel_runtime_pm_put(i915, wakeref); 4049 } 4050 4051 static void i915_gem_flush_free_objects(struct drm_i915_private *i915) 4052 { 4053 struct llist_node *freed; 4054 4055 /* Free the oldest, most stale object to keep the free_list short */ 4056 freed = NULL; 4057 if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */ 4058 /* Only one consumer of llist_del_first() allowed */ 4059 spin_lock(&i915->mm.free_lock); 4060 freed = llist_del_first(&i915->mm.free_list); 4061 spin_unlock(&i915->mm.free_lock); 4062 } 4063 if (unlikely(freed)) { 4064 freed->next = NULL; 4065 __i915_gem_free_objects(i915, freed); 4066 } 4067 } 4068 4069 static void __i915_gem_free_work(struct work_struct *work) 4070 { 4071 struct drm_i915_private *i915 = 4072 container_of(work, struct drm_i915_private, mm.free_work); 4073 struct llist_node *freed; 4074 4075 /* 4076 * All file-owned VMA should have been released by this point through 4077 * i915_gem_close_object(), or earlier by i915_gem_context_close(). 4078 * However, the object may also be bound into the global GTT (e.g. 4079 * older GPUs without per-process support, or for direct access through 4080 * the GTT either for the user or for scanout). Those VMA still need to 4081 * unbound now. 4082 */ 4083 4084 spin_lock(&i915->mm.free_lock); 4085 while ((freed = llist_del_all(&i915->mm.free_list))) { 4086 spin_unlock(&i915->mm.free_lock); 4087 4088 __i915_gem_free_objects(i915, freed); 4089 if (need_resched()) 4090 return; 4091 4092 spin_lock(&i915->mm.free_lock); 4093 } 4094 spin_unlock(&i915->mm.free_lock); 4095 } 4096 4097 static void __i915_gem_free_object_rcu(struct rcu_head *head) 4098 { 4099 struct drm_i915_gem_object *obj = 4100 container_of(head, typeof(*obj), rcu); 4101 struct drm_i915_private *i915 = to_i915(obj->base.dev); 4102 4103 /* 4104 * We reuse obj->rcu for the freed list, so we had better not treat 4105 * it like a rcu_head from this point forwards. And we expect all 4106 * objects to be freed via this path. 4107 */ 4108 destroy_rcu_head(&obj->rcu); 4109 4110 /* 4111 * Since we require blocking on struct_mutex to unbind the freed 4112 * object from the GPU before releasing resources back to the 4113 * system, we can not do that directly from the RCU callback (which may 4114 * be a softirq context), but must instead then defer that work onto a 4115 * kthread. We use the RCU callback rather than move the freed object 4116 * directly onto the work queue so that we can mix between using the 4117 * worker and performing frees directly from subsequent allocations for 4118 * crude but effective memory throttling. 4119 */ 4120 if (llist_add(&obj->freed, &i915->mm.free_list)) 4121 queue_work(i915->wq, &i915->mm.free_work); 4122 } 4123 4124 void i915_gem_free_object(struct drm_gem_object *gem_obj) 4125 { 4126 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 4127 4128 if (obj->mm.quirked) 4129 __i915_gem_object_unpin_pages(obj); 4130 4131 if (discard_backing_storage(obj)) 4132 obj->mm.madv = I915_MADV_DONTNEED; 4133 4134 /* 4135 * Before we free the object, make sure any pure RCU-only 4136 * read-side critical sections are complete, e.g. 4137 * i915_gem_busy_ioctl(). For the corresponding synchronized 4138 * lookup see i915_gem_object_lookup_rcu(). 4139 */ 4140 atomic_inc(&to_i915(obj->base.dev)->mm.free_count); 4141 call_rcu(&obj->rcu, __i915_gem_free_object_rcu); 4142 } 4143 4144 void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj) 4145 { 4146 lockdep_assert_held(&obj->base.dev->struct_mutex); 4147 4148 if (!i915_gem_object_has_active_reference(obj) && 4149 i915_gem_object_is_active(obj)) 4150 i915_gem_object_set_active_reference(obj); 4151 else 4152 i915_gem_object_put(obj); 4153 } 4154 4155 void i915_gem_sanitize(struct drm_i915_private *i915) 4156 { 4157 intel_wakeref_t wakeref; 4158 4159 GEM_TRACE("\n"); 4160 4161 wakeref = intel_runtime_pm_get(i915); 4162 intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); 4163 4164 /* 4165 * As we have just resumed the machine and woken the device up from 4166 * deep PCI sleep (presumably D3_cold), assume the HW has been reset 4167 * back to defaults, recovering from whatever wedged state we left it 4168 * in and so worth trying to use the device once more. 4169 */ 4170 if (i915_terminally_wedged(i915)) 4171 i915_gem_unset_wedged(i915); 4172 4173 /* 4174 * If we inherit context state from the BIOS or earlier occupants 4175 * of the GPU, the GPU may be in an inconsistent state when we 4176 * try to take over. The only way to remove the earlier state 4177 * is by resetting. However, resetting on earlier gen is tricky as 4178 * it may impact the display and we are uncertain about the stability 4179 * of the reset, so this could be applied to even earlier gen. 4180 */ 4181 intel_gt_sanitize(i915, false); 4182 4183 intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); 4184 intel_runtime_pm_put(i915, wakeref); 4185 4186 mutex_lock(&i915->drm.struct_mutex); 4187 i915_gem_contexts_lost(i915); 4188 mutex_unlock(&i915->drm.struct_mutex); 4189 } 4190 4191 void i915_gem_init_swizzling(struct drm_i915_private *dev_priv) 4192 { 4193 if (INTEL_GEN(dev_priv) < 5 || 4194 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 4195 return; 4196 4197 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 4198 DISP_TILE_SURFACE_SWIZZLING); 4199 4200 if (IS_GEN(dev_priv, 5)) 4201 return; 4202 4203 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 4204 if (IS_GEN(dev_priv, 6)) 4205 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 4206 else if (IS_GEN(dev_priv, 7)) 4207 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 4208 else if (IS_GEN(dev_priv, 8)) 4209 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 4210 else 4211 BUG(); 4212 } 4213 4214 static void init_unused_ring(struct drm_i915_private *dev_priv, u32 base) 4215 { 4216 I915_WRITE(RING_CTL(base), 0); 4217 I915_WRITE(RING_HEAD(base), 0); 4218 I915_WRITE(RING_TAIL(base), 0); 4219 I915_WRITE(RING_START(base), 0); 4220 } 4221 4222 static void init_unused_rings(struct drm_i915_private *dev_priv) 4223 { 4224 if (IS_I830(dev_priv)) { 4225 init_unused_ring(dev_priv, PRB1_BASE); 4226 init_unused_ring(dev_priv, SRB0_BASE); 4227 init_unused_ring(dev_priv, SRB1_BASE); 4228 init_unused_ring(dev_priv, SRB2_BASE); 4229 init_unused_ring(dev_priv, SRB3_BASE); 4230 } else if (IS_GEN(dev_priv, 2)) { 4231 init_unused_ring(dev_priv, SRB0_BASE); 4232 init_unused_ring(dev_priv, SRB1_BASE); 4233 } else if (IS_GEN(dev_priv, 3)) { 4234 init_unused_ring(dev_priv, PRB1_BASE); 4235 init_unused_ring(dev_priv, PRB2_BASE); 4236 } 4237 } 4238 4239 int i915_gem_init_hw(struct drm_i915_private *dev_priv) 4240 { 4241 int ret; 4242 4243 dev_priv->gt.last_init_time = ktime_get(); 4244 4245 /* Double layer security blanket, see i915_gem_init() */ 4246 intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); 4247 4248 if (HAS_EDRAM(dev_priv) && INTEL_GEN(dev_priv) < 9) 4249 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 4250 4251 if (IS_HASWELL(dev_priv)) 4252 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ? 4253 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 4254 4255 /* Apply the GT workarounds... */ 4256 intel_gt_apply_workarounds(dev_priv); 4257 /* ...and determine whether they are sticking. */ 4258 intel_gt_verify_workarounds(dev_priv, "init"); 4259 4260 i915_gem_init_swizzling(dev_priv); 4261 4262 /* 4263 * At least 830 can leave some of the unused rings 4264 * "active" (ie. head != tail) after resume which 4265 * will prevent c3 entry. Makes sure all unused rings 4266 * are totally idle. 4267 */ 4268 init_unused_rings(dev_priv); 4269 4270 BUG_ON(!dev_priv->kernel_context); 4271 ret = i915_terminally_wedged(dev_priv); 4272 if (ret) 4273 goto out; 4274 4275 ret = i915_ppgtt_init_hw(dev_priv); 4276 if (ret) { 4277 DRM_ERROR("Enabling PPGTT failed (%d)\n", ret); 4278 goto out; 4279 } 4280 4281 ret = intel_wopcm_init_hw(&dev_priv->wopcm); 4282 if (ret) { 4283 DRM_ERROR("Enabling WOPCM failed (%d)\n", ret); 4284 goto out; 4285 } 4286 4287 /* We can't enable contexts until all firmware is loaded */ 4288 ret = intel_uc_init_hw(dev_priv); 4289 if (ret) { 4290 DRM_ERROR("Enabling uc failed (%d)\n", ret); 4291 goto out; 4292 } 4293 4294 intel_mocs_init_l3cc_table(dev_priv); 4295 4296 /* Only when the HW is re-initialised, can we replay the requests */ 4297 ret = intel_engines_resume(dev_priv); 4298 if (ret) 4299 goto cleanup_uc; 4300 4301 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); 4302 4303 intel_engines_set_scheduler_caps(dev_priv); 4304 return 0; 4305 4306 cleanup_uc: 4307 intel_uc_fini_hw(dev_priv); 4308 out: 4309 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); 4310 4311 return ret; 4312 } 4313 4314 static int __intel_engines_record_defaults(struct drm_i915_private *i915) 4315 { 4316 struct intel_engine_cs *engine; 4317 struct i915_gem_context *ctx; 4318 struct i915_gem_engines *e; 4319 enum intel_engine_id id; 4320 int err = 0; 4321 4322 /* 4323 * As we reset the gpu during very early sanitisation, the current 4324 * register state on the GPU should reflect its defaults values. 4325 * We load a context onto the hw (with restore-inhibit), then switch 4326 * over to a second context to save that default register state. We 4327 * can then prime every new context with that state so they all start 4328 * from the same default HW values. 4329 */ 4330 4331 ctx = i915_gem_context_create_kernel(i915, 0); 4332 if (IS_ERR(ctx)) 4333 return PTR_ERR(ctx); 4334 4335 e = i915_gem_context_lock_engines(ctx); 4336 4337 for_each_engine(engine, i915, id) { 4338 struct intel_context *ce = e->engines[id]; 4339 struct i915_request *rq; 4340 4341 rq = intel_context_create_request(ce); 4342 if (IS_ERR(rq)) { 4343 err = PTR_ERR(rq); 4344 goto err_active; 4345 } 4346 4347 err = 0; 4348 if (rq->engine->init_context) 4349 err = rq->engine->init_context(rq); 4350 4351 i915_request_add(rq); 4352 if (err) 4353 goto err_active; 4354 } 4355 4356 /* Flush the default context image to memory, and enable powersaving. */ 4357 if (!i915_gem_load_power_context(i915)) { 4358 err = -EIO; 4359 goto err_active; 4360 } 4361 4362 for_each_engine(engine, i915, id) { 4363 struct intel_context *ce = e->engines[id]; 4364 struct i915_vma *state = ce->state; 4365 void *vaddr; 4366 4367 if (!state) 4368 continue; 4369 4370 GEM_BUG_ON(intel_context_is_pinned(ce)); 4371 4372 /* 4373 * As we will hold a reference to the logical state, it will 4374 * not be torn down with the context, and importantly the 4375 * object will hold onto its vma (making it possible for a 4376 * stray GTT write to corrupt our defaults). Unmap the vma 4377 * from the GTT to prevent such accidents and reclaim the 4378 * space. 4379 */ 4380 err = i915_vma_unbind(state); 4381 if (err) 4382 goto err_active; 4383 4384 err = i915_gem_object_set_to_cpu_domain(state->obj, false); 4385 if (err) 4386 goto err_active; 4387 4388 engine->default_state = i915_gem_object_get(state->obj); 4389 i915_gem_object_set_cache_coherency(engine->default_state, 4390 I915_CACHE_LLC); 4391 4392 /* Check we can acquire the image of the context state */ 4393 vaddr = i915_gem_object_pin_map(engine->default_state, 4394 I915_MAP_FORCE_WB); 4395 if (IS_ERR(vaddr)) { 4396 err = PTR_ERR(vaddr); 4397 goto err_active; 4398 } 4399 4400 i915_gem_object_unpin_map(engine->default_state); 4401 } 4402 4403 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) { 4404 unsigned int found = intel_engines_has_context_isolation(i915); 4405 4406 /* 4407 * Make sure that classes with multiple engine instances all 4408 * share the same basic configuration. 4409 */ 4410 for_each_engine(engine, i915, id) { 4411 unsigned int bit = BIT(engine->uabi_class); 4412 unsigned int expected = engine->default_state ? bit : 0; 4413 4414 if ((found & bit) != expected) { 4415 DRM_ERROR("mismatching default context state for class %d on engine %s\n", 4416 engine->uabi_class, engine->name); 4417 } 4418 } 4419 } 4420 4421 out_ctx: 4422 i915_gem_context_unlock_engines(ctx); 4423 i915_gem_context_set_closed(ctx); 4424 i915_gem_context_put(ctx); 4425 return err; 4426 4427 err_active: 4428 /* 4429 * If we have to abandon now, we expect the engines to be idle 4430 * and ready to be torn-down. The quickest way we can accomplish 4431 * this is by declaring ourselves wedged. 4432 */ 4433 i915_gem_set_wedged(i915); 4434 goto out_ctx; 4435 } 4436 4437 static int 4438 i915_gem_init_scratch(struct drm_i915_private *i915, unsigned int size) 4439 { 4440 struct drm_i915_gem_object *obj; 4441 struct i915_vma *vma; 4442 int ret; 4443 4444 obj = i915_gem_object_create_stolen(i915, size); 4445 if (!obj) 4446 obj = i915_gem_object_create_internal(i915, size); 4447 if (IS_ERR(obj)) { 4448 DRM_ERROR("Failed to allocate scratch page\n"); 4449 return PTR_ERR(obj); 4450 } 4451 4452 vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); 4453 if (IS_ERR(vma)) { 4454 ret = PTR_ERR(vma); 4455 goto err_unref; 4456 } 4457 4458 ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); 4459 if (ret) 4460 goto err_unref; 4461 4462 i915->gt.scratch = vma; 4463 return 0; 4464 4465 err_unref: 4466 i915_gem_object_put(obj); 4467 return ret; 4468 } 4469 4470 static void i915_gem_fini_scratch(struct drm_i915_private *i915) 4471 { 4472 i915_vma_unpin_and_release(&i915->gt.scratch, 0); 4473 } 4474 4475 static int intel_engines_verify_workarounds(struct drm_i915_private *i915) 4476 { 4477 struct intel_engine_cs *engine; 4478 enum intel_engine_id id; 4479 int err = 0; 4480 4481 if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 4482 return 0; 4483 4484 for_each_engine(engine, i915, id) { 4485 if (intel_engine_verify_workarounds(engine, "load")) 4486 err = -EIO; 4487 } 4488 4489 return err; 4490 } 4491 4492 int i915_gem_init(struct drm_i915_private *dev_priv) 4493 { 4494 int ret; 4495 4496 /* We need to fallback to 4K pages if host doesn't support huge gtt. */ 4497 if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_huge_gtt(dev_priv)) 4498 mkwrite_device_info(dev_priv)->page_sizes = 4499 I915_GTT_PAGE_SIZE_4K; 4500 4501 dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1); 4502 4503 i915_timelines_init(dev_priv); 4504 4505 ret = i915_gem_init_userptr(dev_priv); 4506 if (ret) 4507 return ret; 4508 4509 ret = intel_uc_init_misc(dev_priv); 4510 if (ret) 4511 return ret; 4512 4513 ret = intel_wopcm_init(&dev_priv->wopcm); 4514 if (ret) 4515 goto err_uc_misc; 4516 4517 /* This is just a security blanket to placate dragons. 4518 * On some systems, we very sporadically observe that the first TLBs 4519 * used by the CS may be stale, despite us poking the TLB reset. If 4520 * we hold the forcewake during initialisation these problems 4521 * just magically go away. 4522 */ 4523 mutex_lock(&dev_priv->drm.struct_mutex); 4524 intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); 4525 4526 ret = i915_gem_init_ggtt(dev_priv); 4527 if (ret) { 4528 GEM_BUG_ON(ret == -EIO); 4529 goto err_unlock; 4530 } 4531 4532 ret = i915_gem_init_scratch(dev_priv, 4533 IS_GEN(dev_priv, 2) ? SZ_256K : PAGE_SIZE); 4534 if (ret) { 4535 GEM_BUG_ON(ret == -EIO); 4536 goto err_ggtt; 4537 } 4538 4539 ret = intel_engines_setup(dev_priv); 4540 if (ret) { 4541 GEM_BUG_ON(ret == -EIO); 4542 goto err_unlock; 4543 } 4544 4545 ret = i915_gem_contexts_init(dev_priv); 4546 if (ret) { 4547 GEM_BUG_ON(ret == -EIO); 4548 goto err_scratch; 4549 } 4550 4551 ret = intel_engines_init(dev_priv); 4552 if (ret) { 4553 GEM_BUG_ON(ret == -EIO); 4554 goto err_context; 4555 } 4556 4557 intel_init_gt_powersave(dev_priv); 4558 4559 ret = intel_uc_init(dev_priv); 4560 if (ret) 4561 goto err_pm; 4562 4563 ret = i915_gem_init_hw(dev_priv); 4564 if (ret) 4565 goto err_uc_init; 4566 4567 /* 4568 * Despite its name intel_init_clock_gating applies both display 4569 * clock gating workarounds; GT mmio workarounds and the occasional 4570 * GT power context workaround. Worse, sometimes it includes a context 4571 * register workaround which we need to apply before we record the 4572 * default HW state for all contexts. 4573 * 4574 * FIXME: break up the workarounds and apply them at the right time! 4575 */ 4576 intel_init_clock_gating(dev_priv); 4577 4578 ret = intel_engines_verify_workarounds(dev_priv); 4579 if (ret) 4580 goto err_init_hw; 4581 4582 ret = __intel_engines_record_defaults(dev_priv); 4583 if (ret) 4584 goto err_init_hw; 4585 4586 if (i915_inject_load_failure()) { 4587 ret = -ENODEV; 4588 goto err_init_hw; 4589 } 4590 4591 if (i915_inject_load_failure()) { 4592 ret = -EIO; 4593 goto err_init_hw; 4594 } 4595 4596 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); 4597 mutex_unlock(&dev_priv->drm.struct_mutex); 4598 4599 return 0; 4600 4601 /* 4602 * Unwinding is complicated by that we want to handle -EIO to mean 4603 * disable GPU submission but keep KMS alive. We want to mark the 4604 * HW as irrevisibly wedged, but keep enough state around that the 4605 * driver doesn't explode during runtime. 4606 */ 4607 err_init_hw: 4608 mutex_unlock(&dev_priv->drm.struct_mutex); 4609 4610 i915_gem_set_wedged(dev_priv); 4611 i915_gem_suspend(dev_priv); 4612 i915_gem_suspend_late(dev_priv); 4613 4614 i915_gem_drain_workqueue(dev_priv); 4615 4616 mutex_lock(&dev_priv->drm.struct_mutex); 4617 intel_uc_fini_hw(dev_priv); 4618 err_uc_init: 4619 intel_uc_fini(dev_priv); 4620 err_pm: 4621 if (ret != -EIO) { 4622 intel_cleanup_gt_powersave(dev_priv); 4623 intel_engines_cleanup(dev_priv); 4624 } 4625 err_context: 4626 if (ret != -EIO) 4627 i915_gem_contexts_fini(dev_priv); 4628 err_scratch: 4629 i915_gem_fini_scratch(dev_priv); 4630 err_ggtt: 4631 err_unlock: 4632 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); 4633 mutex_unlock(&dev_priv->drm.struct_mutex); 4634 4635 err_uc_misc: 4636 intel_uc_fini_misc(dev_priv); 4637 4638 if (ret != -EIO) { 4639 i915_gem_cleanup_userptr(dev_priv); 4640 i915_timelines_fini(dev_priv); 4641 } 4642 4643 if (ret == -EIO) { 4644 mutex_lock(&dev_priv->drm.struct_mutex); 4645 4646 /* 4647 * Allow engine initialisation to fail by marking the GPU as 4648 * wedged. But we only want to do this where the GPU is angry, 4649 * for all other failure, such as an allocation failure, bail. 4650 */ 4651 if (!i915_reset_failed(dev_priv)) { 4652 i915_load_error(dev_priv, 4653 "Failed to initialize GPU, declaring it wedged!\n"); 4654 i915_gem_set_wedged(dev_priv); 4655 } 4656 4657 /* Minimal basic recovery for KMS */ 4658 ret = i915_ggtt_enable_hw(dev_priv); 4659 i915_gem_restore_gtt_mappings(dev_priv); 4660 i915_gem_restore_fences(dev_priv); 4661 intel_init_clock_gating(dev_priv); 4662 4663 mutex_unlock(&dev_priv->drm.struct_mutex); 4664 } 4665 4666 i915_gem_drain_freed_objects(dev_priv); 4667 return ret; 4668 } 4669 4670 void i915_gem_fini(struct drm_i915_private *dev_priv) 4671 { 4672 GEM_BUG_ON(dev_priv->gt.awake); 4673 4674 i915_gem_suspend_late(dev_priv); 4675 intel_disable_gt_powersave(dev_priv); 4676 4677 /* Flush any outstanding unpin_work. */ 4678 i915_gem_drain_workqueue(dev_priv); 4679 4680 mutex_lock(&dev_priv->drm.struct_mutex); 4681 intel_uc_fini_hw(dev_priv); 4682 intel_uc_fini(dev_priv); 4683 intel_engines_cleanup(dev_priv); 4684 i915_gem_contexts_fini(dev_priv); 4685 i915_gem_fini_scratch(dev_priv); 4686 mutex_unlock(&dev_priv->drm.struct_mutex); 4687 4688 intel_wa_list_free(&dev_priv->gt_wa_list); 4689 4690 intel_cleanup_gt_powersave(dev_priv); 4691 4692 intel_uc_fini_misc(dev_priv); 4693 i915_gem_cleanup_userptr(dev_priv); 4694 i915_timelines_fini(dev_priv); 4695 4696 i915_gem_drain_freed_objects(dev_priv); 4697 4698 WARN_ON(!list_empty(&dev_priv->contexts.list)); 4699 } 4700 4701 void i915_gem_init_mmio(struct drm_i915_private *i915) 4702 { 4703 i915_gem_sanitize(i915); 4704 } 4705 4706 void 4707 i915_gem_load_init_fences(struct drm_i915_private *dev_priv) 4708 { 4709 int i; 4710 4711 if (INTEL_GEN(dev_priv) >= 7 && !IS_VALLEYVIEW(dev_priv) && 4712 !IS_CHERRYVIEW(dev_priv)) 4713 dev_priv->num_fence_regs = 32; 4714 else if (INTEL_GEN(dev_priv) >= 4 || 4715 IS_I945G(dev_priv) || IS_I945GM(dev_priv) || 4716 IS_G33(dev_priv) || IS_PINEVIEW(dev_priv)) 4717 dev_priv->num_fence_regs = 16; 4718 else 4719 dev_priv->num_fence_regs = 8; 4720 4721 if (intel_vgpu_active(dev_priv)) 4722 dev_priv->num_fence_regs = 4723 I915_READ(vgtif_reg(avail_rs.fence_num)); 4724 4725 /* Initialize fence registers to zero */ 4726 for (i = 0; i < dev_priv->num_fence_regs; i++) { 4727 struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i]; 4728 4729 fence->i915 = dev_priv; 4730 fence->id = i; 4731 list_add_tail(&fence->link, &dev_priv->mm.fence_list); 4732 } 4733 i915_gem_restore_fences(dev_priv); 4734 4735 i915_gem_detect_bit_6_swizzle(dev_priv); 4736 } 4737 4738 static void i915_gem_init__mm(struct drm_i915_private *i915) 4739 { 4740 spin_lock_init(&i915->mm.object_stat_lock); 4741 spin_lock_init(&i915->mm.obj_lock); 4742 spin_lock_init(&i915->mm.free_lock); 4743 4744 init_llist_head(&i915->mm.free_list); 4745 4746 INIT_LIST_HEAD(&i915->mm.unbound_list); 4747 INIT_LIST_HEAD(&i915->mm.bound_list); 4748 INIT_LIST_HEAD(&i915->mm.fence_list); 4749 INIT_LIST_HEAD(&i915->mm.userfault_list); 4750 4751 INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); 4752 } 4753 4754 int i915_gem_init_early(struct drm_i915_private *dev_priv) 4755 { 4756 int err; 4757 4758 intel_gt_pm_init(dev_priv); 4759 4760 INIT_LIST_HEAD(&dev_priv->gt.active_rings); 4761 INIT_LIST_HEAD(&dev_priv->gt.closed_vma); 4762 4763 i915_gem_init__mm(dev_priv); 4764 i915_gem_init__pm(dev_priv); 4765 4766 init_waitqueue_head(&dev_priv->gpu_error.wait_queue); 4767 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 4768 mutex_init(&dev_priv->gpu_error.wedge_mutex); 4769 init_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu); 4770 4771 atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0); 4772 4773 spin_lock_init(&dev_priv->fb_tracking.lock); 4774 4775 err = i915_gemfs_init(dev_priv); 4776 if (err) 4777 DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err); 4778 4779 return 0; 4780 } 4781 4782 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) 4783 { 4784 i915_gem_drain_freed_objects(dev_priv); 4785 GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list)); 4786 GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count)); 4787 WARN_ON(dev_priv->mm.object_count); 4788 4789 cleanup_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu); 4790 4791 i915_gemfs_fini(dev_priv); 4792 } 4793 4794 int i915_gem_freeze(struct drm_i915_private *dev_priv) 4795 { 4796 /* Discard all purgeable objects, let userspace recover those as 4797 * required after resuming. 4798 */ 4799 i915_gem_shrink_all(dev_priv); 4800 4801 return 0; 4802 } 4803 4804 int i915_gem_freeze_late(struct drm_i915_private *i915) 4805 { 4806 struct drm_i915_gem_object *obj; 4807 struct list_head *phases[] = { 4808 &i915->mm.unbound_list, 4809 &i915->mm.bound_list, 4810 NULL 4811 }, **phase; 4812 4813 /* 4814 * Called just before we write the hibernation image. 4815 * 4816 * We need to update the domain tracking to reflect that the CPU 4817 * will be accessing all the pages to create and restore from the 4818 * hibernation, and so upon restoration those pages will be in the 4819 * CPU domain. 4820 * 4821 * To make sure the hibernation image contains the latest state, 4822 * we update that state just before writing out the image. 4823 * 4824 * To try and reduce the hibernation image, we manually shrink 4825 * the objects as well, see i915_gem_freeze() 4826 */ 4827 4828 i915_gem_shrink(i915, -1UL, NULL, I915_SHRINK_UNBOUND); 4829 i915_gem_drain_freed_objects(i915); 4830 4831 mutex_lock(&i915->drm.struct_mutex); 4832 for (phase = phases; *phase; phase++) { 4833 list_for_each_entry(obj, *phase, mm.link) 4834 WARN_ON(i915_gem_object_set_to_cpu_domain(obj, true)); 4835 } 4836 mutex_unlock(&i915->drm.struct_mutex); 4837 4838 return 0; 4839 } 4840 4841 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 4842 { 4843 struct drm_i915_file_private *file_priv = file->driver_priv; 4844 struct i915_request *request; 4845 4846 /* Clean up our request list when the client is going away, so that 4847 * later retire_requests won't dereference our soon-to-be-gone 4848 * file_priv. 4849 */ 4850 spin_lock(&file_priv->mm.lock); 4851 list_for_each_entry(request, &file_priv->mm.request_list, client_link) 4852 request->file_priv = NULL; 4853 spin_unlock(&file_priv->mm.lock); 4854 } 4855 4856 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) 4857 { 4858 struct drm_i915_file_private *file_priv; 4859 int ret; 4860 4861 DRM_DEBUG("\n"); 4862 4863 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 4864 if (!file_priv) 4865 return -ENOMEM; 4866 4867 file->driver_priv = file_priv; 4868 file_priv->dev_priv = i915; 4869 file_priv->file = file; 4870 4871 spin_lock_init(&file_priv->mm.lock); 4872 INIT_LIST_HEAD(&file_priv->mm.request_list); 4873 4874 file_priv->bsd_engine = -1; 4875 file_priv->hang_timestamp = jiffies; 4876 4877 ret = i915_gem_context_open(i915, file); 4878 if (ret) 4879 kfree(file_priv); 4880 4881 return ret; 4882 } 4883 4884 /** 4885 * i915_gem_track_fb - update frontbuffer tracking 4886 * @old: current GEM buffer for the frontbuffer slots 4887 * @new: new GEM buffer for the frontbuffer slots 4888 * @frontbuffer_bits: bitmask of frontbuffer slots 4889 * 4890 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them 4891 * from @old and setting them in @new. Both @old and @new can be NULL. 4892 */ 4893 void i915_gem_track_fb(struct drm_i915_gem_object *old, 4894 struct drm_i915_gem_object *new, 4895 unsigned frontbuffer_bits) 4896 { 4897 /* Control of individual bits within the mask are guarded by 4898 * the owning plane->mutex, i.e. we can never see concurrent 4899 * manipulation of individual bits. But since the bitfield as a whole 4900 * is updated using RMW, we need to use atomics in order to update 4901 * the bits. 4902 */ 4903 BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES > 4904 BITS_PER_TYPE(atomic_t)); 4905 4906 if (old) { 4907 WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits)); 4908 atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits); 4909 } 4910 4911 if (new) { 4912 WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits); 4913 atomic_or(frontbuffer_bits, &new->frontbuffer_bits); 4914 } 4915 } 4916 4917 /* Allocate a new GEM object and fill it with the supplied data */ 4918 struct drm_i915_gem_object * 4919 i915_gem_object_create_from_data(struct drm_i915_private *dev_priv, 4920 const void *data, size_t size) 4921 { 4922 struct drm_i915_gem_object *obj; 4923 struct file *file; 4924 size_t offset; 4925 int err; 4926 4927 obj = i915_gem_object_create(dev_priv, round_up(size, PAGE_SIZE)); 4928 if (IS_ERR(obj)) 4929 return obj; 4930 4931 GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU); 4932 4933 file = obj->base.filp; 4934 offset = 0; 4935 do { 4936 unsigned int len = min_t(typeof(size), size, PAGE_SIZE); 4937 struct page *page; 4938 void *pgdata, *vaddr; 4939 4940 err = pagecache_write_begin(file, file->f_mapping, 4941 offset, len, 0, 4942 &page, &pgdata); 4943 if (err < 0) 4944 goto fail; 4945 4946 vaddr = kmap(page); 4947 memcpy(vaddr, data, len); 4948 kunmap(page); 4949 4950 err = pagecache_write_end(file, file->f_mapping, 4951 offset, len, len, 4952 page, pgdata); 4953 if (err < 0) 4954 goto fail; 4955 4956 size -= len; 4957 data += len; 4958 offset += len; 4959 } while (size); 4960 4961 return obj; 4962 4963 fail: 4964 i915_gem_object_put(obj); 4965 return ERR_PTR(err); 4966 } 4967 4968 struct scatterlist * 4969 i915_gem_object_get_sg(struct drm_i915_gem_object *obj, 4970 unsigned int n, 4971 unsigned int *offset) 4972 { 4973 struct i915_gem_object_page_iter *iter = &obj->mm.get_page; 4974 struct scatterlist *sg; 4975 unsigned int idx, count; 4976 4977 might_sleep(); 4978 GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT); 4979 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 4980 4981 /* As we iterate forward through the sg, we record each entry in a 4982 * radixtree for quick repeated (backwards) lookups. If we have seen 4983 * this index previously, we will have an entry for it. 4984 * 4985 * Initial lookup is O(N), but this is amortized to O(1) for 4986 * sequential page access (where each new request is consecutive 4987 * to the previous one). Repeated lookups are O(lg(obj->base.size)), 4988 * i.e. O(1) with a large constant! 4989 */ 4990 if (n < READ_ONCE(iter->sg_idx)) 4991 goto lookup; 4992 4993 mutex_lock(&iter->lock); 4994 4995 /* We prefer to reuse the last sg so that repeated lookup of this 4996 * (or the subsequent) sg are fast - comparing against the last 4997 * sg is faster than going through the radixtree. 4998 */ 4999 5000 sg = iter->sg_pos; 5001 idx = iter->sg_idx; 5002 count = __sg_page_count(sg); 5003 5004 while (idx + count <= n) { 5005 void *entry; 5006 unsigned long i; 5007 int ret; 5008 5009 /* If we cannot allocate and insert this entry, or the 5010 * individual pages from this range, cancel updating the 5011 * sg_idx so that on this lookup we are forced to linearly 5012 * scan onwards, but on future lookups we will try the 5013 * insertion again (in which case we need to be careful of 5014 * the error return reporting that we have already inserted 5015 * this index). 5016 */ 5017 ret = radix_tree_insert(&iter->radix, idx, sg); 5018 if (ret && ret != -EEXIST) 5019 goto scan; 5020 5021 entry = xa_mk_value(idx); 5022 for (i = 1; i < count; i++) { 5023 ret = radix_tree_insert(&iter->radix, idx + i, entry); 5024 if (ret && ret != -EEXIST) 5025 goto scan; 5026 } 5027 5028 idx += count; 5029 sg = ____sg_next(sg); 5030 count = __sg_page_count(sg); 5031 } 5032 5033 scan: 5034 iter->sg_pos = sg; 5035 iter->sg_idx = idx; 5036 5037 mutex_unlock(&iter->lock); 5038 5039 if (unlikely(n < idx)) /* insertion completed by another thread */ 5040 goto lookup; 5041 5042 /* In case we failed to insert the entry into the radixtree, we need 5043 * to look beyond the current sg. 5044 */ 5045 while (idx + count <= n) { 5046 idx += count; 5047 sg = ____sg_next(sg); 5048 count = __sg_page_count(sg); 5049 } 5050 5051 *offset = n - idx; 5052 return sg; 5053 5054 lookup: 5055 rcu_read_lock(); 5056 5057 sg = radix_tree_lookup(&iter->radix, n); 5058 GEM_BUG_ON(!sg); 5059 5060 /* If this index is in the middle of multi-page sg entry, 5061 * the radix tree will contain a value entry that points 5062 * to the start of that range. We will return the pointer to 5063 * the base page and the offset of this page within the 5064 * sg entry's range. 5065 */ 5066 *offset = 0; 5067 if (unlikely(xa_is_value(sg))) { 5068 unsigned long base = xa_to_value(sg); 5069 5070 sg = radix_tree_lookup(&iter->radix, base); 5071 GEM_BUG_ON(!sg); 5072 5073 *offset = n - base; 5074 } 5075 5076 rcu_read_unlock(); 5077 5078 return sg; 5079 } 5080 5081 struct page * 5082 i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n) 5083 { 5084 struct scatterlist *sg; 5085 unsigned int offset; 5086 5087 GEM_BUG_ON(!i915_gem_object_has_struct_page(obj)); 5088 5089 sg = i915_gem_object_get_sg(obj, n, &offset); 5090 return nth_page(sg_page(sg), offset); 5091 } 5092 5093 /* Like i915_gem_object_get_page(), but mark the returned page dirty */ 5094 struct page * 5095 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, 5096 unsigned int n) 5097 { 5098 struct page *page; 5099 5100 page = i915_gem_object_get_page(obj, n); 5101 if (!obj->mm.dirty) 5102 set_page_dirty(page); 5103 5104 return page; 5105 } 5106 5107 dma_addr_t 5108 i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj, 5109 unsigned long n, 5110 unsigned int *len) 5111 { 5112 struct scatterlist *sg; 5113 unsigned int offset; 5114 5115 sg = i915_gem_object_get_sg(obj, n, &offset); 5116 5117 if (len) 5118 *len = sg_dma_len(sg) - (offset << PAGE_SHIFT); 5119 5120 return sg_dma_address(sg) + (offset << PAGE_SHIFT); 5121 } 5122 5123 dma_addr_t 5124 i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, 5125 unsigned long n) 5126 { 5127 return i915_gem_object_get_dma_address_len(obj, n, NULL); 5128 } 5129 5130 5131 int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) 5132 { 5133 struct sg_table *pages; 5134 int err; 5135 5136 if (align > obj->base.size) 5137 return -EINVAL; 5138 5139 if (obj->ops == &i915_gem_phys_ops) 5140 return 0; 5141 5142 if (obj->ops != &i915_gem_object_ops) 5143 return -EINVAL; 5144 5145 err = i915_gem_object_unbind(obj); 5146 if (err) 5147 return err; 5148 5149 mutex_lock(&obj->mm.lock); 5150 5151 if (obj->mm.madv != I915_MADV_WILLNEED) { 5152 err = -EFAULT; 5153 goto err_unlock; 5154 } 5155 5156 if (obj->mm.quirked) { 5157 err = -EFAULT; 5158 goto err_unlock; 5159 } 5160 5161 if (obj->mm.mapping) { 5162 err = -EBUSY; 5163 goto err_unlock; 5164 } 5165 5166 pages = __i915_gem_object_unset_pages(obj); 5167 5168 obj->ops = &i915_gem_phys_ops; 5169 5170 err = ____i915_gem_object_get_pages(obj); 5171 if (err) 5172 goto err_xfer; 5173 5174 /* Perma-pin (until release) the physical set of pages */ 5175 __i915_gem_object_pin_pages(obj); 5176 5177 if (!IS_ERR_OR_NULL(pages)) 5178 i915_gem_object_ops.put_pages(obj, pages); 5179 mutex_unlock(&obj->mm.lock); 5180 return 0; 5181 5182 err_xfer: 5183 obj->ops = &i915_gem_object_ops; 5184 if (!IS_ERR_OR_NULL(pages)) { 5185 unsigned int sg_page_sizes = i915_sg_page_sizes(pages->sgl); 5186 5187 __i915_gem_object_set_pages(obj, pages, sg_page_sizes); 5188 } 5189 err_unlock: 5190 mutex_unlock(&obj->mm.lock); 5191 return err; 5192 } 5193 5194 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 5195 #include "selftests/scatterlist.c" 5196 #include "selftests/mock_gem_device.c" 5197 #include "selftests/huge_gem_object.c" 5198 #include "selftests/huge_pages.c" 5199 #include "selftests/i915_gem_object.c" 5200 #include "selftests/i915_gem_coherency.c" 5201 #include "selftests/i915_gem.c" 5202 #endif 5203