1 /* 2 * Copyright © 2008 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drmP.h> 29 #include <drm/drm_vma_manager.h> 30 #include <drm/i915_drm.h> 31 #include "i915_drv.h" 32 #include "i915_trace.h" 33 #include "intel_drv.h" 34 #include <linux/oom.h> 35 #include <linux/shmem_fs.h> 36 #include <linux/slab.h> 37 #include <linux/swap.h> 38 #include <linux/pci.h> 39 #include <linux/dma-buf.h> 40 41 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 42 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj, 43 bool force); 44 static __must_check int 45 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 46 bool readonly); 47 static void 48 i915_gem_object_retire(struct drm_i915_gem_object *obj); 49 50 static void i915_gem_write_fence(struct drm_device *dev, int reg, 51 struct drm_i915_gem_object *obj); 52 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 53 struct drm_i915_fence_reg *fence, 54 bool enable); 55 56 static unsigned long i915_gem_shrinker_count(struct shrinker *shrinker, 57 struct shrink_control *sc); 58 static unsigned long i915_gem_shrinker_scan(struct shrinker *shrinker, 59 struct shrink_control *sc); 60 static int i915_gem_shrinker_oom(struct notifier_block *nb, 61 unsigned long event, 62 void *ptr); 63 static unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv); 64 65 static bool cpu_cache_is_coherent(struct drm_device *dev, 66 enum i915_cache_level level) 67 { 68 return HAS_LLC(dev) || level != I915_CACHE_NONE; 69 } 70 71 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 72 { 73 if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) 74 return true; 75 76 return obj->pin_display; 77 } 78 79 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) 80 { 81 if (obj->tiling_mode) 82 i915_gem_release_mmap(obj); 83 84 /* As we do not have an associated fence register, we will force 85 * a tiling change if we ever need to acquire one. 86 */ 87 obj->fence_dirty = false; 88 obj->fence_reg = I915_FENCE_REG_NONE; 89 } 90 91 /* some bookkeeping */ 92 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 93 size_t size) 94 { 95 spin_lock(&dev_priv->mm.object_stat_lock); 96 dev_priv->mm.object_count++; 97 dev_priv->mm.object_memory += size; 98 spin_unlock(&dev_priv->mm.object_stat_lock); 99 } 100 101 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 102 size_t size) 103 { 104 spin_lock(&dev_priv->mm.object_stat_lock); 105 dev_priv->mm.object_count--; 106 dev_priv->mm.object_memory -= size; 107 spin_unlock(&dev_priv->mm.object_stat_lock); 108 } 109 110 static int 111 i915_gem_wait_for_error(struct i915_gpu_error *error) 112 { 113 int ret; 114 115 #define EXIT_COND (!i915_reset_in_progress(error) || \ 116 i915_terminally_wedged(error)) 117 if (EXIT_COND) 118 return 0; 119 120 /* 121 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 122 * userspace. If it takes that long something really bad is going on and 123 * we should simply try to bail out and fail as gracefully as possible. 124 */ 125 ret = wait_event_interruptible_timeout(error->reset_queue, 126 EXIT_COND, 127 10*HZ); 128 if (ret == 0) { 129 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 130 return -EIO; 131 } else if (ret < 0) { 132 return ret; 133 } 134 #undef EXIT_COND 135 136 return 0; 137 } 138 139 int i915_mutex_lock_interruptible(struct drm_device *dev) 140 { 141 struct drm_i915_private *dev_priv = dev->dev_private; 142 int ret; 143 144 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 145 if (ret) 146 return ret; 147 148 ret = mutex_lock_interruptible(&dev->struct_mutex); 149 if (ret) 150 return ret; 151 152 WARN_ON(i915_verify_lists(dev)); 153 return 0; 154 } 155 156 static inline bool 157 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj) 158 { 159 return i915_gem_obj_bound_any(obj) && !obj->active; 160 } 161 162 int 163 i915_gem_init_ioctl(struct drm_device *dev, void *data, 164 struct drm_file *file) 165 { 166 struct drm_i915_private *dev_priv = dev->dev_private; 167 struct drm_i915_gem_init *args = data; 168 169 if (drm_core_check_feature(dev, DRIVER_MODESET)) 170 return -ENODEV; 171 172 if (args->gtt_start >= args->gtt_end || 173 (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1)) 174 return -EINVAL; 175 176 /* GEM with user mode setting was never supported on ilk and later. */ 177 if (INTEL_INFO(dev)->gen >= 5) 178 return -ENODEV; 179 180 mutex_lock(&dev->struct_mutex); 181 i915_gem_setup_global_gtt(dev, args->gtt_start, args->gtt_end, 182 args->gtt_end); 183 dev_priv->gtt.mappable_end = args->gtt_end; 184 mutex_unlock(&dev->struct_mutex); 185 186 return 0; 187 } 188 189 int 190 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 191 struct drm_file *file) 192 { 193 struct drm_i915_private *dev_priv = dev->dev_private; 194 struct drm_i915_gem_get_aperture *args = data; 195 struct drm_i915_gem_object *obj; 196 size_t pinned; 197 198 pinned = 0; 199 mutex_lock(&dev->struct_mutex); 200 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) 201 if (i915_gem_obj_is_pinned(obj)) 202 pinned += i915_gem_obj_ggtt_size(obj); 203 mutex_unlock(&dev->struct_mutex); 204 205 args->aper_size = dev_priv->gtt.base.total; 206 args->aper_available_size = args->aper_size - pinned; 207 208 return 0; 209 } 210 211 static void i915_gem_object_detach_phys(struct drm_i915_gem_object *obj) 212 { 213 drm_dma_handle_t *phys = obj->phys_handle; 214 215 if (!phys) 216 return; 217 218 if (obj->madv == I915_MADV_WILLNEED) { 219 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 220 char *vaddr = phys->vaddr; 221 int i; 222 223 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 224 struct page *page = shmem_read_mapping_page(mapping, i); 225 if (!IS_ERR(page)) { 226 char *dst = kmap_atomic(page); 227 memcpy(dst, vaddr, PAGE_SIZE); 228 drm_clflush_virt_range(dst, PAGE_SIZE); 229 kunmap_atomic(dst); 230 231 set_page_dirty(page); 232 mark_page_accessed(page); 233 page_cache_release(page); 234 } 235 vaddr += PAGE_SIZE; 236 } 237 i915_gem_chipset_flush(obj->base.dev); 238 } 239 240 #ifdef CONFIG_X86 241 set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE); 242 #endif 243 drm_pci_free(obj->base.dev, phys); 244 obj->phys_handle = NULL; 245 } 246 247 int 248 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, 249 int align) 250 { 251 drm_dma_handle_t *phys; 252 struct address_space *mapping; 253 char *vaddr; 254 int i; 255 256 if (obj->phys_handle) { 257 if ((unsigned long)obj->phys_handle->vaddr & (align -1)) 258 return -EBUSY; 259 260 return 0; 261 } 262 263 if (obj->madv != I915_MADV_WILLNEED) 264 return -EFAULT; 265 266 if (obj->base.filp == NULL) 267 return -EINVAL; 268 269 /* create a new object */ 270 phys = drm_pci_alloc(obj->base.dev, obj->base.size, align); 271 if (!phys) 272 return -ENOMEM; 273 274 vaddr = phys->vaddr; 275 #ifdef CONFIG_X86 276 set_memory_wc((unsigned long)vaddr, phys->size / PAGE_SIZE); 277 #endif 278 mapping = file_inode(obj->base.filp)->i_mapping; 279 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 280 struct page *page; 281 char *src; 282 283 page = shmem_read_mapping_page(mapping, i); 284 if (IS_ERR(page)) { 285 #ifdef CONFIG_X86 286 set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE); 287 #endif 288 drm_pci_free(obj->base.dev, phys); 289 return PTR_ERR(page); 290 } 291 292 src = kmap_atomic(page); 293 memcpy(vaddr, src, PAGE_SIZE); 294 kunmap_atomic(src); 295 296 mark_page_accessed(page); 297 page_cache_release(page); 298 299 vaddr += PAGE_SIZE; 300 } 301 302 obj->phys_handle = phys; 303 return 0; 304 } 305 306 static int 307 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 308 struct drm_i915_gem_pwrite *args, 309 struct drm_file *file_priv) 310 { 311 struct drm_device *dev = obj->base.dev; 312 void *vaddr = obj->phys_handle->vaddr + args->offset; 313 char __user *user_data = to_user_ptr(args->data_ptr); 314 315 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { 316 unsigned long unwritten; 317 318 /* The physical object once assigned is fixed for the lifetime 319 * of the obj, so we can safely drop the lock and continue 320 * to access vaddr. 321 */ 322 mutex_unlock(&dev->struct_mutex); 323 unwritten = copy_from_user(vaddr, user_data, args->size); 324 mutex_lock(&dev->struct_mutex); 325 if (unwritten) 326 return -EFAULT; 327 } 328 329 i915_gem_chipset_flush(dev); 330 return 0; 331 } 332 333 void *i915_gem_object_alloc(struct drm_device *dev) 334 { 335 struct drm_i915_private *dev_priv = dev->dev_private; 336 return kmem_cache_zalloc(dev_priv->slab, GFP_KERNEL); 337 } 338 339 void i915_gem_object_free(struct drm_i915_gem_object *obj) 340 { 341 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 342 kmem_cache_free(dev_priv->slab, obj); 343 } 344 345 static int 346 i915_gem_create(struct drm_file *file, 347 struct drm_device *dev, 348 uint64_t size, 349 uint32_t *handle_p) 350 { 351 struct drm_i915_gem_object *obj; 352 int ret; 353 u32 handle; 354 355 size = roundup(size, PAGE_SIZE); 356 if (size == 0) 357 return -EINVAL; 358 359 /* Allocate the new object */ 360 obj = i915_gem_alloc_object(dev, size); 361 if (obj == NULL) 362 return -ENOMEM; 363 364 ret = drm_gem_handle_create(file, &obj->base, &handle); 365 /* drop reference from allocate - handle holds it now */ 366 drm_gem_object_unreference_unlocked(&obj->base); 367 if (ret) 368 return ret; 369 370 *handle_p = handle; 371 return 0; 372 } 373 374 int 375 i915_gem_dumb_create(struct drm_file *file, 376 struct drm_device *dev, 377 struct drm_mode_create_dumb *args) 378 { 379 /* have to work out size/pitch and return them */ 380 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); 381 args->size = args->pitch * args->height; 382 return i915_gem_create(file, dev, 383 args->size, &args->handle); 384 } 385 386 /** 387 * Creates a new mm object and returns a handle to it. 388 */ 389 int 390 i915_gem_create_ioctl(struct drm_device *dev, void *data, 391 struct drm_file *file) 392 { 393 struct drm_i915_gem_create *args = data; 394 395 return i915_gem_create(file, dev, 396 args->size, &args->handle); 397 } 398 399 static inline int 400 __copy_to_user_swizzled(char __user *cpu_vaddr, 401 const char *gpu_vaddr, int gpu_offset, 402 int length) 403 { 404 int ret, cpu_offset = 0; 405 406 while (length > 0) { 407 int cacheline_end = ALIGN(gpu_offset + 1, 64); 408 int this_length = min(cacheline_end - gpu_offset, length); 409 int swizzled_gpu_offset = gpu_offset ^ 64; 410 411 ret = __copy_to_user(cpu_vaddr + cpu_offset, 412 gpu_vaddr + swizzled_gpu_offset, 413 this_length); 414 if (ret) 415 return ret + length; 416 417 cpu_offset += this_length; 418 gpu_offset += this_length; 419 length -= this_length; 420 } 421 422 return 0; 423 } 424 425 static inline int 426 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 427 const char __user *cpu_vaddr, 428 int length) 429 { 430 int ret, cpu_offset = 0; 431 432 while (length > 0) { 433 int cacheline_end = ALIGN(gpu_offset + 1, 64); 434 int this_length = min(cacheline_end - gpu_offset, length); 435 int swizzled_gpu_offset = gpu_offset ^ 64; 436 437 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 438 cpu_vaddr + cpu_offset, 439 this_length); 440 if (ret) 441 return ret + length; 442 443 cpu_offset += this_length; 444 gpu_offset += this_length; 445 length -= this_length; 446 } 447 448 return 0; 449 } 450 451 /* 452 * Pins the specified object's pages and synchronizes the object with 453 * GPU accesses. Sets needs_clflush to non-zero if the caller should 454 * flush the object from the CPU cache. 455 */ 456 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, 457 int *needs_clflush) 458 { 459 int ret; 460 461 *needs_clflush = 0; 462 463 if (!obj->base.filp) 464 return -EINVAL; 465 466 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { 467 /* If we're not in the cpu read domain, set ourself into the gtt 468 * read domain and manually flush cachelines (if required). This 469 * optimizes for the case when the gpu will dirty the data 470 * anyway again before the next pread happens. */ 471 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev, 472 obj->cache_level); 473 ret = i915_gem_object_wait_rendering(obj, true); 474 if (ret) 475 return ret; 476 477 i915_gem_object_retire(obj); 478 } 479 480 ret = i915_gem_object_get_pages(obj); 481 if (ret) 482 return ret; 483 484 i915_gem_object_pin_pages(obj); 485 486 return ret; 487 } 488 489 /* Per-page copy function for the shmem pread fastpath. 490 * Flushes invalid cachelines before reading the target if 491 * needs_clflush is set. */ 492 static int 493 shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length, 494 char __user *user_data, 495 bool page_do_bit17_swizzling, bool needs_clflush) 496 { 497 char *vaddr; 498 int ret; 499 500 if (unlikely(page_do_bit17_swizzling)) 501 return -EINVAL; 502 503 vaddr = kmap_atomic(page); 504 if (needs_clflush) 505 drm_clflush_virt_range(vaddr + shmem_page_offset, 506 page_length); 507 ret = __copy_to_user_inatomic(user_data, 508 vaddr + shmem_page_offset, 509 page_length); 510 kunmap_atomic(vaddr); 511 512 return ret ? -EFAULT : 0; 513 } 514 515 static void 516 shmem_clflush_swizzled_range(char *addr, unsigned long length, 517 bool swizzled) 518 { 519 if (unlikely(swizzled)) { 520 unsigned long start = (unsigned long) addr; 521 unsigned long end = (unsigned long) addr + length; 522 523 /* For swizzling simply ensure that we always flush both 524 * channels. Lame, but simple and it works. Swizzled 525 * pwrite/pread is far from a hotpath - current userspace 526 * doesn't use it at all. */ 527 start = round_down(start, 128); 528 end = round_up(end, 128); 529 530 drm_clflush_virt_range((void *)start, end - start); 531 } else { 532 drm_clflush_virt_range(addr, length); 533 } 534 535 } 536 537 /* Only difference to the fast-path function is that this can handle bit17 538 * and uses non-atomic copy and kmap functions. */ 539 static int 540 shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length, 541 char __user *user_data, 542 bool page_do_bit17_swizzling, bool needs_clflush) 543 { 544 char *vaddr; 545 int ret; 546 547 vaddr = kmap(page); 548 if (needs_clflush) 549 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 550 page_length, 551 page_do_bit17_swizzling); 552 553 if (page_do_bit17_swizzling) 554 ret = __copy_to_user_swizzled(user_data, 555 vaddr, shmem_page_offset, 556 page_length); 557 else 558 ret = __copy_to_user(user_data, 559 vaddr + shmem_page_offset, 560 page_length); 561 kunmap(page); 562 563 return ret ? - EFAULT : 0; 564 } 565 566 static int 567 i915_gem_shmem_pread(struct drm_device *dev, 568 struct drm_i915_gem_object *obj, 569 struct drm_i915_gem_pread *args, 570 struct drm_file *file) 571 { 572 char __user *user_data; 573 ssize_t remain; 574 loff_t offset; 575 int shmem_page_offset, page_length, ret = 0; 576 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 577 int prefaulted = 0; 578 int needs_clflush = 0; 579 struct sg_page_iter sg_iter; 580 581 user_data = to_user_ptr(args->data_ptr); 582 remain = args->size; 583 584 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 585 586 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); 587 if (ret) 588 return ret; 589 590 offset = args->offset; 591 592 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 593 offset >> PAGE_SHIFT) { 594 struct page *page = sg_page_iter_page(&sg_iter); 595 596 if (remain <= 0) 597 break; 598 599 /* Operation in this page 600 * 601 * shmem_page_offset = offset within page in shmem file 602 * page_length = bytes to copy for this page 603 */ 604 shmem_page_offset = offset_in_page(offset); 605 page_length = remain; 606 if ((shmem_page_offset + page_length) > PAGE_SIZE) 607 page_length = PAGE_SIZE - shmem_page_offset; 608 609 page_do_bit17_swizzling = obj_do_bit17_swizzling && 610 (page_to_phys(page) & (1 << 17)) != 0; 611 612 ret = shmem_pread_fast(page, shmem_page_offset, page_length, 613 user_data, page_do_bit17_swizzling, 614 needs_clflush); 615 if (ret == 0) 616 goto next_page; 617 618 mutex_unlock(&dev->struct_mutex); 619 620 if (likely(!i915.prefault_disable) && !prefaulted) { 621 ret = fault_in_multipages_writeable(user_data, remain); 622 /* Userspace is tricking us, but we've already clobbered 623 * its pages with the prefault and promised to write the 624 * data up to the first fault. Hence ignore any errors 625 * and just continue. */ 626 (void)ret; 627 prefaulted = 1; 628 } 629 630 ret = shmem_pread_slow(page, shmem_page_offset, page_length, 631 user_data, page_do_bit17_swizzling, 632 needs_clflush); 633 634 mutex_lock(&dev->struct_mutex); 635 636 if (ret) 637 goto out; 638 639 next_page: 640 remain -= page_length; 641 user_data += page_length; 642 offset += page_length; 643 } 644 645 out: 646 i915_gem_object_unpin_pages(obj); 647 648 return ret; 649 } 650 651 /** 652 * Reads data from the object referenced by handle. 653 * 654 * On error, the contents of *data are undefined. 655 */ 656 int 657 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 658 struct drm_file *file) 659 { 660 struct drm_i915_gem_pread *args = data; 661 struct drm_i915_gem_object *obj; 662 int ret = 0; 663 664 if (args->size == 0) 665 return 0; 666 667 if (!access_ok(VERIFY_WRITE, 668 to_user_ptr(args->data_ptr), 669 args->size)) 670 return -EFAULT; 671 672 ret = i915_mutex_lock_interruptible(dev); 673 if (ret) 674 return ret; 675 676 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 677 if (&obj->base == NULL) { 678 ret = -ENOENT; 679 goto unlock; 680 } 681 682 /* Bounds check source. */ 683 if (args->offset > obj->base.size || 684 args->size > obj->base.size - args->offset) { 685 ret = -EINVAL; 686 goto out; 687 } 688 689 /* prime objects have no backing filp to GEM pread/pwrite 690 * pages from. 691 */ 692 if (!obj->base.filp) { 693 ret = -EINVAL; 694 goto out; 695 } 696 697 trace_i915_gem_object_pread(obj, args->offset, args->size); 698 699 ret = i915_gem_shmem_pread(dev, obj, args, file); 700 701 out: 702 drm_gem_object_unreference(&obj->base); 703 unlock: 704 mutex_unlock(&dev->struct_mutex); 705 return ret; 706 } 707 708 /* This is the fast write path which cannot handle 709 * page faults in the source data 710 */ 711 712 static inline int 713 fast_user_write(struct io_mapping *mapping, 714 loff_t page_base, int page_offset, 715 char __user *user_data, 716 int length) 717 { 718 void __iomem *vaddr_atomic; 719 void *vaddr; 720 unsigned long unwritten; 721 722 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 723 /* We can use the cpu mem copy function because this is X86. */ 724 vaddr = (void __force*)vaddr_atomic + page_offset; 725 unwritten = __copy_from_user_inatomic_nocache(vaddr, 726 user_data, length); 727 io_mapping_unmap_atomic(vaddr_atomic); 728 return unwritten; 729 } 730 731 /** 732 * This is the fast pwrite path, where we copy the data directly from the 733 * user into the GTT, uncached. 734 */ 735 static int 736 i915_gem_gtt_pwrite_fast(struct drm_device *dev, 737 struct drm_i915_gem_object *obj, 738 struct drm_i915_gem_pwrite *args, 739 struct drm_file *file) 740 { 741 struct drm_i915_private *dev_priv = dev->dev_private; 742 ssize_t remain; 743 loff_t offset, page_base; 744 char __user *user_data; 745 int page_offset, page_length, ret; 746 747 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); 748 if (ret) 749 goto out; 750 751 ret = i915_gem_object_set_to_gtt_domain(obj, true); 752 if (ret) 753 goto out_unpin; 754 755 ret = i915_gem_object_put_fence(obj); 756 if (ret) 757 goto out_unpin; 758 759 user_data = to_user_ptr(args->data_ptr); 760 remain = args->size; 761 762 offset = i915_gem_obj_ggtt_offset(obj) + args->offset; 763 764 while (remain > 0) { 765 /* Operation in this page 766 * 767 * page_base = page offset within aperture 768 * page_offset = offset within page 769 * page_length = bytes to copy for this page 770 */ 771 page_base = offset & PAGE_MASK; 772 page_offset = offset_in_page(offset); 773 page_length = remain; 774 if ((page_offset + remain) > PAGE_SIZE) 775 page_length = PAGE_SIZE - page_offset; 776 777 /* If we get a fault while copying data, then (presumably) our 778 * source page isn't available. Return the error and we'll 779 * retry in the slow path. 780 */ 781 if (fast_user_write(dev_priv->gtt.mappable, page_base, 782 page_offset, user_data, page_length)) { 783 ret = -EFAULT; 784 goto out_unpin; 785 } 786 787 remain -= page_length; 788 user_data += page_length; 789 offset += page_length; 790 } 791 792 out_unpin: 793 i915_gem_object_ggtt_unpin(obj); 794 out: 795 return ret; 796 } 797 798 /* Per-page copy function for the shmem pwrite fastpath. 799 * Flushes invalid cachelines before writing to the target if 800 * needs_clflush_before is set and flushes out any written cachelines after 801 * writing if needs_clflush is set. */ 802 static int 803 shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length, 804 char __user *user_data, 805 bool page_do_bit17_swizzling, 806 bool needs_clflush_before, 807 bool needs_clflush_after) 808 { 809 char *vaddr; 810 int ret; 811 812 if (unlikely(page_do_bit17_swizzling)) 813 return -EINVAL; 814 815 vaddr = kmap_atomic(page); 816 if (needs_clflush_before) 817 drm_clflush_virt_range(vaddr + shmem_page_offset, 818 page_length); 819 ret = __copy_from_user_inatomic(vaddr + shmem_page_offset, 820 user_data, page_length); 821 if (needs_clflush_after) 822 drm_clflush_virt_range(vaddr + shmem_page_offset, 823 page_length); 824 kunmap_atomic(vaddr); 825 826 return ret ? -EFAULT : 0; 827 } 828 829 /* Only difference to the fast-path function is that this can handle bit17 830 * and uses non-atomic copy and kmap functions. */ 831 static int 832 shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length, 833 char __user *user_data, 834 bool page_do_bit17_swizzling, 835 bool needs_clflush_before, 836 bool needs_clflush_after) 837 { 838 char *vaddr; 839 int ret; 840 841 vaddr = kmap(page); 842 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 843 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 844 page_length, 845 page_do_bit17_swizzling); 846 if (page_do_bit17_swizzling) 847 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, 848 user_data, 849 page_length); 850 else 851 ret = __copy_from_user(vaddr + shmem_page_offset, 852 user_data, 853 page_length); 854 if (needs_clflush_after) 855 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 856 page_length, 857 page_do_bit17_swizzling); 858 kunmap(page); 859 860 return ret ? -EFAULT : 0; 861 } 862 863 static int 864 i915_gem_shmem_pwrite(struct drm_device *dev, 865 struct drm_i915_gem_object *obj, 866 struct drm_i915_gem_pwrite *args, 867 struct drm_file *file) 868 { 869 ssize_t remain; 870 loff_t offset; 871 char __user *user_data; 872 int shmem_page_offset, page_length, ret = 0; 873 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 874 int hit_slowpath = 0; 875 int needs_clflush_after = 0; 876 int needs_clflush_before = 0; 877 struct sg_page_iter sg_iter; 878 879 user_data = to_user_ptr(args->data_ptr); 880 remain = args->size; 881 882 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 883 884 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 885 /* If we're not in the cpu write domain, set ourself into the gtt 886 * write domain and manually flush cachelines (if required). This 887 * optimizes for the case when the gpu will use the data 888 * right away and we therefore have to clflush anyway. */ 889 needs_clflush_after = cpu_write_needs_clflush(obj); 890 ret = i915_gem_object_wait_rendering(obj, false); 891 if (ret) 892 return ret; 893 894 i915_gem_object_retire(obj); 895 } 896 /* Same trick applies to invalidate partially written cachelines read 897 * before writing. */ 898 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) 899 needs_clflush_before = 900 !cpu_cache_is_coherent(dev, obj->cache_level); 901 902 ret = i915_gem_object_get_pages(obj); 903 if (ret) 904 return ret; 905 906 i915_gem_object_pin_pages(obj); 907 908 offset = args->offset; 909 obj->dirty = 1; 910 911 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 912 offset >> PAGE_SHIFT) { 913 struct page *page = sg_page_iter_page(&sg_iter); 914 int partial_cacheline_write; 915 916 if (remain <= 0) 917 break; 918 919 /* Operation in this page 920 * 921 * shmem_page_offset = offset within page in shmem file 922 * page_length = bytes to copy for this page 923 */ 924 shmem_page_offset = offset_in_page(offset); 925 926 page_length = remain; 927 if ((shmem_page_offset + page_length) > PAGE_SIZE) 928 page_length = PAGE_SIZE - shmem_page_offset; 929 930 /* If we don't overwrite a cacheline completely we need to be 931 * careful to have up-to-date data by first clflushing. Don't 932 * overcomplicate things and flush the entire patch. */ 933 partial_cacheline_write = needs_clflush_before && 934 ((shmem_page_offset | page_length) 935 & (boot_cpu_data.x86_clflush_size - 1)); 936 937 page_do_bit17_swizzling = obj_do_bit17_swizzling && 938 (page_to_phys(page) & (1 << 17)) != 0; 939 940 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, 941 user_data, page_do_bit17_swizzling, 942 partial_cacheline_write, 943 needs_clflush_after); 944 if (ret == 0) 945 goto next_page; 946 947 hit_slowpath = 1; 948 mutex_unlock(&dev->struct_mutex); 949 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, 950 user_data, page_do_bit17_swizzling, 951 partial_cacheline_write, 952 needs_clflush_after); 953 954 mutex_lock(&dev->struct_mutex); 955 956 if (ret) 957 goto out; 958 959 next_page: 960 remain -= page_length; 961 user_data += page_length; 962 offset += page_length; 963 } 964 965 out: 966 i915_gem_object_unpin_pages(obj); 967 968 if (hit_slowpath) { 969 /* 970 * Fixup: Flush cpu caches in case we didn't flush the dirty 971 * cachelines in-line while writing and the object moved 972 * out of the cpu write domain while we've dropped the lock. 973 */ 974 if (!needs_clflush_after && 975 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 976 if (i915_gem_clflush_object(obj, obj->pin_display)) 977 i915_gem_chipset_flush(dev); 978 } 979 } 980 981 if (needs_clflush_after) 982 i915_gem_chipset_flush(dev); 983 984 return ret; 985 } 986 987 /** 988 * Writes data to the object referenced by handle. 989 * 990 * On error, the contents of the buffer that were to be modified are undefined. 991 */ 992 int 993 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 994 struct drm_file *file) 995 { 996 struct drm_i915_gem_pwrite *args = data; 997 struct drm_i915_gem_object *obj; 998 int ret; 999 1000 if (args->size == 0) 1001 return 0; 1002 1003 if (!access_ok(VERIFY_READ, 1004 to_user_ptr(args->data_ptr), 1005 args->size)) 1006 return -EFAULT; 1007 1008 if (likely(!i915.prefault_disable)) { 1009 ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr), 1010 args->size); 1011 if (ret) 1012 return -EFAULT; 1013 } 1014 1015 ret = i915_mutex_lock_interruptible(dev); 1016 if (ret) 1017 return ret; 1018 1019 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1020 if (&obj->base == NULL) { 1021 ret = -ENOENT; 1022 goto unlock; 1023 } 1024 1025 /* Bounds check destination. */ 1026 if (args->offset > obj->base.size || 1027 args->size > obj->base.size - args->offset) { 1028 ret = -EINVAL; 1029 goto out; 1030 } 1031 1032 /* prime objects have no backing filp to GEM pread/pwrite 1033 * pages from. 1034 */ 1035 if (!obj->base.filp) { 1036 ret = -EINVAL; 1037 goto out; 1038 } 1039 1040 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1041 1042 ret = -EFAULT; 1043 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1044 * it would end up going through the fenced access, and we'll get 1045 * different detiling behavior between reading and writing. 1046 * pread/pwrite currently are reading and writing from the CPU 1047 * perspective, requiring manual detiling by the client. 1048 */ 1049 if (obj->phys_handle) { 1050 ret = i915_gem_phys_pwrite(obj, args, file); 1051 goto out; 1052 } 1053 1054 if (obj->tiling_mode == I915_TILING_NONE && 1055 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 1056 cpu_write_needs_clflush(obj)) { 1057 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); 1058 /* Note that the gtt paths might fail with non-page-backed user 1059 * pointers (e.g. gtt mappings when moving data between 1060 * textures). Fallback to the shmem path in that case. */ 1061 } 1062 1063 if (ret == -EFAULT || ret == -ENOSPC) 1064 ret = i915_gem_shmem_pwrite(dev, obj, args, file); 1065 1066 out: 1067 drm_gem_object_unreference(&obj->base); 1068 unlock: 1069 mutex_unlock(&dev->struct_mutex); 1070 return ret; 1071 } 1072 1073 int 1074 i915_gem_check_wedge(struct i915_gpu_error *error, 1075 bool interruptible) 1076 { 1077 if (i915_reset_in_progress(error)) { 1078 /* Non-interruptible callers can't handle -EAGAIN, hence return 1079 * -EIO unconditionally for these. */ 1080 if (!interruptible) 1081 return -EIO; 1082 1083 /* Recovery complete, but the reset failed ... */ 1084 if (i915_terminally_wedged(error)) 1085 return -EIO; 1086 1087 /* 1088 * Check if GPU Reset is in progress - we need intel_ring_begin 1089 * to work properly to reinit the hw state while the gpu is 1090 * still marked as reset-in-progress. Handle this with a flag. 1091 */ 1092 if (!error->reload_in_reset) 1093 return -EAGAIN; 1094 } 1095 1096 return 0; 1097 } 1098 1099 /* 1100 * Compare seqno against outstanding lazy request. Emit a request if they are 1101 * equal. 1102 */ 1103 int 1104 i915_gem_check_olr(struct intel_engine_cs *ring, u32 seqno) 1105 { 1106 int ret; 1107 1108 BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex)); 1109 1110 ret = 0; 1111 if (seqno == ring->outstanding_lazy_seqno) 1112 ret = i915_add_request(ring, NULL); 1113 1114 return ret; 1115 } 1116 1117 static void fake_irq(unsigned long data) 1118 { 1119 wake_up_process((struct task_struct *)data); 1120 } 1121 1122 static bool missed_irq(struct drm_i915_private *dev_priv, 1123 struct intel_engine_cs *ring) 1124 { 1125 return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings); 1126 } 1127 1128 static bool can_wait_boost(struct drm_i915_file_private *file_priv) 1129 { 1130 if (file_priv == NULL) 1131 return true; 1132 1133 return !atomic_xchg(&file_priv->rps_wait_boost, true); 1134 } 1135 1136 /** 1137 * __wait_seqno - wait until execution of seqno has finished 1138 * @ring: the ring expected to report seqno 1139 * @seqno: duh! 1140 * @reset_counter: reset sequence associated with the given seqno 1141 * @interruptible: do an interruptible wait (normally yes) 1142 * @timeout: in - how long to wait (NULL forever); out - how much time remaining 1143 * 1144 * Note: It is of utmost importance that the passed in seqno and reset_counter 1145 * values have been read by the caller in an smp safe manner. Where read-side 1146 * locks are involved, it is sufficient to read the reset_counter before 1147 * unlocking the lock that protects the seqno. For lockless tricks, the 1148 * reset_counter _must_ be read before, and an appropriate smp_rmb must be 1149 * inserted. 1150 * 1151 * Returns 0 if the seqno was found within the alloted time. Else returns the 1152 * errno with remaining time filled in timeout argument. 1153 */ 1154 static int __wait_seqno(struct intel_engine_cs *ring, u32 seqno, 1155 unsigned reset_counter, 1156 bool interruptible, 1157 s64 *timeout, 1158 struct drm_i915_file_private *file_priv) 1159 { 1160 struct drm_device *dev = ring->dev; 1161 struct drm_i915_private *dev_priv = dev->dev_private; 1162 const bool irq_test_in_progress = 1163 ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring); 1164 DEFINE_WAIT(wait); 1165 unsigned long timeout_expire; 1166 s64 before, now; 1167 int ret; 1168 1169 WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled"); 1170 1171 if (i915_seqno_passed(ring->get_seqno(ring, true), seqno)) 1172 return 0; 1173 1174 timeout_expire = timeout ? jiffies + nsecs_to_jiffies((u64)*timeout) : 0; 1175 1176 if (INTEL_INFO(dev)->gen >= 6 && ring->id == RCS && can_wait_boost(file_priv)) { 1177 gen6_rps_boost(dev_priv); 1178 if (file_priv) 1179 mod_delayed_work(dev_priv->wq, 1180 &file_priv->mm.idle_work, 1181 msecs_to_jiffies(100)); 1182 } 1183 1184 if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring))) 1185 return -ENODEV; 1186 1187 /* Record current time in case interrupted by signal, or wedged */ 1188 trace_i915_gem_request_wait_begin(ring, seqno); 1189 before = ktime_get_raw_ns(); 1190 for (;;) { 1191 struct timer_list timer; 1192 1193 prepare_to_wait(&ring->irq_queue, &wait, 1194 interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE); 1195 1196 /* We need to check whether any gpu reset happened in between 1197 * the caller grabbing the seqno and now ... */ 1198 if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) { 1199 /* ... but upgrade the -EAGAIN to an -EIO if the gpu 1200 * is truely gone. */ 1201 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 1202 if (ret == 0) 1203 ret = -EAGAIN; 1204 break; 1205 } 1206 1207 if (i915_seqno_passed(ring->get_seqno(ring, false), seqno)) { 1208 ret = 0; 1209 break; 1210 } 1211 1212 if (interruptible && signal_pending(current)) { 1213 ret = -ERESTARTSYS; 1214 break; 1215 } 1216 1217 if (timeout && time_after_eq(jiffies, timeout_expire)) { 1218 ret = -ETIME; 1219 break; 1220 } 1221 1222 timer.function = NULL; 1223 if (timeout || missed_irq(dev_priv, ring)) { 1224 unsigned long expire; 1225 1226 setup_timer_on_stack(&timer, fake_irq, (unsigned long)current); 1227 expire = missed_irq(dev_priv, ring) ? jiffies + 1 : timeout_expire; 1228 mod_timer(&timer, expire); 1229 } 1230 1231 io_schedule(); 1232 1233 if (timer.function) { 1234 del_singleshot_timer_sync(&timer); 1235 destroy_timer_on_stack(&timer); 1236 } 1237 } 1238 now = ktime_get_raw_ns(); 1239 trace_i915_gem_request_wait_end(ring, seqno); 1240 1241 if (!irq_test_in_progress) 1242 ring->irq_put(ring); 1243 1244 finish_wait(&ring->irq_queue, &wait); 1245 1246 if (timeout) { 1247 s64 tres = *timeout - (now - before); 1248 1249 *timeout = tres < 0 ? 0 : tres; 1250 } 1251 1252 return ret; 1253 } 1254 1255 /** 1256 * Waits for a sequence number to be signaled, and cleans up the 1257 * request and object lists appropriately for that event. 1258 */ 1259 int 1260 i915_wait_seqno(struct intel_engine_cs *ring, uint32_t seqno) 1261 { 1262 struct drm_device *dev = ring->dev; 1263 struct drm_i915_private *dev_priv = dev->dev_private; 1264 bool interruptible = dev_priv->mm.interruptible; 1265 int ret; 1266 1267 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1268 BUG_ON(seqno == 0); 1269 1270 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 1271 if (ret) 1272 return ret; 1273 1274 ret = i915_gem_check_olr(ring, seqno); 1275 if (ret) 1276 return ret; 1277 1278 return __wait_seqno(ring, seqno, 1279 atomic_read(&dev_priv->gpu_error.reset_counter), 1280 interruptible, NULL, NULL); 1281 } 1282 1283 static int 1284 i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj, 1285 struct intel_engine_cs *ring) 1286 { 1287 if (!obj->active) 1288 return 0; 1289 1290 /* Manually manage the write flush as we may have not yet 1291 * retired the buffer. 1292 * 1293 * Note that the last_write_seqno is always the earlier of 1294 * the two (read/write) seqno, so if we haved successfully waited, 1295 * we know we have passed the last write. 1296 */ 1297 obj->last_write_seqno = 0; 1298 1299 return 0; 1300 } 1301 1302 /** 1303 * Ensures that all rendering to the object has completed and the object is 1304 * safe to unbind from the GTT or access from the CPU. 1305 */ 1306 static __must_check int 1307 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 1308 bool readonly) 1309 { 1310 struct intel_engine_cs *ring = obj->ring; 1311 u32 seqno; 1312 int ret; 1313 1314 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno; 1315 if (seqno == 0) 1316 return 0; 1317 1318 ret = i915_wait_seqno(ring, seqno); 1319 if (ret) 1320 return ret; 1321 1322 return i915_gem_object_wait_rendering__tail(obj, ring); 1323 } 1324 1325 /* A nonblocking variant of the above wait. This is a highly dangerous routine 1326 * as the object state may change during this call. 1327 */ 1328 static __must_check int 1329 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, 1330 struct drm_i915_file_private *file_priv, 1331 bool readonly) 1332 { 1333 struct drm_device *dev = obj->base.dev; 1334 struct drm_i915_private *dev_priv = dev->dev_private; 1335 struct intel_engine_cs *ring = obj->ring; 1336 unsigned reset_counter; 1337 u32 seqno; 1338 int ret; 1339 1340 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1341 BUG_ON(!dev_priv->mm.interruptible); 1342 1343 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno; 1344 if (seqno == 0) 1345 return 0; 1346 1347 ret = i915_gem_check_wedge(&dev_priv->gpu_error, true); 1348 if (ret) 1349 return ret; 1350 1351 ret = i915_gem_check_olr(ring, seqno); 1352 if (ret) 1353 return ret; 1354 1355 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 1356 mutex_unlock(&dev->struct_mutex); 1357 ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, file_priv); 1358 mutex_lock(&dev->struct_mutex); 1359 if (ret) 1360 return ret; 1361 1362 return i915_gem_object_wait_rendering__tail(obj, ring); 1363 } 1364 1365 /** 1366 * Called when user space prepares to use an object with the CPU, either 1367 * through the mmap ioctl's mapping or a GTT mapping. 1368 */ 1369 int 1370 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1371 struct drm_file *file) 1372 { 1373 struct drm_i915_gem_set_domain *args = data; 1374 struct drm_i915_gem_object *obj; 1375 uint32_t read_domains = args->read_domains; 1376 uint32_t write_domain = args->write_domain; 1377 int ret; 1378 1379 /* Only handle setting domains to types used by the CPU. */ 1380 if (write_domain & I915_GEM_GPU_DOMAINS) 1381 return -EINVAL; 1382 1383 if (read_domains & I915_GEM_GPU_DOMAINS) 1384 return -EINVAL; 1385 1386 /* Having something in the write domain implies it's in the read 1387 * domain, and only that read domain. Enforce that in the request. 1388 */ 1389 if (write_domain != 0 && read_domains != write_domain) 1390 return -EINVAL; 1391 1392 ret = i915_mutex_lock_interruptible(dev); 1393 if (ret) 1394 return ret; 1395 1396 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1397 if (&obj->base == NULL) { 1398 ret = -ENOENT; 1399 goto unlock; 1400 } 1401 1402 /* Try to flush the object off the GPU without holding the lock. 1403 * We will repeat the flush holding the lock in the normal manner 1404 * to catch cases where we are gazumped. 1405 */ 1406 ret = i915_gem_object_wait_rendering__nonblocking(obj, 1407 file->driver_priv, 1408 !write_domain); 1409 if (ret) 1410 goto unref; 1411 1412 if (read_domains & I915_GEM_DOMAIN_GTT) { 1413 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1414 1415 /* Silently promote "you're not bound, there was nothing to do" 1416 * to success, since the client was just asking us to 1417 * make sure everything was done. 1418 */ 1419 if (ret == -EINVAL) 1420 ret = 0; 1421 } else { 1422 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1423 } 1424 1425 unref: 1426 drm_gem_object_unreference(&obj->base); 1427 unlock: 1428 mutex_unlock(&dev->struct_mutex); 1429 return ret; 1430 } 1431 1432 /** 1433 * Called when user space has done writes to this buffer 1434 */ 1435 int 1436 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1437 struct drm_file *file) 1438 { 1439 struct drm_i915_gem_sw_finish *args = data; 1440 struct drm_i915_gem_object *obj; 1441 int ret = 0; 1442 1443 ret = i915_mutex_lock_interruptible(dev); 1444 if (ret) 1445 return ret; 1446 1447 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1448 if (&obj->base == NULL) { 1449 ret = -ENOENT; 1450 goto unlock; 1451 } 1452 1453 /* Pinned buffers may be scanout, so flush the cache */ 1454 if (obj->pin_display) 1455 i915_gem_object_flush_cpu_write_domain(obj, true); 1456 1457 drm_gem_object_unreference(&obj->base); 1458 unlock: 1459 mutex_unlock(&dev->struct_mutex); 1460 return ret; 1461 } 1462 1463 /** 1464 * Maps the contents of an object, returning the address it is mapped 1465 * into. 1466 * 1467 * While the mapping holds a reference on the contents of the object, it doesn't 1468 * imply a ref on the object itself. 1469 */ 1470 int 1471 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1472 struct drm_file *file) 1473 { 1474 struct drm_i915_gem_mmap *args = data; 1475 struct drm_gem_object *obj; 1476 unsigned long addr; 1477 1478 obj = drm_gem_object_lookup(dev, file, args->handle); 1479 if (obj == NULL) 1480 return -ENOENT; 1481 1482 /* prime objects have no backing filp to GEM mmap 1483 * pages from. 1484 */ 1485 if (!obj->filp) { 1486 drm_gem_object_unreference_unlocked(obj); 1487 return -EINVAL; 1488 } 1489 1490 addr = vm_mmap(obj->filp, 0, args->size, 1491 PROT_READ | PROT_WRITE, MAP_SHARED, 1492 args->offset); 1493 drm_gem_object_unreference_unlocked(obj); 1494 if (IS_ERR((void *)addr)) 1495 return addr; 1496 1497 args->addr_ptr = (uint64_t) addr; 1498 1499 return 0; 1500 } 1501 1502 /** 1503 * i915_gem_fault - fault a page into the GTT 1504 * vma: VMA in question 1505 * vmf: fault info 1506 * 1507 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1508 * from userspace. The fault handler takes care of binding the object to 1509 * the GTT (if needed), allocating and programming a fence register (again, 1510 * only if needed based on whether the old reg is still valid or the object 1511 * is tiled) and inserting a new PTE into the faulting process. 1512 * 1513 * Note that the faulting process may involve evicting existing objects 1514 * from the GTT and/or fence registers to make room. So performance may 1515 * suffer if the GTT working set is large or there are few fence registers 1516 * left. 1517 */ 1518 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1519 { 1520 struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data); 1521 struct drm_device *dev = obj->base.dev; 1522 struct drm_i915_private *dev_priv = dev->dev_private; 1523 pgoff_t page_offset; 1524 unsigned long pfn; 1525 int ret = 0; 1526 bool write = !!(vmf->flags & FAULT_FLAG_WRITE); 1527 1528 intel_runtime_pm_get(dev_priv); 1529 1530 /* We don't use vmf->pgoff since that has the fake offset */ 1531 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> 1532 PAGE_SHIFT; 1533 1534 ret = i915_mutex_lock_interruptible(dev); 1535 if (ret) 1536 goto out; 1537 1538 trace_i915_gem_object_fault(obj, page_offset, true, write); 1539 1540 /* Try to flush the object off the GPU first without holding the lock. 1541 * Upon reacquiring the lock, we will perform our sanity checks and then 1542 * repeat the flush holding the lock in the normal manner to catch cases 1543 * where we are gazumped. 1544 */ 1545 ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write); 1546 if (ret) 1547 goto unlock; 1548 1549 /* Access to snoopable pages through the GTT is incoherent. */ 1550 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { 1551 ret = -EFAULT; 1552 goto unlock; 1553 } 1554 1555 /* Now bind it into the GTT if needed */ 1556 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE); 1557 if (ret) 1558 goto unlock; 1559 1560 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1561 if (ret) 1562 goto unpin; 1563 1564 ret = i915_gem_object_get_fence(obj); 1565 if (ret) 1566 goto unpin; 1567 1568 /* Finally, remap it using the new GTT offset */ 1569 pfn = dev_priv->gtt.mappable_base + i915_gem_obj_ggtt_offset(obj); 1570 pfn >>= PAGE_SHIFT; 1571 1572 if (!obj->fault_mappable) { 1573 unsigned long size = min_t(unsigned long, 1574 vma->vm_end - vma->vm_start, 1575 obj->base.size); 1576 int i; 1577 1578 for (i = 0; i < size >> PAGE_SHIFT; i++) { 1579 ret = vm_insert_pfn(vma, 1580 (unsigned long)vma->vm_start + i * PAGE_SIZE, 1581 pfn + i); 1582 if (ret) 1583 break; 1584 } 1585 1586 obj->fault_mappable = true; 1587 } else 1588 ret = vm_insert_pfn(vma, 1589 (unsigned long)vmf->virtual_address, 1590 pfn + page_offset); 1591 unpin: 1592 i915_gem_object_ggtt_unpin(obj); 1593 unlock: 1594 mutex_unlock(&dev->struct_mutex); 1595 out: 1596 switch (ret) { 1597 case -EIO: 1598 /* 1599 * We eat errors when the gpu is terminally wedged to avoid 1600 * userspace unduly crashing (gl has no provisions for mmaps to 1601 * fail). But any other -EIO isn't ours (e.g. swap in failure) 1602 * and so needs to be reported. 1603 */ 1604 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 1605 ret = VM_FAULT_SIGBUS; 1606 break; 1607 } 1608 case -EAGAIN: 1609 /* 1610 * EAGAIN means the gpu is hung and we'll wait for the error 1611 * handler to reset everything when re-faulting in 1612 * i915_mutex_lock_interruptible. 1613 */ 1614 case 0: 1615 case -ERESTARTSYS: 1616 case -EINTR: 1617 case -EBUSY: 1618 /* 1619 * EBUSY is ok: this just means that another thread 1620 * already did the job. 1621 */ 1622 ret = VM_FAULT_NOPAGE; 1623 break; 1624 case -ENOMEM: 1625 ret = VM_FAULT_OOM; 1626 break; 1627 case -ENOSPC: 1628 case -EFAULT: 1629 ret = VM_FAULT_SIGBUS; 1630 break; 1631 default: 1632 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 1633 ret = VM_FAULT_SIGBUS; 1634 break; 1635 } 1636 1637 intel_runtime_pm_put(dev_priv); 1638 return ret; 1639 } 1640 1641 /** 1642 * i915_gem_release_mmap - remove physical page mappings 1643 * @obj: obj in question 1644 * 1645 * Preserve the reservation of the mmapping with the DRM core code, but 1646 * relinquish ownership of the pages back to the system. 1647 * 1648 * It is vital that we remove the page mapping if we have mapped a tiled 1649 * object through the GTT and then lose the fence register due to 1650 * resource pressure. Similarly if the object has been moved out of the 1651 * aperture, than pages mapped into userspace must be revoked. Removing the 1652 * mapping will then trigger a page fault on the next user access, allowing 1653 * fixup by i915_gem_fault(). 1654 */ 1655 void 1656 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 1657 { 1658 if (!obj->fault_mappable) 1659 return; 1660 1661 drm_vma_node_unmap(&obj->base.vma_node, 1662 obj->base.dev->anon_inode->i_mapping); 1663 obj->fault_mappable = false; 1664 } 1665 1666 void 1667 i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv) 1668 { 1669 struct drm_i915_gem_object *obj; 1670 1671 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) 1672 i915_gem_release_mmap(obj); 1673 } 1674 1675 uint32_t 1676 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) 1677 { 1678 uint32_t gtt_size; 1679 1680 if (INTEL_INFO(dev)->gen >= 4 || 1681 tiling_mode == I915_TILING_NONE) 1682 return size; 1683 1684 /* Previous chips need a power-of-two fence region when tiling */ 1685 if (INTEL_INFO(dev)->gen == 3) 1686 gtt_size = 1024*1024; 1687 else 1688 gtt_size = 512*1024; 1689 1690 while (gtt_size < size) 1691 gtt_size <<= 1; 1692 1693 return gtt_size; 1694 } 1695 1696 /** 1697 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 1698 * @obj: object to check 1699 * 1700 * Return the required GTT alignment for an object, taking into account 1701 * potential fence register mapping. 1702 */ 1703 uint32_t 1704 i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, 1705 int tiling_mode, bool fenced) 1706 { 1707 /* 1708 * Minimum alignment is 4k (GTT page size), but might be greater 1709 * if a fence register is needed for the object. 1710 */ 1711 if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) || 1712 tiling_mode == I915_TILING_NONE) 1713 return 4096; 1714 1715 /* 1716 * Previous chips need to be aligned to the size of the smallest 1717 * fence register that can contain the object. 1718 */ 1719 return i915_gem_get_gtt_size(dev, size, tiling_mode); 1720 } 1721 1722 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 1723 { 1724 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 1725 int ret; 1726 1727 if (drm_vma_node_has_offset(&obj->base.vma_node)) 1728 return 0; 1729 1730 dev_priv->mm.shrinker_no_lock_stealing = true; 1731 1732 ret = drm_gem_create_mmap_offset(&obj->base); 1733 if (ret != -ENOSPC) 1734 goto out; 1735 1736 /* Badly fragmented mmap space? The only way we can recover 1737 * space is by destroying unwanted objects. We can't randomly release 1738 * mmap_offsets as userspace expects them to be persistent for the 1739 * lifetime of the objects. The closest we can is to release the 1740 * offsets on purgeable objects by truncating it and marking it purged, 1741 * which prevents userspace from ever using that object again. 1742 */ 1743 i915_gem_shrink(dev_priv, 1744 obj->base.size >> PAGE_SHIFT, 1745 I915_SHRINK_BOUND | 1746 I915_SHRINK_UNBOUND | 1747 I915_SHRINK_PURGEABLE); 1748 ret = drm_gem_create_mmap_offset(&obj->base); 1749 if (ret != -ENOSPC) 1750 goto out; 1751 1752 i915_gem_shrink_all(dev_priv); 1753 ret = drm_gem_create_mmap_offset(&obj->base); 1754 out: 1755 dev_priv->mm.shrinker_no_lock_stealing = false; 1756 1757 return ret; 1758 } 1759 1760 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 1761 { 1762 drm_gem_free_mmap_offset(&obj->base); 1763 } 1764 1765 int 1766 i915_gem_mmap_gtt(struct drm_file *file, 1767 struct drm_device *dev, 1768 uint32_t handle, 1769 uint64_t *offset) 1770 { 1771 struct drm_i915_private *dev_priv = dev->dev_private; 1772 struct drm_i915_gem_object *obj; 1773 int ret; 1774 1775 ret = i915_mutex_lock_interruptible(dev); 1776 if (ret) 1777 return ret; 1778 1779 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle)); 1780 if (&obj->base == NULL) { 1781 ret = -ENOENT; 1782 goto unlock; 1783 } 1784 1785 if (obj->base.size > dev_priv->gtt.mappable_end) { 1786 ret = -E2BIG; 1787 goto out; 1788 } 1789 1790 if (obj->madv != I915_MADV_WILLNEED) { 1791 DRM_DEBUG("Attempting to mmap a purgeable buffer\n"); 1792 ret = -EFAULT; 1793 goto out; 1794 } 1795 1796 ret = i915_gem_object_create_mmap_offset(obj); 1797 if (ret) 1798 goto out; 1799 1800 *offset = drm_vma_node_offset_addr(&obj->base.vma_node); 1801 1802 out: 1803 drm_gem_object_unreference(&obj->base); 1804 unlock: 1805 mutex_unlock(&dev->struct_mutex); 1806 return ret; 1807 } 1808 1809 /** 1810 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 1811 * @dev: DRM device 1812 * @data: GTT mapping ioctl data 1813 * @file: GEM object info 1814 * 1815 * Simply returns the fake offset to userspace so it can mmap it. 1816 * The mmap call will end up in drm_gem_mmap(), which will set things 1817 * up so we can get faults in the handler above. 1818 * 1819 * The fault handler will take care of binding the object into the GTT 1820 * (since it may have been evicted to make room for something), allocating 1821 * a fence register, and mapping the appropriate aperture address into 1822 * userspace. 1823 */ 1824 int 1825 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 1826 struct drm_file *file) 1827 { 1828 struct drm_i915_gem_mmap_gtt *args = data; 1829 1830 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 1831 } 1832 1833 static inline int 1834 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj) 1835 { 1836 return obj->madv == I915_MADV_DONTNEED; 1837 } 1838 1839 /* Immediately discard the backing storage */ 1840 static void 1841 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 1842 { 1843 i915_gem_object_free_mmap_offset(obj); 1844 1845 if (obj->base.filp == NULL) 1846 return; 1847 1848 /* Our goal here is to return as much of the memory as 1849 * is possible back to the system as we are called from OOM. 1850 * To do this we must instruct the shmfs to drop all of its 1851 * backing pages, *now*. 1852 */ 1853 shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1); 1854 obj->madv = __I915_MADV_PURGED; 1855 } 1856 1857 /* Try to discard unwanted pages */ 1858 static void 1859 i915_gem_object_invalidate(struct drm_i915_gem_object *obj) 1860 { 1861 struct address_space *mapping; 1862 1863 switch (obj->madv) { 1864 case I915_MADV_DONTNEED: 1865 i915_gem_object_truncate(obj); 1866 case __I915_MADV_PURGED: 1867 return; 1868 } 1869 1870 if (obj->base.filp == NULL) 1871 return; 1872 1873 mapping = file_inode(obj->base.filp)->i_mapping, 1874 invalidate_mapping_pages(mapping, 0, (loff_t)-1); 1875 } 1876 1877 static void 1878 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 1879 { 1880 struct sg_page_iter sg_iter; 1881 int ret; 1882 1883 BUG_ON(obj->madv == __I915_MADV_PURGED); 1884 1885 ret = i915_gem_object_set_to_cpu_domain(obj, true); 1886 if (ret) { 1887 /* In the event of a disaster, abandon all caches and 1888 * hope for the best. 1889 */ 1890 WARN_ON(ret != -EIO); 1891 i915_gem_clflush_object(obj, true); 1892 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 1893 } 1894 1895 if (i915_gem_object_needs_bit17_swizzle(obj)) 1896 i915_gem_object_save_bit_17_swizzle(obj); 1897 1898 if (obj->madv == I915_MADV_DONTNEED) 1899 obj->dirty = 0; 1900 1901 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) { 1902 struct page *page = sg_page_iter_page(&sg_iter); 1903 1904 if (obj->dirty) 1905 set_page_dirty(page); 1906 1907 if (obj->madv == I915_MADV_WILLNEED) 1908 mark_page_accessed(page); 1909 1910 page_cache_release(page); 1911 } 1912 obj->dirty = 0; 1913 1914 sg_free_table(obj->pages); 1915 kfree(obj->pages); 1916 } 1917 1918 int 1919 i915_gem_object_put_pages(struct drm_i915_gem_object *obj) 1920 { 1921 const struct drm_i915_gem_object_ops *ops = obj->ops; 1922 1923 if (obj->pages == NULL) 1924 return 0; 1925 1926 if (obj->pages_pin_count) 1927 return -EBUSY; 1928 1929 BUG_ON(i915_gem_obj_bound_any(obj)); 1930 1931 /* ->put_pages might need to allocate memory for the bit17 swizzle 1932 * array, hence protect them from being reaped by removing them from gtt 1933 * lists early. */ 1934 list_del(&obj->global_list); 1935 1936 ops->put_pages(obj); 1937 obj->pages = NULL; 1938 1939 i915_gem_object_invalidate(obj); 1940 1941 return 0; 1942 } 1943 1944 unsigned long 1945 i915_gem_shrink(struct drm_i915_private *dev_priv, 1946 long target, unsigned flags) 1947 { 1948 const bool purgeable_only = flags & I915_SHRINK_PURGEABLE; 1949 unsigned long count = 0; 1950 1951 /* 1952 * As we may completely rewrite the (un)bound list whilst unbinding 1953 * (due to retiring requests) we have to strictly process only 1954 * one element of the list at the time, and recheck the list 1955 * on every iteration. 1956 * 1957 * In particular, we must hold a reference whilst removing the 1958 * object as we may end up waiting for and/or retiring the objects. 1959 * This might release the final reference (held by the active list) 1960 * and result in the object being freed from under us. This is 1961 * similar to the precautions the eviction code must take whilst 1962 * removing objects. 1963 * 1964 * Also note that although these lists do not hold a reference to 1965 * the object we can safely grab one here: The final object 1966 * unreferencing and the bound_list are both protected by the 1967 * dev->struct_mutex and so we won't ever be able to observe an 1968 * object on the bound_list with a reference count equals 0. 1969 */ 1970 if (flags & I915_SHRINK_UNBOUND) { 1971 struct list_head still_in_list; 1972 1973 INIT_LIST_HEAD(&still_in_list); 1974 while (count < target && !list_empty(&dev_priv->mm.unbound_list)) { 1975 struct drm_i915_gem_object *obj; 1976 1977 obj = list_first_entry(&dev_priv->mm.unbound_list, 1978 typeof(*obj), global_list); 1979 list_move_tail(&obj->global_list, &still_in_list); 1980 1981 if (!i915_gem_object_is_purgeable(obj) && purgeable_only) 1982 continue; 1983 1984 drm_gem_object_reference(&obj->base); 1985 1986 if (i915_gem_object_put_pages(obj) == 0) 1987 count += obj->base.size >> PAGE_SHIFT; 1988 1989 drm_gem_object_unreference(&obj->base); 1990 } 1991 list_splice(&still_in_list, &dev_priv->mm.unbound_list); 1992 } 1993 1994 if (flags & I915_SHRINK_BOUND) { 1995 struct list_head still_in_list; 1996 1997 INIT_LIST_HEAD(&still_in_list); 1998 while (count < target && !list_empty(&dev_priv->mm.bound_list)) { 1999 struct drm_i915_gem_object *obj; 2000 struct i915_vma *vma, *v; 2001 2002 obj = list_first_entry(&dev_priv->mm.bound_list, 2003 typeof(*obj), global_list); 2004 list_move_tail(&obj->global_list, &still_in_list); 2005 2006 if (!i915_gem_object_is_purgeable(obj) && purgeable_only) 2007 continue; 2008 2009 drm_gem_object_reference(&obj->base); 2010 2011 list_for_each_entry_safe(vma, v, &obj->vma_list, vma_link) 2012 if (i915_vma_unbind(vma)) 2013 break; 2014 2015 if (i915_gem_object_put_pages(obj) == 0) 2016 count += obj->base.size >> PAGE_SHIFT; 2017 2018 drm_gem_object_unreference(&obj->base); 2019 } 2020 list_splice(&still_in_list, &dev_priv->mm.bound_list); 2021 } 2022 2023 return count; 2024 } 2025 2026 static unsigned long 2027 i915_gem_shrink_all(struct drm_i915_private *dev_priv) 2028 { 2029 i915_gem_evict_everything(dev_priv->dev); 2030 return i915_gem_shrink(dev_priv, LONG_MAX, 2031 I915_SHRINK_BOUND | I915_SHRINK_UNBOUND); 2032 } 2033 2034 static int 2035 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2036 { 2037 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2038 int page_count, i; 2039 struct address_space *mapping; 2040 struct sg_table *st; 2041 struct scatterlist *sg; 2042 struct sg_page_iter sg_iter; 2043 struct page *page; 2044 unsigned long last_pfn = 0; /* suppress gcc warning */ 2045 gfp_t gfp; 2046 2047 /* Assert that the object is not currently in any GPU domain. As it 2048 * wasn't in the GTT, there shouldn't be any way it could have been in 2049 * a GPU cache 2050 */ 2051 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2052 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2053 2054 st = kmalloc(sizeof(*st), GFP_KERNEL); 2055 if (st == NULL) 2056 return -ENOMEM; 2057 2058 page_count = obj->base.size / PAGE_SIZE; 2059 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 2060 kfree(st); 2061 return -ENOMEM; 2062 } 2063 2064 /* Get the list of pages out of our struct file. They'll be pinned 2065 * at this point until we release them. 2066 * 2067 * Fail silently without starting the shrinker 2068 */ 2069 mapping = file_inode(obj->base.filp)->i_mapping; 2070 gfp = mapping_gfp_mask(mapping); 2071 gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD; 2072 gfp &= ~(__GFP_IO | __GFP_WAIT); 2073 sg = st->sgl; 2074 st->nents = 0; 2075 for (i = 0; i < page_count; i++) { 2076 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2077 if (IS_ERR(page)) { 2078 i915_gem_shrink(dev_priv, 2079 page_count, 2080 I915_SHRINK_BOUND | 2081 I915_SHRINK_UNBOUND | 2082 I915_SHRINK_PURGEABLE); 2083 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2084 } 2085 if (IS_ERR(page)) { 2086 /* We've tried hard to allocate the memory by reaping 2087 * our own buffer, now let the real VM do its job and 2088 * go down in flames if truly OOM. 2089 */ 2090 i915_gem_shrink_all(dev_priv); 2091 page = shmem_read_mapping_page(mapping, i); 2092 if (IS_ERR(page)) 2093 goto err_pages; 2094 } 2095 #ifdef CONFIG_SWIOTLB 2096 if (swiotlb_nr_tbl()) { 2097 st->nents++; 2098 sg_set_page(sg, page, PAGE_SIZE, 0); 2099 sg = sg_next(sg); 2100 continue; 2101 } 2102 #endif 2103 if (!i || page_to_pfn(page) != last_pfn + 1) { 2104 if (i) 2105 sg = sg_next(sg); 2106 st->nents++; 2107 sg_set_page(sg, page, PAGE_SIZE, 0); 2108 } else { 2109 sg->length += PAGE_SIZE; 2110 } 2111 last_pfn = page_to_pfn(page); 2112 2113 /* Check that the i965g/gm workaround works. */ 2114 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL)); 2115 } 2116 #ifdef CONFIG_SWIOTLB 2117 if (!swiotlb_nr_tbl()) 2118 #endif 2119 sg_mark_end(sg); 2120 obj->pages = st; 2121 2122 if (i915_gem_object_needs_bit17_swizzle(obj)) 2123 i915_gem_object_do_bit_17_swizzle(obj); 2124 2125 return 0; 2126 2127 err_pages: 2128 sg_mark_end(sg); 2129 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) 2130 page_cache_release(sg_page_iter_page(&sg_iter)); 2131 sg_free_table(st); 2132 kfree(st); 2133 2134 /* shmemfs first checks if there is enough memory to allocate the page 2135 * and reports ENOSPC should there be insufficient, along with the usual 2136 * ENOMEM for a genuine allocation failure. 2137 * 2138 * We use ENOSPC in our driver to mean that we have run out of aperture 2139 * space and so want to translate the error from shmemfs back to our 2140 * usual understanding of ENOMEM. 2141 */ 2142 if (PTR_ERR(page) == -ENOSPC) 2143 return -ENOMEM; 2144 else 2145 return PTR_ERR(page); 2146 } 2147 2148 /* Ensure that the associated pages are gathered from the backing storage 2149 * and pinned into our object. i915_gem_object_get_pages() may be called 2150 * multiple times before they are released by a single call to 2151 * i915_gem_object_put_pages() - once the pages are no longer referenced 2152 * either as a result of memory pressure (reaping pages under the shrinker) 2153 * or as the object is itself released. 2154 */ 2155 int 2156 i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2157 { 2158 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2159 const struct drm_i915_gem_object_ops *ops = obj->ops; 2160 int ret; 2161 2162 if (obj->pages) 2163 return 0; 2164 2165 if (obj->madv != I915_MADV_WILLNEED) { 2166 DRM_DEBUG("Attempting to obtain a purgeable object\n"); 2167 return -EFAULT; 2168 } 2169 2170 BUG_ON(obj->pages_pin_count); 2171 2172 ret = ops->get_pages(obj); 2173 if (ret) 2174 return ret; 2175 2176 list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list); 2177 return 0; 2178 } 2179 2180 static void 2181 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, 2182 struct intel_engine_cs *ring) 2183 { 2184 u32 seqno = intel_ring_get_seqno(ring); 2185 2186 BUG_ON(ring == NULL); 2187 if (obj->ring != ring && obj->last_write_seqno) { 2188 /* Keep the seqno relative to the current ring */ 2189 obj->last_write_seqno = seqno; 2190 } 2191 obj->ring = ring; 2192 2193 /* Add a reference if we're newly entering the active list. */ 2194 if (!obj->active) { 2195 drm_gem_object_reference(&obj->base); 2196 obj->active = 1; 2197 } 2198 2199 list_move_tail(&obj->ring_list, &ring->active_list); 2200 2201 obj->last_read_seqno = seqno; 2202 } 2203 2204 void i915_vma_move_to_active(struct i915_vma *vma, 2205 struct intel_engine_cs *ring) 2206 { 2207 list_move_tail(&vma->mm_list, &vma->vm->active_list); 2208 return i915_gem_object_move_to_active(vma->obj, ring); 2209 } 2210 2211 static void 2212 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) 2213 { 2214 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2215 struct i915_address_space *vm; 2216 struct i915_vma *vma; 2217 2218 BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS); 2219 BUG_ON(!obj->active); 2220 2221 list_for_each_entry(vm, &dev_priv->vm_list, global_link) { 2222 vma = i915_gem_obj_to_vma(obj, vm); 2223 if (vma && !list_empty(&vma->mm_list)) 2224 list_move_tail(&vma->mm_list, &vm->inactive_list); 2225 } 2226 2227 intel_fb_obj_flush(obj, true); 2228 2229 list_del_init(&obj->ring_list); 2230 obj->ring = NULL; 2231 2232 obj->last_read_seqno = 0; 2233 obj->last_write_seqno = 0; 2234 obj->base.write_domain = 0; 2235 2236 obj->last_fenced_seqno = 0; 2237 2238 obj->active = 0; 2239 drm_gem_object_unreference(&obj->base); 2240 2241 WARN_ON(i915_verify_lists(dev)); 2242 } 2243 2244 static void 2245 i915_gem_object_retire(struct drm_i915_gem_object *obj) 2246 { 2247 struct intel_engine_cs *ring = obj->ring; 2248 2249 if (ring == NULL) 2250 return; 2251 2252 if (i915_seqno_passed(ring->get_seqno(ring, true), 2253 obj->last_read_seqno)) 2254 i915_gem_object_move_to_inactive(obj); 2255 } 2256 2257 static int 2258 i915_gem_init_seqno(struct drm_device *dev, u32 seqno) 2259 { 2260 struct drm_i915_private *dev_priv = dev->dev_private; 2261 struct intel_engine_cs *ring; 2262 int ret, i, j; 2263 2264 /* Carefully retire all requests without writing to the rings */ 2265 for_each_ring(ring, dev_priv, i) { 2266 ret = intel_ring_idle(ring); 2267 if (ret) 2268 return ret; 2269 } 2270 i915_gem_retire_requests(dev); 2271 2272 /* Finally reset hw state */ 2273 for_each_ring(ring, dev_priv, i) { 2274 intel_ring_init_seqno(ring, seqno); 2275 2276 for (j = 0; j < ARRAY_SIZE(ring->semaphore.sync_seqno); j++) 2277 ring->semaphore.sync_seqno[j] = 0; 2278 } 2279 2280 return 0; 2281 } 2282 2283 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) 2284 { 2285 struct drm_i915_private *dev_priv = dev->dev_private; 2286 int ret; 2287 2288 if (seqno == 0) 2289 return -EINVAL; 2290 2291 /* HWS page needs to be set less than what we 2292 * will inject to ring 2293 */ 2294 ret = i915_gem_init_seqno(dev, seqno - 1); 2295 if (ret) 2296 return ret; 2297 2298 /* Carefully set the last_seqno value so that wrap 2299 * detection still works 2300 */ 2301 dev_priv->next_seqno = seqno; 2302 dev_priv->last_seqno = seqno - 1; 2303 if (dev_priv->last_seqno == 0) 2304 dev_priv->last_seqno--; 2305 2306 return 0; 2307 } 2308 2309 int 2310 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) 2311 { 2312 struct drm_i915_private *dev_priv = dev->dev_private; 2313 2314 /* reserve 0 for non-seqno */ 2315 if (dev_priv->next_seqno == 0) { 2316 int ret = i915_gem_init_seqno(dev, 0); 2317 if (ret) 2318 return ret; 2319 2320 dev_priv->next_seqno = 1; 2321 } 2322 2323 *seqno = dev_priv->last_seqno = dev_priv->next_seqno++; 2324 return 0; 2325 } 2326 2327 int __i915_add_request(struct intel_engine_cs *ring, 2328 struct drm_file *file, 2329 struct drm_i915_gem_object *obj, 2330 u32 *out_seqno) 2331 { 2332 struct drm_i915_private *dev_priv = ring->dev->dev_private; 2333 struct drm_i915_gem_request *request; 2334 struct intel_ringbuffer *ringbuf; 2335 u32 request_ring_position, request_start; 2336 int ret; 2337 2338 request = ring->preallocated_lazy_request; 2339 if (WARN_ON(request == NULL)) 2340 return -ENOMEM; 2341 2342 if (i915.enable_execlists) { 2343 struct intel_context *ctx = request->ctx; 2344 ringbuf = ctx->engine[ring->id].ringbuf; 2345 } else 2346 ringbuf = ring->buffer; 2347 2348 request_start = intel_ring_get_tail(ringbuf); 2349 /* 2350 * Emit any outstanding flushes - execbuf can fail to emit the flush 2351 * after having emitted the batchbuffer command. Hence we need to fix 2352 * things up similar to emitting the lazy request. The difference here 2353 * is that the flush _must_ happen before the next request, no matter 2354 * what. 2355 */ 2356 if (i915.enable_execlists) { 2357 ret = logical_ring_flush_all_caches(ringbuf); 2358 if (ret) 2359 return ret; 2360 } else { 2361 ret = intel_ring_flush_all_caches(ring); 2362 if (ret) 2363 return ret; 2364 } 2365 2366 /* Record the position of the start of the request so that 2367 * should we detect the updated seqno part-way through the 2368 * GPU processing the request, we never over-estimate the 2369 * position of the head. 2370 */ 2371 request_ring_position = intel_ring_get_tail(ringbuf); 2372 2373 if (i915.enable_execlists) { 2374 ret = ring->emit_request(ringbuf); 2375 if (ret) 2376 return ret; 2377 } else { 2378 ret = ring->add_request(ring); 2379 if (ret) 2380 return ret; 2381 } 2382 2383 request->seqno = intel_ring_get_seqno(ring); 2384 request->ring = ring; 2385 request->head = request_start; 2386 request->tail = request_ring_position; 2387 2388 /* Whilst this request exists, batch_obj will be on the 2389 * active_list, and so will hold the active reference. Only when this 2390 * request is retired will the the batch_obj be moved onto the 2391 * inactive_list and lose its active reference. Hence we do not need 2392 * to explicitly hold another reference here. 2393 */ 2394 request->batch_obj = obj; 2395 2396 if (!i915.enable_execlists) { 2397 /* Hold a reference to the current context so that we can inspect 2398 * it later in case a hangcheck error event fires. 2399 */ 2400 request->ctx = ring->last_context; 2401 if (request->ctx) 2402 i915_gem_context_reference(request->ctx); 2403 } 2404 2405 request->emitted_jiffies = jiffies; 2406 list_add_tail(&request->list, &ring->request_list); 2407 request->file_priv = NULL; 2408 2409 if (file) { 2410 struct drm_i915_file_private *file_priv = file->driver_priv; 2411 2412 spin_lock(&file_priv->mm.lock); 2413 request->file_priv = file_priv; 2414 list_add_tail(&request->client_list, 2415 &file_priv->mm.request_list); 2416 spin_unlock(&file_priv->mm.lock); 2417 } 2418 2419 trace_i915_gem_request_add(ring, request->seqno); 2420 ring->outstanding_lazy_seqno = 0; 2421 ring->preallocated_lazy_request = NULL; 2422 2423 if (!dev_priv->ums.mm_suspended) { 2424 i915_queue_hangcheck(ring->dev); 2425 2426 cancel_delayed_work_sync(&dev_priv->mm.idle_work); 2427 queue_delayed_work(dev_priv->wq, 2428 &dev_priv->mm.retire_work, 2429 round_jiffies_up_relative(HZ)); 2430 intel_mark_busy(dev_priv->dev); 2431 } 2432 2433 if (out_seqno) 2434 *out_seqno = request->seqno; 2435 return 0; 2436 } 2437 2438 static inline void 2439 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) 2440 { 2441 struct drm_i915_file_private *file_priv = request->file_priv; 2442 2443 if (!file_priv) 2444 return; 2445 2446 spin_lock(&file_priv->mm.lock); 2447 list_del(&request->client_list); 2448 request->file_priv = NULL; 2449 spin_unlock(&file_priv->mm.lock); 2450 } 2451 2452 static bool i915_context_is_banned(struct drm_i915_private *dev_priv, 2453 const struct intel_context *ctx) 2454 { 2455 unsigned long elapsed; 2456 2457 elapsed = get_seconds() - ctx->hang_stats.guilty_ts; 2458 2459 if (ctx->hang_stats.banned) 2460 return true; 2461 2462 if (elapsed <= DRM_I915_CTX_BAN_PERIOD) { 2463 if (!i915_gem_context_is_default(ctx)) { 2464 DRM_DEBUG("context hanging too fast, banning!\n"); 2465 return true; 2466 } else if (i915_stop_ring_allow_ban(dev_priv)) { 2467 if (i915_stop_ring_allow_warn(dev_priv)) 2468 DRM_ERROR("gpu hanging too fast, banning!\n"); 2469 return true; 2470 } 2471 } 2472 2473 return false; 2474 } 2475 2476 static void i915_set_reset_status(struct drm_i915_private *dev_priv, 2477 struct intel_context *ctx, 2478 const bool guilty) 2479 { 2480 struct i915_ctx_hang_stats *hs; 2481 2482 if (WARN_ON(!ctx)) 2483 return; 2484 2485 hs = &ctx->hang_stats; 2486 2487 if (guilty) { 2488 hs->banned = i915_context_is_banned(dev_priv, ctx); 2489 hs->batch_active++; 2490 hs->guilty_ts = get_seconds(); 2491 } else { 2492 hs->batch_pending++; 2493 } 2494 } 2495 2496 static void i915_gem_free_request(struct drm_i915_gem_request *request) 2497 { 2498 list_del(&request->list); 2499 i915_gem_request_remove_from_client(request); 2500 2501 if (request->ctx) 2502 i915_gem_context_unreference(request->ctx); 2503 2504 kfree(request); 2505 } 2506 2507 struct drm_i915_gem_request * 2508 i915_gem_find_active_request(struct intel_engine_cs *ring) 2509 { 2510 struct drm_i915_gem_request *request; 2511 u32 completed_seqno; 2512 2513 completed_seqno = ring->get_seqno(ring, false); 2514 2515 list_for_each_entry(request, &ring->request_list, list) { 2516 if (i915_seqno_passed(completed_seqno, request->seqno)) 2517 continue; 2518 2519 return request; 2520 } 2521 2522 return NULL; 2523 } 2524 2525 static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv, 2526 struct intel_engine_cs *ring) 2527 { 2528 struct drm_i915_gem_request *request; 2529 bool ring_hung; 2530 2531 request = i915_gem_find_active_request(ring); 2532 2533 if (request == NULL) 2534 return; 2535 2536 ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG; 2537 2538 i915_set_reset_status(dev_priv, request->ctx, ring_hung); 2539 2540 list_for_each_entry_continue(request, &ring->request_list, list) 2541 i915_set_reset_status(dev_priv, request->ctx, false); 2542 } 2543 2544 static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv, 2545 struct intel_engine_cs *ring) 2546 { 2547 while (!list_empty(&ring->active_list)) { 2548 struct drm_i915_gem_object *obj; 2549 2550 obj = list_first_entry(&ring->active_list, 2551 struct drm_i915_gem_object, 2552 ring_list); 2553 2554 i915_gem_object_move_to_inactive(obj); 2555 } 2556 2557 /* 2558 * We must free the requests after all the corresponding objects have 2559 * been moved off active lists. Which is the same order as the normal 2560 * retire_requests function does. This is important if object hold 2561 * implicit references on things like e.g. ppgtt address spaces through 2562 * the request. 2563 */ 2564 while (!list_empty(&ring->request_list)) { 2565 struct drm_i915_gem_request *request; 2566 2567 request = list_first_entry(&ring->request_list, 2568 struct drm_i915_gem_request, 2569 list); 2570 2571 i915_gem_free_request(request); 2572 } 2573 2574 while (!list_empty(&ring->execlist_queue)) { 2575 struct intel_ctx_submit_request *submit_req; 2576 2577 submit_req = list_first_entry(&ring->execlist_queue, 2578 struct intel_ctx_submit_request, 2579 execlist_link); 2580 list_del(&submit_req->execlist_link); 2581 intel_runtime_pm_put(dev_priv); 2582 i915_gem_context_unreference(submit_req->ctx); 2583 kfree(submit_req); 2584 } 2585 2586 /* These may not have been flush before the reset, do so now */ 2587 kfree(ring->preallocated_lazy_request); 2588 ring->preallocated_lazy_request = NULL; 2589 ring->outstanding_lazy_seqno = 0; 2590 } 2591 2592 void i915_gem_restore_fences(struct drm_device *dev) 2593 { 2594 struct drm_i915_private *dev_priv = dev->dev_private; 2595 int i; 2596 2597 for (i = 0; i < dev_priv->num_fence_regs; i++) { 2598 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 2599 2600 /* 2601 * Commit delayed tiling changes if we have an object still 2602 * attached to the fence, otherwise just clear the fence. 2603 */ 2604 if (reg->obj) { 2605 i915_gem_object_update_fence(reg->obj, reg, 2606 reg->obj->tiling_mode); 2607 } else { 2608 i915_gem_write_fence(dev, i, NULL); 2609 } 2610 } 2611 } 2612 2613 void i915_gem_reset(struct drm_device *dev) 2614 { 2615 struct drm_i915_private *dev_priv = dev->dev_private; 2616 struct intel_engine_cs *ring; 2617 int i; 2618 2619 /* 2620 * Before we free the objects from the requests, we need to inspect 2621 * them for finding the guilty party. As the requests only borrow 2622 * their reference to the objects, the inspection must be done first. 2623 */ 2624 for_each_ring(ring, dev_priv, i) 2625 i915_gem_reset_ring_status(dev_priv, ring); 2626 2627 for_each_ring(ring, dev_priv, i) 2628 i915_gem_reset_ring_cleanup(dev_priv, ring); 2629 2630 i915_gem_context_reset(dev); 2631 2632 i915_gem_restore_fences(dev); 2633 } 2634 2635 /** 2636 * This function clears the request list as sequence numbers are passed. 2637 */ 2638 void 2639 i915_gem_retire_requests_ring(struct intel_engine_cs *ring) 2640 { 2641 uint32_t seqno; 2642 2643 if (list_empty(&ring->request_list)) 2644 return; 2645 2646 WARN_ON(i915_verify_lists(ring->dev)); 2647 2648 seqno = ring->get_seqno(ring, true); 2649 2650 /* Move any buffers on the active list that are no longer referenced 2651 * by the ringbuffer to the flushing/inactive lists as appropriate, 2652 * before we free the context associated with the requests. 2653 */ 2654 while (!list_empty(&ring->active_list)) { 2655 struct drm_i915_gem_object *obj; 2656 2657 obj = list_first_entry(&ring->active_list, 2658 struct drm_i915_gem_object, 2659 ring_list); 2660 2661 if (!i915_seqno_passed(seqno, obj->last_read_seqno)) 2662 break; 2663 2664 i915_gem_object_move_to_inactive(obj); 2665 } 2666 2667 2668 while (!list_empty(&ring->request_list)) { 2669 struct drm_i915_gem_request *request; 2670 struct intel_ringbuffer *ringbuf; 2671 2672 request = list_first_entry(&ring->request_list, 2673 struct drm_i915_gem_request, 2674 list); 2675 2676 if (!i915_seqno_passed(seqno, request->seqno)) 2677 break; 2678 2679 trace_i915_gem_request_retire(ring, request->seqno); 2680 2681 /* This is one of the few common intersection points 2682 * between legacy ringbuffer submission and execlists: 2683 * we need to tell them apart in order to find the correct 2684 * ringbuffer to which the request belongs to. 2685 */ 2686 if (i915.enable_execlists) { 2687 struct intel_context *ctx = request->ctx; 2688 ringbuf = ctx->engine[ring->id].ringbuf; 2689 } else 2690 ringbuf = ring->buffer; 2691 2692 /* We know the GPU must have read the request to have 2693 * sent us the seqno + interrupt, so use the position 2694 * of tail of the request to update the last known position 2695 * of the GPU head. 2696 */ 2697 ringbuf->last_retired_head = request->tail; 2698 2699 i915_gem_free_request(request); 2700 } 2701 2702 if (unlikely(ring->trace_irq_seqno && 2703 i915_seqno_passed(seqno, ring->trace_irq_seqno))) { 2704 ring->irq_put(ring); 2705 ring->trace_irq_seqno = 0; 2706 } 2707 2708 WARN_ON(i915_verify_lists(ring->dev)); 2709 } 2710 2711 bool 2712 i915_gem_retire_requests(struct drm_device *dev) 2713 { 2714 struct drm_i915_private *dev_priv = dev->dev_private; 2715 struct intel_engine_cs *ring; 2716 bool idle = true; 2717 int i; 2718 2719 for_each_ring(ring, dev_priv, i) { 2720 i915_gem_retire_requests_ring(ring); 2721 idle &= list_empty(&ring->request_list); 2722 } 2723 2724 if (idle) 2725 mod_delayed_work(dev_priv->wq, 2726 &dev_priv->mm.idle_work, 2727 msecs_to_jiffies(100)); 2728 2729 return idle; 2730 } 2731 2732 static void 2733 i915_gem_retire_work_handler(struct work_struct *work) 2734 { 2735 struct drm_i915_private *dev_priv = 2736 container_of(work, typeof(*dev_priv), mm.retire_work.work); 2737 struct drm_device *dev = dev_priv->dev; 2738 bool idle; 2739 2740 /* Come back later if the device is busy... */ 2741 idle = false; 2742 if (mutex_trylock(&dev->struct_mutex)) { 2743 idle = i915_gem_retire_requests(dev); 2744 mutex_unlock(&dev->struct_mutex); 2745 } 2746 if (!idle) 2747 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 2748 round_jiffies_up_relative(HZ)); 2749 } 2750 2751 static void 2752 i915_gem_idle_work_handler(struct work_struct *work) 2753 { 2754 struct drm_i915_private *dev_priv = 2755 container_of(work, typeof(*dev_priv), mm.idle_work.work); 2756 2757 intel_mark_idle(dev_priv->dev); 2758 } 2759 2760 /** 2761 * Ensures that an object will eventually get non-busy by flushing any required 2762 * write domains, emitting any outstanding lazy request and retiring and 2763 * completed requests. 2764 */ 2765 static int 2766 i915_gem_object_flush_active(struct drm_i915_gem_object *obj) 2767 { 2768 int ret; 2769 2770 if (obj->active) { 2771 ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno); 2772 if (ret) 2773 return ret; 2774 2775 i915_gem_retire_requests_ring(obj->ring); 2776 } 2777 2778 return 0; 2779 } 2780 2781 /** 2782 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 2783 * @DRM_IOCTL_ARGS: standard ioctl arguments 2784 * 2785 * Returns 0 if successful, else an error is returned with the remaining time in 2786 * the timeout parameter. 2787 * -ETIME: object is still busy after timeout 2788 * -ERESTARTSYS: signal interrupted the wait 2789 * -ENONENT: object doesn't exist 2790 * Also possible, but rare: 2791 * -EAGAIN: GPU wedged 2792 * -ENOMEM: damn 2793 * -ENODEV: Internal IRQ fail 2794 * -E?: The add request failed 2795 * 2796 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 2797 * non-zero timeout parameter the wait ioctl will wait for the given number of 2798 * nanoseconds on an object becoming unbusy. Since the wait itself does so 2799 * without holding struct_mutex the object may become re-busied before this 2800 * function completes. A similar but shorter * race condition exists in the busy 2801 * ioctl 2802 */ 2803 int 2804 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 2805 { 2806 struct drm_i915_private *dev_priv = dev->dev_private; 2807 struct drm_i915_gem_wait *args = data; 2808 struct drm_i915_gem_object *obj; 2809 struct intel_engine_cs *ring = NULL; 2810 unsigned reset_counter; 2811 u32 seqno = 0; 2812 int ret = 0; 2813 2814 ret = i915_mutex_lock_interruptible(dev); 2815 if (ret) 2816 return ret; 2817 2818 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle)); 2819 if (&obj->base == NULL) { 2820 mutex_unlock(&dev->struct_mutex); 2821 return -ENOENT; 2822 } 2823 2824 /* Need to make sure the object gets inactive eventually. */ 2825 ret = i915_gem_object_flush_active(obj); 2826 if (ret) 2827 goto out; 2828 2829 if (obj->active) { 2830 seqno = obj->last_read_seqno; 2831 ring = obj->ring; 2832 } 2833 2834 if (seqno == 0) 2835 goto out; 2836 2837 /* Do this after OLR check to make sure we make forward progress polling 2838 * on this IOCTL with a timeout <=0 (like busy ioctl) 2839 */ 2840 if (args->timeout_ns <= 0) { 2841 ret = -ETIME; 2842 goto out; 2843 } 2844 2845 drm_gem_object_unreference(&obj->base); 2846 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 2847 mutex_unlock(&dev->struct_mutex); 2848 2849 return __wait_seqno(ring, seqno, reset_counter, true, &args->timeout_ns, 2850 file->driver_priv); 2851 2852 out: 2853 drm_gem_object_unreference(&obj->base); 2854 mutex_unlock(&dev->struct_mutex); 2855 return ret; 2856 } 2857 2858 /** 2859 * i915_gem_object_sync - sync an object to a ring. 2860 * 2861 * @obj: object which may be in use on another ring. 2862 * @to: ring we wish to use the object on. May be NULL. 2863 * 2864 * This code is meant to abstract object synchronization with the GPU. 2865 * Calling with NULL implies synchronizing the object with the CPU 2866 * rather than a particular GPU ring. 2867 * 2868 * Returns 0 if successful, else propagates up the lower layer error. 2869 */ 2870 int 2871 i915_gem_object_sync(struct drm_i915_gem_object *obj, 2872 struct intel_engine_cs *to) 2873 { 2874 struct intel_engine_cs *from = obj->ring; 2875 u32 seqno; 2876 int ret, idx; 2877 2878 if (from == NULL || to == from) 2879 return 0; 2880 2881 if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev)) 2882 return i915_gem_object_wait_rendering(obj, false); 2883 2884 idx = intel_ring_sync_index(from, to); 2885 2886 seqno = obj->last_read_seqno; 2887 /* Optimization: Avoid semaphore sync when we are sure we already 2888 * waited for an object with higher seqno */ 2889 if (seqno <= from->semaphore.sync_seqno[idx]) 2890 return 0; 2891 2892 ret = i915_gem_check_olr(obj->ring, seqno); 2893 if (ret) 2894 return ret; 2895 2896 trace_i915_gem_ring_sync_to(from, to, seqno); 2897 ret = to->semaphore.sync_to(to, from, seqno); 2898 if (!ret) 2899 /* We use last_read_seqno because sync_to() 2900 * might have just caused seqno wrap under 2901 * the radar. 2902 */ 2903 from->semaphore.sync_seqno[idx] = obj->last_read_seqno; 2904 2905 return ret; 2906 } 2907 2908 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) 2909 { 2910 u32 old_write_domain, old_read_domains; 2911 2912 /* Force a pagefault for domain tracking on next user access */ 2913 i915_gem_release_mmap(obj); 2914 2915 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 2916 return; 2917 2918 /* Wait for any direct GTT access to complete */ 2919 mb(); 2920 2921 old_read_domains = obj->base.read_domains; 2922 old_write_domain = obj->base.write_domain; 2923 2924 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; 2925 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; 2926 2927 trace_i915_gem_object_change_domain(obj, 2928 old_read_domains, 2929 old_write_domain); 2930 } 2931 2932 int i915_vma_unbind(struct i915_vma *vma) 2933 { 2934 struct drm_i915_gem_object *obj = vma->obj; 2935 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2936 int ret; 2937 2938 if (list_empty(&vma->vma_link)) 2939 return 0; 2940 2941 if (!drm_mm_node_allocated(&vma->node)) { 2942 i915_gem_vma_destroy(vma); 2943 return 0; 2944 } 2945 2946 if (vma->pin_count) 2947 return -EBUSY; 2948 2949 BUG_ON(obj->pages == NULL); 2950 2951 ret = i915_gem_object_finish_gpu(obj); 2952 if (ret) 2953 return ret; 2954 /* Continue on if we fail due to EIO, the GPU is hung so we 2955 * should be safe and we need to cleanup or else we might 2956 * cause memory corruption through use-after-free. 2957 */ 2958 2959 /* Throw away the active reference before moving to the unbound list */ 2960 i915_gem_object_retire(obj); 2961 2962 if (i915_is_ggtt(vma->vm)) { 2963 i915_gem_object_finish_gtt(obj); 2964 2965 /* release the fence reg _after_ flushing */ 2966 ret = i915_gem_object_put_fence(obj); 2967 if (ret) 2968 return ret; 2969 } 2970 2971 trace_i915_vma_unbind(vma); 2972 2973 vma->unbind_vma(vma); 2974 2975 list_del_init(&vma->mm_list); 2976 if (i915_is_ggtt(vma->vm)) 2977 obj->map_and_fenceable = false; 2978 2979 drm_mm_remove_node(&vma->node); 2980 i915_gem_vma_destroy(vma); 2981 2982 /* Since the unbound list is global, only move to that list if 2983 * no more VMAs exist. */ 2984 if (list_empty(&obj->vma_list)) { 2985 i915_gem_gtt_finish_object(obj); 2986 list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list); 2987 } 2988 2989 /* And finally now the object is completely decoupled from this vma, 2990 * we can drop its hold on the backing storage and allow it to be 2991 * reaped by the shrinker. 2992 */ 2993 i915_gem_object_unpin_pages(obj); 2994 2995 return 0; 2996 } 2997 2998 int i915_gpu_idle(struct drm_device *dev) 2999 { 3000 struct drm_i915_private *dev_priv = dev->dev_private; 3001 struct intel_engine_cs *ring; 3002 int ret, i; 3003 3004 /* Flush everything onto the inactive list. */ 3005 for_each_ring(ring, dev_priv, i) { 3006 if (!i915.enable_execlists) { 3007 ret = i915_switch_context(ring, ring->default_context); 3008 if (ret) 3009 return ret; 3010 } 3011 3012 ret = intel_ring_idle(ring); 3013 if (ret) 3014 return ret; 3015 } 3016 3017 return 0; 3018 } 3019 3020 static void i965_write_fence_reg(struct drm_device *dev, int reg, 3021 struct drm_i915_gem_object *obj) 3022 { 3023 struct drm_i915_private *dev_priv = dev->dev_private; 3024 int fence_reg; 3025 int fence_pitch_shift; 3026 3027 if (INTEL_INFO(dev)->gen >= 6) { 3028 fence_reg = FENCE_REG_SANDYBRIDGE_0; 3029 fence_pitch_shift = SANDYBRIDGE_FENCE_PITCH_SHIFT; 3030 } else { 3031 fence_reg = FENCE_REG_965_0; 3032 fence_pitch_shift = I965_FENCE_PITCH_SHIFT; 3033 } 3034 3035 fence_reg += reg * 8; 3036 3037 /* To w/a incoherency with non-atomic 64-bit register updates, 3038 * we split the 64-bit update into two 32-bit writes. In order 3039 * for a partial fence not to be evaluated between writes, we 3040 * precede the update with write to turn off the fence register, 3041 * and only enable the fence as the last step. 3042 * 3043 * For extra levels of paranoia, we make sure each step lands 3044 * before applying the next step. 3045 */ 3046 I915_WRITE(fence_reg, 0); 3047 POSTING_READ(fence_reg); 3048 3049 if (obj) { 3050 u32 size = i915_gem_obj_ggtt_size(obj); 3051 uint64_t val; 3052 3053 val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) & 3054 0xfffff000) << 32; 3055 val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000; 3056 val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift; 3057 if (obj->tiling_mode == I915_TILING_Y) 3058 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 3059 val |= I965_FENCE_REG_VALID; 3060 3061 I915_WRITE(fence_reg + 4, val >> 32); 3062 POSTING_READ(fence_reg + 4); 3063 3064 I915_WRITE(fence_reg + 0, val); 3065 POSTING_READ(fence_reg); 3066 } else { 3067 I915_WRITE(fence_reg + 4, 0); 3068 POSTING_READ(fence_reg + 4); 3069 } 3070 } 3071 3072 static void i915_write_fence_reg(struct drm_device *dev, int reg, 3073 struct drm_i915_gem_object *obj) 3074 { 3075 struct drm_i915_private *dev_priv = dev->dev_private; 3076 u32 val; 3077 3078 if (obj) { 3079 u32 size = i915_gem_obj_ggtt_size(obj); 3080 int pitch_val; 3081 int tile_width; 3082 3083 WARN((i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) || 3084 (size & -size) != size || 3085 (i915_gem_obj_ggtt_offset(obj) & (size - 1)), 3086 "object 0x%08lx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n", 3087 i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size); 3088 3089 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) 3090 tile_width = 128; 3091 else 3092 tile_width = 512; 3093 3094 /* Note: pitch better be a power of two tile widths */ 3095 pitch_val = obj->stride / tile_width; 3096 pitch_val = ffs(pitch_val) - 1; 3097 3098 val = i915_gem_obj_ggtt_offset(obj); 3099 if (obj->tiling_mode == I915_TILING_Y) 3100 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 3101 val |= I915_FENCE_SIZE_BITS(size); 3102 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 3103 val |= I830_FENCE_REG_VALID; 3104 } else 3105 val = 0; 3106 3107 if (reg < 8) 3108 reg = FENCE_REG_830_0 + reg * 4; 3109 else 3110 reg = FENCE_REG_945_8 + (reg - 8) * 4; 3111 3112 I915_WRITE(reg, val); 3113 POSTING_READ(reg); 3114 } 3115 3116 static void i830_write_fence_reg(struct drm_device *dev, int reg, 3117 struct drm_i915_gem_object *obj) 3118 { 3119 struct drm_i915_private *dev_priv = dev->dev_private; 3120 uint32_t val; 3121 3122 if (obj) { 3123 u32 size = i915_gem_obj_ggtt_size(obj); 3124 uint32_t pitch_val; 3125 3126 WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) || 3127 (size & -size) != size || 3128 (i915_gem_obj_ggtt_offset(obj) & (size - 1)), 3129 "object 0x%08lx not 512K or pot-size 0x%08x aligned\n", 3130 i915_gem_obj_ggtt_offset(obj), size); 3131 3132 pitch_val = obj->stride / 128; 3133 pitch_val = ffs(pitch_val) - 1; 3134 3135 val = i915_gem_obj_ggtt_offset(obj); 3136 if (obj->tiling_mode == I915_TILING_Y) 3137 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 3138 val |= I830_FENCE_SIZE_BITS(size); 3139 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 3140 val |= I830_FENCE_REG_VALID; 3141 } else 3142 val = 0; 3143 3144 I915_WRITE(FENCE_REG_830_0 + reg * 4, val); 3145 POSTING_READ(FENCE_REG_830_0 + reg * 4); 3146 } 3147 3148 inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj) 3149 { 3150 return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT; 3151 } 3152 3153 static void i915_gem_write_fence(struct drm_device *dev, int reg, 3154 struct drm_i915_gem_object *obj) 3155 { 3156 struct drm_i915_private *dev_priv = dev->dev_private; 3157 3158 /* Ensure that all CPU reads are completed before installing a fence 3159 * and all writes before removing the fence. 3160 */ 3161 if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj)) 3162 mb(); 3163 3164 WARN(obj && (!obj->stride || !obj->tiling_mode), 3165 "bogus fence setup with stride: 0x%x, tiling mode: %i\n", 3166 obj->stride, obj->tiling_mode); 3167 3168 switch (INTEL_INFO(dev)->gen) { 3169 case 8: 3170 case 7: 3171 case 6: 3172 case 5: 3173 case 4: i965_write_fence_reg(dev, reg, obj); break; 3174 case 3: i915_write_fence_reg(dev, reg, obj); break; 3175 case 2: i830_write_fence_reg(dev, reg, obj); break; 3176 default: BUG(); 3177 } 3178 3179 /* And similarly be paranoid that no direct access to this region 3180 * is reordered to before the fence is installed. 3181 */ 3182 if (i915_gem_object_needs_mb(obj)) 3183 mb(); 3184 } 3185 3186 static inline int fence_number(struct drm_i915_private *dev_priv, 3187 struct drm_i915_fence_reg *fence) 3188 { 3189 return fence - dev_priv->fence_regs; 3190 } 3191 3192 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 3193 struct drm_i915_fence_reg *fence, 3194 bool enable) 3195 { 3196 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3197 int reg = fence_number(dev_priv, fence); 3198 3199 i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL); 3200 3201 if (enable) { 3202 obj->fence_reg = reg; 3203 fence->obj = obj; 3204 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list); 3205 } else { 3206 obj->fence_reg = I915_FENCE_REG_NONE; 3207 fence->obj = NULL; 3208 list_del_init(&fence->lru_list); 3209 } 3210 obj->fence_dirty = false; 3211 } 3212 3213 static int 3214 i915_gem_object_wait_fence(struct drm_i915_gem_object *obj) 3215 { 3216 if (obj->last_fenced_seqno) { 3217 int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno); 3218 if (ret) 3219 return ret; 3220 3221 obj->last_fenced_seqno = 0; 3222 } 3223 3224 return 0; 3225 } 3226 3227 int 3228 i915_gem_object_put_fence(struct drm_i915_gem_object *obj) 3229 { 3230 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3231 struct drm_i915_fence_reg *fence; 3232 int ret; 3233 3234 ret = i915_gem_object_wait_fence(obj); 3235 if (ret) 3236 return ret; 3237 3238 if (obj->fence_reg == I915_FENCE_REG_NONE) 3239 return 0; 3240 3241 fence = &dev_priv->fence_regs[obj->fence_reg]; 3242 3243 if (WARN_ON(fence->pin_count)) 3244 return -EBUSY; 3245 3246 i915_gem_object_fence_lost(obj); 3247 i915_gem_object_update_fence(obj, fence, false); 3248 3249 return 0; 3250 } 3251 3252 static struct drm_i915_fence_reg * 3253 i915_find_fence_reg(struct drm_device *dev) 3254 { 3255 struct drm_i915_private *dev_priv = dev->dev_private; 3256 struct drm_i915_fence_reg *reg, *avail; 3257 int i; 3258 3259 /* First try to find a free reg */ 3260 avail = NULL; 3261 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { 3262 reg = &dev_priv->fence_regs[i]; 3263 if (!reg->obj) 3264 return reg; 3265 3266 if (!reg->pin_count) 3267 avail = reg; 3268 } 3269 3270 if (avail == NULL) 3271 goto deadlock; 3272 3273 /* None available, try to steal one or wait for a user to finish */ 3274 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) { 3275 if (reg->pin_count) 3276 continue; 3277 3278 return reg; 3279 } 3280 3281 deadlock: 3282 /* Wait for completion of pending flips which consume fences */ 3283 if (intel_has_pending_fb_unpin(dev)) 3284 return ERR_PTR(-EAGAIN); 3285 3286 return ERR_PTR(-EDEADLK); 3287 } 3288 3289 /** 3290 * i915_gem_object_get_fence - set up fencing for an object 3291 * @obj: object to map through a fence reg 3292 * 3293 * When mapping objects through the GTT, userspace wants to be able to write 3294 * to them without having to worry about swizzling if the object is tiled. 3295 * This function walks the fence regs looking for a free one for @obj, 3296 * stealing one if it can't find any. 3297 * 3298 * It then sets up the reg based on the object's properties: address, pitch 3299 * and tiling format. 3300 * 3301 * For an untiled surface, this removes any existing fence. 3302 */ 3303 int 3304 i915_gem_object_get_fence(struct drm_i915_gem_object *obj) 3305 { 3306 struct drm_device *dev = obj->base.dev; 3307 struct drm_i915_private *dev_priv = dev->dev_private; 3308 bool enable = obj->tiling_mode != I915_TILING_NONE; 3309 struct drm_i915_fence_reg *reg; 3310 int ret; 3311 3312 /* Have we updated the tiling parameters upon the object and so 3313 * will need to serialise the write to the associated fence register? 3314 */ 3315 if (obj->fence_dirty) { 3316 ret = i915_gem_object_wait_fence(obj); 3317 if (ret) 3318 return ret; 3319 } 3320 3321 /* Just update our place in the LRU if our fence is getting reused. */ 3322 if (obj->fence_reg != I915_FENCE_REG_NONE) { 3323 reg = &dev_priv->fence_regs[obj->fence_reg]; 3324 if (!obj->fence_dirty) { 3325 list_move_tail(®->lru_list, 3326 &dev_priv->mm.fence_list); 3327 return 0; 3328 } 3329 } else if (enable) { 3330 if (WARN_ON(!obj->map_and_fenceable)) 3331 return -EINVAL; 3332 3333 reg = i915_find_fence_reg(dev); 3334 if (IS_ERR(reg)) 3335 return PTR_ERR(reg); 3336 3337 if (reg->obj) { 3338 struct drm_i915_gem_object *old = reg->obj; 3339 3340 ret = i915_gem_object_wait_fence(old); 3341 if (ret) 3342 return ret; 3343 3344 i915_gem_object_fence_lost(old); 3345 } 3346 } else 3347 return 0; 3348 3349 i915_gem_object_update_fence(obj, reg, enable); 3350 3351 return 0; 3352 } 3353 3354 static bool i915_gem_valid_gtt_space(struct i915_vma *vma, 3355 unsigned long cache_level) 3356 { 3357 struct drm_mm_node *gtt_space = &vma->node; 3358 struct drm_mm_node *other; 3359 3360 /* 3361 * On some machines we have to be careful when putting differing types 3362 * of snoopable memory together to avoid the prefetcher crossing memory 3363 * domains and dying. During vm initialisation, we decide whether or not 3364 * these constraints apply and set the drm_mm.color_adjust 3365 * appropriately. 3366 */ 3367 if (vma->vm->mm.color_adjust == NULL) 3368 return true; 3369 3370 if (!drm_mm_node_allocated(gtt_space)) 3371 return true; 3372 3373 if (list_empty(>t_space->node_list)) 3374 return true; 3375 3376 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); 3377 if (other->allocated && !other->hole_follows && other->color != cache_level) 3378 return false; 3379 3380 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); 3381 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) 3382 return false; 3383 3384 return true; 3385 } 3386 3387 static void i915_gem_verify_gtt(struct drm_device *dev) 3388 { 3389 #if WATCH_GTT 3390 struct drm_i915_private *dev_priv = dev->dev_private; 3391 struct drm_i915_gem_object *obj; 3392 int err = 0; 3393 3394 list_for_each_entry(obj, &dev_priv->mm.gtt_list, global_list) { 3395 if (obj->gtt_space == NULL) { 3396 printk(KERN_ERR "object found on GTT list with no space reserved\n"); 3397 err++; 3398 continue; 3399 } 3400 3401 if (obj->cache_level != obj->gtt_space->color) { 3402 printk(KERN_ERR "object reserved space [%08lx, %08lx] with wrong color, cache_level=%x, color=%lx\n", 3403 i915_gem_obj_ggtt_offset(obj), 3404 i915_gem_obj_ggtt_offset(obj) + i915_gem_obj_ggtt_size(obj), 3405 obj->cache_level, 3406 obj->gtt_space->color); 3407 err++; 3408 continue; 3409 } 3410 3411 if (!i915_gem_valid_gtt_space(dev, 3412 obj->gtt_space, 3413 obj->cache_level)) { 3414 printk(KERN_ERR "invalid GTT space found at [%08lx, %08lx] - color=%x\n", 3415 i915_gem_obj_ggtt_offset(obj), 3416 i915_gem_obj_ggtt_offset(obj) + i915_gem_obj_ggtt_size(obj), 3417 obj->cache_level); 3418 err++; 3419 continue; 3420 } 3421 } 3422 3423 WARN_ON(err); 3424 #endif 3425 } 3426 3427 /** 3428 * Finds free space in the GTT aperture and binds the object there. 3429 */ 3430 static struct i915_vma * 3431 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, 3432 struct i915_address_space *vm, 3433 unsigned alignment, 3434 uint64_t flags) 3435 { 3436 struct drm_device *dev = obj->base.dev; 3437 struct drm_i915_private *dev_priv = dev->dev_private; 3438 u32 size, fence_size, fence_alignment, unfenced_alignment; 3439 unsigned long start = 3440 flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; 3441 unsigned long end = 3442 flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total; 3443 struct i915_vma *vma; 3444 int ret; 3445 3446 fence_size = i915_gem_get_gtt_size(dev, 3447 obj->base.size, 3448 obj->tiling_mode); 3449 fence_alignment = i915_gem_get_gtt_alignment(dev, 3450 obj->base.size, 3451 obj->tiling_mode, true); 3452 unfenced_alignment = 3453 i915_gem_get_gtt_alignment(dev, 3454 obj->base.size, 3455 obj->tiling_mode, false); 3456 3457 if (alignment == 0) 3458 alignment = flags & PIN_MAPPABLE ? fence_alignment : 3459 unfenced_alignment; 3460 if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) { 3461 DRM_DEBUG("Invalid object alignment requested %u\n", alignment); 3462 return ERR_PTR(-EINVAL); 3463 } 3464 3465 size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; 3466 3467 /* If the object is bigger than the entire aperture, reject it early 3468 * before evicting everything in a vain attempt to find space. 3469 */ 3470 if (obj->base.size > end) { 3471 DRM_DEBUG("Attempting to bind an object larger than the aperture: object=%zd > %s aperture=%lu\n", 3472 obj->base.size, 3473 flags & PIN_MAPPABLE ? "mappable" : "total", 3474 end); 3475 return ERR_PTR(-E2BIG); 3476 } 3477 3478 ret = i915_gem_object_get_pages(obj); 3479 if (ret) 3480 return ERR_PTR(ret); 3481 3482 i915_gem_object_pin_pages(obj); 3483 3484 vma = i915_gem_obj_lookup_or_create_vma(obj, vm); 3485 if (IS_ERR(vma)) 3486 goto err_unpin; 3487 3488 search_free: 3489 ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, 3490 size, alignment, 3491 obj->cache_level, 3492 start, end, 3493 DRM_MM_SEARCH_DEFAULT, 3494 DRM_MM_CREATE_DEFAULT); 3495 if (ret) { 3496 ret = i915_gem_evict_something(dev, vm, size, alignment, 3497 obj->cache_level, 3498 start, end, 3499 flags); 3500 if (ret == 0) 3501 goto search_free; 3502 3503 goto err_free_vma; 3504 } 3505 if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) { 3506 ret = -EINVAL; 3507 goto err_remove_node; 3508 } 3509 3510 ret = i915_gem_gtt_prepare_object(obj); 3511 if (ret) 3512 goto err_remove_node; 3513 3514 list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); 3515 list_add_tail(&vma->mm_list, &vm->inactive_list); 3516 3517 if (i915_is_ggtt(vm)) { 3518 bool mappable, fenceable; 3519 3520 fenceable = (vma->node.size == fence_size && 3521 (vma->node.start & (fence_alignment - 1)) == 0); 3522 3523 mappable = (vma->node.start + obj->base.size <= 3524 dev_priv->gtt.mappable_end); 3525 3526 obj->map_and_fenceable = mappable && fenceable; 3527 } 3528 3529 WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable); 3530 3531 trace_i915_vma_bind(vma, flags); 3532 vma->bind_vma(vma, obj->cache_level, 3533 flags & (PIN_MAPPABLE | PIN_GLOBAL) ? GLOBAL_BIND : 0); 3534 3535 i915_gem_verify_gtt(dev); 3536 return vma; 3537 3538 err_remove_node: 3539 drm_mm_remove_node(&vma->node); 3540 err_free_vma: 3541 i915_gem_vma_destroy(vma); 3542 vma = ERR_PTR(ret); 3543 err_unpin: 3544 i915_gem_object_unpin_pages(obj); 3545 return vma; 3546 } 3547 3548 bool 3549 i915_gem_clflush_object(struct drm_i915_gem_object *obj, 3550 bool force) 3551 { 3552 /* If we don't have a page list set up, then we're not pinned 3553 * to GPU, and we can ignore the cache flush because it'll happen 3554 * again at bind time. 3555 */ 3556 if (obj->pages == NULL) 3557 return false; 3558 3559 /* 3560 * Stolen memory is always coherent with the GPU as it is explicitly 3561 * marked as wc by the system, or the system is cache-coherent. 3562 */ 3563 if (obj->stolen) 3564 return false; 3565 3566 /* If the GPU is snooping the contents of the CPU cache, 3567 * we do not need to manually clear the CPU cache lines. However, 3568 * the caches are only snooped when the render cache is 3569 * flushed/invalidated. As we always have to emit invalidations 3570 * and flushes when moving into and out of the RENDER domain, correct 3571 * snooping behaviour occurs naturally as the result of our domain 3572 * tracking. 3573 */ 3574 if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) 3575 return false; 3576 3577 trace_i915_gem_object_clflush(obj); 3578 drm_clflush_sg(obj->pages); 3579 3580 return true; 3581 } 3582 3583 /** Flushes the GTT write domain for the object if it's dirty. */ 3584 static void 3585 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 3586 { 3587 uint32_t old_write_domain; 3588 3589 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 3590 return; 3591 3592 /* No actual flushing is required for the GTT write domain. Writes 3593 * to it immediately go to main memory as far as we know, so there's 3594 * no chipset flush. It also doesn't land in render cache. 3595 * 3596 * However, we do have to enforce the order so that all writes through 3597 * the GTT land before any writes to the device, such as updates to 3598 * the GATT itself. 3599 */ 3600 wmb(); 3601 3602 old_write_domain = obj->base.write_domain; 3603 obj->base.write_domain = 0; 3604 3605 intel_fb_obj_flush(obj, false); 3606 3607 trace_i915_gem_object_change_domain(obj, 3608 obj->base.read_domains, 3609 old_write_domain); 3610 } 3611 3612 /** Flushes the CPU write domain for the object if it's dirty. */ 3613 static void 3614 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj, 3615 bool force) 3616 { 3617 uint32_t old_write_domain; 3618 3619 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 3620 return; 3621 3622 if (i915_gem_clflush_object(obj, force)) 3623 i915_gem_chipset_flush(obj->base.dev); 3624 3625 old_write_domain = obj->base.write_domain; 3626 obj->base.write_domain = 0; 3627 3628 intel_fb_obj_flush(obj, false); 3629 3630 trace_i915_gem_object_change_domain(obj, 3631 obj->base.read_domains, 3632 old_write_domain); 3633 } 3634 3635 /** 3636 * Moves a single object to the GTT read, and possibly write domain. 3637 * 3638 * This function returns when the move is complete, including waiting on 3639 * flushes to occur. 3640 */ 3641 int 3642 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3643 { 3644 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3645 struct i915_vma *vma = i915_gem_obj_to_ggtt(obj); 3646 uint32_t old_write_domain, old_read_domains; 3647 int ret; 3648 3649 /* Not valid to be called on unbound objects. */ 3650 if (vma == NULL) 3651 return -EINVAL; 3652 3653 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 3654 return 0; 3655 3656 ret = i915_gem_object_wait_rendering(obj, !write); 3657 if (ret) 3658 return ret; 3659 3660 i915_gem_object_retire(obj); 3661 i915_gem_object_flush_cpu_write_domain(obj, false); 3662 3663 /* Serialise direct access to this object with the barriers for 3664 * coherent writes from the GPU, by effectively invalidating the 3665 * GTT domain upon first access. 3666 */ 3667 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3668 mb(); 3669 3670 old_write_domain = obj->base.write_domain; 3671 old_read_domains = obj->base.read_domains; 3672 3673 /* It should now be out of any other write domains, and we can update 3674 * the domain values for our changes. 3675 */ 3676 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3677 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3678 if (write) { 3679 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 3680 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 3681 obj->dirty = 1; 3682 } 3683 3684 if (write) 3685 intel_fb_obj_invalidate(obj, NULL); 3686 3687 trace_i915_gem_object_change_domain(obj, 3688 old_read_domains, 3689 old_write_domain); 3690 3691 /* And bump the LRU for this access */ 3692 if (i915_gem_object_is_inactive(obj)) 3693 list_move_tail(&vma->mm_list, 3694 &dev_priv->gtt.base.inactive_list); 3695 3696 return 0; 3697 } 3698 3699 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 3700 enum i915_cache_level cache_level) 3701 { 3702 struct drm_device *dev = obj->base.dev; 3703 struct i915_vma *vma, *next; 3704 int ret; 3705 3706 if (obj->cache_level == cache_level) 3707 return 0; 3708 3709 if (i915_gem_obj_is_pinned(obj)) { 3710 DRM_DEBUG("can not change the cache level of pinned objects\n"); 3711 return -EBUSY; 3712 } 3713 3714 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { 3715 if (!i915_gem_valid_gtt_space(vma, cache_level)) { 3716 ret = i915_vma_unbind(vma); 3717 if (ret) 3718 return ret; 3719 } 3720 } 3721 3722 if (i915_gem_obj_bound_any(obj)) { 3723 ret = i915_gem_object_finish_gpu(obj); 3724 if (ret) 3725 return ret; 3726 3727 i915_gem_object_finish_gtt(obj); 3728 3729 /* Before SandyBridge, you could not use tiling or fence 3730 * registers with snooped memory, so relinquish any fences 3731 * currently pointing to our region in the aperture. 3732 */ 3733 if (INTEL_INFO(dev)->gen < 6) { 3734 ret = i915_gem_object_put_fence(obj); 3735 if (ret) 3736 return ret; 3737 } 3738 3739 list_for_each_entry(vma, &obj->vma_list, vma_link) 3740 if (drm_mm_node_allocated(&vma->node)) 3741 vma->bind_vma(vma, cache_level, 3742 obj->has_global_gtt_mapping ? GLOBAL_BIND : 0); 3743 } 3744 3745 list_for_each_entry(vma, &obj->vma_list, vma_link) 3746 vma->node.color = cache_level; 3747 obj->cache_level = cache_level; 3748 3749 if (cpu_write_needs_clflush(obj)) { 3750 u32 old_read_domains, old_write_domain; 3751 3752 /* If we're coming from LLC cached, then we haven't 3753 * actually been tracking whether the data is in the 3754 * CPU cache or not, since we only allow one bit set 3755 * in obj->write_domain and have been skipping the clflushes. 3756 * Just set it to the CPU cache for now. 3757 */ 3758 i915_gem_object_retire(obj); 3759 WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU); 3760 3761 old_read_domains = obj->base.read_domains; 3762 old_write_domain = obj->base.write_domain; 3763 3764 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 3765 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 3766 3767 trace_i915_gem_object_change_domain(obj, 3768 old_read_domains, 3769 old_write_domain); 3770 } 3771 3772 i915_gem_verify_gtt(dev); 3773 return 0; 3774 } 3775 3776 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 3777 struct drm_file *file) 3778 { 3779 struct drm_i915_gem_caching *args = data; 3780 struct drm_i915_gem_object *obj; 3781 int ret; 3782 3783 ret = i915_mutex_lock_interruptible(dev); 3784 if (ret) 3785 return ret; 3786 3787 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3788 if (&obj->base == NULL) { 3789 ret = -ENOENT; 3790 goto unlock; 3791 } 3792 3793 switch (obj->cache_level) { 3794 case I915_CACHE_LLC: 3795 case I915_CACHE_L3_LLC: 3796 args->caching = I915_CACHING_CACHED; 3797 break; 3798 3799 case I915_CACHE_WT: 3800 args->caching = I915_CACHING_DISPLAY; 3801 break; 3802 3803 default: 3804 args->caching = I915_CACHING_NONE; 3805 break; 3806 } 3807 3808 drm_gem_object_unreference(&obj->base); 3809 unlock: 3810 mutex_unlock(&dev->struct_mutex); 3811 return ret; 3812 } 3813 3814 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 3815 struct drm_file *file) 3816 { 3817 struct drm_i915_gem_caching *args = data; 3818 struct drm_i915_gem_object *obj; 3819 enum i915_cache_level level; 3820 int ret; 3821 3822 switch (args->caching) { 3823 case I915_CACHING_NONE: 3824 level = I915_CACHE_NONE; 3825 break; 3826 case I915_CACHING_CACHED: 3827 level = I915_CACHE_LLC; 3828 break; 3829 case I915_CACHING_DISPLAY: 3830 level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE; 3831 break; 3832 default: 3833 return -EINVAL; 3834 } 3835 3836 ret = i915_mutex_lock_interruptible(dev); 3837 if (ret) 3838 return ret; 3839 3840 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3841 if (&obj->base == NULL) { 3842 ret = -ENOENT; 3843 goto unlock; 3844 } 3845 3846 ret = i915_gem_object_set_cache_level(obj, level); 3847 3848 drm_gem_object_unreference(&obj->base); 3849 unlock: 3850 mutex_unlock(&dev->struct_mutex); 3851 return ret; 3852 } 3853 3854 static bool is_pin_display(struct drm_i915_gem_object *obj) 3855 { 3856 struct i915_vma *vma; 3857 3858 vma = i915_gem_obj_to_ggtt(obj); 3859 if (!vma) 3860 return false; 3861 3862 /* There are 3 sources that pin objects: 3863 * 1. The display engine (scanouts, sprites, cursors); 3864 * 2. Reservations for execbuffer; 3865 * 3. The user. 3866 * 3867 * We can ignore reservations as we hold the struct_mutex and 3868 * are only called outside of the reservation path. The user 3869 * can only increment pin_count once, and so if after 3870 * subtracting the potential reference by the user, any pin_count 3871 * remains, it must be due to another use by the display engine. 3872 */ 3873 return vma->pin_count - !!obj->user_pin_count; 3874 } 3875 3876 /* 3877 * Prepare buffer for display plane (scanout, cursors, etc). 3878 * Can be called from an uninterruptible phase (modesetting) and allows 3879 * any flushes to be pipelined (for pageflips). 3880 */ 3881 int 3882 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 3883 u32 alignment, 3884 struct intel_engine_cs *pipelined) 3885 { 3886 u32 old_read_domains, old_write_domain; 3887 bool was_pin_display; 3888 int ret; 3889 3890 if (pipelined != obj->ring) { 3891 ret = i915_gem_object_sync(obj, pipelined); 3892 if (ret) 3893 return ret; 3894 } 3895 3896 /* Mark the pin_display early so that we account for the 3897 * display coherency whilst setting up the cache domains. 3898 */ 3899 was_pin_display = obj->pin_display; 3900 obj->pin_display = true; 3901 3902 /* The display engine is not coherent with the LLC cache on gen6. As 3903 * a result, we make sure that the pinning that is about to occur is 3904 * done with uncached PTEs. This is lowest common denominator for all 3905 * chipsets. 3906 * 3907 * However for gen6+, we could do better by using the GFDT bit instead 3908 * of uncaching, which would allow us to flush all the LLC-cached data 3909 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 3910 */ 3911 ret = i915_gem_object_set_cache_level(obj, 3912 HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE); 3913 if (ret) 3914 goto err_unpin_display; 3915 3916 /* As the user may map the buffer once pinned in the display plane 3917 * (e.g. libkms for the bootup splash), we have to ensure that we 3918 * always use map_and_fenceable for all scanout buffers. 3919 */ 3920 ret = i915_gem_obj_ggtt_pin(obj, alignment, PIN_MAPPABLE); 3921 if (ret) 3922 goto err_unpin_display; 3923 3924 i915_gem_object_flush_cpu_write_domain(obj, true); 3925 3926 old_write_domain = obj->base.write_domain; 3927 old_read_domains = obj->base.read_domains; 3928 3929 /* It should now be out of any other write domains, and we can update 3930 * the domain values for our changes. 3931 */ 3932 obj->base.write_domain = 0; 3933 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3934 3935 trace_i915_gem_object_change_domain(obj, 3936 old_read_domains, 3937 old_write_domain); 3938 3939 return 0; 3940 3941 err_unpin_display: 3942 WARN_ON(was_pin_display != is_pin_display(obj)); 3943 obj->pin_display = was_pin_display; 3944 return ret; 3945 } 3946 3947 void 3948 i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj) 3949 { 3950 i915_gem_object_ggtt_unpin(obj); 3951 obj->pin_display = is_pin_display(obj); 3952 } 3953 3954 int 3955 i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj) 3956 { 3957 int ret; 3958 3959 if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0) 3960 return 0; 3961 3962 ret = i915_gem_object_wait_rendering(obj, false); 3963 if (ret) 3964 return ret; 3965 3966 /* Ensure that we invalidate the GPU's caches and TLBs. */ 3967 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; 3968 return 0; 3969 } 3970 3971 /** 3972 * Moves a single object to the CPU read, and possibly write domain. 3973 * 3974 * This function returns when the move is complete, including waiting on 3975 * flushes to occur. 3976 */ 3977 int 3978 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 3979 { 3980 uint32_t old_write_domain, old_read_domains; 3981 int ret; 3982 3983 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 3984 return 0; 3985 3986 ret = i915_gem_object_wait_rendering(obj, !write); 3987 if (ret) 3988 return ret; 3989 3990 i915_gem_object_retire(obj); 3991 i915_gem_object_flush_gtt_write_domain(obj); 3992 3993 old_write_domain = obj->base.write_domain; 3994 old_read_domains = obj->base.read_domains; 3995 3996 /* Flush the CPU cache if it's still invalid. */ 3997 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 3998 i915_gem_clflush_object(obj, false); 3999 4000 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 4001 } 4002 4003 /* It should now be out of any other write domains, and we can update 4004 * the domain values for our changes. 4005 */ 4006 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 4007 4008 /* If we're writing through the CPU, then the GPU read domains will 4009 * need to be invalidated at next use. 4010 */ 4011 if (write) { 4012 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4013 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4014 } 4015 4016 if (write) 4017 intel_fb_obj_invalidate(obj, NULL); 4018 4019 trace_i915_gem_object_change_domain(obj, 4020 old_read_domains, 4021 old_write_domain); 4022 4023 return 0; 4024 } 4025 4026 /* Throttle our rendering by waiting until the ring has completed our requests 4027 * emitted over 20 msec ago. 4028 * 4029 * Note that if we were to use the current jiffies each time around the loop, 4030 * we wouldn't escape the function with any frames outstanding if the time to 4031 * render a frame was over 20ms. 4032 * 4033 * This should get us reasonable parallelism between CPU and GPU but also 4034 * relatively low latency when blocking on a particular request to finish. 4035 */ 4036 static int 4037 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 4038 { 4039 struct drm_i915_private *dev_priv = dev->dev_private; 4040 struct drm_i915_file_private *file_priv = file->driver_priv; 4041 unsigned long recent_enough = jiffies - msecs_to_jiffies(20); 4042 struct drm_i915_gem_request *request; 4043 struct intel_engine_cs *ring = NULL; 4044 unsigned reset_counter; 4045 u32 seqno = 0; 4046 int ret; 4047 4048 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 4049 if (ret) 4050 return ret; 4051 4052 ret = i915_gem_check_wedge(&dev_priv->gpu_error, false); 4053 if (ret) 4054 return ret; 4055 4056 spin_lock(&file_priv->mm.lock); 4057 list_for_each_entry(request, &file_priv->mm.request_list, client_list) { 4058 if (time_after_eq(request->emitted_jiffies, recent_enough)) 4059 break; 4060 4061 ring = request->ring; 4062 seqno = request->seqno; 4063 } 4064 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 4065 spin_unlock(&file_priv->mm.lock); 4066 4067 if (seqno == 0) 4068 return 0; 4069 4070 ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, NULL); 4071 if (ret == 0) 4072 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); 4073 4074 return ret; 4075 } 4076 4077 static bool 4078 i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags) 4079 { 4080 struct drm_i915_gem_object *obj = vma->obj; 4081 4082 if (alignment && 4083 vma->node.start & (alignment - 1)) 4084 return true; 4085 4086 if (flags & PIN_MAPPABLE && !obj->map_and_fenceable) 4087 return true; 4088 4089 if (flags & PIN_OFFSET_BIAS && 4090 vma->node.start < (flags & PIN_OFFSET_MASK)) 4091 return true; 4092 4093 return false; 4094 } 4095 4096 int 4097 i915_gem_object_pin(struct drm_i915_gem_object *obj, 4098 struct i915_address_space *vm, 4099 uint32_t alignment, 4100 uint64_t flags) 4101 { 4102 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 4103 struct i915_vma *vma; 4104 int ret; 4105 4106 if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base)) 4107 return -ENODEV; 4108 4109 if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm))) 4110 return -EINVAL; 4111 4112 vma = i915_gem_obj_to_vma(obj, vm); 4113 if (vma) { 4114 if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) 4115 return -EBUSY; 4116 4117 if (i915_vma_misplaced(vma, alignment, flags)) { 4118 WARN(vma->pin_count, 4119 "bo is already pinned with incorrect alignment:" 4120 " offset=%lx, req.alignment=%x, req.map_and_fenceable=%d," 4121 " obj->map_and_fenceable=%d\n", 4122 i915_gem_obj_offset(obj, vm), alignment, 4123 !!(flags & PIN_MAPPABLE), 4124 obj->map_and_fenceable); 4125 ret = i915_vma_unbind(vma); 4126 if (ret) 4127 return ret; 4128 4129 vma = NULL; 4130 } 4131 } 4132 4133 if (vma == NULL || !drm_mm_node_allocated(&vma->node)) { 4134 vma = i915_gem_object_bind_to_vm(obj, vm, alignment, flags); 4135 if (IS_ERR(vma)) 4136 return PTR_ERR(vma); 4137 } 4138 4139 if (flags & PIN_GLOBAL && !obj->has_global_gtt_mapping) 4140 vma->bind_vma(vma, obj->cache_level, GLOBAL_BIND); 4141 4142 vma->pin_count++; 4143 if (flags & PIN_MAPPABLE) 4144 obj->pin_mappable |= true; 4145 4146 return 0; 4147 } 4148 4149 void 4150 i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj) 4151 { 4152 struct i915_vma *vma = i915_gem_obj_to_ggtt(obj); 4153 4154 BUG_ON(!vma); 4155 BUG_ON(vma->pin_count == 0); 4156 BUG_ON(!i915_gem_obj_ggtt_bound(obj)); 4157 4158 if (--vma->pin_count == 0) 4159 obj->pin_mappable = false; 4160 } 4161 4162 bool 4163 i915_gem_object_pin_fence(struct drm_i915_gem_object *obj) 4164 { 4165 if (obj->fence_reg != I915_FENCE_REG_NONE) { 4166 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 4167 struct i915_vma *ggtt_vma = i915_gem_obj_to_ggtt(obj); 4168 4169 WARN_ON(!ggtt_vma || 4170 dev_priv->fence_regs[obj->fence_reg].pin_count > 4171 ggtt_vma->pin_count); 4172 dev_priv->fence_regs[obj->fence_reg].pin_count++; 4173 return true; 4174 } else 4175 return false; 4176 } 4177 4178 void 4179 i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj) 4180 { 4181 if (obj->fence_reg != I915_FENCE_REG_NONE) { 4182 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 4183 WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count <= 0); 4184 dev_priv->fence_regs[obj->fence_reg].pin_count--; 4185 } 4186 } 4187 4188 int 4189 i915_gem_pin_ioctl(struct drm_device *dev, void *data, 4190 struct drm_file *file) 4191 { 4192 struct drm_i915_gem_pin *args = data; 4193 struct drm_i915_gem_object *obj; 4194 int ret; 4195 4196 if (INTEL_INFO(dev)->gen >= 6) 4197 return -ENODEV; 4198 4199 ret = i915_mutex_lock_interruptible(dev); 4200 if (ret) 4201 return ret; 4202 4203 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4204 if (&obj->base == NULL) { 4205 ret = -ENOENT; 4206 goto unlock; 4207 } 4208 4209 if (obj->madv != I915_MADV_WILLNEED) { 4210 DRM_DEBUG("Attempting to pin a purgeable buffer\n"); 4211 ret = -EFAULT; 4212 goto out; 4213 } 4214 4215 if (obj->pin_filp != NULL && obj->pin_filp != file) { 4216 DRM_DEBUG("Already pinned in i915_gem_pin_ioctl(): %d\n", 4217 args->handle); 4218 ret = -EINVAL; 4219 goto out; 4220 } 4221 4222 if (obj->user_pin_count == ULONG_MAX) { 4223 ret = -EBUSY; 4224 goto out; 4225 } 4226 4227 if (obj->user_pin_count == 0) { 4228 ret = i915_gem_obj_ggtt_pin(obj, args->alignment, PIN_MAPPABLE); 4229 if (ret) 4230 goto out; 4231 } 4232 4233 obj->user_pin_count++; 4234 obj->pin_filp = file; 4235 4236 args->offset = i915_gem_obj_ggtt_offset(obj); 4237 out: 4238 drm_gem_object_unreference(&obj->base); 4239 unlock: 4240 mutex_unlock(&dev->struct_mutex); 4241 return ret; 4242 } 4243 4244 int 4245 i915_gem_unpin_ioctl(struct drm_device *dev, void *data, 4246 struct drm_file *file) 4247 { 4248 struct drm_i915_gem_pin *args = data; 4249 struct drm_i915_gem_object *obj; 4250 int ret; 4251 4252 ret = i915_mutex_lock_interruptible(dev); 4253 if (ret) 4254 return ret; 4255 4256 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4257 if (&obj->base == NULL) { 4258 ret = -ENOENT; 4259 goto unlock; 4260 } 4261 4262 if (obj->pin_filp != file) { 4263 DRM_DEBUG("Not pinned by caller in i915_gem_pin_ioctl(): %d\n", 4264 args->handle); 4265 ret = -EINVAL; 4266 goto out; 4267 } 4268 obj->user_pin_count--; 4269 if (obj->user_pin_count == 0) { 4270 obj->pin_filp = NULL; 4271 i915_gem_object_ggtt_unpin(obj); 4272 } 4273 4274 out: 4275 drm_gem_object_unreference(&obj->base); 4276 unlock: 4277 mutex_unlock(&dev->struct_mutex); 4278 return ret; 4279 } 4280 4281 int 4282 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4283 struct drm_file *file) 4284 { 4285 struct drm_i915_gem_busy *args = data; 4286 struct drm_i915_gem_object *obj; 4287 int ret; 4288 4289 ret = i915_mutex_lock_interruptible(dev); 4290 if (ret) 4291 return ret; 4292 4293 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4294 if (&obj->base == NULL) { 4295 ret = -ENOENT; 4296 goto unlock; 4297 } 4298 4299 /* Count all active objects as busy, even if they are currently not used 4300 * by the gpu. Users of this interface expect objects to eventually 4301 * become non-busy without any further actions, therefore emit any 4302 * necessary flushes here. 4303 */ 4304 ret = i915_gem_object_flush_active(obj); 4305 4306 args->busy = obj->active; 4307 if (obj->ring) { 4308 BUILD_BUG_ON(I915_NUM_RINGS > 16); 4309 args->busy |= intel_ring_flag(obj->ring) << 16; 4310 } 4311 4312 drm_gem_object_unreference(&obj->base); 4313 unlock: 4314 mutex_unlock(&dev->struct_mutex); 4315 return ret; 4316 } 4317 4318 int 4319 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4320 struct drm_file *file_priv) 4321 { 4322 return i915_gem_ring_throttle(dev, file_priv); 4323 } 4324 4325 int 4326 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4327 struct drm_file *file_priv) 4328 { 4329 struct drm_i915_gem_madvise *args = data; 4330 struct drm_i915_gem_object *obj; 4331 int ret; 4332 4333 switch (args->madv) { 4334 case I915_MADV_DONTNEED: 4335 case I915_MADV_WILLNEED: 4336 break; 4337 default: 4338 return -EINVAL; 4339 } 4340 4341 ret = i915_mutex_lock_interruptible(dev); 4342 if (ret) 4343 return ret; 4344 4345 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle)); 4346 if (&obj->base == NULL) { 4347 ret = -ENOENT; 4348 goto unlock; 4349 } 4350 4351 if (i915_gem_obj_is_pinned(obj)) { 4352 ret = -EINVAL; 4353 goto out; 4354 } 4355 4356 if (obj->madv != __I915_MADV_PURGED) 4357 obj->madv = args->madv; 4358 4359 /* if the object is no longer attached, discard its backing storage */ 4360 if (i915_gem_object_is_purgeable(obj) && obj->pages == NULL) 4361 i915_gem_object_truncate(obj); 4362 4363 args->retained = obj->madv != __I915_MADV_PURGED; 4364 4365 out: 4366 drm_gem_object_unreference(&obj->base); 4367 unlock: 4368 mutex_unlock(&dev->struct_mutex); 4369 return ret; 4370 } 4371 4372 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4373 const struct drm_i915_gem_object_ops *ops) 4374 { 4375 INIT_LIST_HEAD(&obj->global_list); 4376 INIT_LIST_HEAD(&obj->ring_list); 4377 INIT_LIST_HEAD(&obj->obj_exec_link); 4378 INIT_LIST_HEAD(&obj->vma_list); 4379 4380 obj->ops = ops; 4381 4382 obj->fence_reg = I915_FENCE_REG_NONE; 4383 obj->madv = I915_MADV_WILLNEED; 4384 4385 i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size); 4386 } 4387 4388 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4389 .get_pages = i915_gem_object_get_pages_gtt, 4390 .put_pages = i915_gem_object_put_pages_gtt, 4391 }; 4392 4393 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, 4394 size_t size) 4395 { 4396 struct drm_i915_gem_object *obj; 4397 struct address_space *mapping; 4398 gfp_t mask; 4399 4400 obj = i915_gem_object_alloc(dev); 4401 if (obj == NULL) 4402 return NULL; 4403 4404 if (drm_gem_object_init(dev, &obj->base, size) != 0) { 4405 i915_gem_object_free(obj); 4406 return NULL; 4407 } 4408 4409 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4410 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) { 4411 /* 965gm cannot relocate objects above 4GiB. */ 4412 mask &= ~__GFP_HIGHMEM; 4413 mask |= __GFP_DMA32; 4414 } 4415 4416 mapping = file_inode(obj->base.filp)->i_mapping; 4417 mapping_set_gfp_mask(mapping, mask); 4418 4419 i915_gem_object_init(obj, &i915_gem_object_ops); 4420 4421 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4422 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4423 4424 if (HAS_LLC(dev)) { 4425 /* On some devices, we can have the GPU use the LLC (the CPU 4426 * cache) for about a 10% performance improvement 4427 * compared to uncached. Graphics requests other than 4428 * display scanout are coherent with the CPU in 4429 * accessing this cache. This means in this mode we 4430 * don't need to clflush on the CPU side, and on the 4431 * GPU side we only need to flush internal caches to 4432 * get data visible to the CPU. 4433 * 4434 * However, we maintain the display planes as UC, and so 4435 * need to rebind when first used as such. 4436 */ 4437 obj->cache_level = I915_CACHE_LLC; 4438 } else 4439 obj->cache_level = I915_CACHE_NONE; 4440 4441 trace_i915_gem_object_create(obj); 4442 4443 return obj; 4444 } 4445 4446 static bool discard_backing_storage(struct drm_i915_gem_object *obj) 4447 { 4448 /* If we are the last user of the backing storage (be it shmemfs 4449 * pages or stolen etc), we know that the pages are going to be 4450 * immediately released. In this case, we can then skip copying 4451 * back the contents from the GPU. 4452 */ 4453 4454 if (obj->madv != I915_MADV_WILLNEED) 4455 return false; 4456 4457 if (obj->base.filp == NULL) 4458 return true; 4459 4460 /* At first glance, this looks racy, but then again so would be 4461 * userspace racing mmap against close. However, the first external 4462 * reference to the filp can only be obtained through the 4463 * i915_gem_mmap_ioctl() which safeguards us against the user 4464 * acquiring such a reference whilst we are in the middle of 4465 * freeing the object. 4466 */ 4467 return atomic_long_read(&obj->base.filp->f_count) == 1; 4468 } 4469 4470 void i915_gem_free_object(struct drm_gem_object *gem_obj) 4471 { 4472 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 4473 struct drm_device *dev = obj->base.dev; 4474 struct drm_i915_private *dev_priv = dev->dev_private; 4475 struct i915_vma *vma, *next; 4476 4477 intel_runtime_pm_get(dev_priv); 4478 4479 trace_i915_gem_object_destroy(obj); 4480 4481 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { 4482 int ret; 4483 4484 vma->pin_count = 0; 4485 ret = i915_vma_unbind(vma); 4486 if (WARN_ON(ret == -ERESTARTSYS)) { 4487 bool was_interruptible; 4488 4489 was_interruptible = dev_priv->mm.interruptible; 4490 dev_priv->mm.interruptible = false; 4491 4492 WARN_ON(i915_vma_unbind(vma)); 4493 4494 dev_priv->mm.interruptible = was_interruptible; 4495 } 4496 } 4497 4498 i915_gem_object_detach_phys(obj); 4499 4500 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up 4501 * before progressing. */ 4502 if (obj->stolen) 4503 i915_gem_object_unpin_pages(obj); 4504 4505 WARN_ON(obj->frontbuffer_bits); 4506 4507 if (WARN_ON(obj->pages_pin_count)) 4508 obj->pages_pin_count = 0; 4509 if (discard_backing_storage(obj)) 4510 obj->madv = I915_MADV_DONTNEED; 4511 i915_gem_object_put_pages(obj); 4512 i915_gem_object_free_mmap_offset(obj); 4513 4514 BUG_ON(obj->pages); 4515 4516 if (obj->base.import_attach) 4517 drm_prime_gem_destroy(&obj->base, NULL); 4518 4519 if (obj->ops->release) 4520 obj->ops->release(obj); 4521 4522 drm_gem_object_release(&obj->base); 4523 i915_gem_info_remove_obj(dev_priv, obj->base.size); 4524 4525 kfree(obj->bit_17); 4526 i915_gem_object_free(obj); 4527 4528 intel_runtime_pm_put(dev_priv); 4529 } 4530 4531 struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, 4532 struct i915_address_space *vm) 4533 { 4534 struct i915_vma *vma; 4535 list_for_each_entry(vma, &obj->vma_list, vma_link) 4536 if (vma->vm == vm) 4537 return vma; 4538 4539 return NULL; 4540 } 4541 4542 void i915_gem_vma_destroy(struct i915_vma *vma) 4543 { 4544 struct i915_address_space *vm = NULL; 4545 WARN_ON(vma->node.allocated); 4546 4547 /* Keep the vma as a placeholder in the execbuffer reservation lists */ 4548 if (!list_empty(&vma->exec_list)) 4549 return; 4550 4551 vm = vma->vm; 4552 4553 if (!i915_is_ggtt(vm)) 4554 i915_ppgtt_put(i915_vm_to_ppgtt(vm)); 4555 4556 list_del(&vma->vma_link); 4557 4558 kfree(vma); 4559 } 4560 4561 static void 4562 i915_gem_stop_ringbuffers(struct drm_device *dev) 4563 { 4564 struct drm_i915_private *dev_priv = dev->dev_private; 4565 struct intel_engine_cs *ring; 4566 int i; 4567 4568 for_each_ring(ring, dev_priv, i) 4569 dev_priv->gt.stop_ring(ring); 4570 } 4571 4572 int 4573 i915_gem_suspend(struct drm_device *dev) 4574 { 4575 struct drm_i915_private *dev_priv = dev->dev_private; 4576 int ret = 0; 4577 4578 mutex_lock(&dev->struct_mutex); 4579 if (dev_priv->ums.mm_suspended) 4580 goto err; 4581 4582 ret = i915_gpu_idle(dev); 4583 if (ret) 4584 goto err; 4585 4586 i915_gem_retire_requests(dev); 4587 4588 /* Under UMS, be paranoid and evict. */ 4589 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 4590 i915_gem_evict_everything(dev); 4591 4592 i915_kernel_lost_context(dev); 4593 i915_gem_stop_ringbuffers(dev); 4594 4595 /* Hack! Don't let anybody do execbuf while we don't control the chip. 4596 * We need to replace this with a semaphore, or something. 4597 * And not confound ums.mm_suspended! 4598 */ 4599 dev_priv->ums.mm_suspended = !drm_core_check_feature(dev, 4600 DRIVER_MODESET); 4601 mutex_unlock(&dev->struct_mutex); 4602 4603 del_timer_sync(&dev_priv->gpu_error.hangcheck_timer); 4604 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 4605 flush_delayed_work(&dev_priv->mm.idle_work); 4606 4607 return 0; 4608 4609 err: 4610 mutex_unlock(&dev->struct_mutex); 4611 return ret; 4612 } 4613 4614 int i915_gem_l3_remap(struct intel_engine_cs *ring, int slice) 4615 { 4616 struct drm_device *dev = ring->dev; 4617 struct drm_i915_private *dev_priv = dev->dev_private; 4618 u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200); 4619 u32 *remap_info = dev_priv->l3_parity.remap_info[slice]; 4620 int i, ret; 4621 4622 if (!HAS_L3_DPF(dev) || !remap_info) 4623 return 0; 4624 4625 ret = intel_ring_begin(ring, GEN7_L3LOG_SIZE / 4 * 3); 4626 if (ret) 4627 return ret; 4628 4629 /* 4630 * Note: We do not worry about the concurrent register cacheline hang 4631 * here because no other code should access these registers other than 4632 * at initialization time. 4633 */ 4634 for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) { 4635 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 4636 intel_ring_emit(ring, reg_base + i); 4637 intel_ring_emit(ring, remap_info[i/4]); 4638 } 4639 4640 intel_ring_advance(ring); 4641 4642 return ret; 4643 } 4644 4645 void i915_gem_init_swizzling(struct drm_device *dev) 4646 { 4647 struct drm_i915_private *dev_priv = dev->dev_private; 4648 4649 if (INTEL_INFO(dev)->gen < 5 || 4650 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 4651 return; 4652 4653 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 4654 DISP_TILE_SURFACE_SWIZZLING); 4655 4656 if (IS_GEN5(dev)) 4657 return; 4658 4659 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 4660 if (IS_GEN6(dev)) 4661 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 4662 else if (IS_GEN7(dev)) 4663 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 4664 else if (IS_GEN8(dev)) 4665 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 4666 else 4667 BUG(); 4668 } 4669 4670 static bool 4671 intel_enable_blt(struct drm_device *dev) 4672 { 4673 if (!HAS_BLT(dev)) 4674 return false; 4675 4676 /* The blitter was dysfunctional on early prototypes */ 4677 if (IS_GEN6(dev) && dev->pdev->revision < 8) { 4678 DRM_INFO("BLT not supported on this pre-production hardware;" 4679 " graphics performance will be degraded.\n"); 4680 return false; 4681 } 4682 4683 return true; 4684 } 4685 4686 static void init_unused_ring(struct drm_device *dev, u32 base) 4687 { 4688 struct drm_i915_private *dev_priv = dev->dev_private; 4689 4690 I915_WRITE(RING_CTL(base), 0); 4691 I915_WRITE(RING_HEAD(base), 0); 4692 I915_WRITE(RING_TAIL(base), 0); 4693 I915_WRITE(RING_START(base), 0); 4694 } 4695 4696 static void init_unused_rings(struct drm_device *dev) 4697 { 4698 if (IS_I830(dev)) { 4699 init_unused_ring(dev, PRB1_BASE); 4700 init_unused_ring(dev, SRB0_BASE); 4701 init_unused_ring(dev, SRB1_BASE); 4702 init_unused_ring(dev, SRB2_BASE); 4703 init_unused_ring(dev, SRB3_BASE); 4704 } else if (IS_GEN2(dev)) { 4705 init_unused_ring(dev, SRB0_BASE); 4706 init_unused_ring(dev, SRB1_BASE); 4707 } else if (IS_GEN3(dev)) { 4708 init_unused_ring(dev, PRB1_BASE); 4709 init_unused_ring(dev, PRB2_BASE); 4710 } 4711 } 4712 4713 int i915_gem_init_rings(struct drm_device *dev) 4714 { 4715 struct drm_i915_private *dev_priv = dev->dev_private; 4716 int ret; 4717 4718 /* 4719 * At least 830 can leave some of the unused rings 4720 * "active" (ie. head != tail) after resume which 4721 * will prevent c3 entry. Makes sure all unused rings 4722 * are totally idle. 4723 */ 4724 init_unused_rings(dev); 4725 4726 ret = intel_init_render_ring_buffer(dev); 4727 if (ret) 4728 return ret; 4729 4730 if (HAS_BSD(dev)) { 4731 ret = intel_init_bsd_ring_buffer(dev); 4732 if (ret) 4733 goto cleanup_render_ring; 4734 } 4735 4736 if (intel_enable_blt(dev)) { 4737 ret = intel_init_blt_ring_buffer(dev); 4738 if (ret) 4739 goto cleanup_bsd_ring; 4740 } 4741 4742 if (HAS_VEBOX(dev)) { 4743 ret = intel_init_vebox_ring_buffer(dev); 4744 if (ret) 4745 goto cleanup_blt_ring; 4746 } 4747 4748 if (HAS_BSD2(dev)) { 4749 ret = intel_init_bsd2_ring_buffer(dev); 4750 if (ret) 4751 goto cleanup_vebox_ring; 4752 } 4753 4754 ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000)); 4755 if (ret) 4756 goto cleanup_bsd2_ring; 4757 4758 return 0; 4759 4760 cleanup_bsd2_ring: 4761 intel_cleanup_ring_buffer(&dev_priv->ring[VCS2]); 4762 cleanup_vebox_ring: 4763 intel_cleanup_ring_buffer(&dev_priv->ring[VECS]); 4764 cleanup_blt_ring: 4765 intel_cleanup_ring_buffer(&dev_priv->ring[BCS]); 4766 cleanup_bsd_ring: 4767 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]); 4768 cleanup_render_ring: 4769 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]); 4770 4771 return ret; 4772 } 4773 4774 int 4775 i915_gem_init_hw(struct drm_device *dev) 4776 { 4777 struct drm_i915_private *dev_priv = dev->dev_private; 4778 int ret, i; 4779 4780 if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) 4781 return -EIO; 4782 4783 if (dev_priv->ellc_size) 4784 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 4785 4786 if (IS_HASWELL(dev)) 4787 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ? 4788 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 4789 4790 if (HAS_PCH_NOP(dev)) { 4791 if (IS_IVYBRIDGE(dev)) { 4792 u32 temp = I915_READ(GEN7_MSG_CTL); 4793 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); 4794 I915_WRITE(GEN7_MSG_CTL, temp); 4795 } else if (INTEL_INFO(dev)->gen >= 7) { 4796 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT); 4797 temp &= ~RESET_PCH_HANDSHAKE_ENABLE; 4798 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp); 4799 } 4800 } 4801 4802 i915_gem_init_swizzling(dev); 4803 4804 ret = dev_priv->gt.init_rings(dev); 4805 if (ret) 4806 return ret; 4807 4808 for (i = 0; i < NUM_L3_SLICES(dev); i++) 4809 i915_gem_l3_remap(&dev_priv->ring[RCS], i); 4810 4811 /* 4812 * XXX: Contexts should only be initialized once. Doing a switch to the 4813 * default context switch however is something we'd like to do after 4814 * reset or thaw (the latter may not actually be necessary for HW, but 4815 * goes with our code better). Context switching requires rings (for 4816 * the do_switch), but before enabling PPGTT. So don't move this. 4817 */ 4818 ret = i915_gem_context_enable(dev_priv); 4819 if (ret && ret != -EIO) { 4820 DRM_ERROR("Context enable failed %d\n", ret); 4821 i915_gem_cleanup_ringbuffer(dev); 4822 4823 return ret; 4824 } 4825 4826 ret = i915_ppgtt_init_hw(dev); 4827 if (ret && ret != -EIO) { 4828 DRM_ERROR("PPGTT enable failed %d\n", ret); 4829 i915_gem_cleanup_ringbuffer(dev); 4830 } 4831 4832 return ret; 4833 } 4834 4835 int i915_gem_init(struct drm_device *dev) 4836 { 4837 struct drm_i915_private *dev_priv = dev->dev_private; 4838 int ret; 4839 4840 i915.enable_execlists = intel_sanitize_enable_execlists(dev, 4841 i915.enable_execlists); 4842 4843 mutex_lock(&dev->struct_mutex); 4844 4845 if (IS_VALLEYVIEW(dev)) { 4846 /* VLVA0 (potential hack), BIOS isn't actually waking us */ 4847 I915_WRITE(VLV_GTLC_WAKE_CTRL, VLV_GTLC_ALLOWWAKEREQ); 4848 if (wait_for((I915_READ(VLV_GTLC_PW_STATUS) & 4849 VLV_GTLC_ALLOWWAKEACK), 10)) 4850 DRM_DEBUG_DRIVER("allow wake ack timed out\n"); 4851 } 4852 4853 if (!i915.enable_execlists) { 4854 dev_priv->gt.do_execbuf = i915_gem_ringbuffer_submission; 4855 dev_priv->gt.init_rings = i915_gem_init_rings; 4856 dev_priv->gt.cleanup_ring = intel_cleanup_ring_buffer; 4857 dev_priv->gt.stop_ring = intel_stop_ring_buffer; 4858 } else { 4859 dev_priv->gt.do_execbuf = intel_execlists_submission; 4860 dev_priv->gt.init_rings = intel_logical_rings_init; 4861 dev_priv->gt.cleanup_ring = intel_logical_ring_cleanup; 4862 dev_priv->gt.stop_ring = intel_logical_ring_stop; 4863 } 4864 4865 ret = i915_gem_init_userptr(dev); 4866 if (ret) { 4867 mutex_unlock(&dev->struct_mutex); 4868 return ret; 4869 } 4870 4871 i915_gem_init_global_gtt(dev); 4872 4873 ret = i915_gem_context_init(dev); 4874 if (ret) { 4875 mutex_unlock(&dev->struct_mutex); 4876 return ret; 4877 } 4878 4879 ret = i915_gem_init_hw(dev); 4880 if (ret == -EIO) { 4881 /* Allow ring initialisation to fail by marking the GPU as 4882 * wedged. But we only want to do this where the GPU is angry, 4883 * for all other failure, such as an allocation failure, bail. 4884 */ 4885 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n"); 4886 atomic_set_mask(I915_WEDGED, &dev_priv->gpu_error.reset_counter); 4887 ret = 0; 4888 } 4889 mutex_unlock(&dev->struct_mutex); 4890 4891 /* Allow hardware batchbuffers unless told otherwise, but not for KMS. */ 4892 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 4893 dev_priv->dri1.allow_batchbuffer = 1; 4894 return ret; 4895 } 4896 4897 void 4898 i915_gem_cleanup_ringbuffer(struct drm_device *dev) 4899 { 4900 struct drm_i915_private *dev_priv = dev->dev_private; 4901 struct intel_engine_cs *ring; 4902 int i; 4903 4904 for_each_ring(ring, dev_priv, i) 4905 dev_priv->gt.cleanup_ring(ring); 4906 } 4907 4908 int 4909 i915_gem_entervt_ioctl(struct drm_device *dev, void *data, 4910 struct drm_file *file_priv) 4911 { 4912 struct drm_i915_private *dev_priv = dev->dev_private; 4913 int ret; 4914 4915 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4916 return 0; 4917 4918 if (i915_reset_in_progress(&dev_priv->gpu_error)) { 4919 DRM_ERROR("Reenabling wedged hardware, good luck\n"); 4920 atomic_set(&dev_priv->gpu_error.reset_counter, 0); 4921 } 4922 4923 mutex_lock(&dev->struct_mutex); 4924 dev_priv->ums.mm_suspended = 0; 4925 4926 ret = i915_gem_init_hw(dev); 4927 if (ret != 0) { 4928 mutex_unlock(&dev->struct_mutex); 4929 return ret; 4930 } 4931 4932 BUG_ON(!list_empty(&dev_priv->gtt.base.active_list)); 4933 4934 ret = drm_irq_install(dev, dev->pdev->irq); 4935 if (ret) 4936 goto cleanup_ringbuffer; 4937 mutex_unlock(&dev->struct_mutex); 4938 4939 return 0; 4940 4941 cleanup_ringbuffer: 4942 i915_gem_cleanup_ringbuffer(dev); 4943 dev_priv->ums.mm_suspended = 1; 4944 mutex_unlock(&dev->struct_mutex); 4945 4946 return ret; 4947 } 4948 4949 int 4950 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, 4951 struct drm_file *file_priv) 4952 { 4953 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4954 return 0; 4955 4956 mutex_lock(&dev->struct_mutex); 4957 drm_irq_uninstall(dev); 4958 mutex_unlock(&dev->struct_mutex); 4959 4960 return i915_gem_suspend(dev); 4961 } 4962 4963 void 4964 i915_gem_lastclose(struct drm_device *dev) 4965 { 4966 int ret; 4967 4968 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4969 return; 4970 4971 ret = i915_gem_suspend(dev); 4972 if (ret) 4973 DRM_ERROR("failed to idle hardware: %d\n", ret); 4974 } 4975 4976 static void 4977 init_ring_lists(struct intel_engine_cs *ring) 4978 { 4979 INIT_LIST_HEAD(&ring->active_list); 4980 INIT_LIST_HEAD(&ring->request_list); 4981 } 4982 4983 void i915_init_vm(struct drm_i915_private *dev_priv, 4984 struct i915_address_space *vm) 4985 { 4986 if (!i915_is_ggtt(vm)) 4987 drm_mm_init(&vm->mm, vm->start, vm->total); 4988 vm->dev = dev_priv->dev; 4989 INIT_LIST_HEAD(&vm->active_list); 4990 INIT_LIST_HEAD(&vm->inactive_list); 4991 INIT_LIST_HEAD(&vm->global_link); 4992 list_add_tail(&vm->global_link, &dev_priv->vm_list); 4993 } 4994 4995 void 4996 i915_gem_load(struct drm_device *dev) 4997 { 4998 struct drm_i915_private *dev_priv = dev->dev_private; 4999 int i; 5000 5001 dev_priv->slab = 5002 kmem_cache_create("i915_gem_object", 5003 sizeof(struct drm_i915_gem_object), 0, 5004 SLAB_HWCACHE_ALIGN, 5005 NULL); 5006 5007 INIT_LIST_HEAD(&dev_priv->vm_list); 5008 i915_init_vm(dev_priv, &dev_priv->gtt.base); 5009 5010 INIT_LIST_HEAD(&dev_priv->context_list); 5011 INIT_LIST_HEAD(&dev_priv->mm.unbound_list); 5012 INIT_LIST_HEAD(&dev_priv->mm.bound_list); 5013 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5014 for (i = 0; i < I915_NUM_RINGS; i++) 5015 init_ring_lists(&dev_priv->ring[i]); 5016 for (i = 0; i < I915_MAX_NUM_FENCES; i++) 5017 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 5018 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 5019 i915_gem_retire_work_handler); 5020 INIT_DELAYED_WORK(&dev_priv->mm.idle_work, 5021 i915_gem_idle_work_handler); 5022 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 5023 5024 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ 5025 if (!drm_core_check_feature(dev, DRIVER_MODESET) && IS_GEN3(dev)) { 5026 I915_WRITE(MI_ARB_STATE, 5027 _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE)); 5028 } 5029 5030 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 5031 5032 /* Old X drivers will take 0-2 for front, back, depth buffers */ 5033 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 5034 dev_priv->fence_reg_start = 3; 5035 5036 if (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev)) 5037 dev_priv->num_fence_regs = 32; 5038 else if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 5039 dev_priv->num_fence_regs = 16; 5040 else 5041 dev_priv->num_fence_regs = 8; 5042 5043 /* Initialize fence registers to zero */ 5044 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5045 i915_gem_restore_fences(dev); 5046 5047 i915_gem_detect_bit_6_swizzle(dev); 5048 init_waitqueue_head(&dev_priv->pending_flip_queue); 5049 5050 dev_priv->mm.interruptible = true; 5051 5052 dev_priv->mm.shrinker.scan_objects = i915_gem_shrinker_scan; 5053 dev_priv->mm.shrinker.count_objects = i915_gem_shrinker_count; 5054 dev_priv->mm.shrinker.seeks = DEFAULT_SEEKS; 5055 register_shrinker(&dev_priv->mm.shrinker); 5056 5057 dev_priv->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom; 5058 register_oom_notifier(&dev_priv->mm.oom_notifier); 5059 5060 mutex_init(&dev_priv->fb_tracking.lock); 5061 } 5062 5063 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 5064 { 5065 struct drm_i915_file_private *file_priv = file->driver_priv; 5066 5067 cancel_delayed_work_sync(&file_priv->mm.idle_work); 5068 5069 /* Clean up our request list when the client is going away, so that 5070 * later retire_requests won't dereference our soon-to-be-gone 5071 * file_priv. 5072 */ 5073 spin_lock(&file_priv->mm.lock); 5074 while (!list_empty(&file_priv->mm.request_list)) { 5075 struct drm_i915_gem_request *request; 5076 5077 request = list_first_entry(&file_priv->mm.request_list, 5078 struct drm_i915_gem_request, 5079 client_list); 5080 list_del(&request->client_list); 5081 request->file_priv = NULL; 5082 } 5083 spin_unlock(&file_priv->mm.lock); 5084 } 5085 5086 static void 5087 i915_gem_file_idle_work_handler(struct work_struct *work) 5088 { 5089 struct drm_i915_file_private *file_priv = 5090 container_of(work, typeof(*file_priv), mm.idle_work.work); 5091 5092 atomic_set(&file_priv->rps_wait_boost, false); 5093 } 5094 5095 int i915_gem_open(struct drm_device *dev, struct drm_file *file) 5096 { 5097 struct drm_i915_file_private *file_priv; 5098 int ret; 5099 5100 DRM_DEBUG_DRIVER("\n"); 5101 5102 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 5103 if (!file_priv) 5104 return -ENOMEM; 5105 5106 file->driver_priv = file_priv; 5107 file_priv->dev_priv = dev->dev_private; 5108 file_priv->file = file; 5109 5110 spin_lock_init(&file_priv->mm.lock); 5111 INIT_LIST_HEAD(&file_priv->mm.request_list); 5112 INIT_DELAYED_WORK(&file_priv->mm.idle_work, 5113 i915_gem_file_idle_work_handler); 5114 5115 ret = i915_gem_context_open(dev, file); 5116 if (ret) 5117 kfree(file_priv); 5118 5119 return ret; 5120 } 5121 5122 void i915_gem_track_fb(struct drm_i915_gem_object *old, 5123 struct drm_i915_gem_object *new, 5124 unsigned frontbuffer_bits) 5125 { 5126 if (old) { 5127 WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex)); 5128 WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits)); 5129 old->frontbuffer_bits &= ~frontbuffer_bits; 5130 } 5131 5132 if (new) { 5133 WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex)); 5134 WARN_ON(new->frontbuffer_bits & frontbuffer_bits); 5135 new->frontbuffer_bits |= frontbuffer_bits; 5136 } 5137 } 5138 5139 static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) 5140 { 5141 if (!mutex_is_locked(mutex)) 5142 return false; 5143 5144 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES) 5145 return mutex->owner == task; 5146 #else 5147 /* Since UP may be pre-empted, we cannot assume that we own the lock */ 5148 return false; 5149 #endif 5150 } 5151 5152 static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock) 5153 { 5154 if (!mutex_trylock(&dev->struct_mutex)) { 5155 if (!mutex_is_locked_by(&dev->struct_mutex, current)) 5156 return false; 5157 5158 if (to_i915(dev)->mm.shrinker_no_lock_stealing) 5159 return false; 5160 5161 *unlock = false; 5162 } else 5163 *unlock = true; 5164 5165 return true; 5166 } 5167 5168 static int num_vma_bound(struct drm_i915_gem_object *obj) 5169 { 5170 struct i915_vma *vma; 5171 int count = 0; 5172 5173 list_for_each_entry(vma, &obj->vma_list, vma_link) 5174 if (drm_mm_node_allocated(&vma->node)) 5175 count++; 5176 5177 return count; 5178 } 5179 5180 static unsigned long 5181 i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) 5182 { 5183 struct drm_i915_private *dev_priv = 5184 container_of(shrinker, struct drm_i915_private, mm.shrinker); 5185 struct drm_device *dev = dev_priv->dev; 5186 struct drm_i915_gem_object *obj; 5187 unsigned long count; 5188 bool unlock; 5189 5190 if (!i915_gem_shrinker_lock(dev, &unlock)) 5191 return 0; 5192 5193 count = 0; 5194 list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) 5195 if (obj->pages_pin_count == 0) 5196 count += obj->base.size >> PAGE_SHIFT; 5197 5198 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 5199 if (!i915_gem_obj_is_pinned(obj) && 5200 obj->pages_pin_count == num_vma_bound(obj)) 5201 count += obj->base.size >> PAGE_SHIFT; 5202 } 5203 5204 if (unlock) 5205 mutex_unlock(&dev->struct_mutex); 5206 5207 return count; 5208 } 5209 5210 /* All the new VM stuff */ 5211 unsigned long i915_gem_obj_offset(struct drm_i915_gem_object *o, 5212 struct i915_address_space *vm) 5213 { 5214 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5215 struct i915_vma *vma; 5216 5217 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 5218 5219 list_for_each_entry(vma, &o->vma_list, vma_link) { 5220 if (vma->vm == vm) 5221 return vma->node.start; 5222 5223 } 5224 WARN(1, "%s vma for this object not found.\n", 5225 i915_is_ggtt(vm) ? "global" : "ppgtt"); 5226 return -1; 5227 } 5228 5229 bool i915_gem_obj_bound(struct drm_i915_gem_object *o, 5230 struct i915_address_space *vm) 5231 { 5232 struct i915_vma *vma; 5233 5234 list_for_each_entry(vma, &o->vma_list, vma_link) 5235 if (vma->vm == vm && drm_mm_node_allocated(&vma->node)) 5236 return true; 5237 5238 return false; 5239 } 5240 5241 bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o) 5242 { 5243 struct i915_vma *vma; 5244 5245 list_for_each_entry(vma, &o->vma_list, vma_link) 5246 if (drm_mm_node_allocated(&vma->node)) 5247 return true; 5248 5249 return false; 5250 } 5251 5252 unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o, 5253 struct i915_address_space *vm) 5254 { 5255 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5256 struct i915_vma *vma; 5257 5258 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 5259 5260 BUG_ON(list_empty(&o->vma_list)); 5261 5262 list_for_each_entry(vma, &o->vma_list, vma_link) 5263 if (vma->vm == vm) 5264 return vma->node.size; 5265 5266 return 0; 5267 } 5268 5269 static unsigned long 5270 i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) 5271 { 5272 struct drm_i915_private *dev_priv = 5273 container_of(shrinker, struct drm_i915_private, mm.shrinker); 5274 struct drm_device *dev = dev_priv->dev; 5275 unsigned long freed; 5276 bool unlock; 5277 5278 if (!i915_gem_shrinker_lock(dev, &unlock)) 5279 return SHRINK_STOP; 5280 5281 freed = i915_gem_shrink(dev_priv, 5282 sc->nr_to_scan, 5283 I915_SHRINK_BOUND | 5284 I915_SHRINK_UNBOUND | 5285 I915_SHRINK_PURGEABLE); 5286 if (freed < sc->nr_to_scan) 5287 freed += i915_gem_shrink(dev_priv, 5288 sc->nr_to_scan - freed, 5289 I915_SHRINK_BOUND | 5290 I915_SHRINK_UNBOUND); 5291 if (unlock) 5292 mutex_unlock(&dev->struct_mutex); 5293 5294 return freed; 5295 } 5296 5297 static int 5298 i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) 5299 { 5300 struct drm_i915_private *dev_priv = 5301 container_of(nb, struct drm_i915_private, mm.oom_notifier); 5302 struct drm_device *dev = dev_priv->dev; 5303 struct drm_i915_gem_object *obj; 5304 unsigned long timeout = msecs_to_jiffies(5000) + 1; 5305 unsigned long pinned, bound, unbound, freed; 5306 bool was_interruptible; 5307 bool unlock; 5308 5309 while (!i915_gem_shrinker_lock(dev, &unlock) && --timeout) { 5310 schedule_timeout_killable(1); 5311 if (fatal_signal_pending(current)) 5312 return NOTIFY_DONE; 5313 } 5314 if (timeout == 0) { 5315 pr_err("Unable to purge GPU memory due lock contention.\n"); 5316 return NOTIFY_DONE; 5317 } 5318 5319 was_interruptible = dev_priv->mm.interruptible; 5320 dev_priv->mm.interruptible = false; 5321 5322 freed = i915_gem_shrink_all(dev_priv); 5323 5324 dev_priv->mm.interruptible = was_interruptible; 5325 5326 /* Because we may be allocating inside our own driver, we cannot 5327 * assert that there are no objects with pinned pages that are not 5328 * being pointed to by hardware. 5329 */ 5330 unbound = bound = pinned = 0; 5331 list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) { 5332 if (!obj->base.filp) /* not backed by a freeable object */ 5333 continue; 5334 5335 if (obj->pages_pin_count) 5336 pinned += obj->base.size; 5337 else 5338 unbound += obj->base.size; 5339 } 5340 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 5341 if (!obj->base.filp) 5342 continue; 5343 5344 if (obj->pages_pin_count) 5345 pinned += obj->base.size; 5346 else 5347 bound += obj->base.size; 5348 } 5349 5350 if (unlock) 5351 mutex_unlock(&dev->struct_mutex); 5352 5353 pr_info("Purging GPU memory, %lu bytes freed, %lu bytes still pinned.\n", 5354 freed, pinned); 5355 if (unbound || bound) 5356 pr_err("%lu and %lu bytes still available in the " 5357 "bound and unbound GPU page lists.\n", 5358 bound, unbound); 5359 5360 *(unsigned long *)ptr += freed; 5361 return NOTIFY_DONE; 5362 } 5363 5364 struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj) 5365 { 5366 struct i915_vma *vma; 5367 5368 vma = list_first_entry(&obj->vma_list, typeof(*vma), vma_link); 5369 if (vma->vm != i915_obj_to_ggtt(obj)) 5370 return NULL; 5371 5372 return vma; 5373 } 5374