1 /* 2 * Copyright © 2008 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include "drmP.h" 29 #include "drm.h" 30 #include "i915_drm.h" 31 #include "i915_drv.h" 32 #include "i915_trace.h" 33 #include "intel_drv.h" 34 #include <linux/shmem_fs.h> 35 #include <linux/slab.h> 36 #include <linux/swap.h> 37 #include <linux/pci.h> 38 #include <linux/dma-buf.h> 39 40 static __must_check int i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj); 41 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 42 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); 43 static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, 44 unsigned alignment, 45 bool map_and_fenceable); 46 static int i915_gem_phys_pwrite(struct drm_device *dev, 47 struct drm_i915_gem_object *obj, 48 struct drm_i915_gem_pwrite *args, 49 struct drm_file *file); 50 51 static void i915_gem_write_fence(struct drm_device *dev, int reg, 52 struct drm_i915_gem_object *obj); 53 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 54 struct drm_i915_fence_reg *fence, 55 bool enable); 56 57 static int i915_gem_inactive_shrink(struct shrinker *shrinker, 58 struct shrink_control *sc); 59 static void i915_gem_object_truncate(struct drm_i915_gem_object *obj); 60 61 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) 62 { 63 if (obj->tiling_mode) 64 i915_gem_release_mmap(obj); 65 66 /* As we do not have an associated fence register, we will force 67 * a tiling change if we ever need to acquire one. 68 */ 69 obj->fence_dirty = false; 70 obj->fence_reg = I915_FENCE_REG_NONE; 71 } 72 73 /* some bookkeeping */ 74 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 75 size_t size) 76 { 77 dev_priv->mm.object_count++; 78 dev_priv->mm.object_memory += size; 79 } 80 81 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 82 size_t size) 83 { 84 dev_priv->mm.object_count--; 85 dev_priv->mm.object_memory -= size; 86 } 87 88 static int 89 i915_gem_wait_for_error(struct drm_device *dev) 90 { 91 struct drm_i915_private *dev_priv = dev->dev_private; 92 struct completion *x = &dev_priv->error_completion; 93 unsigned long flags; 94 int ret; 95 96 if (!atomic_read(&dev_priv->mm.wedged)) 97 return 0; 98 99 ret = wait_for_completion_interruptible(x); 100 if (ret) 101 return ret; 102 103 if (atomic_read(&dev_priv->mm.wedged)) { 104 /* GPU is hung, bump the completion count to account for 105 * the token we just consumed so that we never hit zero and 106 * end up waiting upon a subsequent completion event that 107 * will never happen. 108 */ 109 spin_lock_irqsave(&x->wait.lock, flags); 110 x->done++; 111 spin_unlock_irqrestore(&x->wait.lock, flags); 112 } 113 return 0; 114 } 115 116 int i915_mutex_lock_interruptible(struct drm_device *dev) 117 { 118 int ret; 119 120 ret = i915_gem_wait_for_error(dev); 121 if (ret) 122 return ret; 123 124 ret = mutex_lock_interruptible(&dev->struct_mutex); 125 if (ret) 126 return ret; 127 128 WARN_ON(i915_verify_lists(dev)); 129 return 0; 130 } 131 132 static inline bool 133 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj) 134 { 135 return !obj->active; 136 } 137 138 int 139 i915_gem_init_ioctl(struct drm_device *dev, void *data, 140 struct drm_file *file) 141 { 142 struct drm_i915_gem_init *args = data; 143 144 if (drm_core_check_feature(dev, DRIVER_MODESET)) 145 return -ENODEV; 146 147 if (args->gtt_start >= args->gtt_end || 148 (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1)) 149 return -EINVAL; 150 151 /* GEM with user mode setting was never supported on ilk and later. */ 152 if (INTEL_INFO(dev)->gen >= 5) 153 return -ENODEV; 154 155 mutex_lock(&dev->struct_mutex); 156 i915_gem_init_global_gtt(dev, args->gtt_start, 157 args->gtt_end, args->gtt_end); 158 mutex_unlock(&dev->struct_mutex); 159 160 return 0; 161 } 162 163 int 164 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 165 struct drm_file *file) 166 { 167 struct drm_i915_private *dev_priv = dev->dev_private; 168 struct drm_i915_gem_get_aperture *args = data; 169 struct drm_i915_gem_object *obj; 170 size_t pinned; 171 172 pinned = 0; 173 mutex_lock(&dev->struct_mutex); 174 list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list) 175 if (obj->pin_count) 176 pinned += obj->gtt_space->size; 177 mutex_unlock(&dev->struct_mutex); 178 179 args->aper_size = dev_priv->mm.gtt_total; 180 args->aper_available_size = args->aper_size - pinned; 181 182 return 0; 183 } 184 185 static int 186 i915_gem_create(struct drm_file *file, 187 struct drm_device *dev, 188 uint64_t size, 189 uint32_t *handle_p) 190 { 191 struct drm_i915_gem_object *obj; 192 int ret; 193 u32 handle; 194 195 size = roundup(size, PAGE_SIZE); 196 if (size == 0) 197 return -EINVAL; 198 199 /* Allocate the new object */ 200 obj = i915_gem_alloc_object(dev, size); 201 if (obj == NULL) 202 return -ENOMEM; 203 204 ret = drm_gem_handle_create(file, &obj->base, &handle); 205 if (ret) { 206 drm_gem_object_release(&obj->base); 207 i915_gem_info_remove_obj(dev->dev_private, obj->base.size); 208 kfree(obj); 209 return ret; 210 } 211 212 /* drop reference from allocate - handle holds it now */ 213 drm_gem_object_unreference(&obj->base); 214 trace_i915_gem_object_create(obj); 215 216 *handle_p = handle; 217 return 0; 218 } 219 220 int 221 i915_gem_dumb_create(struct drm_file *file, 222 struct drm_device *dev, 223 struct drm_mode_create_dumb *args) 224 { 225 /* have to work out size/pitch and return them */ 226 args->pitch = ALIGN(args->width * ((args->bpp + 7) / 8), 64); 227 args->size = args->pitch * args->height; 228 return i915_gem_create(file, dev, 229 args->size, &args->handle); 230 } 231 232 int i915_gem_dumb_destroy(struct drm_file *file, 233 struct drm_device *dev, 234 uint32_t handle) 235 { 236 return drm_gem_handle_delete(file, handle); 237 } 238 239 /** 240 * Creates a new mm object and returns a handle to it. 241 */ 242 int 243 i915_gem_create_ioctl(struct drm_device *dev, void *data, 244 struct drm_file *file) 245 { 246 struct drm_i915_gem_create *args = data; 247 248 return i915_gem_create(file, dev, 249 args->size, &args->handle); 250 } 251 252 static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj) 253 { 254 drm_i915_private_t *dev_priv = obj->base.dev->dev_private; 255 256 return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 && 257 obj->tiling_mode != I915_TILING_NONE; 258 } 259 260 static inline int 261 __copy_to_user_swizzled(char __user *cpu_vaddr, 262 const char *gpu_vaddr, int gpu_offset, 263 int length) 264 { 265 int ret, cpu_offset = 0; 266 267 while (length > 0) { 268 int cacheline_end = ALIGN(gpu_offset + 1, 64); 269 int this_length = min(cacheline_end - gpu_offset, length); 270 int swizzled_gpu_offset = gpu_offset ^ 64; 271 272 ret = __copy_to_user(cpu_vaddr + cpu_offset, 273 gpu_vaddr + swizzled_gpu_offset, 274 this_length); 275 if (ret) 276 return ret + length; 277 278 cpu_offset += this_length; 279 gpu_offset += this_length; 280 length -= this_length; 281 } 282 283 return 0; 284 } 285 286 static inline int 287 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 288 const char __user *cpu_vaddr, 289 int length) 290 { 291 int ret, cpu_offset = 0; 292 293 while (length > 0) { 294 int cacheline_end = ALIGN(gpu_offset + 1, 64); 295 int this_length = min(cacheline_end - gpu_offset, length); 296 int swizzled_gpu_offset = gpu_offset ^ 64; 297 298 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 299 cpu_vaddr + cpu_offset, 300 this_length); 301 if (ret) 302 return ret + length; 303 304 cpu_offset += this_length; 305 gpu_offset += this_length; 306 length -= this_length; 307 } 308 309 return 0; 310 } 311 312 /* Per-page copy function for the shmem pread fastpath. 313 * Flushes invalid cachelines before reading the target if 314 * needs_clflush is set. */ 315 static int 316 shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length, 317 char __user *user_data, 318 bool page_do_bit17_swizzling, bool needs_clflush) 319 { 320 char *vaddr; 321 int ret; 322 323 if (unlikely(page_do_bit17_swizzling)) 324 return -EINVAL; 325 326 vaddr = kmap_atomic(page); 327 if (needs_clflush) 328 drm_clflush_virt_range(vaddr + shmem_page_offset, 329 page_length); 330 ret = __copy_to_user_inatomic(user_data, 331 vaddr + shmem_page_offset, 332 page_length); 333 kunmap_atomic(vaddr); 334 335 return ret; 336 } 337 338 static void 339 shmem_clflush_swizzled_range(char *addr, unsigned long length, 340 bool swizzled) 341 { 342 if (unlikely(swizzled)) { 343 unsigned long start = (unsigned long) addr; 344 unsigned long end = (unsigned long) addr + length; 345 346 /* For swizzling simply ensure that we always flush both 347 * channels. Lame, but simple and it works. Swizzled 348 * pwrite/pread is far from a hotpath - current userspace 349 * doesn't use it at all. */ 350 start = round_down(start, 128); 351 end = round_up(end, 128); 352 353 drm_clflush_virt_range((void *)start, end - start); 354 } else { 355 drm_clflush_virt_range(addr, length); 356 } 357 358 } 359 360 /* Only difference to the fast-path function is that this can handle bit17 361 * and uses non-atomic copy and kmap functions. */ 362 static int 363 shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length, 364 char __user *user_data, 365 bool page_do_bit17_swizzling, bool needs_clflush) 366 { 367 char *vaddr; 368 int ret; 369 370 vaddr = kmap(page); 371 if (needs_clflush) 372 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 373 page_length, 374 page_do_bit17_swizzling); 375 376 if (page_do_bit17_swizzling) 377 ret = __copy_to_user_swizzled(user_data, 378 vaddr, shmem_page_offset, 379 page_length); 380 else 381 ret = __copy_to_user(user_data, 382 vaddr + shmem_page_offset, 383 page_length); 384 kunmap(page); 385 386 return ret; 387 } 388 389 static int 390 i915_gem_shmem_pread(struct drm_device *dev, 391 struct drm_i915_gem_object *obj, 392 struct drm_i915_gem_pread *args, 393 struct drm_file *file) 394 { 395 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 396 char __user *user_data; 397 ssize_t remain; 398 loff_t offset; 399 int shmem_page_offset, page_length, ret = 0; 400 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 401 int hit_slowpath = 0; 402 int prefaulted = 0; 403 int needs_clflush = 0; 404 int release_page; 405 406 user_data = (char __user *) (uintptr_t) args->data_ptr; 407 remain = args->size; 408 409 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 410 411 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { 412 /* If we're not in the cpu read domain, set ourself into the gtt 413 * read domain and manually flush cachelines (if required). This 414 * optimizes for the case when the gpu will dirty the data 415 * anyway again before the next pread happens. */ 416 if (obj->cache_level == I915_CACHE_NONE) 417 needs_clflush = 1; 418 ret = i915_gem_object_set_to_gtt_domain(obj, false); 419 if (ret) 420 return ret; 421 } 422 423 offset = args->offset; 424 425 while (remain > 0) { 426 struct page *page; 427 428 /* Operation in this page 429 * 430 * shmem_page_offset = offset within page in shmem file 431 * page_length = bytes to copy for this page 432 */ 433 shmem_page_offset = offset_in_page(offset); 434 page_length = remain; 435 if ((shmem_page_offset + page_length) > PAGE_SIZE) 436 page_length = PAGE_SIZE - shmem_page_offset; 437 438 if (obj->pages) { 439 page = obj->pages[offset >> PAGE_SHIFT]; 440 release_page = 0; 441 } else { 442 page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT); 443 if (IS_ERR(page)) { 444 ret = PTR_ERR(page); 445 goto out; 446 } 447 release_page = 1; 448 } 449 450 page_do_bit17_swizzling = obj_do_bit17_swizzling && 451 (page_to_phys(page) & (1 << 17)) != 0; 452 453 ret = shmem_pread_fast(page, shmem_page_offset, page_length, 454 user_data, page_do_bit17_swizzling, 455 needs_clflush); 456 if (ret == 0) 457 goto next_page; 458 459 hit_slowpath = 1; 460 page_cache_get(page); 461 mutex_unlock(&dev->struct_mutex); 462 463 if (!prefaulted) { 464 ret = fault_in_multipages_writeable(user_data, remain); 465 /* Userspace is tricking us, but we've already clobbered 466 * its pages with the prefault and promised to write the 467 * data up to the first fault. Hence ignore any errors 468 * and just continue. */ 469 (void)ret; 470 prefaulted = 1; 471 } 472 473 ret = shmem_pread_slow(page, shmem_page_offset, page_length, 474 user_data, page_do_bit17_swizzling, 475 needs_clflush); 476 477 mutex_lock(&dev->struct_mutex); 478 page_cache_release(page); 479 next_page: 480 mark_page_accessed(page); 481 if (release_page) 482 page_cache_release(page); 483 484 if (ret) { 485 ret = -EFAULT; 486 goto out; 487 } 488 489 remain -= page_length; 490 user_data += page_length; 491 offset += page_length; 492 } 493 494 out: 495 if (hit_slowpath) { 496 /* Fixup: Kill any reinstated backing storage pages */ 497 if (obj->madv == __I915_MADV_PURGED) 498 i915_gem_object_truncate(obj); 499 } 500 501 return ret; 502 } 503 504 /** 505 * Reads data from the object referenced by handle. 506 * 507 * On error, the contents of *data are undefined. 508 */ 509 int 510 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 511 struct drm_file *file) 512 { 513 struct drm_i915_gem_pread *args = data; 514 struct drm_i915_gem_object *obj; 515 int ret = 0; 516 517 if (args->size == 0) 518 return 0; 519 520 if (!access_ok(VERIFY_WRITE, 521 (char __user *)(uintptr_t)args->data_ptr, 522 args->size)) 523 return -EFAULT; 524 525 ret = i915_mutex_lock_interruptible(dev); 526 if (ret) 527 return ret; 528 529 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 530 if (&obj->base == NULL) { 531 ret = -ENOENT; 532 goto unlock; 533 } 534 535 /* Bounds check source. */ 536 if (args->offset > obj->base.size || 537 args->size > obj->base.size - args->offset) { 538 ret = -EINVAL; 539 goto out; 540 } 541 542 /* prime objects have no backing filp to GEM pread/pwrite 543 * pages from. 544 */ 545 if (!obj->base.filp) { 546 ret = -EINVAL; 547 goto out; 548 } 549 550 trace_i915_gem_object_pread(obj, args->offset, args->size); 551 552 ret = i915_gem_shmem_pread(dev, obj, args, file); 553 554 out: 555 drm_gem_object_unreference(&obj->base); 556 unlock: 557 mutex_unlock(&dev->struct_mutex); 558 return ret; 559 } 560 561 /* This is the fast write path which cannot handle 562 * page faults in the source data 563 */ 564 565 static inline int 566 fast_user_write(struct io_mapping *mapping, 567 loff_t page_base, int page_offset, 568 char __user *user_data, 569 int length) 570 { 571 void __iomem *vaddr_atomic; 572 void *vaddr; 573 unsigned long unwritten; 574 575 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 576 /* We can use the cpu mem copy function because this is X86. */ 577 vaddr = (void __force*)vaddr_atomic + page_offset; 578 unwritten = __copy_from_user_inatomic_nocache(vaddr, 579 user_data, length); 580 io_mapping_unmap_atomic(vaddr_atomic); 581 return unwritten; 582 } 583 584 /** 585 * This is the fast pwrite path, where we copy the data directly from the 586 * user into the GTT, uncached. 587 */ 588 static int 589 i915_gem_gtt_pwrite_fast(struct drm_device *dev, 590 struct drm_i915_gem_object *obj, 591 struct drm_i915_gem_pwrite *args, 592 struct drm_file *file) 593 { 594 drm_i915_private_t *dev_priv = dev->dev_private; 595 ssize_t remain; 596 loff_t offset, page_base; 597 char __user *user_data; 598 int page_offset, page_length, ret; 599 600 ret = i915_gem_object_pin(obj, 0, true); 601 if (ret) 602 goto out; 603 604 ret = i915_gem_object_set_to_gtt_domain(obj, true); 605 if (ret) 606 goto out_unpin; 607 608 ret = i915_gem_object_put_fence(obj); 609 if (ret) 610 goto out_unpin; 611 612 user_data = (char __user *) (uintptr_t) args->data_ptr; 613 remain = args->size; 614 615 offset = obj->gtt_offset + args->offset; 616 617 while (remain > 0) { 618 /* Operation in this page 619 * 620 * page_base = page offset within aperture 621 * page_offset = offset within page 622 * page_length = bytes to copy for this page 623 */ 624 page_base = offset & PAGE_MASK; 625 page_offset = offset_in_page(offset); 626 page_length = remain; 627 if ((page_offset + remain) > PAGE_SIZE) 628 page_length = PAGE_SIZE - page_offset; 629 630 /* If we get a fault while copying data, then (presumably) our 631 * source page isn't available. Return the error and we'll 632 * retry in the slow path. 633 */ 634 if (fast_user_write(dev_priv->mm.gtt_mapping, page_base, 635 page_offset, user_data, page_length)) { 636 ret = -EFAULT; 637 goto out_unpin; 638 } 639 640 remain -= page_length; 641 user_data += page_length; 642 offset += page_length; 643 } 644 645 out_unpin: 646 i915_gem_object_unpin(obj); 647 out: 648 return ret; 649 } 650 651 /* Per-page copy function for the shmem pwrite fastpath. 652 * Flushes invalid cachelines before writing to the target if 653 * needs_clflush_before is set and flushes out any written cachelines after 654 * writing if needs_clflush is set. */ 655 static int 656 shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length, 657 char __user *user_data, 658 bool page_do_bit17_swizzling, 659 bool needs_clflush_before, 660 bool needs_clflush_after) 661 { 662 char *vaddr; 663 int ret; 664 665 if (unlikely(page_do_bit17_swizzling)) 666 return -EINVAL; 667 668 vaddr = kmap_atomic(page); 669 if (needs_clflush_before) 670 drm_clflush_virt_range(vaddr + shmem_page_offset, 671 page_length); 672 ret = __copy_from_user_inatomic_nocache(vaddr + shmem_page_offset, 673 user_data, 674 page_length); 675 if (needs_clflush_after) 676 drm_clflush_virt_range(vaddr + shmem_page_offset, 677 page_length); 678 kunmap_atomic(vaddr); 679 680 return ret; 681 } 682 683 /* Only difference to the fast-path function is that this can handle bit17 684 * and uses non-atomic copy and kmap functions. */ 685 static int 686 shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length, 687 char __user *user_data, 688 bool page_do_bit17_swizzling, 689 bool needs_clflush_before, 690 bool needs_clflush_after) 691 { 692 char *vaddr; 693 int ret; 694 695 vaddr = kmap(page); 696 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 697 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 698 page_length, 699 page_do_bit17_swizzling); 700 if (page_do_bit17_swizzling) 701 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, 702 user_data, 703 page_length); 704 else 705 ret = __copy_from_user(vaddr + shmem_page_offset, 706 user_data, 707 page_length); 708 if (needs_clflush_after) 709 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 710 page_length, 711 page_do_bit17_swizzling); 712 kunmap(page); 713 714 return ret; 715 } 716 717 static int 718 i915_gem_shmem_pwrite(struct drm_device *dev, 719 struct drm_i915_gem_object *obj, 720 struct drm_i915_gem_pwrite *args, 721 struct drm_file *file) 722 { 723 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 724 ssize_t remain; 725 loff_t offset; 726 char __user *user_data; 727 int shmem_page_offset, page_length, ret = 0; 728 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 729 int hit_slowpath = 0; 730 int needs_clflush_after = 0; 731 int needs_clflush_before = 0; 732 int release_page; 733 734 user_data = (char __user *) (uintptr_t) args->data_ptr; 735 remain = args->size; 736 737 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 738 739 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 740 /* If we're not in the cpu write domain, set ourself into the gtt 741 * write domain and manually flush cachelines (if required). This 742 * optimizes for the case when the gpu will use the data 743 * right away and we therefore have to clflush anyway. */ 744 if (obj->cache_level == I915_CACHE_NONE) 745 needs_clflush_after = 1; 746 ret = i915_gem_object_set_to_gtt_domain(obj, true); 747 if (ret) 748 return ret; 749 } 750 /* Same trick applies for invalidate partially written cachelines before 751 * writing. */ 752 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU) 753 && obj->cache_level == I915_CACHE_NONE) 754 needs_clflush_before = 1; 755 756 offset = args->offset; 757 obj->dirty = 1; 758 759 while (remain > 0) { 760 struct page *page; 761 int partial_cacheline_write; 762 763 /* Operation in this page 764 * 765 * shmem_page_offset = offset within page in shmem file 766 * page_length = bytes to copy for this page 767 */ 768 shmem_page_offset = offset_in_page(offset); 769 770 page_length = remain; 771 if ((shmem_page_offset + page_length) > PAGE_SIZE) 772 page_length = PAGE_SIZE - shmem_page_offset; 773 774 /* If we don't overwrite a cacheline completely we need to be 775 * careful to have up-to-date data by first clflushing. Don't 776 * overcomplicate things and flush the entire patch. */ 777 partial_cacheline_write = needs_clflush_before && 778 ((shmem_page_offset | page_length) 779 & (boot_cpu_data.x86_clflush_size - 1)); 780 781 if (obj->pages) { 782 page = obj->pages[offset >> PAGE_SHIFT]; 783 release_page = 0; 784 } else { 785 page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT); 786 if (IS_ERR(page)) { 787 ret = PTR_ERR(page); 788 goto out; 789 } 790 release_page = 1; 791 } 792 793 page_do_bit17_swizzling = obj_do_bit17_swizzling && 794 (page_to_phys(page) & (1 << 17)) != 0; 795 796 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, 797 user_data, page_do_bit17_swizzling, 798 partial_cacheline_write, 799 needs_clflush_after); 800 if (ret == 0) 801 goto next_page; 802 803 hit_slowpath = 1; 804 page_cache_get(page); 805 mutex_unlock(&dev->struct_mutex); 806 807 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, 808 user_data, page_do_bit17_swizzling, 809 partial_cacheline_write, 810 needs_clflush_after); 811 812 mutex_lock(&dev->struct_mutex); 813 page_cache_release(page); 814 next_page: 815 set_page_dirty(page); 816 mark_page_accessed(page); 817 if (release_page) 818 page_cache_release(page); 819 820 if (ret) { 821 ret = -EFAULT; 822 goto out; 823 } 824 825 remain -= page_length; 826 user_data += page_length; 827 offset += page_length; 828 } 829 830 out: 831 if (hit_slowpath) { 832 /* Fixup: Kill any reinstated backing storage pages */ 833 if (obj->madv == __I915_MADV_PURGED) 834 i915_gem_object_truncate(obj); 835 /* and flush dirty cachelines in case the object isn't in the cpu write 836 * domain anymore. */ 837 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 838 i915_gem_clflush_object(obj); 839 intel_gtt_chipset_flush(); 840 } 841 } 842 843 if (needs_clflush_after) 844 intel_gtt_chipset_flush(); 845 846 return ret; 847 } 848 849 /** 850 * Writes data to the object referenced by handle. 851 * 852 * On error, the contents of the buffer that were to be modified are undefined. 853 */ 854 int 855 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 856 struct drm_file *file) 857 { 858 struct drm_i915_gem_pwrite *args = data; 859 struct drm_i915_gem_object *obj; 860 int ret; 861 862 if (args->size == 0) 863 return 0; 864 865 if (!access_ok(VERIFY_READ, 866 (char __user *)(uintptr_t)args->data_ptr, 867 args->size)) 868 return -EFAULT; 869 870 ret = fault_in_multipages_readable((char __user *)(uintptr_t)args->data_ptr, 871 args->size); 872 if (ret) 873 return -EFAULT; 874 875 ret = i915_mutex_lock_interruptible(dev); 876 if (ret) 877 return ret; 878 879 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 880 if (&obj->base == NULL) { 881 ret = -ENOENT; 882 goto unlock; 883 } 884 885 /* Bounds check destination. */ 886 if (args->offset > obj->base.size || 887 args->size > obj->base.size - args->offset) { 888 ret = -EINVAL; 889 goto out; 890 } 891 892 /* prime objects have no backing filp to GEM pread/pwrite 893 * pages from. 894 */ 895 if (!obj->base.filp) { 896 ret = -EINVAL; 897 goto out; 898 } 899 900 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 901 902 ret = -EFAULT; 903 /* We can only do the GTT pwrite on untiled buffers, as otherwise 904 * it would end up going through the fenced access, and we'll get 905 * different detiling behavior between reading and writing. 906 * pread/pwrite currently are reading and writing from the CPU 907 * perspective, requiring manual detiling by the client. 908 */ 909 if (obj->phys_obj) { 910 ret = i915_gem_phys_pwrite(dev, obj, args, file); 911 goto out; 912 } 913 914 if (obj->gtt_space && 915 obj->cache_level == I915_CACHE_NONE && 916 obj->tiling_mode == I915_TILING_NONE && 917 obj->map_and_fenceable && 918 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 919 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); 920 /* Note that the gtt paths might fail with non-page-backed user 921 * pointers (e.g. gtt mappings when moving data between 922 * textures). Fallback to the shmem path in that case. */ 923 } 924 925 if (ret == -EFAULT) 926 ret = i915_gem_shmem_pwrite(dev, obj, args, file); 927 928 out: 929 drm_gem_object_unreference(&obj->base); 930 unlock: 931 mutex_unlock(&dev->struct_mutex); 932 return ret; 933 } 934 935 /** 936 * Called when user space prepares to use an object with the CPU, either 937 * through the mmap ioctl's mapping or a GTT mapping. 938 */ 939 int 940 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 941 struct drm_file *file) 942 { 943 struct drm_i915_gem_set_domain *args = data; 944 struct drm_i915_gem_object *obj; 945 uint32_t read_domains = args->read_domains; 946 uint32_t write_domain = args->write_domain; 947 int ret; 948 949 /* Only handle setting domains to types used by the CPU. */ 950 if (write_domain & I915_GEM_GPU_DOMAINS) 951 return -EINVAL; 952 953 if (read_domains & I915_GEM_GPU_DOMAINS) 954 return -EINVAL; 955 956 /* Having something in the write domain implies it's in the read 957 * domain, and only that read domain. Enforce that in the request. 958 */ 959 if (write_domain != 0 && read_domains != write_domain) 960 return -EINVAL; 961 962 ret = i915_mutex_lock_interruptible(dev); 963 if (ret) 964 return ret; 965 966 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 967 if (&obj->base == NULL) { 968 ret = -ENOENT; 969 goto unlock; 970 } 971 972 if (read_domains & I915_GEM_DOMAIN_GTT) { 973 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 974 975 /* Silently promote "you're not bound, there was nothing to do" 976 * to success, since the client was just asking us to 977 * make sure everything was done. 978 */ 979 if (ret == -EINVAL) 980 ret = 0; 981 } else { 982 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 983 } 984 985 drm_gem_object_unreference(&obj->base); 986 unlock: 987 mutex_unlock(&dev->struct_mutex); 988 return ret; 989 } 990 991 /** 992 * Called when user space has done writes to this buffer 993 */ 994 int 995 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 996 struct drm_file *file) 997 { 998 struct drm_i915_gem_sw_finish *args = data; 999 struct drm_i915_gem_object *obj; 1000 int ret = 0; 1001 1002 ret = i915_mutex_lock_interruptible(dev); 1003 if (ret) 1004 return ret; 1005 1006 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1007 if (&obj->base == NULL) { 1008 ret = -ENOENT; 1009 goto unlock; 1010 } 1011 1012 /* Pinned buffers may be scanout, so flush the cache */ 1013 if (obj->pin_count) 1014 i915_gem_object_flush_cpu_write_domain(obj); 1015 1016 drm_gem_object_unreference(&obj->base); 1017 unlock: 1018 mutex_unlock(&dev->struct_mutex); 1019 return ret; 1020 } 1021 1022 /** 1023 * Maps the contents of an object, returning the address it is mapped 1024 * into. 1025 * 1026 * While the mapping holds a reference on the contents of the object, it doesn't 1027 * imply a ref on the object itself. 1028 */ 1029 int 1030 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1031 struct drm_file *file) 1032 { 1033 struct drm_i915_gem_mmap *args = data; 1034 struct drm_gem_object *obj; 1035 unsigned long addr; 1036 1037 obj = drm_gem_object_lookup(dev, file, args->handle); 1038 if (obj == NULL) 1039 return -ENOENT; 1040 1041 /* prime objects have no backing filp to GEM mmap 1042 * pages from. 1043 */ 1044 if (!obj->filp) { 1045 drm_gem_object_unreference_unlocked(obj); 1046 return -EINVAL; 1047 } 1048 1049 addr = vm_mmap(obj->filp, 0, args->size, 1050 PROT_READ | PROT_WRITE, MAP_SHARED, 1051 args->offset); 1052 drm_gem_object_unreference_unlocked(obj); 1053 if (IS_ERR((void *)addr)) 1054 return addr; 1055 1056 args->addr_ptr = (uint64_t) addr; 1057 1058 return 0; 1059 } 1060 1061 /** 1062 * i915_gem_fault - fault a page into the GTT 1063 * vma: VMA in question 1064 * vmf: fault info 1065 * 1066 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1067 * from userspace. The fault handler takes care of binding the object to 1068 * the GTT (if needed), allocating and programming a fence register (again, 1069 * only if needed based on whether the old reg is still valid or the object 1070 * is tiled) and inserting a new PTE into the faulting process. 1071 * 1072 * Note that the faulting process may involve evicting existing objects 1073 * from the GTT and/or fence registers to make room. So performance may 1074 * suffer if the GTT working set is large or there are few fence registers 1075 * left. 1076 */ 1077 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1078 { 1079 struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data); 1080 struct drm_device *dev = obj->base.dev; 1081 drm_i915_private_t *dev_priv = dev->dev_private; 1082 pgoff_t page_offset; 1083 unsigned long pfn; 1084 int ret = 0; 1085 bool write = !!(vmf->flags & FAULT_FLAG_WRITE); 1086 1087 /* We don't use vmf->pgoff since that has the fake offset */ 1088 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> 1089 PAGE_SHIFT; 1090 1091 ret = i915_mutex_lock_interruptible(dev); 1092 if (ret) 1093 goto out; 1094 1095 trace_i915_gem_object_fault(obj, page_offset, true, write); 1096 1097 /* Now bind it into the GTT if needed */ 1098 if (!obj->map_and_fenceable) { 1099 ret = i915_gem_object_unbind(obj); 1100 if (ret) 1101 goto unlock; 1102 } 1103 if (!obj->gtt_space) { 1104 ret = i915_gem_object_bind_to_gtt(obj, 0, true); 1105 if (ret) 1106 goto unlock; 1107 1108 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1109 if (ret) 1110 goto unlock; 1111 } 1112 1113 if (!obj->has_global_gtt_mapping) 1114 i915_gem_gtt_bind_object(obj, obj->cache_level); 1115 1116 ret = i915_gem_object_get_fence(obj); 1117 if (ret) 1118 goto unlock; 1119 1120 if (i915_gem_object_is_inactive(obj)) 1121 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 1122 1123 obj->fault_mappable = true; 1124 1125 pfn = ((dev->agp->base + obj->gtt_offset) >> PAGE_SHIFT) + 1126 page_offset; 1127 1128 /* Finally, remap it using the new GTT offset */ 1129 ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn); 1130 unlock: 1131 mutex_unlock(&dev->struct_mutex); 1132 out: 1133 switch (ret) { 1134 case -EIO: 1135 case -EAGAIN: 1136 /* Give the error handler a chance to run and move the 1137 * objects off the GPU active list. Next time we service the 1138 * fault, we should be able to transition the page into the 1139 * GTT without touching the GPU (and so avoid further 1140 * EIO/EGAIN). If the GPU is wedged, then there is no issue 1141 * with coherency, just lost writes. 1142 */ 1143 set_need_resched(); 1144 case 0: 1145 case -ERESTARTSYS: 1146 case -EINTR: 1147 return VM_FAULT_NOPAGE; 1148 case -ENOMEM: 1149 return VM_FAULT_OOM; 1150 default: 1151 return VM_FAULT_SIGBUS; 1152 } 1153 } 1154 1155 /** 1156 * i915_gem_release_mmap - remove physical page mappings 1157 * @obj: obj in question 1158 * 1159 * Preserve the reservation of the mmapping with the DRM core code, but 1160 * relinquish ownership of the pages back to the system. 1161 * 1162 * It is vital that we remove the page mapping if we have mapped a tiled 1163 * object through the GTT and then lose the fence register due to 1164 * resource pressure. Similarly if the object has been moved out of the 1165 * aperture, than pages mapped into userspace must be revoked. Removing the 1166 * mapping will then trigger a page fault on the next user access, allowing 1167 * fixup by i915_gem_fault(). 1168 */ 1169 void 1170 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 1171 { 1172 if (!obj->fault_mappable) 1173 return; 1174 1175 if (obj->base.dev->dev_mapping) 1176 unmap_mapping_range(obj->base.dev->dev_mapping, 1177 (loff_t)obj->base.map_list.hash.key<<PAGE_SHIFT, 1178 obj->base.size, 1); 1179 1180 obj->fault_mappable = false; 1181 } 1182 1183 static uint32_t 1184 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) 1185 { 1186 uint32_t gtt_size; 1187 1188 if (INTEL_INFO(dev)->gen >= 4 || 1189 tiling_mode == I915_TILING_NONE) 1190 return size; 1191 1192 /* Previous chips need a power-of-two fence region when tiling */ 1193 if (INTEL_INFO(dev)->gen == 3) 1194 gtt_size = 1024*1024; 1195 else 1196 gtt_size = 512*1024; 1197 1198 while (gtt_size < size) 1199 gtt_size <<= 1; 1200 1201 return gtt_size; 1202 } 1203 1204 /** 1205 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 1206 * @obj: object to check 1207 * 1208 * Return the required GTT alignment for an object, taking into account 1209 * potential fence register mapping. 1210 */ 1211 static uint32_t 1212 i915_gem_get_gtt_alignment(struct drm_device *dev, 1213 uint32_t size, 1214 int tiling_mode) 1215 { 1216 /* 1217 * Minimum alignment is 4k (GTT page size), but might be greater 1218 * if a fence register is needed for the object. 1219 */ 1220 if (INTEL_INFO(dev)->gen >= 4 || 1221 tiling_mode == I915_TILING_NONE) 1222 return 4096; 1223 1224 /* 1225 * Previous chips need to be aligned to the size of the smallest 1226 * fence register that can contain the object. 1227 */ 1228 return i915_gem_get_gtt_size(dev, size, tiling_mode); 1229 } 1230 1231 /** 1232 * i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an 1233 * unfenced object 1234 * @dev: the device 1235 * @size: size of the object 1236 * @tiling_mode: tiling mode of the object 1237 * 1238 * Return the required GTT alignment for an object, only taking into account 1239 * unfenced tiled surface requirements. 1240 */ 1241 uint32_t 1242 i915_gem_get_unfenced_gtt_alignment(struct drm_device *dev, 1243 uint32_t size, 1244 int tiling_mode) 1245 { 1246 /* 1247 * Minimum alignment is 4k (GTT page size) for sane hw. 1248 */ 1249 if (INTEL_INFO(dev)->gen >= 4 || IS_G33(dev) || 1250 tiling_mode == I915_TILING_NONE) 1251 return 4096; 1252 1253 /* Previous hardware however needs to be aligned to a power-of-two 1254 * tile height. The simplest method for determining this is to reuse 1255 * the power-of-tile object size. 1256 */ 1257 return i915_gem_get_gtt_size(dev, size, tiling_mode); 1258 } 1259 1260 int 1261 i915_gem_mmap_gtt(struct drm_file *file, 1262 struct drm_device *dev, 1263 uint32_t handle, 1264 uint64_t *offset) 1265 { 1266 struct drm_i915_private *dev_priv = dev->dev_private; 1267 struct drm_i915_gem_object *obj; 1268 int ret; 1269 1270 ret = i915_mutex_lock_interruptible(dev); 1271 if (ret) 1272 return ret; 1273 1274 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle)); 1275 if (&obj->base == NULL) { 1276 ret = -ENOENT; 1277 goto unlock; 1278 } 1279 1280 if (obj->base.size > dev_priv->mm.gtt_mappable_end) { 1281 ret = -E2BIG; 1282 goto out; 1283 } 1284 1285 if (obj->madv != I915_MADV_WILLNEED) { 1286 DRM_ERROR("Attempting to mmap a purgeable buffer\n"); 1287 ret = -EINVAL; 1288 goto out; 1289 } 1290 1291 if (!obj->base.map_list.map) { 1292 ret = drm_gem_create_mmap_offset(&obj->base); 1293 if (ret) 1294 goto out; 1295 } 1296 1297 *offset = (u64)obj->base.map_list.hash.key << PAGE_SHIFT; 1298 1299 out: 1300 drm_gem_object_unreference(&obj->base); 1301 unlock: 1302 mutex_unlock(&dev->struct_mutex); 1303 return ret; 1304 } 1305 1306 /** 1307 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 1308 * @dev: DRM device 1309 * @data: GTT mapping ioctl data 1310 * @file: GEM object info 1311 * 1312 * Simply returns the fake offset to userspace so it can mmap it. 1313 * The mmap call will end up in drm_gem_mmap(), which will set things 1314 * up so we can get faults in the handler above. 1315 * 1316 * The fault handler will take care of binding the object into the GTT 1317 * (since it may have been evicted to make room for something), allocating 1318 * a fence register, and mapping the appropriate aperture address into 1319 * userspace. 1320 */ 1321 int 1322 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 1323 struct drm_file *file) 1324 { 1325 struct drm_i915_gem_mmap_gtt *args = data; 1326 1327 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 1328 } 1329 1330 int 1331 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj, 1332 gfp_t gfpmask) 1333 { 1334 int page_count, i; 1335 struct address_space *mapping; 1336 struct inode *inode; 1337 struct page *page; 1338 1339 if (obj->pages || obj->sg_table) 1340 return 0; 1341 1342 /* Get the list of pages out of our struct file. They'll be pinned 1343 * at this point until we release them. 1344 */ 1345 page_count = obj->base.size / PAGE_SIZE; 1346 BUG_ON(obj->pages != NULL); 1347 obj->pages = drm_malloc_ab(page_count, sizeof(struct page *)); 1348 if (obj->pages == NULL) 1349 return -ENOMEM; 1350 1351 inode = obj->base.filp->f_path.dentry->d_inode; 1352 mapping = inode->i_mapping; 1353 gfpmask |= mapping_gfp_mask(mapping); 1354 1355 for (i = 0; i < page_count; i++) { 1356 page = shmem_read_mapping_page_gfp(mapping, i, gfpmask); 1357 if (IS_ERR(page)) 1358 goto err_pages; 1359 1360 obj->pages[i] = page; 1361 } 1362 1363 if (i915_gem_object_needs_bit17_swizzle(obj)) 1364 i915_gem_object_do_bit_17_swizzle(obj); 1365 1366 return 0; 1367 1368 err_pages: 1369 while (i--) 1370 page_cache_release(obj->pages[i]); 1371 1372 drm_free_large(obj->pages); 1373 obj->pages = NULL; 1374 return PTR_ERR(page); 1375 } 1376 1377 static void 1378 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 1379 { 1380 int page_count = obj->base.size / PAGE_SIZE; 1381 int i; 1382 1383 if (!obj->pages) 1384 return; 1385 1386 BUG_ON(obj->madv == __I915_MADV_PURGED); 1387 1388 if (i915_gem_object_needs_bit17_swizzle(obj)) 1389 i915_gem_object_save_bit_17_swizzle(obj); 1390 1391 if (obj->madv == I915_MADV_DONTNEED) 1392 obj->dirty = 0; 1393 1394 for (i = 0; i < page_count; i++) { 1395 if (obj->dirty) 1396 set_page_dirty(obj->pages[i]); 1397 1398 if (obj->madv == I915_MADV_WILLNEED) 1399 mark_page_accessed(obj->pages[i]); 1400 1401 page_cache_release(obj->pages[i]); 1402 } 1403 obj->dirty = 0; 1404 1405 drm_free_large(obj->pages); 1406 obj->pages = NULL; 1407 } 1408 1409 void 1410 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, 1411 struct intel_ring_buffer *ring, 1412 u32 seqno) 1413 { 1414 struct drm_device *dev = obj->base.dev; 1415 struct drm_i915_private *dev_priv = dev->dev_private; 1416 1417 BUG_ON(ring == NULL); 1418 obj->ring = ring; 1419 1420 /* Add a reference if we're newly entering the active list. */ 1421 if (!obj->active) { 1422 drm_gem_object_reference(&obj->base); 1423 obj->active = 1; 1424 } 1425 1426 /* Move from whatever list we were on to the tail of execution. */ 1427 list_move_tail(&obj->mm_list, &dev_priv->mm.active_list); 1428 list_move_tail(&obj->ring_list, &ring->active_list); 1429 1430 obj->last_rendering_seqno = seqno; 1431 1432 if (obj->fenced_gpu_access) { 1433 obj->last_fenced_seqno = seqno; 1434 1435 /* Bump MRU to take account of the delayed flush */ 1436 if (obj->fence_reg != I915_FENCE_REG_NONE) { 1437 struct drm_i915_fence_reg *reg; 1438 1439 reg = &dev_priv->fence_regs[obj->fence_reg]; 1440 list_move_tail(®->lru_list, 1441 &dev_priv->mm.fence_list); 1442 } 1443 } 1444 } 1445 1446 static void 1447 i915_gem_object_move_off_active(struct drm_i915_gem_object *obj) 1448 { 1449 list_del_init(&obj->ring_list); 1450 obj->last_rendering_seqno = 0; 1451 obj->last_fenced_seqno = 0; 1452 } 1453 1454 static void 1455 i915_gem_object_move_to_flushing(struct drm_i915_gem_object *obj) 1456 { 1457 struct drm_device *dev = obj->base.dev; 1458 drm_i915_private_t *dev_priv = dev->dev_private; 1459 1460 BUG_ON(!obj->active); 1461 list_move_tail(&obj->mm_list, &dev_priv->mm.flushing_list); 1462 1463 i915_gem_object_move_off_active(obj); 1464 } 1465 1466 static void 1467 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) 1468 { 1469 struct drm_device *dev = obj->base.dev; 1470 struct drm_i915_private *dev_priv = dev->dev_private; 1471 1472 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 1473 1474 BUG_ON(!list_empty(&obj->gpu_write_list)); 1475 BUG_ON(!obj->active); 1476 obj->ring = NULL; 1477 1478 i915_gem_object_move_off_active(obj); 1479 obj->fenced_gpu_access = false; 1480 1481 obj->active = 0; 1482 obj->pending_gpu_write = false; 1483 drm_gem_object_unreference(&obj->base); 1484 1485 WARN_ON(i915_verify_lists(dev)); 1486 } 1487 1488 /* Immediately discard the backing storage */ 1489 static void 1490 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 1491 { 1492 struct inode *inode; 1493 1494 /* Our goal here is to return as much of the memory as 1495 * is possible back to the system as we are called from OOM. 1496 * To do this we must instruct the shmfs to drop all of its 1497 * backing pages, *now*. 1498 */ 1499 inode = obj->base.filp->f_path.dentry->d_inode; 1500 shmem_truncate_range(inode, 0, (loff_t)-1); 1501 1502 if (obj->base.map_list.map) 1503 drm_gem_free_mmap_offset(&obj->base); 1504 1505 obj->madv = __I915_MADV_PURGED; 1506 } 1507 1508 static inline int 1509 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj) 1510 { 1511 return obj->madv == I915_MADV_DONTNEED; 1512 } 1513 1514 static void 1515 i915_gem_process_flushing_list(struct intel_ring_buffer *ring, 1516 uint32_t flush_domains) 1517 { 1518 struct drm_i915_gem_object *obj, *next; 1519 1520 list_for_each_entry_safe(obj, next, 1521 &ring->gpu_write_list, 1522 gpu_write_list) { 1523 if (obj->base.write_domain & flush_domains) { 1524 uint32_t old_write_domain = obj->base.write_domain; 1525 1526 obj->base.write_domain = 0; 1527 list_del_init(&obj->gpu_write_list); 1528 i915_gem_object_move_to_active(obj, ring, 1529 i915_gem_next_request_seqno(ring)); 1530 1531 trace_i915_gem_object_change_domain(obj, 1532 obj->base.read_domains, 1533 old_write_domain); 1534 } 1535 } 1536 } 1537 1538 static u32 1539 i915_gem_get_seqno(struct drm_device *dev) 1540 { 1541 drm_i915_private_t *dev_priv = dev->dev_private; 1542 u32 seqno = dev_priv->next_seqno; 1543 1544 /* reserve 0 for non-seqno */ 1545 if (++dev_priv->next_seqno == 0) 1546 dev_priv->next_seqno = 1; 1547 1548 return seqno; 1549 } 1550 1551 u32 1552 i915_gem_next_request_seqno(struct intel_ring_buffer *ring) 1553 { 1554 if (ring->outstanding_lazy_request == 0) 1555 ring->outstanding_lazy_request = i915_gem_get_seqno(ring->dev); 1556 1557 return ring->outstanding_lazy_request; 1558 } 1559 1560 int 1561 i915_add_request(struct intel_ring_buffer *ring, 1562 struct drm_file *file, 1563 struct drm_i915_gem_request *request) 1564 { 1565 drm_i915_private_t *dev_priv = ring->dev->dev_private; 1566 uint32_t seqno; 1567 u32 request_ring_position; 1568 int was_empty; 1569 int ret; 1570 1571 BUG_ON(request == NULL); 1572 seqno = i915_gem_next_request_seqno(ring); 1573 1574 /* Record the position of the start of the request so that 1575 * should we detect the updated seqno part-way through the 1576 * GPU processing the request, we never over-estimate the 1577 * position of the head. 1578 */ 1579 request_ring_position = intel_ring_get_tail(ring); 1580 1581 ret = ring->add_request(ring, &seqno); 1582 if (ret) 1583 return ret; 1584 1585 trace_i915_gem_request_add(ring, seqno); 1586 1587 request->seqno = seqno; 1588 request->ring = ring; 1589 request->tail = request_ring_position; 1590 request->emitted_jiffies = jiffies; 1591 was_empty = list_empty(&ring->request_list); 1592 list_add_tail(&request->list, &ring->request_list); 1593 1594 if (file) { 1595 struct drm_i915_file_private *file_priv = file->driver_priv; 1596 1597 spin_lock(&file_priv->mm.lock); 1598 request->file_priv = file_priv; 1599 list_add_tail(&request->client_list, 1600 &file_priv->mm.request_list); 1601 spin_unlock(&file_priv->mm.lock); 1602 } 1603 1604 ring->outstanding_lazy_request = 0; 1605 1606 if (!dev_priv->mm.suspended) { 1607 if (i915_enable_hangcheck) { 1608 mod_timer(&dev_priv->hangcheck_timer, 1609 jiffies + 1610 msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD)); 1611 } 1612 if (was_empty) 1613 queue_delayed_work(dev_priv->wq, 1614 &dev_priv->mm.retire_work, HZ); 1615 } 1616 return 0; 1617 } 1618 1619 static inline void 1620 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) 1621 { 1622 struct drm_i915_file_private *file_priv = request->file_priv; 1623 1624 if (!file_priv) 1625 return; 1626 1627 spin_lock(&file_priv->mm.lock); 1628 if (request->file_priv) { 1629 list_del(&request->client_list); 1630 request->file_priv = NULL; 1631 } 1632 spin_unlock(&file_priv->mm.lock); 1633 } 1634 1635 static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv, 1636 struct intel_ring_buffer *ring) 1637 { 1638 while (!list_empty(&ring->request_list)) { 1639 struct drm_i915_gem_request *request; 1640 1641 request = list_first_entry(&ring->request_list, 1642 struct drm_i915_gem_request, 1643 list); 1644 1645 list_del(&request->list); 1646 i915_gem_request_remove_from_client(request); 1647 kfree(request); 1648 } 1649 1650 while (!list_empty(&ring->active_list)) { 1651 struct drm_i915_gem_object *obj; 1652 1653 obj = list_first_entry(&ring->active_list, 1654 struct drm_i915_gem_object, 1655 ring_list); 1656 1657 obj->base.write_domain = 0; 1658 list_del_init(&obj->gpu_write_list); 1659 i915_gem_object_move_to_inactive(obj); 1660 } 1661 } 1662 1663 static void i915_gem_reset_fences(struct drm_device *dev) 1664 { 1665 struct drm_i915_private *dev_priv = dev->dev_private; 1666 int i; 1667 1668 for (i = 0; i < dev_priv->num_fence_regs; i++) { 1669 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 1670 1671 i915_gem_write_fence(dev, i, NULL); 1672 1673 if (reg->obj) 1674 i915_gem_object_fence_lost(reg->obj); 1675 1676 reg->pin_count = 0; 1677 reg->obj = NULL; 1678 INIT_LIST_HEAD(®->lru_list); 1679 } 1680 1681 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 1682 } 1683 1684 void i915_gem_reset(struct drm_device *dev) 1685 { 1686 struct drm_i915_private *dev_priv = dev->dev_private; 1687 struct drm_i915_gem_object *obj; 1688 struct intel_ring_buffer *ring; 1689 int i; 1690 1691 for_each_ring(ring, dev_priv, i) 1692 i915_gem_reset_ring_lists(dev_priv, ring); 1693 1694 /* Remove anything from the flushing lists. The GPU cache is likely 1695 * to be lost on reset along with the data, so simply move the 1696 * lost bo to the inactive list. 1697 */ 1698 while (!list_empty(&dev_priv->mm.flushing_list)) { 1699 obj = list_first_entry(&dev_priv->mm.flushing_list, 1700 struct drm_i915_gem_object, 1701 mm_list); 1702 1703 obj->base.write_domain = 0; 1704 list_del_init(&obj->gpu_write_list); 1705 i915_gem_object_move_to_inactive(obj); 1706 } 1707 1708 /* Move everything out of the GPU domains to ensure we do any 1709 * necessary invalidation upon reuse. 1710 */ 1711 list_for_each_entry(obj, 1712 &dev_priv->mm.inactive_list, 1713 mm_list) 1714 { 1715 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; 1716 } 1717 1718 /* The fence registers are invalidated so clear them out */ 1719 i915_gem_reset_fences(dev); 1720 } 1721 1722 /** 1723 * This function clears the request list as sequence numbers are passed. 1724 */ 1725 void 1726 i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) 1727 { 1728 uint32_t seqno; 1729 int i; 1730 1731 if (list_empty(&ring->request_list)) 1732 return; 1733 1734 WARN_ON(i915_verify_lists(ring->dev)); 1735 1736 seqno = ring->get_seqno(ring); 1737 1738 for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++) 1739 if (seqno >= ring->sync_seqno[i]) 1740 ring->sync_seqno[i] = 0; 1741 1742 while (!list_empty(&ring->request_list)) { 1743 struct drm_i915_gem_request *request; 1744 1745 request = list_first_entry(&ring->request_list, 1746 struct drm_i915_gem_request, 1747 list); 1748 1749 if (!i915_seqno_passed(seqno, request->seqno)) 1750 break; 1751 1752 trace_i915_gem_request_retire(ring, request->seqno); 1753 /* We know the GPU must have read the request to have 1754 * sent us the seqno + interrupt, so use the position 1755 * of tail of the request to update the last known position 1756 * of the GPU head. 1757 */ 1758 ring->last_retired_head = request->tail; 1759 1760 list_del(&request->list); 1761 i915_gem_request_remove_from_client(request); 1762 kfree(request); 1763 } 1764 1765 /* Move any buffers on the active list that are no longer referenced 1766 * by the ringbuffer to the flushing/inactive lists as appropriate. 1767 */ 1768 while (!list_empty(&ring->active_list)) { 1769 struct drm_i915_gem_object *obj; 1770 1771 obj = list_first_entry(&ring->active_list, 1772 struct drm_i915_gem_object, 1773 ring_list); 1774 1775 if (!i915_seqno_passed(seqno, obj->last_rendering_seqno)) 1776 break; 1777 1778 if (obj->base.write_domain != 0) 1779 i915_gem_object_move_to_flushing(obj); 1780 else 1781 i915_gem_object_move_to_inactive(obj); 1782 } 1783 1784 if (unlikely(ring->trace_irq_seqno && 1785 i915_seqno_passed(seqno, ring->trace_irq_seqno))) { 1786 ring->irq_put(ring); 1787 ring->trace_irq_seqno = 0; 1788 } 1789 1790 WARN_ON(i915_verify_lists(ring->dev)); 1791 } 1792 1793 void 1794 i915_gem_retire_requests(struct drm_device *dev) 1795 { 1796 drm_i915_private_t *dev_priv = dev->dev_private; 1797 struct intel_ring_buffer *ring; 1798 int i; 1799 1800 for_each_ring(ring, dev_priv, i) 1801 i915_gem_retire_requests_ring(ring); 1802 } 1803 1804 static void 1805 i915_gem_retire_work_handler(struct work_struct *work) 1806 { 1807 drm_i915_private_t *dev_priv; 1808 struct drm_device *dev; 1809 struct intel_ring_buffer *ring; 1810 bool idle; 1811 int i; 1812 1813 dev_priv = container_of(work, drm_i915_private_t, 1814 mm.retire_work.work); 1815 dev = dev_priv->dev; 1816 1817 /* Come back later if the device is busy... */ 1818 if (!mutex_trylock(&dev->struct_mutex)) { 1819 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ); 1820 return; 1821 } 1822 1823 i915_gem_retire_requests(dev); 1824 1825 /* Send a periodic flush down the ring so we don't hold onto GEM 1826 * objects indefinitely. 1827 */ 1828 idle = true; 1829 for_each_ring(ring, dev_priv, i) { 1830 if (!list_empty(&ring->gpu_write_list)) { 1831 struct drm_i915_gem_request *request; 1832 int ret; 1833 1834 ret = i915_gem_flush_ring(ring, 1835 0, I915_GEM_GPU_DOMAINS); 1836 request = kzalloc(sizeof(*request), GFP_KERNEL); 1837 if (ret || request == NULL || 1838 i915_add_request(ring, NULL, request)) 1839 kfree(request); 1840 } 1841 1842 idle &= list_empty(&ring->request_list); 1843 } 1844 1845 if (!dev_priv->mm.suspended && !idle) 1846 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ); 1847 1848 mutex_unlock(&dev->struct_mutex); 1849 } 1850 1851 static int 1852 i915_gem_check_wedge(struct drm_i915_private *dev_priv) 1853 { 1854 BUG_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex)); 1855 1856 if (atomic_read(&dev_priv->mm.wedged)) { 1857 struct completion *x = &dev_priv->error_completion; 1858 bool recovery_complete; 1859 unsigned long flags; 1860 1861 /* Give the error handler a chance to run. */ 1862 spin_lock_irqsave(&x->wait.lock, flags); 1863 recovery_complete = x->done > 0; 1864 spin_unlock_irqrestore(&x->wait.lock, flags); 1865 1866 return recovery_complete ? -EIO : -EAGAIN; 1867 } 1868 1869 return 0; 1870 } 1871 1872 /* 1873 * Compare seqno against outstanding lazy request. Emit a request if they are 1874 * equal. 1875 */ 1876 static int 1877 i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno) 1878 { 1879 int ret = 0; 1880 1881 BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex)); 1882 1883 if (seqno == ring->outstanding_lazy_request) { 1884 struct drm_i915_gem_request *request; 1885 1886 request = kzalloc(sizeof(*request), GFP_KERNEL); 1887 if (request == NULL) 1888 return -ENOMEM; 1889 1890 ret = i915_add_request(ring, NULL, request); 1891 if (ret) { 1892 kfree(request); 1893 return ret; 1894 } 1895 1896 BUG_ON(seqno != request->seqno); 1897 } 1898 1899 return ret; 1900 } 1901 1902 static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, 1903 bool interruptible) 1904 { 1905 drm_i915_private_t *dev_priv = ring->dev->dev_private; 1906 int ret = 0; 1907 1908 if (i915_seqno_passed(ring->get_seqno(ring), seqno)) 1909 return 0; 1910 1911 trace_i915_gem_request_wait_begin(ring, seqno); 1912 if (WARN_ON(!ring->irq_get(ring))) 1913 return -ENODEV; 1914 1915 #define EXIT_COND \ 1916 (i915_seqno_passed(ring->get_seqno(ring), seqno) || \ 1917 atomic_read(&dev_priv->mm.wedged)) 1918 1919 if (interruptible) 1920 ret = wait_event_interruptible(ring->irq_queue, 1921 EXIT_COND); 1922 else 1923 wait_event(ring->irq_queue, EXIT_COND); 1924 1925 ring->irq_put(ring); 1926 trace_i915_gem_request_wait_end(ring, seqno); 1927 #undef EXIT_COND 1928 1929 return ret; 1930 } 1931 1932 /** 1933 * Waits for a sequence number to be signaled, and cleans up the 1934 * request and object lists appropriately for that event. 1935 */ 1936 int 1937 i915_wait_request(struct intel_ring_buffer *ring, 1938 uint32_t seqno) 1939 { 1940 drm_i915_private_t *dev_priv = ring->dev->dev_private; 1941 int ret = 0; 1942 1943 BUG_ON(seqno == 0); 1944 1945 ret = i915_gem_check_wedge(dev_priv); 1946 if (ret) 1947 return ret; 1948 1949 ret = i915_gem_check_olr(ring, seqno); 1950 if (ret) 1951 return ret; 1952 1953 ret = __wait_seqno(ring, seqno, dev_priv->mm.interruptible); 1954 if (atomic_read(&dev_priv->mm.wedged)) 1955 ret = -EAGAIN; 1956 1957 return ret; 1958 } 1959 1960 /** 1961 * Ensures that all rendering to the object has completed and the object is 1962 * safe to unbind from the GTT or access from the CPU. 1963 */ 1964 int 1965 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj) 1966 { 1967 int ret; 1968 1969 /* This function only exists to support waiting for existing rendering, 1970 * not for emitting required flushes. 1971 */ 1972 BUG_ON((obj->base.write_domain & I915_GEM_GPU_DOMAINS) != 0); 1973 1974 /* If there is rendering queued on the buffer being evicted, wait for 1975 * it. 1976 */ 1977 if (obj->active) { 1978 ret = i915_wait_request(obj->ring, obj->last_rendering_seqno); 1979 if (ret) 1980 return ret; 1981 i915_gem_retire_requests_ring(obj->ring); 1982 } 1983 1984 return 0; 1985 } 1986 1987 /** 1988 * i915_gem_object_sync - sync an object to a ring. 1989 * 1990 * @obj: object which may be in use on another ring. 1991 * @to: ring we wish to use the object on. May be NULL. 1992 * 1993 * This code is meant to abstract object synchronization with the GPU. 1994 * Calling with NULL implies synchronizing the object with the CPU 1995 * rather than a particular GPU ring. 1996 * 1997 * Returns 0 if successful, else propagates up the lower layer error. 1998 */ 1999 int 2000 i915_gem_object_sync(struct drm_i915_gem_object *obj, 2001 struct intel_ring_buffer *to) 2002 { 2003 struct intel_ring_buffer *from = obj->ring; 2004 u32 seqno; 2005 int ret, idx; 2006 2007 if (from == NULL || to == from) 2008 return 0; 2009 2010 if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev)) 2011 return i915_gem_object_wait_rendering(obj); 2012 2013 idx = intel_ring_sync_index(from, to); 2014 2015 seqno = obj->last_rendering_seqno; 2016 if (seqno <= from->sync_seqno[idx]) 2017 return 0; 2018 2019 ret = i915_gem_check_olr(obj->ring, seqno); 2020 if (ret) 2021 return ret; 2022 2023 ret = to->sync_to(to, from, seqno); 2024 if (!ret) 2025 from->sync_seqno[idx] = seqno; 2026 2027 return ret; 2028 } 2029 2030 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) 2031 { 2032 u32 old_write_domain, old_read_domains; 2033 2034 /* Act a barrier for all accesses through the GTT */ 2035 mb(); 2036 2037 /* Force a pagefault for domain tracking on next user access */ 2038 i915_gem_release_mmap(obj); 2039 2040 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 2041 return; 2042 2043 old_read_domains = obj->base.read_domains; 2044 old_write_domain = obj->base.write_domain; 2045 2046 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; 2047 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; 2048 2049 trace_i915_gem_object_change_domain(obj, 2050 old_read_domains, 2051 old_write_domain); 2052 } 2053 2054 /** 2055 * Unbinds an object from the GTT aperture. 2056 */ 2057 int 2058 i915_gem_object_unbind(struct drm_i915_gem_object *obj) 2059 { 2060 drm_i915_private_t *dev_priv = obj->base.dev->dev_private; 2061 int ret = 0; 2062 2063 if (obj->gtt_space == NULL) 2064 return 0; 2065 2066 if (obj->pin_count) 2067 return -EBUSY; 2068 2069 ret = i915_gem_object_finish_gpu(obj); 2070 if (ret) 2071 return ret; 2072 /* Continue on if we fail due to EIO, the GPU is hung so we 2073 * should be safe and we need to cleanup or else we might 2074 * cause memory corruption through use-after-free. 2075 */ 2076 2077 i915_gem_object_finish_gtt(obj); 2078 2079 /* Move the object to the CPU domain to ensure that 2080 * any possible CPU writes while it's not in the GTT 2081 * are flushed when we go to remap it. 2082 */ 2083 if (ret == 0) 2084 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 2085 if (ret == -ERESTARTSYS) 2086 return ret; 2087 if (ret) { 2088 /* In the event of a disaster, abandon all caches and 2089 * hope for the best. 2090 */ 2091 i915_gem_clflush_object(obj); 2092 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2093 } 2094 2095 /* release the fence reg _after_ flushing */ 2096 ret = i915_gem_object_put_fence(obj); 2097 if (ret) 2098 return ret; 2099 2100 trace_i915_gem_object_unbind(obj); 2101 2102 if (obj->has_global_gtt_mapping) 2103 i915_gem_gtt_unbind_object(obj); 2104 if (obj->has_aliasing_ppgtt_mapping) { 2105 i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj); 2106 obj->has_aliasing_ppgtt_mapping = 0; 2107 } 2108 i915_gem_gtt_finish_object(obj); 2109 2110 i915_gem_object_put_pages_gtt(obj); 2111 2112 list_del_init(&obj->gtt_list); 2113 list_del_init(&obj->mm_list); 2114 /* Avoid an unnecessary call to unbind on rebind. */ 2115 obj->map_and_fenceable = true; 2116 2117 drm_mm_put_block(obj->gtt_space); 2118 obj->gtt_space = NULL; 2119 obj->gtt_offset = 0; 2120 2121 if (i915_gem_object_is_purgeable(obj)) 2122 i915_gem_object_truncate(obj); 2123 2124 return ret; 2125 } 2126 2127 int 2128 i915_gem_flush_ring(struct intel_ring_buffer *ring, 2129 uint32_t invalidate_domains, 2130 uint32_t flush_domains) 2131 { 2132 int ret; 2133 2134 if (((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) == 0) 2135 return 0; 2136 2137 trace_i915_gem_ring_flush(ring, invalidate_domains, flush_domains); 2138 2139 ret = ring->flush(ring, invalidate_domains, flush_domains); 2140 if (ret) 2141 return ret; 2142 2143 if (flush_domains & I915_GEM_GPU_DOMAINS) 2144 i915_gem_process_flushing_list(ring, flush_domains); 2145 2146 return 0; 2147 } 2148 2149 static int i915_ring_idle(struct intel_ring_buffer *ring) 2150 { 2151 int ret; 2152 2153 if (list_empty(&ring->gpu_write_list) && list_empty(&ring->active_list)) 2154 return 0; 2155 2156 if (!list_empty(&ring->gpu_write_list)) { 2157 ret = i915_gem_flush_ring(ring, 2158 I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 2159 if (ret) 2160 return ret; 2161 } 2162 2163 return i915_wait_request(ring, i915_gem_next_request_seqno(ring)); 2164 } 2165 2166 int i915_gpu_idle(struct drm_device *dev) 2167 { 2168 drm_i915_private_t *dev_priv = dev->dev_private; 2169 struct intel_ring_buffer *ring; 2170 int ret, i; 2171 2172 /* Flush everything onto the inactive list. */ 2173 for_each_ring(ring, dev_priv, i) { 2174 ret = i915_ring_idle(ring); 2175 if (ret) 2176 return ret; 2177 2178 /* Is the device fubar? */ 2179 if (WARN_ON(!list_empty(&ring->gpu_write_list))) 2180 return -EBUSY; 2181 } 2182 2183 return 0; 2184 } 2185 2186 static void sandybridge_write_fence_reg(struct drm_device *dev, int reg, 2187 struct drm_i915_gem_object *obj) 2188 { 2189 drm_i915_private_t *dev_priv = dev->dev_private; 2190 uint64_t val; 2191 2192 if (obj) { 2193 u32 size = obj->gtt_space->size; 2194 2195 val = (uint64_t)((obj->gtt_offset + size - 4096) & 2196 0xfffff000) << 32; 2197 val |= obj->gtt_offset & 0xfffff000; 2198 val |= (uint64_t)((obj->stride / 128) - 1) << 2199 SANDYBRIDGE_FENCE_PITCH_SHIFT; 2200 2201 if (obj->tiling_mode == I915_TILING_Y) 2202 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 2203 val |= I965_FENCE_REG_VALID; 2204 } else 2205 val = 0; 2206 2207 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + reg * 8, val); 2208 POSTING_READ(FENCE_REG_SANDYBRIDGE_0 + reg * 8); 2209 } 2210 2211 static void i965_write_fence_reg(struct drm_device *dev, int reg, 2212 struct drm_i915_gem_object *obj) 2213 { 2214 drm_i915_private_t *dev_priv = dev->dev_private; 2215 uint64_t val; 2216 2217 if (obj) { 2218 u32 size = obj->gtt_space->size; 2219 2220 val = (uint64_t)((obj->gtt_offset + size - 4096) & 2221 0xfffff000) << 32; 2222 val |= obj->gtt_offset & 0xfffff000; 2223 val |= ((obj->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT; 2224 if (obj->tiling_mode == I915_TILING_Y) 2225 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 2226 val |= I965_FENCE_REG_VALID; 2227 } else 2228 val = 0; 2229 2230 I915_WRITE64(FENCE_REG_965_0 + reg * 8, val); 2231 POSTING_READ(FENCE_REG_965_0 + reg * 8); 2232 } 2233 2234 static void i915_write_fence_reg(struct drm_device *dev, int reg, 2235 struct drm_i915_gem_object *obj) 2236 { 2237 drm_i915_private_t *dev_priv = dev->dev_private; 2238 u32 val; 2239 2240 if (obj) { 2241 u32 size = obj->gtt_space->size; 2242 int pitch_val; 2243 int tile_width; 2244 2245 WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) || 2246 (size & -size) != size || 2247 (obj->gtt_offset & (size - 1)), 2248 "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n", 2249 obj->gtt_offset, obj->map_and_fenceable, size); 2250 2251 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) 2252 tile_width = 128; 2253 else 2254 tile_width = 512; 2255 2256 /* Note: pitch better be a power of two tile widths */ 2257 pitch_val = obj->stride / tile_width; 2258 pitch_val = ffs(pitch_val) - 1; 2259 2260 val = obj->gtt_offset; 2261 if (obj->tiling_mode == I915_TILING_Y) 2262 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 2263 val |= I915_FENCE_SIZE_BITS(size); 2264 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 2265 val |= I830_FENCE_REG_VALID; 2266 } else 2267 val = 0; 2268 2269 if (reg < 8) 2270 reg = FENCE_REG_830_0 + reg * 4; 2271 else 2272 reg = FENCE_REG_945_8 + (reg - 8) * 4; 2273 2274 I915_WRITE(reg, val); 2275 POSTING_READ(reg); 2276 } 2277 2278 static void i830_write_fence_reg(struct drm_device *dev, int reg, 2279 struct drm_i915_gem_object *obj) 2280 { 2281 drm_i915_private_t *dev_priv = dev->dev_private; 2282 uint32_t val; 2283 2284 if (obj) { 2285 u32 size = obj->gtt_space->size; 2286 uint32_t pitch_val; 2287 2288 WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) || 2289 (size & -size) != size || 2290 (obj->gtt_offset & (size - 1)), 2291 "object 0x%08x not 512K or pot-size 0x%08x aligned\n", 2292 obj->gtt_offset, size); 2293 2294 pitch_val = obj->stride / 128; 2295 pitch_val = ffs(pitch_val) - 1; 2296 2297 val = obj->gtt_offset; 2298 if (obj->tiling_mode == I915_TILING_Y) 2299 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 2300 val |= I830_FENCE_SIZE_BITS(size); 2301 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 2302 val |= I830_FENCE_REG_VALID; 2303 } else 2304 val = 0; 2305 2306 I915_WRITE(FENCE_REG_830_0 + reg * 4, val); 2307 POSTING_READ(FENCE_REG_830_0 + reg * 4); 2308 } 2309 2310 static void i915_gem_write_fence(struct drm_device *dev, int reg, 2311 struct drm_i915_gem_object *obj) 2312 { 2313 switch (INTEL_INFO(dev)->gen) { 2314 case 7: 2315 case 6: sandybridge_write_fence_reg(dev, reg, obj); break; 2316 case 5: 2317 case 4: i965_write_fence_reg(dev, reg, obj); break; 2318 case 3: i915_write_fence_reg(dev, reg, obj); break; 2319 case 2: i830_write_fence_reg(dev, reg, obj); break; 2320 default: break; 2321 } 2322 } 2323 2324 static inline int fence_number(struct drm_i915_private *dev_priv, 2325 struct drm_i915_fence_reg *fence) 2326 { 2327 return fence - dev_priv->fence_regs; 2328 } 2329 2330 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 2331 struct drm_i915_fence_reg *fence, 2332 bool enable) 2333 { 2334 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2335 int reg = fence_number(dev_priv, fence); 2336 2337 i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL); 2338 2339 if (enable) { 2340 obj->fence_reg = reg; 2341 fence->obj = obj; 2342 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list); 2343 } else { 2344 obj->fence_reg = I915_FENCE_REG_NONE; 2345 fence->obj = NULL; 2346 list_del_init(&fence->lru_list); 2347 } 2348 } 2349 2350 static int 2351 i915_gem_object_flush_fence(struct drm_i915_gem_object *obj) 2352 { 2353 int ret; 2354 2355 if (obj->fenced_gpu_access) { 2356 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { 2357 ret = i915_gem_flush_ring(obj->ring, 2358 0, obj->base.write_domain); 2359 if (ret) 2360 return ret; 2361 } 2362 2363 obj->fenced_gpu_access = false; 2364 } 2365 2366 if (obj->last_fenced_seqno) { 2367 ret = i915_wait_request(obj->ring, obj->last_fenced_seqno); 2368 if (ret) 2369 return ret; 2370 2371 obj->last_fenced_seqno = 0; 2372 } 2373 2374 /* Ensure that all CPU reads are completed before installing a fence 2375 * and all writes before removing the fence. 2376 */ 2377 if (obj->base.read_domains & I915_GEM_DOMAIN_GTT) 2378 mb(); 2379 2380 return 0; 2381 } 2382 2383 int 2384 i915_gem_object_put_fence(struct drm_i915_gem_object *obj) 2385 { 2386 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2387 int ret; 2388 2389 ret = i915_gem_object_flush_fence(obj); 2390 if (ret) 2391 return ret; 2392 2393 if (obj->fence_reg == I915_FENCE_REG_NONE) 2394 return 0; 2395 2396 i915_gem_object_update_fence(obj, 2397 &dev_priv->fence_regs[obj->fence_reg], 2398 false); 2399 i915_gem_object_fence_lost(obj); 2400 2401 return 0; 2402 } 2403 2404 static struct drm_i915_fence_reg * 2405 i915_find_fence_reg(struct drm_device *dev) 2406 { 2407 struct drm_i915_private *dev_priv = dev->dev_private; 2408 struct drm_i915_fence_reg *reg, *avail; 2409 int i; 2410 2411 /* First try to find a free reg */ 2412 avail = NULL; 2413 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { 2414 reg = &dev_priv->fence_regs[i]; 2415 if (!reg->obj) 2416 return reg; 2417 2418 if (!reg->pin_count) 2419 avail = reg; 2420 } 2421 2422 if (avail == NULL) 2423 return NULL; 2424 2425 /* None available, try to steal one or wait for a user to finish */ 2426 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) { 2427 if (reg->pin_count) 2428 continue; 2429 2430 return reg; 2431 } 2432 2433 return NULL; 2434 } 2435 2436 /** 2437 * i915_gem_object_get_fence - set up fencing for an object 2438 * @obj: object to map through a fence reg 2439 * 2440 * When mapping objects through the GTT, userspace wants to be able to write 2441 * to them without having to worry about swizzling if the object is tiled. 2442 * This function walks the fence regs looking for a free one for @obj, 2443 * stealing one if it can't find any. 2444 * 2445 * It then sets up the reg based on the object's properties: address, pitch 2446 * and tiling format. 2447 * 2448 * For an untiled surface, this removes any existing fence. 2449 */ 2450 int 2451 i915_gem_object_get_fence(struct drm_i915_gem_object *obj) 2452 { 2453 struct drm_device *dev = obj->base.dev; 2454 struct drm_i915_private *dev_priv = dev->dev_private; 2455 bool enable = obj->tiling_mode != I915_TILING_NONE; 2456 struct drm_i915_fence_reg *reg; 2457 int ret; 2458 2459 /* Have we updated the tiling parameters upon the object and so 2460 * will need to serialise the write to the associated fence register? 2461 */ 2462 if (obj->fence_dirty) { 2463 ret = i915_gem_object_flush_fence(obj); 2464 if (ret) 2465 return ret; 2466 } 2467 2468 /* Just update our place in the LRU if our fence is getting reused. */ 2469 if (obj->fence_reg != I915_FENCE_REG_NONE) { 2470 reg = &dev_priv->fence_regs[obj->fence_reg]; 2471 if (!obj->fence_dirty) { 2472 list_move_tail(®->lru_list, 2473 &dev_priv->mm.fence_list); 2474 return 0; 2475 } 2476 } else if (enable) { 2477 reg = i915_find_fence_reg(dev); 2478 if (reg == NULL) 2479 return -EDEADLK; 2480 2481 if (reg->obj) { 2482 struct drm_i915_gem_object *old = reg->obj; 2483 2484 ret = i915_gem_object_flush_fence(old); 2485 if (ret) 2486 return ret; 2487 2488 i915_gem_object_fence_lost(old); 2489 } 2490 } else 2491 return 0; 2492 2493 i915_gem_object_update_fence(obj, reg, enable); 2494 obj->fence_dirty = false; 2495 2496 return 0; 2497 } 2498 2499 /** 2500 * Finds free space in the GTT aperture and binds the object there. 2501 */ 2502 static int 2503 i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, 2504 unsigned alignment, 2505 bool map_and_fenceable) 2506 { 2507 struct drm_device *dev = obj->base.dev; 2508 drm_i915_private_t *dev_priv = dev->dev_private; 2509 struct drm_mm_node *free_space; 2510 gfp_t gfpmask = __GFP_NORETRY | __GFP_NOWARN; 2511 u32 size, fence_size, fence_alignment, unfenced_alignment; 2512 bool mappable, fenceable; 2513 int ret; 2514 2515 if (obj->madv != I915_MADV_WILLNEED) { 2516 DRM_ERROR("Attempting to bind a purgeable object\n"); 2517 return -EINVAL; 2518 } 2519 2520 fence_size = i915_gem_get_gtt_size(dev, 2521 obj->base.size, 2522 obj->tiling_mode); 2523 fence_alignment = i915_gem_get_gtt_alignment(dev, 2524 obj->base.size, 2525 obj->tiling_mode); 2526 unfenced_alignment = 2527 i915_gem_get_unfenced_gtt_alignment(dev, 2528 obj->base.size, 2529 obj->tiling_mode); 2530 2531 if (alignment == 0) 2532 alignment = map_and_fenceable ? fence_alignment : 2533 unfenced_alignment; 2534 if (map_and_fenceable && alignment & (fence_alignment - 1)) { 2535 DRM_ERROR("Invalid object alignment requested %u\n", alignment); 2536 return -EINVAL; 2537 } 2538 2539 size = map_and_fenceable ? fence_size : obj->base.size; 2540 2541 /* If the object is bigger than the entire aperture, reject it early 2542 * before evicting everything in a vain attempt to find space. 2543 */ 2544 if (obj->base.size > 2545 (map_and_fenceable ? dev_priv->mm.gtt_mappable_end : dev_priv->mm.gtt_total)) { 2546 DRM_ERROR("Attempting to bind an object larger than the aperture\n"); 2547 return -E2BIG; 2548 } 2549 2550 search_free: 2551 if (map_and_fenceable) 2552 free_space = 2553 drm_mm_search_free_in_range(&dev_priv->mm.gtt_space, 2554 size, alignment, 0, 2555 dev_priv->mm.gtt_mappable_end, 2556 0); 2557 else 2558 free_space = drm_mm_search_free(&dev_priv->mm.gtt_space, 2559 size, alignment, 0); 2560 2561 if (free_space != NULL) { 2562 if (map_and_fenceable) 2563 obj->gtt_space = 2564 drm_mm_get_block_range_generic(free_space, 2565 size, alignment, 0, 2566 dev_priv->mm.gtt_mappable_end, 2567 0); 2568 else 2569 obj->gtt_space = 2570 drm_mm_get_block(free_space, size, alignment); 2571 } 2572 if (obj->gtt_space == NULL) { 2573 /* If the gtt is empty and we're still having trouble 2574 * fitting our object in, we're out of memory. 2575 */ 2576 ret = i915_gem_evict_something(dev, size, alignment, 2577 map_and_fenceable); 2578 if (ret) 2579 return ret; 2580 2581 goto search_free; 2582 } 2583 2584 ret = i915_gem_object_get_pages_gtt(obj, gfpmask); 2585 if (ret) { 2586 drm_mm_put_block(obj->gtt_space); 2587 obj->gtt_space = NULL; 2588 2589 if (ret == -ENOMEM) { 2590 /* first try to reclaim some memory by clearing the GTT */ 2591 ret = i915_gem_evict_everything(dev, false); 2592 if (ret) { 2593 /* now try to shrink everyone else */ 2594 if (gfpmask) { 2595 gfpmask = 0; 2596 goto search_free; 2597 } 2598 2599 return -ENOMEM; 2600 } 2601 2602 goto search_free; 2603 } 2604 2605 return ret; 2606 } 2607 2608 ret = i915_gem_gtt_prepare_object(obj); 2609 if (ret) { 2610 i915_gem_object_put_pages_gtt(obj); 2611 drm_mm_put_block(obj->gtt_space); 2612 obj->gtt_space = NULL; 2613 2614 if (i915_gem_evict_everything(dev, false)) 2615 return ret; 2616 2617 goto search_free; 2618 } 2619 2620 if (!dev_priv->mm.aliasing_ppgtt) 2621 i915_gem_gtt_bind_object(obj, obj->cache_level); 2622 2623 list_add_tail(&obj->gtt_list, &dev_priv->mm.gtt_list); 2624 list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 2625 2626 /* Assert that the object is not currently in any GPU domain. As it 2627 * wasn't in the GTT, there shouldn't be any way it could have been in 2628 * a GPU cache 2629 */ 2630 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2631 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2632 2633 obj->gtt_offset = obj->gtt_space->start; 2634 2635 fenceable = 2636 obj->gtt_space->size == fence_size && 2637 (obj->gtt_space->start & (fence_alignment - 1)) == 0; 2638 2639 mappable = 2640 obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end; 2641 2642 obj->map_and_fenceable = mappable && fenceable; 2643 2644 trace_i915_gem_object_bind(obj, map_and_fenceable); 2645 return 0; 2646 } 2647 2648 void 2649 i915_gem_clflush_object(struct drm_i915_gem_object *obj) 2650 { 2651 /* If we don't have a page list set up, then we're not pinned 2652 * to GPU, and we can ignore the cache flush because it'll happen 2653 * again at bind time. 2654 */ 2655 if (obj->pages == NULL) 2656 return; 2657 2658 /* If the GPU is snooping the contents of the CPU cache, 2659 * we do not need to manually clear the CPU cache lines. However, 2660 * the caches are only snooped when the render cache is 2661 * flushed/invalidated. As we always have to emit invalidations 2662 * and flushes when moving into and out of the RENDER domain, correct 2663 * snooping behaviour occurs naturally as the result of our domain 2664 * tracking. 2665 */ 2666 if (obj->cache_level != I915_CACHE_NONE) 2667 return; 2668 2669 trace_i915_gem_object_clflush(obj); 2670 2671 drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE); 2672 } 2673 2674 /** Flushes any GPU write domain for the object if it's dirty. */ 2675 static int 2676 i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj) 2677 { 2678 if ((obj->base.write_domain & I915_GEM_GPU_DOMAINS) == 0) 2679 return 0; 2680 2681 /* Queue the GPU write cache flushing we need. */ 2682 return i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain); 2683 } 2684 2685 /** Flushes the GTT write domain for the object if it's dirty. */ 2686 static void 2687 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 2688 { 2689 uint32_t old_write_domain; 2690 2691 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 2692 return; 2693 2694 /* No actual flushing is required for the GTT write domain. Writes 2695 * to it immediately go to main memory as far as we know, so there's 2696 * no chipset flush. It also doesn't land in render cache. 2697 * 2698 * However, we do have to enforce the order so that all writes through 2699 * the GTT land before any writes to the device, such as updates to 2700 * the GATT itself. 2701 */ 2702 wmb(); 2703 2704 old_write_domain = obj->base.write_domain; 2705 obj->base.write_domain = 0; 2706 2707 trace_i915_gem_object_change_domain(obj, 2708 obj->base.read_domains, 2709 old_write_domain); 2710 } 2711 2712 /** Flushes the CPU write domain for the object if it's dirty. */ 2713 static void 2714 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) 2715 { 2716 uint32_t old_write_domain; 2717 2718 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 2719 return; 2720 2721 i915_gem_clflush_object(obj); 2722 intel_gtt_chipset_flush(); 2723 old_write_domain = obj->base.write_domain; 2724 obj->base.write_domain = 0; 2725 2726 trace_i915_gem_object_change_domain(obj, 2727 obj->base.read_domains, 2728 old_write_domain); 2729 } 2730 2731 /** 2732 * Moves a single object to the GTT read, and possibly write domain. 2733 * 2734 * This function returns when the move is complete, including waiting on 2735 * flushes to occur. 2736 */ 2737 int 2738 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 2739 { 2740 drm_i915_private_t *dev_priv = obj->base.dev->dev_private; 2741 uint32_t old_write_domain, old_read_domains; 2742 int ret; 2743 2744 /* Not valid to be called on unbound objects. */ 2745 if (obj->gtt_space == NULL) 2746 return -EINVAL; 2747 2748 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 2749 return 0; 2750 2751 ret = i915_gem_object_flush_gpu_write_domain(obj); 2752 if (ret) 2753 return ret; 2754 2755 if (obj->pending_gpu_write || write) { 2756 ret = i915_gem_object_wait_rendering(obj); 2757 if (ret) 2758 return ret; 2759 } 2760 2761 i915_gem_object_flush_cpu_write_domain(obj); 2762 2763 old_write_domain = obj->base.write_domain; 2764 old_read_domains = obj->base.read_domains; 2765 2766 /* It should now be out of any other write domains, and we can update 2767 * the domain values for our changes. 2768 */ 2769 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 2770 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 2771 if (write) { 2772 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 2773 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 2774 obj->dirty = 1; 2775 } 2776 2777 trace_i915_gem_object_change_domain(obj, 2778 old_read_domains, 2779 old_write_domain); 2780 2781 /* And bump the LRU for this access */ 2782 if (i915_gem_object_is_inactive(obj)) 2783 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 2784 2785 return 0; 2786 } 2787 2788 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 2789 enum i915_cache_level cache_level) 2790 { 2791 struct drm_device *dev = obj->base.dev; 2792 drm_i915_private_t *dev_priv = dev->dev_private; 2793 int ret; 2794 2795 if (obj->cache_level == cache_level) 2796 return 0; 2797 2798 if (obj->pin_count) { 2799 DRM_DEBUG("can not change the cache level of pinned objects\n"); 2800 return -EBUSY; 2801 } 2802 2803 if (obj->gtt_space) { 2804 ret = i915_gem_object_finish_gpu(obj); 2805 if (ret) 2806 return ret; 2807 2808 i915_gem_object_finish_gtt(obj); 2809 2810 /* Before SandyBridge, you could not use tiling or fence 2811 * registers with snooped memory, so relinquish any fences 2812 * currently pointing to our region in the aperture. 2813 */ 2814 if (INTEL_INFO(obj->base.dev)->gen < 6) { 2815 ret = i915_gem_object_put_fence(obj); 2816 if (ret) 2817 return ret; 2818 } 2819 2820 if (obj->has_global_gtt_mapping) 2821 i915_gem_gtt_bind_object(obj, cache_level); 2822 if (obj->has_aliasing_ppgtt_mapping) 2823 i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, 2824 obj, cache_level); 2825 } 2826 2827 if (cache_level == I915_CACHE_NONE) { 2828 u32 old_read_domains, old_write_domain; 2829 2830 /* If we're coming from LLC cached, then we haven't 2831 * actually been tracking whether the data is in the 2832 * CPU cache or not, since we only allow one bit set 2833 * in obj->write_domain and have been skipping the clflushes. 2834 * Just set it to the CPU cache for now. 2835 */ 2836 WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU); 2837 WARN_ON(obj->base.read_domains & ~I915_GEM_DOMAIN_CPU); 2838 2839 old_read_domains = obj->base.read_domains; 2840 old_write_domain = obj->base.write_domain; 2841 2842 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 2843 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2844 2845 trace_i915_gem_object_change_domain(obj, 2846 old_read_domains, 2847 old_write_domain); 2848 } 2849 2850 obj->cache_level = cache_level; 2851 return 0; 2852 } 2853 2854 /* 2855 * Prepare buffer for display plane (scanout, cursors, etc). 2856 * Can be called from an uninterruptible phase (modesetting) and allows 2857 * any flushes to be pipelined (for pageflips). 2858 */ 2859 int 2860 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 2861 u32 alignment, 2862 struct intel_ring_buffer *pipelined) 2863 { 2864 u32 old_read_domains, old_write_domain; 2865 int ret; 2866 2867 ret = i915_gem_object_flush_gpu_write_domain(obj); 2868 if (ret) 2869 return ret; 2870 2871 if (pipelined != obj->ring) { 2872 ret = i915_gem_object_sync(obj, pipelined); 2873 if (ret) 2874 return ret; 2875 } 2876 2877 /* The display engine is not coherent with the LLC cache on gen6. As 2878 * a result, we make sure that the pinning that is about to occur is 2879 * done with uncached PTEs. This is lowest common denominator for all 2880 * chipsets. 2881 * 2882 * However for gen6+, we could do better by using the GFDT bit instead 2883 * of uncaching, which would allow us to flush all the LLC-cached data 2884 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 2885 */ 2886 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE); 2887 if (ret) 2888 return ret; 2889 2890 /* As the user may map the buffer once pinned in the display plane 2891 * (e.g. libkms for the bootup splash), we have to ensure that we 2892 * always use map_and_fenceable for all scanout buffers. 2893 */ 2894 ret = i915_gem_object_pin(obj, alignment, true); 2895 if (ret) 2896 return ret; 2897 2898 i915_gem_object_flush_cpu_write_domain(obj); 2899 2900 old_write_domain = obj->base.write_domain; 2901 old_read_domains = obj->base.read_domains; 2902 2903 /* It should now be out of any other write domains, and we can update 2904 * the domain values for our changes. 2905 */ 2906 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 2907 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 2908 2909 trace_i915_gem_object_change_domain(obj, 2910 old_read_domains, 2911 old_write_domain); 2912 2913 return 0; 2914 } 2915 2916 int 2917 i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj) 2918 { 2919 int ret; 2920 2921 if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0) 2922 return 0; 2923 2924 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { 2925 ret = i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain); 2926 if (ret) 2927 return ret; 2928 } 2929 2930 ret = i915_gem_object_wait_rendering(obj); 2931 if (ret) 2932 return ret; 2933 2934 /* Ensure that we invalidate the GPU's caches and TLBs. */ 2935 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; 2936 return 0; 2937 } 2938 2939 /** 2940 * Moves a single object to the CPU read, and possibly write domain. 2941 * 2942 * This function returns when the move is complete, including waiting on 2943 * flushes to occur. 2944 */ 2945 int 2946 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 2947 { 2948 uint32_t old_write_domain, old_read_domains; 2949 int ret; 2950 2951 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 2952 return 0; 2953 2954 ret = i915_gem_object_flush_gpu_write_domain(obj); 2955 if (ret) 2956 return ret; 2957 2958 if (write || obj->pending_gpu_write) { 2959 ret = i915_gem_object_wait_rendering(obj); 2960 if (ret) 2961 return ret; 2962 } 2963 2964 i915_gem_object_flush_gtt_write_domain(obj); 2965 2966 old_write_domain = obj->base.write_domain; 2967 old_read_domains = obj->base.read_domains; 2968 2969 /* Flush the CPU cache if it's still invalid. */ 2970 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 2971 i915_gem_clflush_object(obj); 2972 2973 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 2974 } 2975 2976 /* It should now be out of any other write domains, and we can update 2977 * the domain values for our changes. 2978 */ 2979 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 2980 2981 /* If we're writing through the CPU, then the GPU read domains will 2982 * need to be invalidated at next use. 2983 */ 2984 if (write) { 2985 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 2986 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2987 } 2988 2989 trace_i915_gem_object_change_domain(obj, 2990 old_read_domains, 2991 old_write_domain); 2992 2993 return 0; 2994 } 2995 2996 /* Throttle our rendering by waiting until the ring has completed our requests 2997 * emitted over 20 msec ago. 2998 * 2999 * Note that if we were to use the current jiffies each time around the loop, 3000 * we wouldn't escape the function with any frames outstanding if the time to 3001 * render a frame was over 20ms. 3002 * 3003 * This should get us reasonable parallelism between CPU and GPU but also 3004 * relatively low latency when blocking on a particular request to finish. 3005 */ 3006 static int 3007 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 3008 { 3009 struct drm_i915_private *dev_priv = dev->dev_private; 3010 struct drm_i915_file_private *file_priv = file->driver_priv; 3011 unsigned long recent_enough = jiffies - msecs_to_jiffies(20); 3012 struct drm_i915_gem_request *request; 3013 struct intel_ring_buffer *ring = NULL; 3014 u32 seqno = 0; 3015 int ret; 3016 3017 if (atomic_read(&dev_priv->mm.wedged)) 3018 return -EIO; 3019 3020 spin_lock(&file_priv->mm.lock); 3021 list_for_each_entry(request, &file_priv->mm.request_list, client_list) { 3022 if (time_after_eq(request->emitted_jiffies, recent_enough)) 3023 break; 3024 3025 ring = request->ring; 3026 seqno = request->seqno; 3027 } 3028 spin_unlock(&file_priv->mm.lock); 3029 3030 if (seqno == 0) 3031 return 0; 3032 3033 ret = __wait_seqno(ring, seqno, true); 3034 if (ret == 0) 3035 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); 3036 3037 return ret; 3038 } 3039 3040 int 3041 i915_gem_object_pin(struct drm_i915_gem_object *obj, 3042 uint32_t alignment, 3043 bool map_and_fenceable) 3044 { 3045 int ret; 3046 3047 BUG_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT); 3048 3049 if (obj->gtt_space != NULL) { 3050 if ((alignment && obj->gtt_offset & (alignment - 1)) || 3051 (map_and_fenceable && !obj->map_and_fenceable)) { 3052 WARN(obj->pin_count, 3053 "bo is already pinned with incorrect alignment:" 3054 " offset=%x, req.alignment=%x, req.map_and_fenceable=%d," 3055 " obj->map_and_fenceable=%d\n", 3056 obj->gtt_offset, alignment, 3057 map_and_fenceable, 3058 obj->map_and_fenceable); 3059 ret = i915_gem_object_unbind(obj); 3060 if (ret) 3061 return ret; 3062 } 3063 } 3064 3065 if (obj->gtt_space == NULL) { 3066 ret = i915_gem_object_bind_to_gtt(obj, alignment, 3067 map_and_fenceable); 3068 if (ret) 3069 return ret; 3070 } 3071 3072 if (!obj->has_global_gtt_mapping && map_and_fenceable) 3073 i915_gem_gtt_bind_object(obj, obj->cache_level); 3074 3075 obj->pin_count++; 3076 obj->pin_mappable |= map_and_fenceable; 3077 3078 return 0; 3079 } 3080 3081 void 3082 i915_gem_object_unpin(struct drm_i915_gem_object *obj) 3083 { 3084 BUG_ON(obj->pin_count == 0); 3085 BUG_ON(obj->gtt_space == NULL); 3086 3087 if (--obj->pin_count == 0) 3088 obj->pin_mappable = false; 3089 } 3090 3091 int 3092 i915_gem_pin_ioctl(struct drm_device *dev, void *data, 3093 struct drm_file *file) 3094 { 3095 struct drm_i915_gem_pin *args = data; 3096 struct drm_i915_gem_object *obj; 3097 int ret; 3098 3099 ret = i915_mutex_lock_interruptible(dev); 3100 if (ret) 3101 return ret; 3102 3103 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3104 if (&obj->base == NULL) { 3105 ret = -ENOENT; 3106 goto unlock; 3107 } 3108 3109 if (obj->madv != I915_MADV_WILLNEED) { 3110 DRM_ERROR("Attempting to pin a purgeable buffer\n"); 3111 ret = -EINVAL; 3112 goto out; 3113 } 3114 3115 if (obj->pin_filp != NULL && obj->pin_filp != file) { 3116 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n", 3117 args->handle); 3118 ret = -EINVAL; 3119 goto out; 3120 } 3121 3122 obj->user_pin_count++; 3123 obj->pin_filp = file; 3124 if (obj->user_pin_count == 1) { 3125 ret = i915_gem_object_pin(obj, args->alignment, true); 3126 if (ret) 3127 goto out; 3128 } 3129 3130 /* XXX - flush the CPU caches for pinned objects 3131 * as the X server doesn't manage domains yet 3132 */ 3133 i915_gem_object_flush_cpu_write_domain(obj); 3134 args->offset = obj->gtt_offset; 3135 out: 3136 drm_gem_object_unreference(&obj->base); 3137 unlock: 3138 mutex_unlock(&dev->struct_mutex); 3139 return ret; 3140 } 3141 3142 int 3143 i915_gem_unpin_ioctl(struct drm_device *dev, void *data, 3144 struct drm_file *file) 3145 { 3146 struct drm_i915_gem_pin *args = data; 3147 struct drm_i915_gem_object *obj; 3148 int ret; 3149 3150 ret = i915_mutex_lock_interruptible(dev); 3151 if (ret) 3152 return ret; 3153 3154 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3155 if (&obj->base == NULL) { 3156 ret = -ENOENT; 3157 goto unlock; 3158 } 3159 3160 if (obj->pin_filp != file) { 3161 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n", 3162 args->handle); 3163 ret = -EINVAL; 3164 goto out; 3165 } 3166 obj->user_pin_count--; 3167 if (obj->user_pin_count == 0) { 3168 obj->pin_filp = NULL; 3169 i915_gem_object_unpin(obj); 3170 } 3171 3172 out: 3173 drm_gem_object_unreference(&obj->base); 3174 unlock: 3175 mutex_unlock(&dev->struct_mutex); 3176 return ret; 3177 } 3178 3179 int 3180 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 3181 struct drm_file *file) 3182 { 3183 struct drm_i915_gem_busy *args = data; 3184 struct drm_i915_gem_object *obj; 3185 int ret; 3186 3187 ret = i915_mutex_lock_interruptible(dev); 3188 if (ret) 3189 return ret; 3190 3191 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3192 if (&obj->base == NULL) { 3193 ret = -ENOENT; 3194 goto unlock; 3195 } 3196 3197 /* Count all active objects as busy, even if they are currently not used 3198 * by the gpu. Users of this interface expect objects to eventually 3199 * become non-busy without any further actions, therefore emit any 3200 * necessary flushes here. 3201 */ 3202 args->busy = obj->active; 3203 if (args->busy) { 3204 /* Unconditionally flush objects, even when the gpu still uses this 3205 * object. Userspace calling this function indicates that it wants to 3206 * use this buffer rather sooner than later, so issuing the required 3207 * flush earlier is beneficial. 3208 */ 3209 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { 3210 ret = i915_gem_flush_ring(obj->ring, 3211 0, obj->base.write_domain); 3212 } else { 3213 ret = i915_gem_check_olr(obj->ring, 3214 obj->last_rendering_seqno); 3215 } 3216 3217 /* Update the active list for the hardware's current position. 3218 * Otherwise this only updates on a delayed timer or when irqs 3219 * are actually unmasked, and our working set ends up being 3220 * larger than required. 3221 */ 3222 i915_gem_retire_requests_ring(obj->ring); 3223 3224 args->busy = obj->active; 3225 } 3226 3227 drm_gem_object_unreference(&obj->base); 3228 unlock: 3229 mutex_unlock(&dev->struct_mutex); 3230 return ret; 3231 } 3232 3233 int 3234 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 3235 struct drm_file *file_priv) 3236 { 3237 return i915_gem_ring_throttle(dev, file_priv); 3238 } 3239 3240 int 3241 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 3242 struct drm_file *file_priv) 3243 { 3244 struct drm_i915_gem_madvise *args = data; 3245 struct drm_i915_gem_object *obj; 3246 int ret; 3247 3248 switch (args->madv) { 3249 case I915_MADV_DONTNEED: 3250 case I915_MADV_WILLNEED: 3251 break; 3252 default: 3253 return -EINVAL; 3254 } 3255 3256 ret = i915_mutex_lock_interruptible(dev); 3257 if (ret) 3258 return ret; 3259 3260 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle)); 3261 if (&obj->base == NULL) { 3262 ret = -ENOENT; 3263 goto unlock; 3264 } 3265 3266 if (obj->pin_count) { 3267 ret = -EINVAL; 3268 goto out; 3269 } 3270 3271 if (obj->madv != __I915_MADV_PURGED) 3272 obj->madv = args->madv; 3273 3274 /* if the object is no longer bound, discard its backing storage */ 3275 if (i915_gem_object_is_purgeable(obj) && 3276 obj->gtt_space == NULL) 3277 i915_gem_object_truncate(obj); 3278 3279 args->retained = obj->madv != __I915_MADV_PURGED; 3280 3281 out: 3282 drm_gem_object_unreference(&obj->base); 3283 unlock: 3284 mutex_unlock(&dev->struct_mutex); 3285 return ret; 3286 } 3287 3288 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, 3289 size_t size) 3290 { 3291 struct drm_i915_private *dev_priv = dev->dev_private; 3292 struct drm_i915_gem_object *obj; 3293 struct address_space *mapping; 3294 u32 mask; 3295 3296 obj = kzalloc(sizeof(*obj), GFP_KERNEL); 3297 if (obj == NULL) 3298 return NULL; 3299 3300 if (drm_gem_object_init(dev, &obj->base, size) != 0) { 3301 kfree(obj); 3302 return NULL; 3303 } 3304 3305 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 3306 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) { 3307 /* 965gm cannot relocate objects above 4GiB. */ 3308 mask &= ~__GFP_HIGHMEM; 3309 mask |= __GFP_DMA32; 3310 } 3311 3312 mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 3313 mapping_set_gfp_mask(mapping, mask); 3314 3315 i915_gem_info_add_obj(dev_priv, size); 3316 3317 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 3318 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 3319 3320 if (HAS_LLC(dev)) { 3321 /* On some devices, we can have the GPU use the LLC (the CPU 3322 * cache) for about a 10% performance improvement 3323 * compared to uncached. Graphics requests other than 3324 * display scanout are coherent with the CPU in 3325 * accessing this cache. This means in this mode we 3326 * don't need to clflush on the CPU side, and on the 3327 * GPU side we only need to flush internal caches to 3328 * get data visible to the CPU. 3329 * 3330 * However, we maintain the display planes as UC, and so 3331 * need to rebind when first used as such. 3332 */ 3333 obj->cache_level = I915_CACHE_LLC; 3334 } else 3335 obj->cache_level = I915_CACHE_NONE; 3336 3337 obj->base.driver_private = NULL; 3338 obj->fence_reg = I915_FENCE_REG_NONE; 3339 INIT_LIST_HEAD(&obj->mm_list); 3340 INIT_LIST_HEAD(&obj->gtt_list); 3341 INIT_LIST_HEAD(&obj->ring_list); 3342 INIT_LIST_HEAD(&obj->exec_list); 3343 INIT_LIST_HEAD(&obj->gpu_write_list); 3344 obj->madv = I915_MADV_WILLNEED; 3345 /* Avoid an unnecessary call to unbind on the first bind. */ 3346 obj->map_and_fenceable = true; 3347 3348 return obj; 3349 } 3350 3351 int i915_gem_init_object(struct drm_gem_object *obj) 3352 { 3353 BUG(); 3354 3355 return 0; 3356 } 3357 3358 void i915_gem_free_object(struct drm_gem_object *gem_obj) 3359 { 3360 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 3361 struct drm_device *dev = obj->base.dev; 3362 drm_i915_private_t *dev_priv = dev->dev_private; 3363 3364 trace_i915_gem_object_destroy(obj); 3365 3366 if (gem_obj->import_attach) 3367 drm_prime_gem_destroy(gem_obj, obj->sg_table); 3368 3369 if (obj->phys_obj) 3370 i915_gem_detach_phys_object(dev, obj); 3371 3372 obj->pin_count = 0; 3373 if (WARN_ON(i915_gem_object_unbind(obj) == -ERESTARTSYS)) { 3374 bool was_interruptible; 3375 3376 was_interruptible = dev_priv->mm.interruptible; 3377 dev_priv->mm.interruptible = false; 3378 3379 WARN_ON(i915_gem_object_unbind(obj)); 3380 3381 dev_priv->mm.interruptible = was_interruptible; 3382 } 3383 3384 if (obj->base.map_list.map) 3385 drm_gem_free_mmap_offset(&obj->base); 3386 3387 drm_gem_object_release(&obj->base); 3388 i915_gem_info_remove_obj(dev_priv, obj->base.size); 3389 3390 kfree(obj->bit_17); 3391 kfree(obj); 3392 } 3393 3394 int 3395 i915_gem_idle(struct drm_device *dev) 3396 { 3397 drm_i915_private_t *dev_priv = dev->dev_private; 3398 int ret; 3399 3400 mutex_lock(&dev->struct_mutex); 3401 3402 if (dev_priv->mm.suspended) { 3403 mutex_unlock(&dev->struct_mutex); 3404 return 0; 3405 } 3406 3407 ret = i915_gpu_idle(dev); 3408 if (ret) { 3409 mutex_unlock(&dev->struct_mutex); 3410 return ret; 3411 } 3412 i915_gem_retire_requests(dev); 3413 3414 /* Under UMS, be paranoid and evict. */ 3415 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 3416 i915_gem_evict_everything(dev, false); 3417 3418 i915_gem_reset_fences(dev); 3419 3420 /* Hack! Don't let anybody do execbuf while we don't control the chip. 3421 * We need to replace this with a semaphore, or something. 3422 * And not confound mm.suspended! 3423 */ 3424 dev_priv->mm.suspended = 1; 3425 del_timer_sync(&dev_priv->hangcheck_timer); 3426 3427 i915_kernel_lost_context(dev); 3428 i915_gem_cleanup_ringbuffer(dev); 3429 3430 mutex_unlock(&dev->struct_mutex); 3431 3432 /* Cancel the retire work handler, which should be idle now. */ 3433 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 3434 3435 return 0; 3436 } 3437 3438 void i915_gem_init_swizzling(struct drm_device *dev) 3439 { 3440 drm_i915_private_t *dev_priv = dev->dev_private; 3441 3442 if (INTEL_INFO(dev)->gen < 5 || 3443 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 3444 return; 3445 3446 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 3447 DISP_TILE_SURFACE_SWIZZLING); 3448 3449 if (IS_GEN5(dev)) 3450 return; 3451 3452 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 3453 if (IS_GEN6(dev)) 3454 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 3455 else 3456 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 3457 } 3458 3459 void i915_gem_init_ppgtt(struct drm_device *dev) 3460 { 3461 drm_i915_private_t *dev_priv = dev->dev_private; 3462 uint32_t pd_offset; 3463 struct intel_ring_buffer *ring; 3464 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; 3465 uint32_t __iomem *pd_addr; 3466 uint32_t pd_entry; 3467 int i; 3468 3469 if (!dev_priv->mm.aliasing_ppgtt) 3470 return; 3471 3472 3473 pd_addr = dev_priv->mm.gtt->gtt + ppgtt->pd_offset/sizeof(uint32_t); 3474 for (i = 0; i < ppgtt->num_pd_entries; i++) { 3475 dma_addr_t pt_addr; 3476 3477 if (dev_priv->mm.gtt->needs_dmar) 3478 pt_addr = ppgtt->pt_dma_addr[i]; 3479 else 3480 pt_addr = page_to_phys(ppgtt->pt_pages[i]); 3481 3482 pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr); 3483 pd_entry |= GEN6_PDE_VALID; 3484 3485 writel(pd_entry, pd_addr + i); 3486 } 3487 readl(pd_addr); 3488 3489 pd_offset = ppgtt->pd_offset; 3490 pd_offset /= 64; /* in cachelines, */ 3491 pd_offset <<= 16; 3492 3493 if (INTEL_INFO(dev)->gen == 6) { 3494 uint32_t ecochk, gab_ctl, ecobits; 3495 3496 ecobits = I915_READ(GAC_ECO_BITS); 3497 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B); 3498 3499 gab_ctl = I915_READ(GAB_CTL); 3500 I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT); 3501 3502 ecochk = I915_READ(GAM_ECOCHK); 3503 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | 3504 ECOCHK_PPGTT_CACHE64B); 3505 I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 3506 } else if (INTEL_INFO(dev)->gen >= 7) { 3507 I915_WRITE(GAM_ECOCHK, ECOCHK_PPGTT_CACHE64B); 3508 /* GFX_MODE is per-ring on gen7+ */ 3509 } 3510 3511 for_each_ring(ring, dev_priv, i) { 3512 if (INTEL_INFO(dev)->gen >= 7) 3513 I915_WRITE(RING_MODE_GEN7(ring), 3514 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 3515 3516 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); 3517 I915_WRITE(RING_PP_DIR_BASE(ring), pd_offset); 3518 } 3519 } 3520 3521 int 3522 i915_gem_init_hw(struct drm_device *dev) 3523 { 3524 drm_i915_private_t *dev_priv = dev->dev_private; 3525 int ret; 3526 3527 i915_gem_init_swizzling(dev); 3528 3529 ret = intel_init_render_ring_buffer(dev); 3530 if (ret) 3531 return ret; 3532 3533 if (HAS_BSD(dev)) { 3534 ret = intel_init_bsd_ring_buffer(dev); 3535 if (ret) 3536 goto cleanup_render_ring; 3537 } 3538 3539 if (HAS_BLT(dev)) { 3540 ret = intel_init_blt_ring_buffer(dev); 3541 if (ret) 3542 goto cleanup_bsd_ring; 3543 } 3544 3545 dev_priv->next_seqno = 1; 3546 3547 i915_gem_init_ppgtt(dev); 3548 3549 return 0; 3550 3551 cleanup_bsd_ring: 3552 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]); 3553 cleanup_render_ring: 3554 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]); 3555 return ret; 3556 } 3557 3558 static bool 3559 intel_enable_ppgtt(struct drm_device *dev) 3560 { 3561 if (i915_enable_ppgtt >= 0) 3562 return i915_enable_ppgtt; 3563 3564 #ifdef CONFIG_INTEL_IOMMU 3565 /* Disable ppgtt on SNB if VT-d is on. */ 3566 if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) 3567 return false; 3568 #endif 3569 3570 return true; 3571 } 3572 3573 int i915_gem_init(struct drm_device *dev) 3574 { 3575 struct drm_i915_private *dev_priv = dev->dev_private; 3576 unsigned long gtt_size, mappable_size; 3577 int ret; 3578 3579 gtt_size = dev_priv->mm.gtt->gtt_total_entries << PAGE_SHIFT; 3580 mappable_size = dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT; 3581 3582 mutex_lock(&dev->struct_mutex); 3583 if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) { 3584 /* PPGTT pdes are stolen from global gtt ptes, so shrink the 3585 * aperture accordingly when using aliasing ppgtt. */ 3586 gtt_size -= I915_PPGTT_PD_ENTRIES*PAGE_SIZE; 3587 3588 i915_gem_init_global_gtt(dev, 0, mappable_size, gtt_size); 3589 3590 ret = i915_gem_init_aliasing_ppgtt(dev); 3591 if (ret) { 3592 mutex_unlock(&dev->struct_mutex); 3593 return ret; 3594 } 3595 } else { 3596 /* Let GEM Manage all of the aperture. 3597 * 3598 * However, leave one page at the end still bound to the scratch 3599 * page. There are a number of places where the hardware 3600 * apparently prefetches past the end of the object, and we've 3601 * seen multiple hangs with the GPU head pointer stuck in a 3602 * batchbuffer bound at the last page of the aperture. One page 3603 * should be enough to keep any prefetching inside of the 3604 * aperture. 3605 */ 3606 i915_gem_init_global_gtt(dev, 0, mappable_size, 3607 gtt_size); 3608 } 3609 3610 ret = i915_gem_init_hw(dev); 3611 mutex_unlock(&dev->struct_mutex); 3612 if (ret) { 3613 i915_gem_cleanup_aliasing_ppgtt(dev); 3614 return ret; 3615 } 3616 3617 /* Allow hardware batchbuffers unless told otherwise, but not for KMS. */ 3618 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 3619 dev_priv->dri1.allow_batchbuffer = 1; 3620 return 0; 3621 } 3622 3623 void 3624 i915_gem_cleanup_ringbuffer(struct drm_device *dev) 3625 { 3626 drm_i915_private_t *dev_priv = dev->dev_private; 3627 struct intel_ring_buffer *ring; 3628 int i; 3629 3630 for_each_ring(ring, dev_priv, i) 3631 intel_cleanup_ring_buffer(ring); 3632 } 3633 3634 int 3635 i915_gem_entervt_ioctl(struct drm_device *dev, void *data, 3636 struct drm_file *file_priv) 3637 { 3638 drm_i915_private_t *dev_priv = dev->dev_private; 3639 int ret; 3640 3641 if (drm_core_check_feature(dev, DRIVER_MODESET)) 3642 return 0; 3643 3644 if (atomic_read(&dev_priv->mm.wedged)) { 3645 DRM_ERROR("Reenabling wedged hardware, good luck\n"); 3646 atomic_set(&dev_priv->mm.wedged, 0); 3647 } 3648 3649 mutex_lock(&dev->struct_mutex); 3650 dev_priv->mm.suspended = 0; 3651 3652 ret = i915_gem_init_hw(dev); 3653 if (ret != 0) { 3654 mutex_unlock(&dev->struct_mutex); 3655 return ret; 3656 } 3657 3658 BUG_ON(!list_empty(&dev_priv->mm.active_list)); 3659 BUG_ON(!list_empty(&dev_priv->mm.flushing_list)); 3660 BUG_ON(!list_empty(&dev_priv->mm.inactive_list)); 3661 mutex_unlock(&dev->struct_mutex); 3662 3663 ret = drm_irq_install(dev); 3664 if (ret) 3665 goto cleanup_ringbuffer; 3666 3667 return 0; 3668 3669 cleanup_ringbuffer: 3670 mutex_lock(&dev->struct_mutex); 3671 i915_gem_cleanup_ringbuffer(dev); 3672 dev_priv->mm.suspended = 1; 3673 mutex_unlock(&dev->struct_mutex); 3674 3675 return ret; 3676 } 3677 3678 int 3679 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, 3680 struct drm_file *file_priv) 3681 { 3682 if (drm_core_check_feature(dev, DRIVER_MODESET)) 3683 return 0; 3684 3685 drm_irq_uninstall(dev); 3686 return i915_gem_idle(dev); 3687 } 3688 3689 void 3690 i915_gem_lastclose(struct drm_device *dev) 3691 { 3692 int ret; 3693 3694 if (drm_core_check_feature(dev, DRIVER_MODESET)) 3695 return; 3696 3697 ret = i915_gem_idle(dev); 3698 if (ret) 3699 DRM_ERROR("failed to idle hardware: %d\n", ret); 3700 } 3701 3702 static void 3703 init_ring_lists(struct intel_ring_buffer *ring) 3704 { 3705 INIT_LIST_HEAD(&ring->active_list); 3706 INIT_LIST_HEAD(&ring->request_list); 3707 INIT_LIST_HEAD(&ring->gpu_write_list); 3708 } 3709 3710 void 3711 i915_gem_load(struct drm_device *dev) 3712 { 3713 int i; 3714 drm_i915_private_t *dev_priv = dev->dev_private; 3715 3716 INIT_LIST_HEAD(&dev_priv->mm.active_list); 3717 INIT_LIST_HEAD(&dev_priv->mm.flushing_list); 3718 INIT_LIST_HEAD(&dev_priv->mm.inactive_list); 3719 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 3720 INIT_LIST_HEAD(&dev_priv->mm.gtt_list); 3721 for (i = 0; i < I915_NUM_RINGS; i++) 3722 init_ring_lists(&dev_priv->ring[i]); 3723 for (i = 0; i < I915_MAX_NUM_FENCES; i++) 3724 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 3725 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 3726 i915_gem_retire_work_handler); 3727 init_completion(&dev_priv->error_completion); 3728 3729 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ 3730 if (IS_GEN3(dev)) { 3731 I915_WRITE(MI_ARB_STATE, 3732 _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE)); 3733 } 3734 3735 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 3736 3737 /* Old X drivers will take 0-2 for front, back, depth buffers */ 3738 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 3739 dev_priv->fence_reg_start = 3; 3740 3741 if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 3742 dev_priv->num_fence_regs = 16; 3743 else 3744 dev_priv->num_fence_regs = 8; 3745 3746 /* Initialize fence registers to zero */ 3747 i915_gem_reset_fences(dev); 3748 3749 i915_gem_detect_bit_6_swizzle(dev); 3750 init_waitqueue_head(&dev_priv->pending_flip_queue); 3751 3752 dev_priv->mm.interruptible = true; 3753 3754 dev_priv->mm.inactive_shrinker.shrink = i915_gem_inactive_shrink; 3755 dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS; 3756 register_shrinker(&dev_priv->mm.inactive_shrinker); 3757 } 3758 3759 /* 3760 * Create a physically contiguous memory object for this object 3761 * e.g. for cursor + overlay regs 3762 */ 3763 static int i915_gem_init_phys_object(struct drm_device *dev, 3764 int id, int size, int align) 3765 { 3766 drm_i915_private_t *dev_priv = dev->dev_private; 3767 struct drm_i915_gem_phys_object *phys_obj; 3768 int ret; 3769 3770 if (dev_priv->mm.phys_objs[id - 1] || !size) 3771 return 0; 3772 3773 phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL); 3774 if (!phys_obj) 3775 return -ENOMEM; 3776 3777 phys_obj->id = id; 3778 3779 phys_obj->handle = drm_pci_alloc(dev, size, align); 3780 if (!phys_obj->handle) { 3781 ret = -ENOMEM; 3782 goto kfree_obj; 3783 } 3784 #ifdef CONFIG_X86 3785 set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE); 3786 #endif 3787 3788 dev_priv->mm.phys_objs[id - 1] = phys_obj; 3789 3790 return 0; 3791 kfree_obj: 3792 kfree(phys_obj); 3793 return ret; 3794 } 3795 3796 static void i915_gem_free_phys_object(struct drm_device *dev, int id) 3797 { 3798 drm_i915_private_t *dev_priv = dev->dev_private; 3799 struct drm_i915_gem_phys_object *phys_obj; 3800 3801 if (!dev_priv->mm.phys_objs[id - 1]) 3802 return; 3803 3804 phys_obj = dev_priv->mm.phys_objs[id - 1]; 3805 if (phys_obj->cur_obj) { 3806 i915_gem_detach_phys_object(dev, phys_obj->cur_obj); 3807 } 3808 3809 #ifdef CONFIG_X86 3810 set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE); 3811 #endif 3812 drm_pci_free(dev, phys_obj->handle); 3813 kfree(phys_obj); 3814 dev_priv->mm.phys_objs[id - 1] = NULL; 3815 } 3816 3817 void i915_gem_free_all_phys_object(struct drm_device *dev) 3818 { 3819 int i; 3820 3821 for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++) 3822 i915_gem_free_phys_object(dev, i); 3823 } 3824 3825 void i915_gem_detach_phys_object(struct drm_device *dev, 3826 struct drm_i915_gem_object *obj) 3827 { 3828 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 3829 char *vaddr; 3830 int i; 3831 int page_count; 3832 3833 if (!obj->phys_obj) 3834 return; 3835 vaddr = obj->phys_obj->handle->vaddr; 3836 3837 page_count = obj->base.size / PAGE_SIZE; 3838 for (i = 0; i < page_count; i++) { 3839 struct page *page = shmem_read_mapping_page(mapping, i); 3840 if (!IS_ERR(page)) { 3841 char *dst = kmap_atomic(page); 3842 memcpy(dst, vaddr + i*PAGE_SIZE, PAGE_SIZE); 3843 kunmap_atomic(dst); 3844 3845 drm_clflush_pages(&page, 1); 3846 3847 set_page_dirty(page); 3848 mark_page_accessed(page); 3849 page_cache_release(page); 3850 } 3851 } 3852 intel_gtt_chipset_flush(); 3853 3854 obj->phys_obj->cur_obj = NULL; 3855 obj->phys_obj = NULL; 3856 } 3857 3858 int 3859 i915_gem_attach_phys_object(struct drm_device *dev, 3860 struct drm_i915_gem_object *obj, 3861 int id, 3862 int align) 3863 { 3864 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 3865 drm_i915_private_t *dev_priv = dev->dev_private; 3866 int ret = 0; 3867 int page_count; 3868 int i; 3869 3870 if (id > I915_MAX_PHYS_OBJECT) 3871 return -EINVAL; 3872 3873 if (obj->phys_obj) { 3874 if (obj->phys_obj->id == id) 3875 return 0; 3876 i915_gem_detach_phys_object(dev, obj); 3877 } 3878 3879 /* create a new object */ 3880 if (!dev_priv->mm.phys_objs[id - 1]) { 3881 ret = i915_gem_init_phys_object(dev, id, 3882 obj->base.size, align); 3883 if (ret) { 3884 DRM_ERROR("failed to init phys object %d size: %zu\n", 3885 id, obj->base.size); 3886 return ret; 3887 } 3888 } 3889 3890 /* bind to the object */ 3891 obj->phys_obj = dev_priv->mm.phys_objs[id - 1]; 3892 obj->phys_obj->cur_obj = obj; 3893 3894 page_count = obj->base.size / PAGE_SIZE; 3895 3896 for (i = 0; i < page_count; i++) { 3897 struct page *page; 3898 char *dst, *src; 3899 3900 page = shmem_read_mapping_page(mapping, i); 3901 if (IS_ERR(page)) 3902 return PTR_ERR(page); 3903 3904 src = kmap_atomic(page); 3905 dst = obj->phys_obj->handle->vaddr + (i * PAGE_SIZE); 3906 memcpy(dst, src, PAGE_SIZE); 3907 kunmap_atomic(src); 3908 3909 mark_page_accessed(page); 3910 page_cache_release(page); 3911 } 3912 3913 return 0; 3914 } 3915 3916 static int 3917 i915_gem_phys_pwrite(struct drm_device *dev, 3918 struct drm_i915_gem_object *obj, 3919 struct drm_i915_gem_pwrite *args, 3920 struct drm_file *file_priv) 3921 { 3922 void *vaddr = obj->phys_obj->handle->vaddr + args->offset; 3923 char __user *user_data = (char __user *) (uintptr_t) args->data_ptr; 3924 3925 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { 3926 unsigned long unwritten; 3927 3928 /* The physical object once assigned is fixed for the lifetime 3929 * of the obj, so we can safely drop the lock and continue 3930 * to access vaddr. 3931 */ 3932 mutex_unlock(&dev->struct_mutex); 3933 unwritten = copy_from_user(vaddr, user_data, args->size); 3934 mutex_lock(&dev->struct_mutex); 3935 if (unwritten) 3936 return -EFAULT; 3937 } 3938 3939 intel_gtt_chipset_flush(); 3940 return 0; 3941 } 3942 3943 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 3944 { 3945 struct drm_i915_file_private *file_priv = file->driver_priv; 3946 3947 /* Clean up our request list when the client is going away, so that 3948 * later retire_requests won't dereference our soon-to-be-gone 3949 * file_priv. 3950 */ 3951 spin_lock(&file_priv->mm.lock); 3952 while (!list_empty(&file_priv->mm.request_list)) { 3953 struct drm_i915_gem_request *request; 3954 3955 request = list_first_entry(&file_priv->mm.request_list, 3956 struct drm_i915_gem_request, 3957 client_list); 3958 list_del(&request->client_list); 3959 request->file_priv = NULL; 3960 } 3961 spin_unlock(&file_priv->mm.lock); 3962 } 3963 3964 static int 3965 i915_gpu_is_active(struct drm_device *dev) 3966 { 3967 drm_i915_private_t *dev_priv = dev->dev_private; 3968 int lists_empty; 3969 3970 lists_empty = list_empty(&dev_priv->mm.flushing_list) && 3971 list_empty(&dev_priv->mm.active_list); 3972 3973 return !lists_empty; 3974 } 3975 3976 static int 3977 i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc) 3978 { 3979 struct drm_i915_private *dev_priv = 3980 container_of(shrinker, 3981 struct drm_i915_private, 3982 mm.inactive_shrinker); 3983 struct drm_device *dev = dev_priv->dev; 3984 struct drm_i915_gem_object *obj, *next; 3985 int nr_to_scan = sc->nr_to_scan; 3986 int cnt; 3987 3988 if (!mutex_trylock(&dev->struct_mutex)) 3989 return 0; 3990 3991 /* "fast-path" to count number of available objects */ 3992 if (nr_to_scan == 0) { 3993 cnt = 0; 3994 list_for_each_entry(obj, 3995 &dev_priv->mm.inactive_list, 3996 mm_list) 3997 cnt++; 3998 mutex_unlock(&dev->struct_mutex); 3999 return cnt / 100 * sysctl_vfs_cache_pressure; 4000 } 4001 4002 rescan: 4003 /* first scan for clean buffers */ 4004 i915_gem_retire_requests(dev); 4005 4006 list_for_each_entry_safe(obj, next, 4007 &dev_priv->mm.inactive_list, 4008 mm_list) { 4009 if (i915_gem_object_is_purgeable(obj)) { 4010 if (i915_gem_object_unbind(obj) == 0 && 4011 --nr_to_scan == 0) 4012 break; 4013 } 4014 } 4015 4016 /* second pass, evict/count anything still on the inactive list */ 4017 cnt = 0; 4018 list_for_each_entry_safe(obj, next, 4019 &dev_priv->mm.inactive_list, 4020 mm_list) { 4021 if (nr_to_scan && 4022 i915_gem_object_unbind(obj) == 0) 4023 nr_to_scan--; 4024 else 4025 cnt++; 4026 } 4027 4028 if (nr_to_scan && i915_gpu_is_active(dev)) { 4029 /* 4030 * We are desperate for pages, so as a last resort, wait 4031 * for the GPU to finish and discard whatever we can. 4032 * This has a dramatic impact to reduce the number of 4033 * OOM-killer events whilst running the GPU aggressively. 4034 */ 4035 if (i915_gpu_idle(dev) == 0) 4036 goto rescan; 4037 } 4038 mutex_unlock(&dev->struct_mutex); 4039 return cnt / 100 * sysctl_vfs_cache_pressure; 4040 } 4041