1 /* 2 * Copyright © 2008 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include "drmP.h" 29 #include "drm.h" 30 #include "i915_drm.h" 31 #include "i915_drv.h" 32 #include "i915_trace.h" 33 #include "intel_drv.h" 34 #include <linux/slab.h> 35 #include <linux/swap.h> 36 #include <linux/pci.h> 37 38 static __must_check int i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj); 39 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 40 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); 41 static __must_check int i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, 42 bool write); 43 static __must_check int i915_gem_object_set_cpu_read_domain_range(struct drm_i915_gem_object *obj, 44 uint64_t offset, 45 uint64_t size); 46 static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_i915_gem_object *obj); 47 static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, 48 unsigned alignment, 49 bool map_and_fenceable); 50 static void i915_gem_clear_fence_reg(struct drm_device *dev, 51 struct drm_i915_fence_reg *reg); 52 static int i915_gem_phys_pwrite(struct drm_device *dev, 53 struct drm_i915_gem_object *obj, 54 struct drm_i915_gem_pwrite *args, 55 struct drm_file *file); 56 static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj); 57 58 static int i915_gem_inactive_shrink(struct shrinker *shrinker, 59 int nr_to_scan, 60 gfp_t gfp_mask); 61 62 63 /* some bookkeeping */ 64 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 65 size_t size) 66 { 67 dev_priv->mm.object_count++; 68 dev_priv->mm.object_memory += size; 69 } 70 71 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 72 size_t size) 73 { 74 dev_priv->mm.object_count--; 75 dev_priv->mm.object_memory -= size; 76 } 77 78 static int 79 i915_gem_wait_for_error(struct drm_device *dev) 80 { 81 struct drm_i915_private *dev_priv = dev->dev_private; 82 struct completion *x = &dev_priv->error_completion; 83 unsigned long flags; 84 int ret; 85 86 if (!atomic_read(&dev_priv->mm.wedged)) 87 return 0; 88 89 ret = wait_for_completion_interruptible(x); 90 if (ret) 91 return ret; 92 93 if (atomic_read(&dev_priv->mm.wedged)) { 94 /* GPU is hung, bump the completion count to account for 95 * the token we just consumed so that we never hit zero and 96 * end up waiting upon a subsequent completion event that 97 * will never happen. 98 */ 99 spin_lock_irqsave(&x->wait.lock, flags); 100 x->done++; 101 spin_unlock_irqrestore(&x->wait.lock, flags); 102 } 103 return 0; 104 } 105 106 int i915_mutex_lock_interruptible(struct drm_device *dev) 107 { 108 int ret; 109 110 ret = i915_gem_wait_for_error(dev); 111 if (ret) 112 return ret; 113 114 ret = mutex_lock_interruptible(&dev->struct_mutex); 115 if (ret) 116 return ret; 117 118 WARN_ON(i915_verify_lists(dev)); 119 return 0; 120 } 121 122 static inline bool 123 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj) 124 { 125 return obj->gtt_space && !obj->active && obj->pin_count == 0; 126 } 127 128 void i915_gem_do_init(struct drm_device *dev, 129 unsigned long start, 130 unsigned long mappable_end, 131 unsigned long end) 132 { 133 drm_i915_private_t *dev_priv = dev->dev_private; 134 135 drm_mm_init(&dev_priv->mm.gtt_space, start, end - start); 136 137 dev_priv->mm.gtt_start = start; 138 dev_priv->mm.gtt_mappable_end = mappable_end; 139 dev_priv->mm.gtt_end = end; 140 dev_priv->mm.gtt_total = end - start; 141 dev_priv->mm.mappable_gtt_total = min(end, mappable_end) - start; 142 143 /* Take over this portion of the GTT */ 144 intel_gtt_clear_range(start / PAGE_SIZE, (end-start) / PAGE_SIZE); 145 } 146 147 int 148 i915_gem_init_ioctl(struct drm_device *dev, void *data, 149 struct drm_file *file) 150 { 151 struct drm_i915_gem_init *args = data; 152 153 if (args->gtt_start >= args->gtt_end || 154 (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1)) 155 return -EINVAL; 156 157 mutex_lock(&dev->struct_mutex); 158 i915_gem_do_init(dev, args->gtt_start, args->gtt_end, args->gtt_end); 159 mutex_unlock(&dev->struct_mutex); 160 161 return 0; 162 } 163 164 int 165 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 166 struct drm_file *file) 167 { 168 struct drm_i915_private *dev_priv = dev->dev_private; 169 struct drm_i915_gem_get_aperture *args = data; 170 struct drm_i915_gem_object *obj; 171 size_t pinned; 172 173 if (!(dev->driver->driver_features & DRIVER_GEM)) 174 return -ENODEV; 175 176 pinned = 0; 177 mutex_lock(&dev->struct_mutex); 178 list_for_each_entry(obj, &dev_priv->mm.pinned_list, mm_list) 179 pinned += obj->gtt_space->size; 180 mutex_unlock(&dev->struct_mutex); 181 182 args->aper_size = dev_priv->mm.gtt_total; 183 args->aper_available_size = args->aper_size -pinned; 184 185 return 0; 186 } 187 188 static int 189 i915_gem_create(struct drm_file *file, 190 struct drm_device *dev, 191 uint64_t size, 192 uint32_t *handle_p) 193 { 194 struct drm_i915_gem_object *obj; 195 int ret; 196 u32 handle; 197 198 size = roundup(size, PAGE_SIZE); 199 200 /* Allocate the new object */ 201 obj = i915_gem_alloc_object(dev, size); 202 if (obj == NULL) 203 return -ENOMEM; 204 205 ret = drm_gem_handle_create(file, &obj->base, &handle); 206 if (ret) { 207 drm_gem_object_release(&obj->base); 208 i915_gem_info_remove_obj(dev->dev_private, obj->base.size); 209 kfree(obj); 210 return ret; 211 } 212 213 /* drop reference from allocate - handle holds it now */ 214 drm_gem_object_unreference(&obj->base); 215 trace_i915_gem_object_create(obj); 216 217 *handle_p = handle; 218 return 0; 219 } 220 221 int 222 i915_gem_dumb_create(struct drm_file *file, 223 struct drm_device *dev, 224 struct drm_mode_create_dumb *args) 225 { 226 /* have to work out size/pitch and return them */ 227 args->pitch = ALIGN(args->width & ((args->bpp + 1) / 8), 64); 228 args->size = args->pitch * args->height; 229 return i915_gem_create(file, dev, 230 args->size, &args->handle); 231 } 232 233 int i915_gem_dumb_destroy(struct drm_file *file, 234 struct drm_device *dev, 235 uint32_t handle) 236 { 237 return drm_gem_handle_delete(file, handle); 238 } 239 240 /** 241 * Creates a new mm object and returns a handle to it. 242 */ 243 int 244 i915_gem_create_ioctl(struct drm_device *dev, void *data, 245 struct drm_file *file) 246 { 247 struct drm_i915_gem_create *args = data; 248 return i915_gem_create(file, dev, 249 args->size, &args->handle); 250 } 251 252 static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj) 253 { 254 drm_i915_private_t *dev_priv = obj->base.dev->dev_private; 255 256 return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 && 257 obj->tiling_mode != I915_TILING_NONE; 258 } 259 260 static inline void 261 slow_shmem_copy(struct page *dst_page, 262 int dst_offset, 263 struct page *src_page, 264 int src_offset, 265 int length) 266 { 267 char *dst_vaddr, *src_vaddr; 268 269 dst_vaddr = kmap(dst_page); 270 src_vaddr = kmap(src_page); 271 272 memcpy(dst_vaddr + dst_offset, src_vaddr + src_offset, length); 273 274 kunmap(src_page); 275 kunmap(dst_page); 276 } 277 278 static inline void 279 slow_shmem_bit17_copy(struct page *gpu_page, 280 int gpu_offset, 281 struct page *cpu_page, 282 int cpu_offset, 283 int length, 284 int is_read) 285 { 286 char *gpu_vaddr, *cpu_vaddr; 287 288 /* Use the unswizzled path if this page isn't affected. */ 289 if ((page_to_phys(gpu_page) & (1 << 17)) == 0) { 290 if (is_read) 291 return slow_shmem_copy(cpu_page, cpu_offset, 292 gpu_page, gpu_offset, length); 293 else 294 return slow_shmem_copy(gpu_page, gpu_offset, 295 cpu_page, cpu_offset, length); 296 } 297 298 gpu_vaddr = kmap(gpu_page); 299 cpu_vaddr = kmap(cpu_page); 300 301 /* Copy the data, XORing A6 with A17 (1). The user already knows he's 302 * XORing with the other bits (A9 for Y, A9 and A10 for X) 303 */ 304 while (length > 0) { 305 int cacheline_end = ALIGN(gpu_offset + 1, 64); 306 int this_length = min(cacheline_end - gpu_offset, length); 307 int swizzled_gpu_offset = gpu_offset ^ 64; 308 309 if (is_read) { 310 memcpy(cpu_vaddr + cpu_offset, 311 gpu_vaddr + swizzled_gpu_offset, 312 this_length); 313 } else { 314 memcpy(gpu_vaddr + swizzled_gpu_offset, 315 cpu_vaddr + cpu_offset, 316 this_length); 317 } 318 cpu_offset += this_length; 319 gpu_offset += this_length; 320 length -= this_length; 321 } 322 323 kunmap(cpu_page); 324 kunmap(gpu_page); 325 } 326 327 /** 328 * This is the fast shmem pread path, which attempts to copy_from_user directly 329 * from the backing pages of the object to the user's address space. On a 330 * fault, it fails so we can fall back to i915_gem_shmem_pwrite_slow(). 331 */ 332 static int 333 i915_gem_shmem_pread_fast(struct drm_device *dev, 334 struct drm_i915_gem_object *obj, 335 struct drm_i915_gem_pread *args, 336 struct drm_file *file) 337 { 338 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 339 ssize_t remain; 340 loff_t offset; 341 char __user *user_data; 342 int page_offset, page_length; 343 344 user_data = (char __user *) (uintptr_t) args->data_ptr; 345 remain = args->size; 346 347 offset = args->offset; 348 349 while (remain > 0) { 350 struct page *page; 351 char *vaddr; 352 int ret; 353 354 /* Operation in this page 355 * 356 * page_offset = offset within page 357 * page_length = bytes to copy for this page 358 */ 359 page_offset = offset & (PAGE_SIZE-1); 360 page_length = remain; 361 if ((page_offset + remain) > PAGE_SIZE) 362 page_length = PAGE_SIZE - page_offset; 363 364 page = read_cache_page_gfp(mapping, offset >> PAGE_SHIFT, 365 GFP_HIGHUSER | __GFP_RECLAIMABLE); 366 if (IS_ERR(page)) 367 return PTR_ERR(page); 368 369 vaddr = kmap_atomic(page); 370 ret = __copy_to_user_inatomic(user_data, 371 vaddr + page_offset, 372 page_length); 373 kunmap_atomic(vaddr); 374 375 mark_page_accessed(page); 376 page_cache_release(page); 377 if (ret) 378 return -EFAULT; 379 380 remain -= page_length; 381 user_data += page_length; 382 offset += page_length; 383 } 384 385 return 0; 386 } 387 388 /** 389 * This is the fallback shmem pread path, which allocates temporary storage 390 * in kernel space to copy_to_user into outside of the struct_mutex, so we 391 * can copy out of the object's backing pages while holding the struct mutex 392 * and not take page faults. 393 */ 394 static int 395 i915_gem_shmem_pread_slow(struct drm_device *dev, 396 struct drm_i915_gem_object *obj, 397 struct drm_i915_gem_pread *args, 398 struct drm_file *file) 399 { 400 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 401 struct mm_struct *mm = current->mm; 402 struct page **user_pages; 403 ssize_t remain; 404 loff_t offset, pinned_pages, i; 405 loff_t first_data_page, last_data_page, num_pages; 406 int shmem_page_offset; 407 int data_page_index, data_page_offset; 408 int page_length; 409 int ret; 410 uint64_t data_ptr = args->data_ptr; 411 int do_bit17_swizzling; 412 413 remain = args->size; 414 415 /* Pin the user pages containing the data. We can't fault while 416 * holding the struct mutex, yet we want to hold it while 417 * dereferencing the user data. 418 */ 419 first_data_page = data_ptr / PAGE_SIZE; 420 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; 421 num_pages = last_data_page - first_data_page + 1; 422 423 user_pages = drm_malloc_ab(num_pages, sizeof(struct page *)); 424 if (user_pages == NULL) 425 return -ENOMEM; 426 427 mutex_unlock(&dev->struct_mutex); 428 down_read(&mm->mmap_sem); 429 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, 430 num_pages, 1, 0, user_pages, NULL); 431 up_read(&mm->mmap_sem); 432 mutex_lock(&dev->struct_mutex); 433 if (pinned_pages < num_pages) { 434 ret = -EFAULT; 435 goto out; 436 } 437 438 ret = i915_gem_object_set_cpu_read_domain_range(obj, 439 args->offset, 440 args->size); 441 if (ret) 442 goto out; 443 444 do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 445 446 offset = args->offset; 447 448 while (remain > 0) { 449 struct page *page; 450 451 /* Operation in this page 452 * 453 * shmem_page_offset = offset within page in shmem file 454 * data_page_index = page number in get_user_pages return 455 * data_page_offset = offset with data_page_index page. 456 * page_length = bytes to copy for this page 457 */ 458 shmem_page_offset = offset & ~PAGE_MASK; 459 data_page_index = data_ptr / PAGE_SIZE - first_data_page; 460 data_page_offset = data_ptr & ~PAGE_MASK; 461 462 page_length = remain; 463 if ((shmem_page_offset + page_length) > PAGE_SIZE) 464 page_length = PAGE_SIZE - shmem_page_offset; 465 if ((data_page_offset + page_length) > PAGE_SIZE) 466 page_length = PAGE_SIZE - data_page_offset; 467 468 page = read_cache_page_gfp(mapping, offset >> PAGE_SHIFT, 469 GFP_HIGHUSER | __GFP_RECLAIMABLE); 470 if (IS_ERR(page)) 471 return PTR_ERR(page); 472 473 if (do_bit17_swizzling) { 474 slow_shmem_bit17_copy(page, 475 shmem_page_offset, 476 user_pages[data_page_index], 477 data_page_offset, 478 page_length, 479 1); 480 } else { 481 slow_shmem_copy(user_pages[data_page_index], 482 data_page_offset, 483 page, 484 shmem_page_offset, 485 page_length); 486 } 487 488 mark_page_accessed(page); 489 page_cache_release(page); 490 491 remain -= page_length; 492 data_ptr += page_length; 493 offset += page_length; 494 } 495 496 out: 497 for (i = 0; i < pinned_pages; i++) { 498 SetPageDirty(user_pages[i]); 499 mark_page_accessed(user_pages[i]); 500 page_cache_release(user_pages[i]); 501 } 502 drm_free_large(user_pages); 503 504 return ret; 505 } 506 507 /** 508 * Reads data from the object referenced by handle. 509 * 510 * On error, the contents of *data are undefined. 511 */ 512 int 513 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 514 struct drm_file *file) 515 { 516 struct drm_i915_gem_pread *args = data; 517 struct drm_i915_gem_object *obj; 518 int ret = 0; 519 520 if (args->size == 0) 521 return 0; 522 523 if (!access_ok(VERIFY_WRITE, 524 (char __user *)(uintptr_t)args->data_ptr, 525 args->size)) 526 return -EFAULT; 527 528 ret = fault_in_pages_writeable((char __user *)(uintptr_t)args->data_ptr, 529 args->size); 530 if (ret) 531 return -EFAULT; 532 533 ret = i915_mutex_lock_interruptible(dev); 534 if (ret) 535 return ret; 536 537 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 538 if (&obj->base == NULL) { 539 ret = -ENOENT; 540 goto unlock; 541 } 542 543 /* Bounds check source. */ 544 if (args->offset > obj->base.size || 545 args->size > obj->base.size - args->offset) { 546 ret = -EINVAL; 547 goto out; 548 } 549 550 trace_i915_gem_object_pread(obj, args->offset, args->size); 551 552 ret = i915_gem_object_set_cpu_read_domain_range(obj, 553 args->offset, 554 args->size); 555 if (ret) 556 goto out; 557 558 ret = -EFAULT; 559 if (!i915_gem_object_needs_bit17_swizzle(obj)) 560 ret = i915_gem_shmem_pread_fast(dev, obj, args, file); 561 if (ret == -EFAULT) 562 ret = i915_gem_shmem_pread_slow(dev, obj, args, file); 563 564 out: 565 drm_gem_object_unreference(&obj->base); 566 unlock: 567 mutex_unlock(&dev->struct_mutex); 568 return ret; 569 } 570 571 /* This is the fast write path which cannot handle 572 * page faults in the source data 573 */ 574 575 static inline int 576 fast_user_write(struct io_mapping *mapping, 577 loff_t page_base, int page_offset, 578 char __user *user_data, 579 int length) 580 { 581 char *vaddr_atomic; 582 unsigned long unwritten; 583 584 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 585 unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset, 586 user_data, length); 587 io_mapping_unmap_atomic(vaddr_atomic); 588 return unwritten; 589 } 590 591 /* Here's the write path which can sleep for 592 * page faults 593 */ 594 595 static inline void 596 slow_kernel_write(struct io_mapping *mapping, 597 loff_t gtt_base, int gtt_offset, 598 struct page *user_page, int user_offset, 599 int length) 600 { 601 char __iomem *dst_vaddr; 602 char *src_vaddr; 603 604 dst_vaddr = io_mapping_map_wc(mapping, gtt_base); 605 src_vaddr = kmap(user_page); 606 607 memcpy_toio(dst_vaddr + gtt_offset, 608 src_vaddr + user_offset, 609 length); 610 611 kunmap(user_page); 612 io_mapping_unmap(dst_vaddr); 613 } 614 615 /** 616 * This is the fast pwrite path, where we copy the data directly from the 617 * user into the GTT, uncached. 618 */ 619 static int 620 i915_gem_gtt_pwrite_fast(struct drm_device *dev, 621 struct drm_i915_gem_object *obj, 622 struct drm_i915_gem_pwrite *args, 623 struct drm_file *file) 624 { 625 drm_i915_private_t *dev_priv = dev->dev_private; 626 ssize_t remain; 627 loff_t offset, page_base; 628 char __user *user_data; 629 int page_offset, page_length; 630 631 user_data = (char __user *) (uintptr_t) args->data_ptr; 632 remain = args->size; 633 634 offset = obj->gtt_offset + args->offset; 635 636 while (remain > 0) { 637 /* Operation in this page 638 * 639 * page_base = page offset within aperture 640 * page_offset = offset within page 641 * page_length = bytes to copy for this page 642 */ 643 page_base = (offset & ~(PAGE_SIZE-1)); 644 page_offset = offset & (PAGE_SIZE-1); 645 page_length = remain; 646 if ((page_offset + remain) > PAGE_SIZE) 647 page_length = PAGE_SIZE - page_offset; 648 649 /* If we get a fault while copying data, then (presumably) our 650 * source page isn't available. Return the error and we'll 651 * retry in the slow path. 652 */ 653 if (fast_user_write(dev_priv->mm.gtt_mapping, page_base, 654 page_offset, user_data, page_length)) 655 656 return -EFAULT; 657 658 remain -= page_length; 659 user_data += page_length; 660 offset += page_length; 661 } 662 663 return 0; 664 } 665 666 /** 667 * This is the fallback GTT pwrite path, which uses get_user_pages to pin 668 * the memory and maps it using kmap_atomic for copying. 669 * 670 * This code resulted in x11perf -rgb10text consuming about 10% more CPU 671 * than using i915_gem_gtt_pwrite_fast on a G45 (32-bit). 672 */ 673 static int 674 i915_gem_gtt_pwrite_slow(struct drm_device *dev, 675 struct drm_i915_gem_object *obj, 676 struct drm_i915_gem_pwrite *args, 677 struct drm_file *file) 678 { 679 drm_i915_private_t *dev_priv = dev->dev_private; 680 ssize_t remain; 681 loff_t gtt_page_base, offset; 682 loff_t first_data_page, last_data_page, num_pages; 683 loff_t pinned_pages, i; 684 struct page **user_pages; 685 struct mm_struct *mm = current->mm; 686 int gtt_page_offset, data_page_offset, data_page_index, page_length; 687 int ret; 688 uint64_t data_ptr = args->data_ptr; 689 690 remain = args->size; 691 692 /* Pin the user pages containing the data. We can't fault while 693 * holding the struct mutex, and all of the pwrite implementations 694 * want to hold it while dereferencing the user data. 695 */ 696 first_data_page = data_ptr / PAGE_SIZE; 697 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; 698 num_pages = last_data_page - first_data_page + 1; 699 700 user_pages = drm_malloc_ab(num_pages, sizeof(struct page *)); 701 if (user_pages == NULL) 702 return -ENOMEM; 703 704 mutex_unlock(&dev->struct_mutex); 705 down_read(&mm->mmap_sem); 706 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, 707 num_pages, 0, 0, user_pages, NULL); 708 up_read(&mm->mmap_sem); 709 mutex_lock(&dev->struct_mutex); 710 if (pinned_pages < num_pages) { 711 ret = -EFAULT; 712 goto out_unpin_pages; 713 } 714 715 ret = i915_gem_object_set_to_gtt_domain(obj, true); 716 if (ret) 717 goto out_unpin_pages; 718 719 ret = i915_gem_object_put_fence(obj); 720 if (ret) 721 goto out_unpin_pages; 722 723 offset = obj->gtt_offset + args->offset; 724 725 while (remain > 0) { 726 /* Operation in this page 727 * 728 * gtt_page_base = page offset within aperture 729 * gtt_page_offset = offset within page in aperture 730 * data_page_index = page number in get_user_pages return 731 * data_page_offset = offset with data_page_index page. 732 * page_length = bytes to copy for this page 733 */ 734 gtt_page_base = offset & PAGE_MASK; 735 gtt_page_offset = offset & ~PAGE_MASK; 736 data_page_index = data_ptr / PAGE_SIZE - first_data_page; 737 data_page_offset = data_ptr & ~PAGE_MASK; 738 739 page_length = remain; 740 if ((gtt_page_offset + page_length) > PAGE_SIZE) 741 page_length = PAGE_SIZE - gtt_page_offset; 742 if ((data_page_offset + page_length) > PAGE_SIZE) 743 page_length = PAGE_SIZE - data_page_offset; 744 745 slow_kernel_write(dev_priv->mm.gtt_mapping, 746 gtt_page_base, gtt_page_offset, 747 user_pages[data_page_index], 748 data_page_offset, 749 page_length); 750 751 remain -= page_length; 752 offset += page_length; 753 data_ptr += page_length; 754 } 755 756 out_unpin_pages: 757 for (i = 0; i < pinned_pages; i++) 758 page_cache_release(user_pages[i]); 759 drm_free_large(user_pages); 760 761 return ret; 762 } 763 764 /** 765 * This is the fast shmem pwrite path, which attempts to directly 766 * copy_from_user into the kmapped pages backing the object. 767 */ 768 static int 769 i915_gem_shmem_pwrite_fast(struct drm_device *dev, 770 struct drm_i915_gem_object *obj, 771 struct drm_i915_gem_pwrite *args, 772 struct drm_file *file) 773 { 774 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 775 ssize_t remain; 776 loff_t offset; 777 char __user *user_data; 778 int page_offset, page_length; 779 780 user_data = (char __user *) (uintptr_t) args->data_ptr; 781 remain = args->size; 782 783 offset = args->offset; 784 obj->dirty = 1; 785 786 while (remain > 0) { 787 struct page *page; 788 char *vaddr; 789 int ret; 790 791 /* Operation in this page 792 * 793 * page_offset = offset within page 794 * page_length = bytes to copy for this page 795 */ 796 page_offset = offset & (PAGE_SIZE-1); 797 page_length = remain; 798 if ((page_offset + remain) > PAGE_SIZE) 799 page_length = PAGE_SIZE - page_offset; 800 801 page = read_cache_page_gfp(mapping, offset >> PAGE_SHIFT, 802 GFP_HIGHUSER | __GFP_RECLAIMABLE); 803 if (IS_ERR(page)) 804 return PTR_ERR(page); 805 806 vaddr = kmap_atomic(page, KM_USER0); 807 ret = __copy_from_user_inatomic(vaddr + page_offset, 808 user_data, 809 page_length); 810 kunmap_atomic(vaddr, KM_USER0); 811 812 set_page_dirty(page); 813 mark_page_accessed(page); 814 page_cache_release(page); 815 816 /* If we get a fault while copying data, then (presumably) our 817 * source page isn't available. Return the error and we'll 818 * retry in the slow path. 819 */ 820 if (ret) 821 return -EFAULT; 822 823 remain -= page_length; 824 user_data += page_length; 825 offset += page_length; 826 } 827 828 return 0; 829 } 830 831 /** 832 * This is the fallback shmem pwrite path, which uses get_user_pages to pin 833 * the memory and maps it using kmap_atomic for copying. 834 * 835 * This avoids taking mmap_sem for faulting on the user's address while the 836 * struct_mutex is held. 837 */ 838 static int 839 i915_gem_shmem_pwrite_slow(struct drm_device *dev, 840 struct drm_i915_gem_object *obj, 841 struct drm_i915_gem_pwrite *args, 842 struct drm_file *file) 843 { 844 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 845 struct mm_struct *mm = current->mm; 846 struct page **user_pages; 847 ssize_t remain; 848 loff_t offset, pinned_pages, i; 849 loff_t first_data_page, last_data_page, num_pages; 850 int shmem_page_offset; 851 int data_page_index, data_page_offset; 852 int page_length; 853 int ret; 854 uint64_t data_ptr = args->data_ptr; 855 int do_bit17_swizzling; 856 857 remain = args->size; 858 859 /* Pin the user pages containing the data. We can't fault while 860 * holding the struct mutex, and all of the pwrite implementations 861 * want to hold it while dereferencing the user data. 862 */ 863 first_data_page = data_ptr / PAGE_SIZE; 864 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; 865 num_pages = last_data_page - first_data_page + 1; 866 867 user_pages = drm_malloc_ab(num_pages, sizeof(struct page *)); 868 if (user_pages == NULL) 869 return -ENOMEM; 870 871 mutex_unlock(&dev->struct_mutex); 872 down_read(&mm->mmap_sem); 873 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, 874 num_pages, 0, 0, user_pages, NULL); 875 up_read(&mm->mmap_sem); 876 mutex_lock(&dev->struct_mutex); 877 if (pinned_pages < num_pages) { 878 ret = -EFAULT; 879 goto out; 880 } 881 882 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 883 if (ret) 884 goto out; 885 886 do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 887 888 offset = args->offset; 889 obj->dirty = 1; 890 891 while (remain > 0) { 892 struct page *page; 893 894 /* Operation in this page 895 * 896 * shmem_page_offset = offset within page in shmem file 897 * data_page_index = page number in get_user_pages return 898 * data_page_offset = offset with data_page_index page. 899 * page_length = bytes to copy for this page 900 */ 901 shmem_page_offset = offset & ~PAGE_MASK; 902 data_page_index = data_ptr / PAGE_SIZE - first_data_page; 903 data_page_offset = data_ptr & ~PAGE_MASK; 904 905 page_length = remain; 906 if ((shmem_page_offset + page_length) > PAGE_SIZE) 907 page_length = PAGE_SIZE - shmem_page_offset; 908 if ((data_page_offset + page_length) > PAGE_SIZE) 909 page_length = PAGE_SIZE - data_page_offset; 910 911 page = read_cache_page_gfp(mapping, offset >> PAGE_SHIFT, 912 GFP_HIGHUSER | __GFP_RECLAIMABLE); 913 if (IS_ERR(page)) { 914 ret = PTR_ERR(page); 915 goto out; 916 } 917 918 if (do_bit17_swizzling) { 919 slow_shmem_bit17_copy(page, 920 shmem_page_offset, 921 user_pages[data_page_index], 922 data_page_offset, 923 page_length, 924 0); 925 } else { 926 slow_shmem_copy(page, 927 shmem_page_offset, 928 user_pages[data_page_index], 929 data_page_offset, 930 page_length); 931 } 932 933 set_page_dirty(page); 934 mark_page_accessed(page); 935 page_cache_release(page); 936 937 remain -= page_length; 938 data_ptr += page_length; 939 offset += page_length; 940 } 941 942 out: 943 for (i = 0; i < pinned_pages; i++) 944 page_cache_release(user_pages[i]); 945 drm_free_large(user_pages); 946 947 return ret; 948 } 949 950 /** 951 * Writes data to the object referenced by handle. 952 * 953 * On error, the contents of the buffer that were to be modified are undefined. 954 */ 955 int 956 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 957 struct drm_file *file) 958 { 959 struct drm_i915_gem_pwrite *args = data; 960 struct drm_i915_gem_object *obj; 961 int ret; 962 963 if (args->size == 0) 964 return 0; 965 966 if (!access_ok(VERIFY_READ, 967 (char __user *)(uintptr_t)args->data_ptr, 968 args->size)) 969 return -EFAULT; 970 971 ret = fault_in_pages_readable((char __user *)(uintptr_t)args->data_ptr, 972 args->size); 973 if (ret) 974 return -EFAULT; 975 976 ret = i915_mutex_lock_interruptible(dev); 977 if (ret) 978 return ret; 979 980 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 981 if (&obj->base == NULL) { 982 ret = -ENOENT; 983 goto unlock; 984 } 985 986 /* Bounds check destination. */ 987 if (args->offset > obj->base.size || 988 args->size > obj->base.size - args->offset) { 989 ret = -EINVAL; 990 goto out; 991 } 992 993 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 994 995 /* We can only do the GTT pwrite on untiled buffers, as otherwise 996 * it would end up going through the fenced access, and we'll get 997 * different detiling behavior between reading and writing. 998 * pread/pwrite currently are reading and writing from the CPU 999 * perspective, requiring manual detiling by the client. 1000 */ 1001 if (obj->phys_obj) 1002 ret = i915_gem_phys_pwrite(dev, obj, args, file); 1003 else if (obj->gtt_space && 1004 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 1005 ret = i915_gem_object_pin(obj, 0, true); 1006 if (ret) 1007 goto out; 1008 1009 ret = i915_gem_object_set_to_gtt_domain(obj, true); 1010 if (ret) 1011 goto out_unpin; 1012 1013 ret = i915_gem_object_put_fence(obj); 1014 if (ret) 1015 goto out_unpin; 1016 1017 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); 1018 if (ret == -EFAULT) 1019 ret = i915_gem_gtt_pwrite_slow(dev, obj, args, file); 1020 1021 out_unpin: 1022 i915_gem_object_unpin(obj); 1023 } else { 1024 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 1025 if (ret) 1026 goto out; 1027 1028 ret = -EFAULT; 1029 if (!i915_gem_object_needs_bit17_swizzle(obj)) 1030 ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file); 1031 if (ret == -EFAULT) 1032 ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file); 1033 } 1034 1035 out: 1036 drm_gem_object_unreference(&obj->base); 1037 unlock: 1038 mutex_unlock(&dev->struct_mutex); 1039 return ret; 1040 } 1041 1042 /** 1043 * Called when user space prepares to use an object with the CPU, either 1044 * through the mmap ioctl's mapping or a GTT mapping. 1045 */ 1046 int 1047 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1048 struct drm_file *file) 1049 { 1050 struct drm_i915_gem_set_domain *args = data; 1051 struct drm_i915_gem_object *obj; 1052 uint32_t read_domains = args->read_domains; 1053 uint32_t write_domain = args->write_domain; 1054 int ret; 1055 1056 if (!(dev->driver->driver_features & DRIVER_GEM)) 1057 return -ENODEV; 1058 1059 /* Only handle setting domains to types used by the CPU. */ 1060 if (write_domain & I915_GEM_GPU_DOMAINS) 1061 return -EINVAL; 1062 1063 if (read_domains & I915_GEM_GPU_DOMAINS) 1064 return -EINVAL; 1065 1066 /* Having something in the write domain implies it's in the read 1067 * domain, and only that read domain. Enforce that in the request. 1068 */ 1069 if (write_domain != 0 && read_domains != write_domain) 1070 return -EINVAL; 1071 1072 ret = i915_mutex_lock_interruptible(dev); 1073 if (ret) 1074 return ret; 1075 1076 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1077 if (&obj->base == NULL) { 1078 ret = -ENOENT; 1079 goto unlock; 1080 } 1081 1082 if (read_domains & I915_GEM_DOMAIN_GTT) { 1083 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1084 1085 /* Silently promote "you're not bound, there was nothing to do" 1086 * to success, since the client was just asking us to 1087 * make sure everything was done. 1088 */ 1089 if (ret == -EINVAL) 1090 ret = 0; 1091 } else { 1092 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1093 } 1094 1095 drm_gem_object_unreference(&obj->base); 1096 unlock: 1097 mutex_unlock(&dev->struct_mutex); 1098 return ret; 1099 } 1100 1101 /** 1102 * Called when user space has done writes to this buffer 1103 */ 1104 int 1105 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1106 struct drm_file *file) 1107 { 1108 struct drm_i915_gem_sw_finish *args = data; 1109 struct drm_i915_gem_object *obj; 1110 int ret = 0; 1111 1112 if (!(dev->driver->driver_features & DRIVER_GEM)) 1113 return -ENODEV; 1114 1115 ret = i915_mutex_lock_interruptible(dev); 1116 if (ret) 1117 return ret; 1118 1119 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1120 if (&obj->base == NULL) { 1121 ret = -ENOENT; 1122 goto unlock; 1123 } 1124 1125 /* Pinned buffers may be scanout, so flush the cache */ 1126 if (obj->pin_count) 1127 i915_gem_object_flush_cpu_write_domain(obj); 1128 1129 drm_gem_object_unreference(&obj->base); 1130 unlock: 1131 mutex_unlock(&dev->struct_mutex); 1132 return ret; 1133 } 1134 1135 /** 1136 * Maps the contents of an object, returning the address it is mapped 1137 * into. 1138 * 1139 * While the mapping holds a reference on the contents of the object, it doesn't 1140 * imply a ref on the object itself. 1141 */ 1142 int 1143 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1144 struct drm_file *file) 1145 { 1146 struct drm_i915_private *dev_priv = dev->dev_private; 1147 struct drm_i915_gem_mmap *args = data; 1148 struct drm_gem_object *obj; 1149 unsigned long addr; 1150 1151 if (!(dev->driver->driver_features & DRIVER_GEM)) 1152 return -ENODEV; 1153 1154 obj = drm_gem_object_lookup(dev, file, args->handle); 1155 if (obj == NULL) 1156 return -ENOENT; 1157 1158 if (obj->size > dev_priv->mm.gtt_mappable_end) { 1159 drm_gem_object_unreference_unlocked(obj); 1160 return -E2BIG; 1161 } 1162 1163 down_write(¤t->mm->mmap_sem); 1164 addr = do_mmap(obj->filp, 0, args->size, 1165 PROT_READ | PROT_WRITE, MAP_SHARED, 1166 args->offset); 1167 up_write(¤t->mm->mmap_sem); 1168 drm_gem_object_unreference_unlocked(obj); 1169 if (IS_ERR((void *)addr)) 1170 return addr; 1171 1172 args->addr_ptr = (uint64_t) addr; 1173 1174 return 0; 1175 } 1176 1177 /** 1178 * i915_gem_fault - fault a page into the GTT 1179 * vma: VMA in question 1180 * vmf: fault info 1181 * 1182 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1183 * from userspace. The fault handler takes care of binding the object to 1184 * the GTT (if needed), allocating and programming a fence register (again, 1185 * only if needed based on whether the old reg is still valid or the object 1186 * is tiled) and inserting a new PTE into the faulting process. 1187 * 1188 * Note that the faulting process may involve evicting existing objects 1189 * from the GTT and/or fence registers to make room. So performance may 1190 * suffer if the GTT working set is large or there are few fence registers 1191 * left. 1192 */ 1193 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1194 { 1195 struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data); 1196 struct drm_device *dev = obj->base.dev; 1197 drm_i915_private_t *dev_priv = dev->dev_private; 1198 pgoff_t page_offset; 1199 unsigned long pfn; 1200 int ret = 0; 1201 bool write = !!(vmf->flags & FAULT_FLAG_WRITE); 1202 1203 /* We don't use vmf->pgoff since that has the fake offset */ 1204 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> 1205 PAGE_SHIFT; 1206 1207 ret = i915_mutex_lock_interruptible(dev); 1208 if (ret) 1209 goto out; 1210 1211 trace_i915_gem_object_fault(obj, page_offset, true, write); 1212 1213 /* Now bind it into the GTT if needed */ 1214 if (!obj->map_and_fenceable) { 1215 ret = i915_gem_object_unbind(obj); 1216 if (ret) 1217 goto unlock; 1218 } 1219 if (!obj->gtt_space) { 1220 ret = i915_gem_object_bind_to_gtt(obj, 0, true); 1221 if (ret) 1222 goto unlock; 1223 } 1224 1225 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1226 if (ret) 1227 goto unlock; 1228 1229 if (obj->tiling_mode == I915_TILING_NONE) 1230 ret = i915_gem_object_put_fence(obj); 1231 else 1232 ret = i915_gem_object_get_fence(obj, NULL); 1233 if (ret) 1234 goto unlock; 1235 1236 if (i915_gem_object_is_inactive(obj)) 1237 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 1238 1239 obj->fault_mappable = true; 1240 1241 pfn = ((dev->agp->base + obj->gtt_offset) >> PAGE_SHIFT) + 1242 page_offset; 1243 1244 /* Finally, remap it using the new GTT offset */ 1245 ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn); 1246 unlock: 1247 mutex_unlock(&dev->struct_mutex); 1248 out: 1249 switch (ret) { 1250 case -EIO: 1251 case -EAGAIN: 1252 /* Give the error handler a chance to run and move the 1253 * objects off the GPU active list. Next time we service the 1254 * fault, we should be able to transition the page into the 1255 * GTT without touching the GPU (and so avoid further 1256 * EIO/EGAIN). If the GPU is wedged, then there is no issue 1257 * with coherency, just lost writes. 1258 */ 1259 set_need_resched(); 1260 case 0: 1261 case -ERESTARTSYS: 1262 case -EINTR: 1263 return VM_FAULT_NOPAGE; 1264 case -ENOMEM: 1265 return VM_FAULT_OOM; 1266 default: 1267 return VM_FAULT_SIGBUS; 1268 } 1269 } 1270 1271 /** 1272 * i915_gem_create_mmap_offset - create a fake mmap offset for an object 1273 * @obj: obj in question 1274 * 1275 * GEM memory mapping works by handing back to userspace a fake mmap offset 1276 * it can use in a subsequent mmap(2) call. The DRM core code then looks 1277 * up the object based on the offset and sets up the various memory mapping 1278 * structures. 1279 * 1280 * This routine allocates and attaches a fake offset for @obj. 1281 */ 1282 static int 1283 i915_gem_create_mmap_offset(struct drm_i915_gem_object *obj) 1284 { 1285 struct drm_device *dev = obj->base.dev; 1286 struct drm_gem_mm *mm = dev->mm_private; 1287 struct drm_map_list *list; 1288 struct drm_local_map *map; 1289 int ret = 0; 1290 1291 /* Set the object up for mmap'ing */ 1292 list = &obj->base.map_list; 1293 list->map = kzalloc(sizeof(struct drm_map_list), GFP_KERNEL); 1294 if (!list->map) 1295 return -ENOMEM; 1296 1297 map = list->map; 1298 map->type = _DRM_GEM; 1299 map->size = obj->base.size; 1300 map->handle = obj; 1301 1302 /* Get a DRM GEM mmap offset allocated... */ 1303 list->file_offset_node = drm_mm_search_free(&mm->offset_manager, 1304 obj->base.size / PAGE_SIZE, 1305 0, 0); 1306 if (!list->file_offset_node) { 1307 DRM_ERROR("failed to allocate offset for bo %d\n", 1308 obj->base.name); 1309 ret = -ENOSPC; 1310 goto out_free_list; 1311 } 1312 1313 list->file_offset_node = drm_mm_get_block(list->file_offset_node, 1314 obj->base.size / PAGE_SIZE, 1315 0); 1316 if (!list->file_offset_node) { 1317 ret = -ENOMEM; 1318 goto out_free_list; 1319 } 1320 1321 list->hash.key = list->file_offset_node->start; 1322 ret = drm_ht_insert_item(&mm->offset_hash, &list->hash); 1323 if (ret) { 1324 DRM_ERROR("failed to add to map hash\n"); 1325 goto out_free_mm; 1326 } 1327 1328 return 0; 1329 1330 out_free_mm: 1331 drm_mm_put_block(list->file_offset_node); 1332 out_free_list: 1333 kfree(list->map); 1334 list->map = NULL; 1335 1336 return ret; 1337 } 1338 1339 /** 1340 * i915_gem_release_mmap - remove physical page mappings 1341 * @obj: obj in question 1342 * 1343 * Preserve the reservation of the mmapping with the DRM core code, but 1344 * relinquish ownership of the pages back to the system. 1345 * 1346 * It is vital that we remove the page mapping if we have mapped a tiled 1347 * object through the GTT and then lose the fence register due to 1348 * resource pressure. Similarly if the object has been moved out of the 1349 * aperture, than pages mapped into userspace must be revoked. Removing the 1350 * mapping will then trigger a page fault on the next user access, allowing 1351 * fixup by i915_gem_fault(). 1352 */ 1353 void 1354 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 1355 { 1356 if (!obj->fault_mappable) 1357 return; 1358 1359 unmap_mapping_range(obj->base.dev->dev_mapping, 1360 (loff_t)obj->base.map_list.hash.key<<PAGE_SHIFT, 1361 obj->base.size, 1); 1362 1363 obj->fault_mappable = false; 1364 } 1365 1366 static void 1367 i915_gem_free_mmap_offset(struct drm_i915_gem_object *obj) 1368 { 1369 struct drm_device *dev = obj->base.dev; 1370 struct drm_gem_mm *mm = dev->mm_private; 1371 struct drm_map_list *list = &obj->base.map_list; 1372 1373 drm_ht_remove_item(&mm->offset_hash, &list->hash); 1374 drm_mm_put_block(list->file_offset_node); 1375 kfree(list->map); 1376 list->map = NULL; 1377 } 1378 1379 static uint32_t 1380 i915_gem_get_gtt_size(struct drm_i915_gem_object *obj) 1381 { 1382 struct drm_device *dev = obj->base.dev; 1383 uint32_t size; 1384 1385 if (INTEL_INFO(dev)->gen >= 4 || 1386 obj->tiling_mode == I915_TILING_NONE) 1387 return obj->base.size; 1388 1389 /* Previous chips need a power-of-two fence region when tiling */ 1390 if (INTEL_INFO(dev)->gen == 3) 1391 size = 1024*1024; 1392 else 1393 size = 512*1024; 1394 1395 while (size < obj->base.size) 1396 size <<= 1; 1397 1398 return size; 1399 } 1400 1401 /** 1402 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 1403 * @obj: object to check 1404 * 1405 * Return the required GTT alignment for an object, taking into account 1406 * potential fence register mapping. 1407 */ 1408 static uint32_t 1409 i915_gem_get_gtt_alignment(struct drm_i915_gem_object *obj) 1410 { 1411 struct drm_device *dev = obj->base.dev; 1412 1413 /* 1414 * Minimum alignment is 4k (GTT page size), but might be greater 1415 * if a fence register is needed for the object. 1416 */ 1417 if (INTEL_INFO(dev)->gen >= 4 || 1418 obj->tiling_mode == I915_TILING_NONE) 1419 return 4096; 1420 1421 /* 1422 * Previous chips need to be aligned to the size of the smallest 1423 * fence register that can contain the object. 1424 */ 1425 return i915_gem_get_gtt_size(obj); 1426 } 1427 1428 /** 1429 * i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an 1430 * unfenced object 1431 * @obj: object to check 1432 * 1433 * Return the required GTT alignment for an object, only taking into account 1434 * unfenced tiled surface requirements. 1435 */ 1436 uint32_t 1437 i915_gem_get_unfenced_gtt_alignment(struct drm_i915_gem_object *obj) 1438 { 1439 struct drm_device *dev = obj->base.dev; 1440 int tile_height; 1441 1442 /* 1443 * Minimum alignment is 4k (GTT page size) for sane hw. 1444 */ 1445 if (INTEL_INFO(dev)->gen >= 4 || IS_G33(dev) || 1446 obj->tiling_mode == I915_TILING_NONE) 1447 return 4096; 1448 1449 /* 1450 * Older chips need unfenced tiled buffers to be aligned to the left 1451 * edge of an even tile row (where tile rows are counted as if the bo is 1452 * placed in a fenced gtt region). 1453 */ 1454 if (IS_GEN2(dev) || 1455 (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))) 1456 tile_height = 32; 1457 else 1458 tile_height = 8; 1459 1460 return tile_height * obj->stride * 2; 1461 } 1462 1463 int 1464 i915_gem_mmap_gtt(struct drm_file *file, 1465 struct drm_device *dev, 1466 uint32_t handle, 1467 uint64_t *offset) 1468 { 1469 struct drm_i915_private *dev_priv = dev->dev_private; 1470 struct drm_i915_gem_object *obj; 1471 int ret; 1472 1473 if (!(dev->driver->driver_features & DRIVER_GEM)) 1474 return -ENODEV; 1475 1476 ret = i915_mutex_lock_interruptible(dev); 1477 if (ret) 1478 return ret; 1479 1480 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle)); 1481 if (&obj->base == NULL) { 1482 ret = -ENOENT; 1483 goto unlock; 1484 } 1485 1486 if (obj->base.size > dev_priv->mm.gtt_mappable_end) { 1487 ret = -E2BIG; 1488 goto unlock; 1489 } 1490 1491 if (obj->madv != I915_MADV_WILLNEED) { 1492 DRM_ERROR("Attempting to mmap a purgeable buffer\n"); 1493 ret = -EINVAL; 1494 goto out; 1495 } 1496 1497 if (!obj->base.map_list.map) { 1498 ret = i915_gem_create_mmap_offset(obj); 1499 if (ret) 1500 goto out; 1501 } 1502 1503 *offset = (u64)obj->base.map_list.hash.key << PAGE_SHIFT; 1504 1505 out: 1506 drm_gem_object_unreference(&obj->base); 1507 unlock: 1508 mutex_unlock(&dev->struct_mutex); 1509 return ret; 1510 } 1511 1512 /** 1513 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 1514 * @dev: DRM device 1515 * @data: GTT mapping ioctl data 1516 * @file: GEM object info 1517 * 1518 * Simply returns the fake offset to userspace so it can mmap it. 1519 * The mmap call will end up in drm_gem_mmap(), which will set things 1520 * up so we can get faults in the handler above. 1521 * 1522 * The fault handler will take care of binding the object into the GTT 1523 * (since it may have been evicted to make room for something), allocating 1524 * a fence register, and mapping the appropriate aperture address into 1525 * userspace. 1526 */ 1527 int 1528 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 1529 struct drm_file *file) 1530 { 1531 struct drm_i915_gem_mmap_gtt *args = data; 1532 1533 if (!(dev->driver->driver_features & DRIVER_GEM)) 1534 return -ENODEV; 1535 1536 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 1537 } 1538 1539 1540 static int 1541 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj, 1542 gfp_t gfpmask) 1543 { 1544 int page_count, i; 1545 struct address_space *mapping; 1546 struct inode *inode; 1547 struct page *page; 1548 1549 /* Get the list of pages out of our struct file. They'll be pinned 1550 * at this point until we release them. 1551 */ 1552 page_count = obj->base.size / PAGE_SIZE; 1553 BUG_ON(obj->pages != NULL); 1554 obj->pages = drm_malloc_ab(page_count, sizeof(struct page *)); 1555 if (obj->pages == NULL) 1556 return -ENOMEM; 1557 1558 inode = obj->base.filp->f_path.dentry->d_inode; 1559 mapping = inode->i_mapping; 1560 for (i = 0; i < page_count; i++) { 1561 page = read_cache_page_gfp(mapping, i, 1562 GFP_HIGHUSER | 1563 __GFP_COLD | 1564 __GFP_RECLAIMABLE | 1565 gfpmask); 1566 if (IS_ERR(page)) 1567 goto err_pages; 1568 1569 obj->pages[i] = page; 1570 } 1571 1572 if (obj->tiling_mode != I915_TILING_NONE) 1573 i915_gem_object_do_bit_17_swizzle(obj); 1574 1575 return 0; 1576 1577 err_pages: 1578 while (i--) 1579 page_cache_release(obj->pages[i]); 1580 1581 drm_free_large(obj->pages); 1582 obj->pages = NULL; 1583 return PTR_ERR(page); 1584 } 1585 1586 static void 1587 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 1588 { 1589 int page_count = obj->base.size / PAGE_SIZE; 1590 int i; 1591 1592 BUG_ON(obj->madv == __I915_MADV_PURGED); 1593 1594 if (obj->tiling_mode != I915_TILING_NONE) 1595 i915_gem_object_save_bit_17_swizzle(obj); 1596 1597 if (obj->madv == I915_MADV_DONTNEED) 1598 obj->dirty = 0; 1599 1600 for (i = 0; i < page_count; i++) { 1601 if (obj->dirty) 1602 set_page_dirty(obj->pages[i]); 1603 1604 if (obj->madv == I915_MADV_WILLNEED) 1605 mark_page_accessed(obj->pages[i]); 1606 1607 page_cache_release(obj->pages[i]); 1608 } 1609 obj->dirty = 0; 1610 1611 drm_free_large(obj->pages); 1612 obj->pages = NULL; 1613 } 1614 1615 void 1616 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, 1617 struct intel_ring_buffer *ring, 1618 u32 seqno) 1619 { 1620 struct drm_device *dev = obj->base.dev; 1621 struct drm_i915_private *dev_priv = dev->dev_private; 1622 1623 BUG_ON(ring == NULL); 1624 obj->ring = ring; 1625 1626 /* Add a reference if we're newly entering the active list. */ 1627 if (!obj->active) { 1628 drm_gem_object_reference(&obj->base); 1629 obj->active = 1; 1630 } 1631 1632 /* Move from whatever list we were on to the tail of execution. */ 1633 list_move_tail(&obj->mm_list, &dev_priv->mm.active_list); 1634 list_move_tail(&obj->ring_list, &ring->active_list); 1635 1636 obj->last_rendering_seqno = seqno; 1637 if (obj->fenced_gpu_access) { 1638 struct drm_i915_fence_reg *reg; 1639 1640 BUG_ON(obj->fence_reg == I915_FENCE_REG_NONE); 1641 1642 obj->last_fenced_seqno = seqno; 1643 obj->last_fenced_ring = ring; 1644 1645 reg = &dev_priv->fence_regs[obj->fence_reg]; 1646 list_move_tail(®->lru_list, &dev_priv->mm.fence_list); 1647 } 1648 } 1649 1650 static void 1651 i915_gem_object_move_off_active(struct drm_i915_gem_object *obj) 1652 { 1653 list_del_init(&obj->ring_list); 1654 obj->last_rendering_seqno = 0; 1655 } 1656 1657 static void 1658 i915_gem_object_move_to_flushing(struct drm_i915_gem_object *obj) 1659 { 1660 struct drm_device *dev = obj->base.dev; 1661 drm_i915_private_t *dev_priv = dev->dev_private; 1662 1663 BUG_ON(!obj->active); 1664 list_move_tail(&obj->mm_list, &dev_priv->mm.flushing_list); 1665 1666 i915_gem_object_move_off_active(obj); 1667 } 1668 1669 static void 1670 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) 1671 { 1672 struct drm_device *dev = obj->base.dev; 1673 struct drm_i915_private *dev_priv = dev->dev_private; 1674 1675 if (obj->pin_count != 0) 1676 list_move_tail(&obj->mm_list, &dev_priv->mm.pinned_list); 1677 else 1678 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 1679 1680 BUG_ON(!list_empty(&obj->gpu_write_list)); 1681 BUG_ON(!obj->active); 1682 obj->ring = NULL; 1683 1684 i915_gem_object_move_off_active(obj); 1685 obj->fenced_gpu_access = false; 1686 1687 obj->active = 0; 1688 obj->pending_gpu_write = false; 1689 drm_gem_object_unreference(&obj->base); 1690 1691 WARN_ON(i915_verify_lists(dev)); 1692 } 1693 1694 /* Immediately discard the backing storage */ 1695 static void 1696 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 1697 { 1698 struct inode *inode; 1699 1700 /* Our goal here is to return as much of the memory as 1701 * is possible back to the system as we are called from OOM. 1702 * To do this we must instruct the shmfs to drop all of its 1703 * backing pages, *now*. Here we mirror the actions taken 1704 * when by shmem_delete_inode() to release the backing store. 1705 */ 1706 inode = obj->base.filp->f_path.dentry->d_inode; 1707 truncate_inode_pages(inode->i_mapping, 0); 1708 if (inode->i_op->truncate_range) 1709 inode->i_op->truncate_range(inode, 0, (loff_t)-1); 1710 1711 obj->madv = __I915_MADV_PURGED; 1712 } 1713 1714 static inline int 1715 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj) 1716 { 1717 return obj->madv == I915_MADV_DONTNEED; 1718 } 1719 1720 static void 1721 i915_gem_process_flushing_list(struct intel_ring_buffer *ring, 1722 uint32_t flush_domains) 1723 { 1724 struct drm_i915_gem_object *obj, *next; 1725 1726 list_for_each_entry_safe(obj, next, 1727 &ring->gpu_write_list, 1728 gpu_write_list) { 1729 if (obj->base.write_domain & flush_domains) { 1730 uint32_t old_write_domain = obj->base.write_domain; 1731 1732 obj->base.write_domain = 0; 1733 list_del_init(&obj->gpu_write_list); 1734 i915_gem_object_move_to_active(obj, ring, 1735 i915_gem_next_request_seqno(ring)); 1736 1737 trace_i915_gem_object_change_domain(obj, 1738 obj->base.read_domains, 1739 old_write_domain); 1740 } 1741 } 1742 } 1743 1744 int 1745 i915_add_request(struct intel_ring_buffer *ring, 1746 struct drm_file *file, 1747 struct drm_i915_gem_request *request) 1748 { 1749 drm_i915_private_t *dev_priv = ring->dev->dev_private; 1750 uint32_t seqno; 1751 int was_empty; 1752 int ret; 1753 1754 BUG_ON(request == NULL); 1755 1756 ret = ring->add_request(ring, &seqno); 1757 if (ret) 1758 return ret; 1759 1760 trace_i915_gem_request_add(ring, seqno); 1761 1762 request->seqno = seqno; 1763 request->ring = ring; 1764 request->emitted_jiffies = jiffies; 1765 was_empty = list_empty(&ring->request_list); 1766 list_add_tail(&request->list, &ring->request_list); 1767 1768 if (file) { 1769 struct drm_i915_file_private *file_priv = file->driver_priv; 1770 1771 spin_lock(&file_priv->mm.lock); 1772 request->file_priv = file_priv; 1773 list_add_tail(&request->client_list, 1774 &file_priv->mm.request_list); 1775 spin_unlock(&file_priv->mm.lock); 1776 } 1777 1778 ring->outstanding_lazy_request = false; 1779 1780 if (!dev_priv->mm.suspended) { 1781 mod_timer(&dev_priv->hangcheck_timer, 1782 jiffies + msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD)); 1783 if (was_empty) 1784 queue_delayed_work(dev_priv->wq, 1785 &dev_priv->mm.retire_work, HZ); 1786 } 1787 return 0; 1788 } 1789 1790 static inline void 1791 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) 1792 { 1793 struct drm_i915_file_private *file_priv = request->file_priv; 1794 1795 if (!file_priv) 1796 return; 1797 1798 spin_lock(&file_priv->mm.lock); 1799 list_del(&request->client_list); 1800 request->file_priv = NULL; 1801 spin_unlock(&file_priv->mm.lock); 1802 } 1803 1804 static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv, 1805 struct intel_ring_buffer *ring) 1806 { 1807 while (!list_empty(&ring->request_list)) { 1808 struct drm_i915_gem_request *request; 1809 1810 request = list_first_entry(&ring->request_list, 1811 struct drm_i915_gem_request, 1812 list); 1813 1814 list_del(&request->list); 1815 i915_gem_request_remove_from_client(request); 1816 kfree(request); 1817 } 1818 1819 while (!list_empty(&ring->active_list)) { 1820 struct drm_i915_gem_object *obj; 1821 1822 obj = list_first_entry(&ring->active_list, 1823 struct drm_i915_gem_object, 1824 ring_list); 1825 1826 obj->base.write_domain = 0; 1827 list_del_init(&obj->gpu_write_list); 1828 i915_gem_object_move_to_inactive(obj); 1829 } 1830 } 1831 1832 static void i915_gem_reset_fences(struct drm_device *dev) 1833 { 1834 struct drm_i915_private *dev_priv = dev->dev_private; 1835 int i; 1836 1837 for (i = 0; i < 16; i++) { 1838 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 1839 struct drm_i915_gem_object *obj = reg->obj; 1840 1841 if (!obj) 1842 continue; 1843 1844 if (obj->tiling_mode) 1845 i915_gem_release_mmap(obj); 1846 1847 reg->obj->fence_reg = I915_FENCE_REG_NONE; 1848 reg->obj->fenced_gpu_access = false; 1849 reg->obj->last_fenced_seqno = 0; 1850 reg->obj->last_fenced_ring = NULL; 1851 i915_gem_clear_fence_reg(dev, reg); 1852 } 1853 } 1854 1855 void i915_gem_reset(struct drm_device *dev) 1856 { 1857 struct drm_i915_private *dev_priv = dev->dev_private; 1858 struct drm_i915_gem_object *obj; 1859 int i; 1860 1861 for (i = 0; i < I915_NUM_RINGS; i++) 1862 i915_gem_reset_ring_lists(dev_priv, &dev_priv->ring[i]); 1863 1864 /* Remove anything from the flushing lists. The GPU cache is likely 1865 * to be lost on reset along with the data, so simply move the 1866 * lost bo to the inactive list. 1867 */ 1868 while (!list_empty(&dev_priv->mm.flushing_list)) { 1869 obj= list_first_entry(&dev_priv->mm.flushing_list, 1870 struct drm_i915_gem_object, 1871 mm_list); 1872 1873 obj->base.write_domain = 0; 1874 list_del_init(&obj->gpu_write_list); 1875 i915_gem_object_move_to_inactive(obj); 1876 } 1877 1878 /* Move everything out of the GPU domains to ensure we do any 1879 * necessary invalidation upon reuse. 1880 */ 1881 list_for_each_entry(obj, 1882 &dev_priv->mm.inactive_list, 1883 mm_list) 1884 { 1885 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; 1886 } 1887 1888 /* The fence registers are invalidated so clear them out */ 1889 i915_gem_reset_fences(dev); 1890 } 1891 1892 /** 1893 * This function clears the request list as sequence numbers are passed. 1894 */ 1895 static void 1896 i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) 1897 { 1898 uint32_t seqno; 1899 int i; 1900 1901 if (list_empty(&ring->request_list)) 1902 return; 1903 1904 WARN_ON(i915_verify_lists(ring->dev)); 1905 1906 seqno = ring->get_seqno(ring); 1907 1908 for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++) 1909 if (seqno >= ring->sync_seqno[i]) 1910 ring->sync_seqno[i] = 0; 1911 1912 while (!list_empty(&ring->request_list)) { 1913 struct drm_i915_gem_request *request; 1914 1915 request = list_first_entry(&ring->request_list, 1916 struct drm_i915_gem_request, 1917 list); 1918 1919 if (!i915_seqno_passed(seqno, request->seqno)) 1920 break; 1921 1922 trace_i915_gem_request_retire(ring, request->seqno); 1923 1924 list_del(&request->list); 1925 i915_gem_request_remove_from_client(request); 1926 kfree(request); 1927 } 1928 1929 /* Move any buffers on the active list that are no longer referenced 1930 * by the ringbuffer to the flushing/inactive lists as appropriate. 1931 */ 1932 while (!list_empty(&ring->active_list)) { 1933 struct drm_i915_gem_object *obj; 1934 1935 obj= list_first_entry(&ring->active_list, 1936 struct drm_i915_gem_object, 1937 ring_list); 1938 1939 if (!i915_seqno_passed(seqno, obj->last_rendering_seqno)) 1940 break; 1941 1942 if (obj->base.write_domain != 0) 1943 i915_gem_object_move_to_flushing(obj); 1944 else 1945 i915_gem_object_move_to_inactive(obj); 1946 } 1947 1948 if (unlikely(ring->trace_irq_seqno && 1949 i915_seqno_passed(seqno, ring->trace_irq_seqno))) { 1950 ring->irq_put(ring); 1951 ring->trace_irq_seqno = 0; 1952 } 1953 1954 WARN_ON(i915_verify_lists(ring->dev)); 1955 } 1956 1957 void 1958 i915_gem_retire_requests(struct drm_device *dev) 1959 { 1960 drm_i915_private_t *dev_priv = dev->dev_private; 1961 int i; 1962 1963 if (!list_empty(&dev_priv->mm.deferred_free_list)) { 1964 struct drm_i915_gem_object *obj, *next; 1965 1966 /* We must be careful that during unbind() we do not 1967 * accidentally infinitely recurse into retire requests. 1968 * Currently: 1969 * retire -> free -> unbind -> wait -> retire_ring 1970 */ 1971 list_for_each_entry_safe(obj, next, 1972 &dev_priv->mm.deferred_free_list, 1973 mm_list) 1974 i915_gem_free_object_tail(obj); 1975 } 1976 1977 for (i = 0; i < I915_NUM_RINGS; i++) 1978 i915_gem_retire_requests_ring(&dev_priv->ring[i]); 1979 } 1980 1981 static void 1982 i915_gem_retire_work_handler(struct work_struct *work) 1983 { 1984 drm_i915_private_t *dev_priv; 1985 struct drm_device *dev; 1986 bool idle; 1987 int i; 1988 1989 dev_priv = container_of(work, drm_i915_private_t, 1990 mm.retire_work.work); 1991 dev = dev_priv->dev; 1992 1993 /* Come back later if the device is busy... */ 1994 if (!mutex_trylock(&dev->struct_mutex)) { 1995 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ); 1996 return; 1997 } 1998 1999 i915_gem_retire_requests(dev); 2000 2001 /* Send a periodic flush down the ring so we don't hold onto GEM 2002 * objects indefinitely. 2003 */ 2004 idle = true; 2005 for (i = 0; i < I915_NUM_RINGS; i++) { 2006 struct intel_ring_buffer *ring = &dev_priv->ring[i]; 2007 2008 if (!list_empty(&ring->gpu_write_list)) { 2009 struct drm_i915_gem_request *request; 2010 int ret; 2011 2012 ret = i915_gem_flush_ring(ring, 2013 0, I915_GEM_GPU_DOMAINS); 2014 request = kzalloc(sizeof(*request), GFP_KERNEL); 2015 if (ret || request == NULL || 2016 i915_add_request(ring, NULL, request)) 2017 kfree(request); 2018 } 2019 2020 idle &= list_empty(&ring->request_list); 2021 } 2022 2023 if (!dev_priv->mm.suspended && !idle) 2024 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ); 2025 2026 mutex_unlock(&dev->struct_mutex); 2027 } 2028 2029 /** 2030 * Waits for a sequence number to be signaled, and cleans up the 2031 * request and object lists appropriately for that event. 2032 */ 2033 int 2034 i915_wait_request(struct intel_ring_buffer *ring, 2035 uint32_t seqno) 2036 { 2037 drm_i915_private_t *dev_priv = ring->dev->dev_private; 2038 u32 ier; 2039 int ret = 0; 2040 2041 BUG_ON(seqno == 0); 2042 2043 if (atomic_read(&dev_priv->mm.wedged)) { 2044 struct completion *x = &dev_priv->error_completion; 2045 bool recovery_complete; 2046 unsigned long flags; 2047 2048 /* Give the error handler a chance to run. */ 2049 spin_lock_irqsave(&x->wait.lock, flags); 2050 recovery_complete = x->done > 0; 2051 spin_unlock_irqrestore(&x->wait.lock, flags); 2052 2053 return recovery_complete ? -EIO : -EAGAIN; 2054 } 2055 2056 if (seqno == ring->outstanding_lazy_request) { 2057 struct drm_i915_gem_request *request; 2058 2059 request = kzalloc(sizeof(*request), GFP_KERNEL); 2060 if (request == NULL) 2061 return -ENOMEM; 2062 2063 ret = i915_add_request(ring, NULL, request); 2064 if (ret) { 2065 kfree(request); 2066 return ret; 2067 } 2068 2069 seqno = request->seqno; 2070 } 2071 2072 if (!i915_seqno_passed(ring->get_seqno(ring), seqno)) { 2073 if (HAS_PCH_SPLIT(ring->dev)) 2074 ier = I915_READ(DEIER) | I915_READ(GTIER); 2075 else 2076 ier = I915_READ(IER); 2077 if (!ier) { 2078 DRM_ERROR("something (likely vbetool) disabled " 2079 "interrupts, re-enabling\n"); 2080 i915_driver_irq_preinstall(ring->dev); 2081 i915_driver_irq_postinstall(ring->dev); 2082 } 2083 2084 trace_i915_gem_request_wait_begin(ring, seqno); 2085 2086 ring->waiting_seqno = seqno; 2087 if (ring->irq_get(ring)) { 2088 if (dev_priv->mm.interruptible) 2089 ret = wait_event_interruptible(ring->irq_queue, 2090 i915_seqno_passed(ring->get_seqno(ring), seqno) 2091 || atomic_read(&dev_priv->mm.wedged)); 2092 else 2093 wait_event(ring->irq_queue, 2094 i915_seqno_passed(ring->get_seqno(ring), seqno) 2095 || atomic_read(&dev_priv->mm.wedged)); 2096 2097 ring->irq_put(ring); 2098 } else if (wait_for(i915_seqno_passed(ring->get_seqno(ring), 2099 seqno) || 2100 atomic_read(&dev_priv->mm.wedged), 3000)) 2101 ret = -EBUSY; 2102 ring->waiting_seqno = 0; 2103 2104 trace_i915_gem_request_wait_end(ring, seqno); 2105 } 2106 if (atomic_read(&dev_priv->mm.wedged)) 2107 ret = -EAGAIN; 2108 2109 if (ret && ret != -ERESTARTSYS) 2110 DRM_ERROR("%s returns %d (awaiting %d at %d, next %d)\n", 2111 __func__, ret, seqno, ring->get_seqno(ring), 2112 dev_priv->next_seqno); 2113 2114 /* Directly dispatch request retiring. While we have the work queue 2115 * to handle this, the waiter on a request often wants an associated 2116 * buffer to have made it to the inactive list, and we would need 2117 * a separate wait queue to handle that. 2118 */ 2119 if (ret == 0) 2120 i915_gem_retire_requests_ring(ring); 2121 2122 return ret; 2123 } 2124 2125 /** 2126 * Ensures that all rendering to the object has completed and the object is 2127 * safe to unbind from the GTT or access from the CPU. 2128 */ 2129 int 2130 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj) 2131 { 2132 int ret; 2133 2134 /* This function only exists to support waiting for existing rendering, 2135 * not for emitting required flushes. 2136 */ 2137 BUG_ON((obj->base.write_domain & I915_GEM_GPU_DOMAINS) != 0); 2138 2139 /* If there is rendering queued on the buffer being evicted, wait for 2140 * it. 2141 */ 2142 if (obj->active) { 2143 ret = i915_wait_request(obj->ring, obj->last_rendering_seqno); 2144 if (ret) 2145 return ret; 2146 } 2147 2148 return 0; 2149 } 2150 2151 /** 2152 * Unbinds an object from the GTT aperture. 2153 */ 2154 int 2155 i915_gem_object_unbind(struct drm_i915_gem_object *obj) 2156 { 2157 int ret = 0; 2158 2159 if (obj->gtt_space == NULL) 2160 return 0; 2161 2162 if (obj->pin_count != 0) { 2163 DRM_ERROR("Attempting to unbind pinned buffer\n"); 2164 return -EINVAL; 2165 } 2166 2167 /* blow away mappings if mapped through GTT */ 2168 i915_gem_release_mmap(obj); 2169 2170 /* Move the object to the CPU domain to ensure that 2171 * any possible CPU writes while it's not in the GTT 2172 * are flushed when we go to remap it. This will 2173 * also ensure that all pending GPU writes are finished 2174 * before we unbind. 2175 */ 2176 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 2177 if (ret == -ERESTARTSYS) 2178 return ret; 2179 /* Continue on if we fail due to EIO, the GPU is hung so we 2180 * should be safe and we need to cleanup or else we might 2181 * cause memory corruption through use-after-free. 2182 */ 2183 if (ret) { 2184 i915_gem_clflush_object(obj); 2185 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2186 } 2187 2188 /* release the fence reg _after_ flushing */ 2189 ret = i915_gem_object_put_fence(obj); 2190 if (ret == -ERESTARTSYS) 2191 return ret; 2192 2193 trace_i915_gem_object_unbind(obj); 2194 2195 i915_gem_gtt_unbind_object(obj); 2196 i915_gem_object_put_pages_gtt(obj); 2197 2198 list_del_init(&obj->gtt_list); 2199 list_del_init(&obj->mm_list); 2200 /* Avoid an unnecessary call to unbind on rebind. */ 2201 obj->map_and_fenceable = true; 2202 2203 drm_mm_put_block(obj->gtt_space); 2204 obj->gtt_space = NULL; 2205 obj->gtt_offset = 0; 2206 2207 if (i915_gem_object_is_purgeable(obj)) 2208 i915_gem_object_truncate(obj); 2209 2210 return ret; 2211 } 2212 2213 int 2214 i915_gem_flush_ring(struct intel_ring_buffer *ring, 2215 uint32_t invalidate_domains, 2216 uint32_t flush_domains) 2217 { 2218 int ret; 2219 2220 trace_i915_gem_ring_flush(ring, invalidate_domains, flush_domains); 2221 2222 ret = ring->flush(ring, invalidate_domains, flush_domains); 2223 if (ret) 2224 return ret; 2225 2226 i915_gem_process_flushing_list(ring, flush_domains); 2227 return 0; 2228 } 2229 2230 static int i915_ring_idle(struct intel_ring_buffer *ring) 2231 { 2232 int ret; 2233 2234 if (list_empty(&ring->gpu_write_list) && list_empty(&ring->active_list)) 2235 return 0; 2236 2237 if (!list_empty(&ring->gpu_write_list)) { 2238 ret = i915_gem_flush_ring(ring, 2239 I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 2240 if (ret) 2241 return ret; 2242 } 2243 2244 return i915_wait_request(ring, i915_gem_next_request_seqno(ring)); 2245 } 2246 2247 int 2248 i915_gpu_idle(struct drm_device *dev) 2249 { 2250 drm_i915_private_t *dev_priv = dev->dev_private; 2251 bool lists_empty; 2252 int ret, i; 2253 2254 lists_empty = (list_empty(&dev_priv->mm.flushing_list) && 2255 list_empty(&dev_priv->mm.active_list)); 2256 if (lists_empty) 2257 return 0; 2258 2259 /* Flush everything onto the inactive list. */ 2260 for (i = 0; i < I915_NUM_RINGS; i++) { 2261 ret = i915_ring_idle(&dev_priv->ring[i]); 2262 if (ret) 2263 return ret; 2264 } 2265 2266 return 0; 2267 } 2268 2269 static int sandybridge_write_fence_reg(struct drm_i915_gem_object *obj, 2270 struct intel_ring_buffer *pipelined) 2271 { 2272 struct drm_device *dev = obj->base.dev; 2273 drm_i915_private_t *dev_priv = dev->dev_private; 2274 u32 size = obj->gtt_space->size; 2275 int regnum = obj->fence_reg; 2276 uint64_t val; 2277 2278 val = (uint64_t)((obj->gtt_offset + size - 4096) & 2279 0xfffff000) << 32; 2280 val |= obj->gtt_offset & 0xfffff000; 2281 val |= (uint64_t)((obj->stride / 128) - 1) << 2282 SANDYBRIDGE_FENCE_PITCH_SHIFT; 2283 2284 if (obj->tiling_mode == I915_TILING_Y) 2285 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 2286 val |= I965_FENCE_REG_VALID; 2287 2288 if (pipelined) { 2289 int ret = intel_ring_begin(pipelined, 6); 2290 if (ret) 2291 return ret; 2292 2293 intel_ring_emit(pipelined, MI_NOOP); 2294 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(2)); 2295 intel_ring_emit(pipelined, FENCE_REG_SANDYBRIDGE_0 + regnum*8); 2296 intel_ring_emit(pipelined, (u32)val); 2297 intel_ring_emit(pipelined, FENCE_REG_SANDYBRIDGE_0 + regnum*8 + 4); 2298 intel_ring_emit(pipelined, (u32)(val >> 32)); 2299 intel_ring_advance(pipelined); 2300 } else 2301 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + regnum * 8, val); 2302 2303 return 0; 2304 } 2305 2306 static int i965_write_fence_reg(struct drm_i915_gem_object *obj, 2307 struct intel_ring_buffer *pipelined) 2308 { 2309 struct drm_device *dev = obj->base.dev; 2310 drm_i915_private_t *dev_priv = dev->dev_private; 2311 u32 size = obj->gtt_space->size; 2312 int regnum = obj->fence_reg; 2313 uint64_t val; 2314 2315 val = (uint64_t)((obj->gtt_offset + size - 4096) & 2316 0xfffff000) << 32; 2317 val |= obj->gtt_offset & 0xfffff000; 2318 val |= ((obj->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT; 2319 if (obj->tiling_mode == I915_TILING_Y) 2320 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 2321 val |= I965_FENCE_REG_VALID; 2322 2323 if (pipelined) { 2324 int ret = intel_ring_begin(pipelined, 6); 2325 if (ret) 2326 return ret; 2327 2328 intel_ring_emit(pipelined, MI_NOOP); 2329 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(2)); 2330 intel_ring_emit(pipelined, FENCE_REG_965_0 + regnum*8); 2331 intel_ring_emit(pipelined, (u32)val); 2332 intel_ring_emit(pipelined, FENCE_REG_965_0 + regnum*8 + 4); 2333 intel_ring_emit(pipelined, (u32)(val >> 32)); 2334 intel_ring_advance(pipelined); 2335 } else 2336 I915_WRITE64(FENCE_REG_965_0 + regnum * 8, val); 2337 2338 return 0; 2339 } 2340 2341 static int i915_write_fence_reg(struct drm_i915_gem_object *obj, 2342 struct intel_ring_buffer *pipelined) 2343 { 2344 struct drm_device *dev = obj->base.dev; 2345 drm_i915_private_t *dev_priv = dev->dev_private; 2346 u32 size = obj->gtt_space->size; 2347 u32 fence_reg, val, pitch_val; 2348 int tile_width; 2349 2350 if (WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) || 2351 (size & -size) != size || 2352 (obj->gtt_offset & (size - 1)), 2353 "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n", 2354 obj->gtt_offset, obj->map_and_fenceable, size)) 2355 return -EINVAL; 2356 2357 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) 2358 tile_width = 128; 2359 else 2360 tile_width = 512; 2361 2362 /* Note: pitch better be a power of two tile widths */ 2363 pitch_val = obj->stride / tile_width; 2364 pitch_val = ffs(pitch_val) - 1; 2365 2366 val = obj->gtt_offset; 2367 if (obj->tiling_mode == I915_TILING_Y) 2368 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 2369 val |= I915_FENCE_SIZE_BITS(size); 2370 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 2371 val |= I830_FENCE_REG_VALID; 2372 2373 fence_reg = obj->fence_reg; 2374 if (fence_reg < 8) 2375 fence_reg = FENCE_REG_830_0 + fence_reg * 4; 2376 else 2377 fence_reg = FENCE_REG_945_8 + (fence_reg - 8) * 4; 2378 2379 if (pipelined) { 2380 int ret = intel_ring_begin(pipelined, 4); 2381 if (ret) 2382 return ret; 2383 2384 intel_ring_emit(pipelined, MI_NOOP); 2385 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(1)); 2386 intel_ring_emit(pipelined, fence_reg); 2387 intel_ring_emit(pipelined, val); 2388 intel_ring_advance(pipelined); 2389 } else 2390 I915_WRITE(fence_reg, val); 2391 2392 return 0; 2393 } 2394 2395 static int i830_write_fence_reg(struct drm_i915_gem_object *obj, 2396 struct intel_ring_buffer *pipelined) 2397 { 2398 struct drm_device *dev = obj->base.dev; 2399 drm_i915_private_t *dev_priv = dev->dev_private; 2400 u32 size = obj->gtt_space->size; 2401 int regnum = obj->fence_reg; 2402 uint32_t val; 2403 uint32_t pitch_val; 2404 2405 if (WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) || 2406 (size & -size) != size || 2407 (obj->gtt_offset & (size - 1)), 2408 "object 0x%08x not 512K or pot-size 0x%08x aligned\n", 2409 obj->gtt_offset, size)) 2410 return -EINVAL; 2411 2412 pitch_val = obj->stride / 128; 2413 pitch_val = ffs(pitch_val) - 1; 2414 2415 val = obj->gtt_offset; 2416 if (obj->tiling_mode == I915_TILING_Y) 2417 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 2418 val |= I830_FENCE_SIZE_BITS(size); 2419 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 2420 val |= I830_FENCE_REG_VALID; 2421 2422 if (pipelined) { 2423 int ret = intel_ring_begin(pipelined, 4); 2424 if (ret) 2425 return ret; 2426 2427 intel_ring_emit(pipelined, MI_NOOP); 2428 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(1)); 2429 intel_ring_emit(pipelined, FENCE_REG_830_0 + regnum*4); 2430 intel_ring_emit(pipelined, val); 2431 intel_ring_advance(pipelined); 2432 } else 2433 I915_WRITE(FENCE_REG_830_0 + regnum * 4, val); 2434 2435 return 0; 2436 } 2437 2438 static bool ring_passed_seqno(struct intel_ring_buffer *ring, u32 seqno) 2439 { 2440 return i915_seqno_passed(ring->get_seqno(ring), seqno); 2441 } 2442 2443 static int 2444 i915_gem_object_flush_fence(struct drm_i915_gem_object *obj, 2445 struct intel_ring_buffer *pipelined) 2446 { 2447 int ret; 2448 2449 if (obj->fenced_gpu_access) { 2450 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { 2451 ret = i915_gem_flush_ring(obj->last_fenced_ring, 2452 0, obj->base.write_domain); 2453 if (ret) 2454 return ret; 2455 } 2456 2457 obj->fenced_gpu_access = false; 2458 } 2459 2460 if (obj->last_fenced_seqno && pipelined != obj->last_fenced_ring) { 2461 if (!ring_passed_seqno(obj->last_fenced_ring, 2462 obj->last_fenced_seqno)) { 2463 ret = i915_wait_request(obj->last_fenced_ring, 2464 obj->last_fenced_seqno); 2465 if (ret) 2466 return ret; 2467 } 2468 2469 obj->last_fenced_seqno = 0; 2470 obj->last_fenced_ring = NULL; 2471 } 2472 2473 /* Ensure that all CPU reads are completed before installing a fence 2474 * and all writes before removing the fence. 2475 */ 2476 if (obj->base.read_domains & I915_GEM_DOMAIN_GTT) 2477 mb(); 2478 2479 return 0; 2480 } 2481 2482 int 2483 i915_gem_object_put_fence(struct drm_i915_gem_object *obj) 2484 { 2485 int ret; 2486 2487 if (obj->tiling_mode) 2488 i915_gem_release_mmap(obj); 2489 2490 ret = i915_gem_object_flush_fence(obj, NULL); 2491 if (ret) 2492 return ret; 2493 2494 if (obj->fence_reg != I915_FENCE_REG_NONE) { 2495 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2496 i915_gem_clear_fence_reg(obj->base.dev, 2497 &dev_priv->fence_regs[obj->fence_reg]); 2498 2499 obj->fence_reg = I915_FENCE_REG_NONE; 2500 } 2501 2502 return 0; 2503 } 2504 2505 static struct drm_i915_fence_reg * 2506 i915_find_fence_reg(struct drm_device *dev, 2507 struct intel_ring_buffer *pipelined) 2508 { 2509 struct drm_i915_private *dev_priv = dev->dev_private; 2510 struct drm_i915_fence_reg *reg, *first, *avail; 2511 int i; 2512 2513 /* First try to find a free reg */ 2514 avail = NULL; 2515 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { 2516 reg = &dev_priv->fence_regs[i]; 2517 if (!reg->obj) 2518 return reg; 2519 2520 if (!reg->obj->pin_count) 2521 avail = reg; 2522 } 2523 2524 if (avail == NULL) 2525 return NULL; 2526 2527 /* None available, try to steal one or wait for a user to finish */ 2528 avail = first = NULL; 2529 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) { 2530 if (reg->obj->pin_count) 2531 continue; 2532 2533 if (first == NULL) 2534 first = reg; 2535 2536 if (!pipelined || 2537 !reg->obj->last_fenced_ring || 2538 reg->obj->last_fenced_ring == pipelined) { 2539 avail = reg; 2540 break; 2541 } 2542 } 2543 2544 if (avail == NULL) 2545 avail = first; 2546 2547 return avail; 2548 } 2549 2550 /** 2551 * i915_gem_object_get_fence - set up a fence reg for an object 2552 * @obj: object to map through a fence reg 2553 * @pipelined: ring on which to queue the change, or NULL for CPU access 2554 * @interruptible: must we wait uninterruptibly for the register to retire? 2555 * 2556 * When mapping objects through the GTT, userspace wants to be able to write 2557 * to them without having to worry about swizzling if the object is tiled. 2558 * 2559 * This function walks the fence regs looking for a free one for @obj, 2560 * stealing one if it can't find any. 2561 * 2562 * It then sets up the reg based on the object's properties: address, pitch 2563 * and tiling format. 2564 */ 2565 int 2566 i915_gem_object_get_fence(struct drm_i915_gem_object *obj, 2567 struct intel_ring_buffer *pipelined) 2568 { 2569 struct drm_device *dev = obj->base.dev; 2570 struct drm_i915_private *dev_priv = dev->dev_private; 2571 struct drm_i915_fence_reg *reg; 2572 int ret; 2573 2574 /* XXX disable pipelining. There are bugs. Shocking. */ 2575 pipelined = NULL; 2576 2577 /* Just update our place in the LRU if our fence is getting reused. */ 2578 if (obj->fence_reg != I915_FENCE_REG_NONE) { 2579 reg = &dev_priv->fence_regs[obj->fence_reg]; 2580 list_move_tail(®->lru_list, &dev_priv->mm.fence_list); 2581 2582 if (!obj->fenced_gpu_access && !obj->last_fenced_seqno) 2583 pipelined = NULL; 2584 2585 if (!pipelined) { 2586 if (reg->setup_seqno) { 2587 if (!ring_passed_seqno(obj->last_fenced_ring, 2588 reg->setup_seqno)) { 2589 ret = i915_wait_request(obj->last_fenced_ring, 2590 reg->setup_seqno); 2591 if (ret) 2592 return ret; 2593 } 2594 2595 reg->setup_seqno = 0; 2596 } 2597 } else if (obj->last_fenced_ring && 2598 obj->last_fenced_ring != pipelined) { 2599 ret = i915_gem_object_flush_fence(obj, pipelined); 2600 if (ret) 2601 return ret; 2602 } else if (obj->tiling_changed) { 2603 if (obj->fenced_gpu_access) { 2604 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { 2605 ret = i915_gem_flush_ring(obj->ring, 2606 0, obj->base.write_domain); 2607 if (ret) 2608 return ret; 2609 } 2610 2611 obj->fenced_gpu_access = false; 2612 } 2613 } 2614 2615 if (!obj->fenced_gpu_access && !obj->last_fenced_seqno) 2616 pipelined = NULL; 2617 BUG_ON(!pipelined && reg->setup_seqno); 2618 2619 if (obj->tiling_changed) { 2620 if (pipelined) { 2621 reg->setup_seqno = 2622 i915_gem_next_request_seqno(pipelined); 2623 obj->last_fenced_seqno = reg->setup_seqno; 2624 obj->last_fenced_ring = pipelined; 2625 } 2626 goto update; 2627 } 2628 2629 return 0; 2630 } 2631 2632 reg = i915_find_fence_reg(dev, pipelined); 2633 if (reg == NULL) 2634 return -ENOSPC; 2635 2636 ret = i915_gem_object_flush_fence(obj, pipelined); 2637 if (ret) 2638 return ret; 2639 2640 if (reg->obj) { 2641 struct drm_i915_gem_object *old = reg->obj; 2642 2643 drm_gem_object_reference(&old->base); 2644 2645 if (old->tiling_mode) 2646 i915_gem_release_mmap(old); 2647 2648 ret = i915_gem_object_flush_fence(old, pipelined); 2649 if (ret) { 2650 drm_gem_object_unreference(&old->base); 2651 return ret; 2652 } 2653 2654 if (old->last_fenced_seqno == 0 && obj->last_fenced_seqno == 0) 2655 pipelined = NULL; 2656 2657 old->fence_reg = I915_FENCE_REG_NONE; 2658 old->last_fenced_ring = pipelined; 2659 old->last_fenced_seqno = 2660 pipelined ? i915_gem_next_request_seqno(pipelined) : 0; 2661 2662 drm_gem_object_unreference(&old->base); 2663 } else if (obj->last_fenced_seqno == 0) 2664 pipelined = NULL; 2665 2666 reg->obj = obj; 2667 list_move_tail(®->lru_list, &dev_priv->mm.fence_list); 2668 obj->fence_reg = reg - dev_priv->fence_regs; 2669 obj->last_fenced_ring = pipelined; 2670 2671 reg->setup_seqno = 2672 pipelined ? i915_gem_next_request_seqno(pipelined) : 0; 2673 obj->last_fenced_seqno = reg->setup_seqno; 2674 2675 update: 2676 obj->tiling_changed = false; 2677 switch (INTEL_INFO(dev)->gen) { 2678 case 6: 2679 ret = sandybridge_write_fence_reg(obj, pipelined); 2680 break; 2681 case 5: 2682 case 4: 2683 ret = i965_write_fence_reg(obj, pipelined); 2684 break; 2685 case 3: 2686 ret = i915_write_fence_reg(obj, pipelined); 2687 break; 2688 case 2: 2689 ret = i830_write_fence_reg(obj, pipelined); 2690 break; 2691 } 2692 2693 return ret; 2694 } 2695 2696 /** 2697 * i915_gem_clear_fence_reg - clear out fence register info 2698 * @obj: object to clear 2699 * 2700 * Zeroes out the fence register itself and clears out the associated 2701 * data structures in dev_priv and obj. 2702 */ 2703 static void 2704 i915_gem_clear_fence_reg(struct drm_device *dev, 2705 struct drm_i915_fence_reg *reg) 2706 { 2707 drm_i915_private_t *dev_priv = dev->dev_private; 2708 uint32_t fence_reg = reg - dev_priv->fence_regs; 2709 2710 switch (INTEL_INFO(dev)->gen) { 2711 case 6: 2712 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + fence_reg*8, 0); 2713 break; 2714 case 5: 2715 case 4: 2716 I915_WRITE64(FENCE_REG_965_0 + fence_reg*8, 0); 2717 break; 2718 case 3: 2719 if (fence_reg >= 8) 2720 fence_reg = FENCE_REG_945_8 + (fence_reg - 8) * 4; 2721 else 2722 case 2: 2723 fence_reg = FENCE_REG_830_0 + fence_reg * 4; 2724 2725 I915_WRITE(fence_reg, 0); 2726 break; 2727 } 2728 2729 list_del_init(®->lru_list); 2730 reg->obj = NULL; 2731 reg->setup_seqno = 0; 2732 } 2733 2734 /** 2735 * Finds free space in the GTT aperture and binds the object there. 2736 */ 2737 static int 2738 i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, 2739 unsigned alignment, 2740 bool map_and_fenceable) 2741 { 2742 struct drm_device *dev = obj->base.dev; 2743 drm_i915_private_t *dev_priv = dev->dev_private; 2744 struct drm_mm_node *free_space; 2745 gfp_t gfpmask = __GFP_NORETRY | __GFP_NOWARN; 2746 u32 size, fence_size, fence_alignment, unfenced_alignment; 2747 bool mappable, fenceable; 2748 int ret; 2749 2750 if (obj->madv != I915_MADV_WILLNEED) { 2751 DRM_ERROR("Attempting to bind a purgeable object\n"); 2752 return -EINVAL; 2753 } 2754 2755 fence_size = i915_gem_get_gtt_size(obj); 2756 fence_alignment = i915_gem_get_gtt_alignment(obj); 2757 unfenced_alignment = i915_gem_get_unfenced_gtt_alignment(obj); 2758 2759 if (alignment == 0) 2760 alignment = map_and_fenceable ? fence_alignment : 2761 unfenced_alignment; 2762 if (map_and_fenceable && alignment & (fence_alignment - 1)) { 2763 DRM_ERROR("Invalid object alignment requested %u\n", alignment); 2764 return -EINVAL; 2765 } 2766 2767 size = map_and_fenceable ? fence_size : obj->base.size; 2768 2769 /* If the object is bigger than the entire aperture, reject it early 2770 * before evicting everything in a vain attempt to find space. 2771 */ 2772 if (obj->base.size > 2773 (map_and_fenceable ? dev_priv->mm.gtt_mappable_end : dev_priv->mm.gtt_total)) { 2774 DRM_ERROR("Attempting to bind an object larger than the aperture\n"); 2775 return -E2BIG; 2776 } 2777 2778 search_free: 2779 if (map_and_fenceable) 2780 free_space = 2781 drm_mm_search_free_in_range(&dev_priv->mm.gtt_space, 2782 size, alignment, 0, 2783 dev_priv->mm.gtt_mappable_end, 2784 0); 2785 else 2786 free_space = drm_mm_search_free(&dev_priv->mm.gtt_space, 2787 size, alignment, 0); 2788 2789 if (free_space != NULL) { 2790 if (map_and_fenceable) 2791 obj->gtt_space = 2792 drm_mm_get_block_range_generic(free_space, 2793 size, alignment, 0, 2794 dev_priv->mm.gtt_mappable_end, 2795 0); 2796 else 2797 obj->gtt_space = 2798 drm_mm_get_block(free_space, size, alignment); 2799 } 2800 if (obj->gtt_space == NULL) { 2801 /* If the gtt is empty and we're still having trouble 2802 * fitting our object in, we're out of memory. 2803 */ 2804 ret = i915_gem_evict_something(dev, size, alignment, 2805 map_and_fenceable); 2806 if (ret) 2807 return ret; 2808 2809 goto search_free; 2810 } 2811 2812 ret = i915_gem_object_get_pages_gtt(obj, gfpmask); 2813 if (ret) { 2814 drm_mm_put_block(obj->gtt_space); 2815 obj->gtt_space = NULL; 2816 2817 if (ret == -ENOMEM) { 2818 /* first try to reclaim some memory by clearing the GTT */ 2819 ret = i915_gem_evict_everything(dev, false); 2820 if (ret) { 2821 /* now try to shrink everyone else */ 2822 if (gfpmask) { 2823 gfpmask = 0; 2824 goto search_free; 2825 } 2826 2827 return -ENOMEM; 2828 } 2829 2830 goto search_free; 2831 } 2832 2833 return ret; 2834 } 2835 2836 ret = i915_gem_gtt_bind_object(obj); 2837 if (ret) { 2838 i915_gem_object_put_pages_gtt(obj); 2839 drm_mm_put_block(obj->gtt_space); 2840 obj->gtt_space = NULL; 2841 2842 if (i915_gem_evict_everything(dev, false)) 2843 return ret; 2844 2845 goto search_free; 2846 } 2847 2848 list_add_tail(&obj->gtt_list, &dev_priv->mm.gtt_list); 2849 list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 2850 2851 /* Assert that the object is not currently in any GPU domain. As it 2852 * wasn't in the GTT, there shouldn't be any way it could have been in 2853 * a GPU cache 2854 */ 2855 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2856 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2857 2858 obj->gtt_offset = obj->gtt_space->start; 2859 2860 fenceable = 2861 obj->gtt_space->size == fence_size && 2862 (obj->gtt_space->start & (fence_alignment -1)) == 0; 2863 2864 mappable = 2865 obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end; 2866 2867 obj->map_and_fenceable = mappable && fenceable; 2868 2869 trace_i915_gem_object_bind(obj, map_and_fenceable); 2870 return 0; 2871 } 2872 2873 void 2874 i915_gem_clflush_object(struct drm_i915_gem_object *obj) 2875 { 2876 /* If we don't have a page list set up, then we're not pinned 2877 * to GPU, and we can ignore the cache flush because it'll happen 2878 * again at bind time. 2879 */ 2880 if (obj->pages == NULL) 2881 return; 2882 2883 trace_i915_gem_object_clflush(obj); 2884 2885 drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE); 2886 } 2887 2888 /** Flushes any GPU write domain for the object if it's dirty. */ 2889 static int 2890 i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj) 2891 { 2892 if ((obj->base.write_domain & I915_GEM_GPU_DOMAINS) == 0) 2893 return 0; 2894 2895 /* Queue the GPU write cache flushing we need. */ 2896 return i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain); 2897 } 2898 2899 /** Flushes the GTT write domain for the object if it's dirty. */ 2900 static void 2901 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 2902 { 2903 uint32_t old_write_domain; 2904 2905 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 2906 return; 2907 2908 /* No actual flushing is required for the GTT write domain. Writes 2909 * to it immediately go to main memory as far as we know, so there's 2910 * no chipset flush. It also doesn't land in render cache. 2911 * 2912 * However, we do have to enforce the order so that all writes through 2913 * the GTT land before any writes to the device, such as updates to 2914 * the GATT itself. 2915 */ 2916 wmb(); 2917 2918 i915_gem_release_mmap(obj); 2919 2920 old_write_domain = obj->base.write_domain; 2921 obj->base.write_domain = 0; 2922 2923 trace_i915_gem_object_change_domain(obj, 2924 obj->base.read_domains, 2925 old_write_domain); 2926 } 2927 2928 /** Flushes the CPU write domain for the object if it's dirty. */ 2929 static void 2930 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) 2931 { 2932 uint32_t old_write_domain; 2933 2934 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 2935 return; 2936 2937 i915_gem_clflush_object(obj); 2938 intel_gtt_chipset_flush(); 2939 old_write_domain = obj->base.write_domain; 2940 obj->base.write_domain = 0; 2941 2942 trace_i915_gem_object_change_domain(obj, 2943 obj->base.read_domains, 2944 old_write_domain); 2945 } 2946 2947 /** 2948 * Moves a single object to the GTT read, and possibly write domain. 2949 * 2950 * This function returns when the move is complete, including waiting on 2951 * flushes to occur. 2952 */ 2953 int 2954 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 2955 { 2956 uint32_t old_write_domain, old_read_domains; 2957 int ret; 2958 2959 /* Not valid to be called on unbound objects. */ 2960 if (obj->gtt_space == NULL) 2961 return -EINVAL; 2962 2963 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 2964 return 0; 2965 2966 ret = i915_gem_object_flush_gpu_write_domain(obj); 2967 if (ret) 2968 return ret; 2969 2970 if (obj->pending_gpu_write || write) { 2971 ret = i915_gem_object_wait_rendering(obj); 2972 if (ret) 2973 return ret; 2974 } 2975 2976 i915_gem_object_flush_cpu_write_domain(obj); 2977 2978 old_write_domain = obj->base.write_domain; 2979 old_read_domains = obj->base.read_domains; 2980 2981 /* It should now be out of any other write domains, and we can update 2982 * the domain values for our changes. 2983 */ 2984 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 2985 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 2986 if (write) { 2987 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 2988 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 2989 obj->dirty = 1; 2990 } 2991 2992 trace_i915_gem_object_change_domain(obj, 2993 old_read_domains, 2994 old_write_domain); 2995 2996 return 0; 2997 } 2998 2999 /* 3000 * Prepare buffer for display plane. Use uninterruptible for possible flush 3001 * wait, as in modesetting process we're not supposed to be interrupted. 3002 */ 3003 int 3004 i915_gem_object_set_to_display_plane(struct drm_i915_gem_object *obj, 3005 struct intel_ring_buffer *pipelined) 3006 { 3007 uint32_t old_read_domains; 3008 int ret; 3009 3010 /* Not valid to be called on unbound objects. */ 3011 if (obj->gtt_space == NULL) 3012 return -EINVAL; 3013 3014 ret = i915_gem_object_flush_gpu_write_domain(obj); 3015 if (ret) 3016 return ret; 3017 3018 3019 /* Currently, we are always called from an non-interruptible context. */ 3020 if (pipelined != obj->ring) { 3021 ret = i915_gem_object_wait_rendering(obj); 3022 if (ret) 3023 return ret; 3024 } 3025 3026 i915_gem_object_flush_cpu_write_domain(obj); 3027 3028 old_read_domains = obj->base.read_domains; 3029 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3030 3031 trace_i915_gem_object_change_domain(obj, 3032 old_read_domains, 3033 obj->base.write_domain); 3034 3035 return 0; 3036 } 3037 3038 int 3039 i915_gem_object_flush_gpu(struct drm_i915_gem_object *obj) 3040 { 3041 int ret; 3042 3043 if (!obj->active) 3044 return 0; 3045 3046 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { 3047 ret = i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain); 3048 if (ret) 3049 return ret; 3050 } 3051 3052 return i915_gem_object_wait_rendering(obj); 3053 } 3054 3055 /** 3056 * Moves a single object to the CPU read, and possibly write domain. 3057 * 3058 * This function returns when the move is complete, including waiting on 3059 * flushes to occur. 3060 */ 3061 static int 3062 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 3063 { 3064 uint32_t old_write_domain, old_read_domains; 3065 int ret; 3066 3067 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 3068 return 0; 3069 3070 ret = i915_gem_object_flush_gpu_write_domain(obj); 3071 if (ret) 3072 return ret; 3073 3074 ret = i915_gem_object_wait_rendering(obj); 3075 if (ret) 3076 return ret; 3077 3078 i915_gem_object_flush_gtt_write_domain(obj); 3079 3080 /* If we have a partially-valid cache of the object in the CPU, 3081 * finish invalidating it and free the per-page flags. 3082 */ 3083 i915_gem_object_set_to_full_cpu_read_domain(obj); 3084 3085 old_write_domain = obj->base.write_domain; 3086 old_read_domains = obj->base.read_domains; 3087 3088 /* Flush the CPU cache if it's still invalid. */ 3089 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 3090 i915_gem_clflush_object(obj); 3091 3092 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 3093 } 3094 3095 /* It should now be out of any other write domains, and we can update 3096 * the domain values for our changes. 3097 */ 3098 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 3099 3100 /* If we're writing through the CPU, then the GPU read domains will 3101 * need to be invalidated at next use. 3102 */ 3103 if (write) { 3104 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 3105 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 3106 } 3107 3108 trace_i915_gem_object_change_domain(obj, 3109 old_read_domains, 3110 old_write_domain); 3111 3112 return 0; 3113 } 3114 3115 /** 3116 * Moves the object from a partially CPU read to a full one. 3117 * 3118 * Note that this only resolves i915_gem_object_set_cpu_read_domain_range(), 3119 * and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU). 3120 */ 3121 static void 3122 i915_gem_object_set_to_full_cpu_read_domain(struct drm_i915_gem_object *obj) 3123 { 3124 if (!obj->page_cpu_valid) 3125 return; 3126 3127 /* If we're partially in the CPU read domain, finish moving it in. 3128 */ 3129 if (obj->base.read_domains & I915_GEM_DOMAIN_CPU) { 3130 int i; 3131 3132 for (i = 0; i <= (obj->base.size - 1) / PAGE_SIZE; i++) { 3133 if (obj->page_cpu_valid[i]) 3134 continue; 3135 drm_clflush_pages(obj->pages + i, 1); 3136 } 3137 } 3138 3139 /* Free the page_cpu_valid mappings which are now stale, whether 3140 * or not we've got I915_GEM_DOMAIN_CPU. 3141 */ 3142 kfree(obj->page_cpu_valid); 3143 obj->page_cpu_valid = NULL; 3144 } 3145 3146 /** 3147 * Set the CPU read domain on a range of the object. 3148 * 3149 * The object ends up with I915_GEM_DOMAIN_CPU in its read flags although it's 3150 * not entirely valid. The page_cpu_valid member of the object flags which 3151 * pages have been flushed, and will be respected by 3152 * i915_gem_object_set_to_cpu_domain() if it's called on to get a valid mapping 3153 * of the whole object. 3154 * 3155 * This function returns when the move is complete, including waiting on 3156 * flushes to occur. 3157 */ 3158 static int 3159 i915_gem_object_set_cpu_read_domain_range(struct drm_i915_gem_object *obj, 3160 uint64_t offset, uint64_t size) 3161 { 3162 uint32_t old_read_domains; 3163 int i, ret; 3164 3165 if (offset == 0 && size == obj->base.size) 3166 return i915_gem_object_set_to_cpu_domain(obj, 0); 3167 3168 ret = i915_gem_object_flush_gpu_write_domain(obj); 3169 if (ret) 3170 return ret; 3171 3172 ret = i915_gem_object_wait_rendering(obj); 3173 if (ret) 3174 return ret; 3175 3176 i915_gem_object_flush_gtt_write_domain(obj); 3177 3178 /* If we're already fully in the CPU read domain, we're done. */ 3179 if (obj->page_cpu_valid == NULL && 3180 (obj->base.read_domains & I915_GEM_DOMAIN_CPU) != 0) 3181 return 0; 3182 3183 /* Otherwise, create/clear the per-page CPU read domain flag if we're 3184 * newly adding I915_GEM_DOMAIN_CPU 3185 */ 3186 if (obj->page_cpu_valid == NULL) { 3187 obj->page_cpu_valid = kzalloc(obj->base.size / PAGE_SIZE, 3188 GFP_KERNEL); 3189 if (obj->page_cpu_valid == NULL) 3190 return -ENOMEM; 3191 } else if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) 3192 memset(obj->page_cpu_valid, 0, obj->base.size / PAGE_SIZE); 3193 3194 /* Flush the cache on any pages that are still invalid from the CPU's 3195 * perspective. 3196 */ 3197 for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE; 3198 i++) { 3199 if (obj->page_cpu_valid[i]) 3200 continue; 3201 3202 drm_clflush_pages(obj->pages + i, 1); 3203 3204 obj->page_cpu_valid[i] = 1; 3205 } 3206 3207 /* It should now be out of any other write domains, and we can update 3208 * the domain values for our changes. 3209 */ 3210 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 3211 3212 old_read_domains = obj->base.read_domains; 3213 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 3214 3215 trace_i915_gem_object_change_domain(obj, 3216 old_read_domains, 3217 obj->base.write_domain); 3218 3219 return 0; 3220 } 3221 3222 /* Throttle our rendering by waiting until the ring has completed our requests 3223 * emitted over 20 msec ago. 3224 * 3225 * Note that if we were to use the current jiffies each time around the loop, 3226 * we wouldn't escape the function with any frames outstanding if the time to 3227 * render a frame was over 20ms. 3228 * 3229 * This should get us reasonable parallelism between CPU and GPU but also 3230 * relatively low latency when blocking on a particular request to finish. 3231 */ 3232 static int 3233 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 3234 { 3235 struct drm_i915_private *dev_priv = dev->dev_private; 3236 struct drm_i915_file_private *file_priv = file->driver_priv; 3237 unsigned long recent_enough = jiffies - msecs_to_jiffies(20); 3238 struct drm_i915_gem_request *request; 3239 struct intel_ring_buffer *ring = NULL; 3240 u32 seqno = 0; 3241 int ret; 3242 3243 if (atomic_read(&dev_priv->mm.wedged)) 3244 return -EIO; 3245 3246 spin_lock(&file_priv->mm.lock); 3247 list_for_each_entry(request, &file_priv->mm.request_list, client_list) { 3248 if (time_after_eq(request->emitted_jiffies, recent_enough)) 3249 break; 3250 3251 ring = request->ring; 3252 seqno = request->seqno; 3253 } 3254 spin_unlock(&file_priv->mm.lock); 3255 3256 if (seqno == 0) 3257 return 0; 3258 3259 ret = 0; 3260 if (!i915_seqno_passed(ring->get_seqno(ring), seqno)) { 3261 /* And wait for the seqno passing without holding any locks and 3262 * causing extra latency for others. This is safe as the irq 3263 * generation is designed to be run atomically and so is 3264 * lockless. 3265 */ 3266 if (ring->irq_get(ring)) { 3267 ret = wait_event_interruptible(ring->irq_queue, 3268 i915_seqno_passed(ring->get_seqno(ring), seqno) 3269 || atomic_read(&dev_priv->mm.wedged)); 3270 ring->irq_put(ring); 3271 3272 if (ret == 0 && atomic_read(&dev_priv->mm.wedged)) 3273 ret = -EIO; 3274 } 3275 } 3276 3277 if (ret == 0) 3278 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); 3279 3280 return ret; 3281 } 3282 3283 int 3284 i915_gem_object_pin(struct drm_i915_gem_object *obj, 3285 uint32_t alignment, 3286 bool map_and_fenceable) 3287 { 3288 struct drm_device *dev = obj->base.dev; 3289 struct drm_i915_private *dev_priv = dev->dev_private; 3290 int ret; 3291 3292 BUG_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT); 3293 WARN_ON(i915_verify_lists(dev)); 3294 3295 if (obj->gtt_space != NULL) { 3296 if ((alignment && obj->gtt_offset & (alignment - 1)) || 3297 (map_and_fenceable && !obj->map_and_fenceable)) { 3298 WARN(obj->pin_count, 3299 "bo is already pinned with incorrect alignment:" 3300 " offset=%x, req.alignment=%x, req.map_and_fenceable=%d," 3301 " obj->map_and_fenceable=%d\n", 3302 obj->gtt_offset, alignment, 3303 map_and_fenceable, 3304 obj->map_and_fenceable); 3305 ret = i915_gem_object_unbind(obj); 3306 if (ret) 3307 return ret; 3308 } 3309 } 3310 3311 if (obj->gtt_space == NULL) { 3312 ret = i915_gem_object_bind_to_gtt(obj, alignment, 3313 map_and_fenceable); 3314 if (ret) 3315 return ret; 3316 } 3317 3318 if (obj->pin_count++ == 0) { 3319 if (!obj->active) 3320 list_move_tail(&obj->mm_list, 3321 &dev_priv->mm.pinned_list); 3322 } 3323 obj->pin_mappable |= map_and_fenceable; 3324 3325 WARN_ON(i915_verify_lists(dev)); 3326 return 0; 3327 } 3328 3329 void 3330 i915_gem_object_unpin(struct drm_i915_gem_object *obj) 3331 { 3332 struct drm_device *dev = obj->base.dev; 3333 drm_i915_private_t *dev_priv = dev->dev_private; 3334 3335 WARN_ON(i915_verify_lists(dev)); 3336 BUG_ON(obj->pin_count == 0); 3337 BUG_ON(obj->gtt_space == NULL); 3338 3339 if (--obj->pin_count == 0) { 3340 if (!obj->active) 3341 list_move_tail(&obj->mm_list, 3342 &dev_priv->mm.inactive_list); 3343 obj->pin_mappable = false; 3344 } 3345 WARN_ON(i915_verify_lists(dev)); 3346 } 3347 3348 int 3349 i915_gem_pin_ioctl(struct drm_device *dev, void *data, 3350 struct drm_file *file) 3351 { 3352 struct drm_i915_gem_pin *args = data; 3353 struct drm_i915_gem_object *obj; 3354 int ret; 3355 3356 ret = i915_mutex_lock_interruptible(dev); 3357 if (ret) 3358 return ret; 3359 3360 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3361 if (&obj->base == NULL) { 3362 ret = -ENOENT; 3363 goto unlock; 3364 } 3365 3366 if (obj->madv != I915_MADV_WILLNEED) { 3367 DRM_ERROR("Attempting to pin a purgeable buffer\n"); 3368 ret = -EINVAL; 3369 goto out; 3370 } 3371 3372 if (obj->pin_filp != NULL && obj->pin_filp != file) { 3373 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n", 3374 args->handle); 3375 ret = -EINVAL; 3376 goto out; 3377 } 3378 3379 obj->user_pin_count++; 3380 obj->pin_filp = file; 3381 if (obj->user_pin_count == 1) { 3382 ret = i915_gem_object_pin(obj, args->alignment, true); 3383 if (ret) 3384 goto out; 3385 } 3386 3387 /* XXX - flush the CPU caches for pinned objects 3388 * as the X server doesn't manage domains yet 3389 */ 3390 i915_gem_object_flush_cpu_write_domain(obj); 3391 args->offset = obj->gtt_offset; 3392 out: 3393 drm_gem_object_unreference(&obj->base); 3394 unlock: 3395 mutex_unlock(&dev->struct_mutex); 3396 return ret; 3397 } 3398 3399 int 3400 i915_gem_unpin_ioctl(struct drm_device *dev, void *data, 3401 struct drm_file *file) 3402 { 3403 struct drm_i915_gem_pin *args = data; 3404 struct drm_i915_gem_object *obj; 3405 int ret; 3406 3407 ret = i915_mutex_lock_interruptible(dev); 3408 if (ret) 3409 return ret; 3410 3411 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3412 if (&obj->base == NULL) { 3413 ret = -ENOENT; 3414 goto unlock; 3415 } 3416 3417 if (obj->pin_filp != file) { 3418 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n", 3419 args->handle); 3420 ret = -EINVAL; 3421 goto out; 3422 } 3423 obj->user_pin_count--; 3424 if (obj->user_pin_count == 0) { 3425 obj->pin_filp = NULL; 3426 i915_gem_object_unpin(obj); 3427 } 3428 3429 out: 3430 drm_gem_object_unreference(&obj->base); 3431 unlock: 3432 mutex_unlock(&dev->struct_mutex); 3433 return ret; 3434 } 3435 3436 int 3437 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 3438 struct drm_file *file) 3439 { 3440 struct drm_i915_gem_busy *args = data; 3441 struct drm_i915_gem_object *obj; 3442 int ret; 3443 3444 ret = i915_mutex_lock_interruptible(dev); 3445 if (ret) 3446 return ret; 3447 3448 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3449 if (&obj->base == NULL) { 3450 ret = -ENOENT; 3451 goto unlock; 3452 } 3453 3454 /* Count all active objects as busy, even if they are currently not used 3455 * by the gpu. Users of this interface expect objects to eventually 3456 * become non-busy without any further actions, therefore emit any 3457 * necessary flushes here. 3458 */ 3459 args->busy = obj->active; 3460 if (args->busy) { 3461 /* Unconditionally flush objects, even when the gpu still uses this 3462 * object. Userspace calling this function indicates that it wants to 3463 * use this buffer rather sooner than later, so issuing the required 3464 * flush earlier is beneficial. 3465 */ 3466 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { 3467 ret = i915_gem_flush_ring(obj->ring, 3468 0, obj->base.write_domain); 3469 } else if (obj->ring->outstanding_lazy_request == 3470 obj->last_rendering_seqno) { 3471 struct drm_i915_gem_request *request; 3472 3473 /* This ring is not being cleared by active usage, 3474 * so emit a request to do so. 3475 */ 3476 request = kzalloc(sizeof(*request), GFP_KERNEL); 3477 if (request) 3478 ret = i915_add_request(obj->ring, NULL,request); 3479 else 3480 ret = -ENOMEM; 3481 } 3482 3483 /* Update the active list for the hardware's current position. 3484 * Otherwise this only updates on a delayed timer or when irqs 3485 * are actually unmasked, and our working set ends up being 3486 * larger than required. 3487 */ 3488 i915_gem_retire_requests_ring(obj->ring); 3489 3490 args->busy = obj->active; 3491 } 3492 3493 drm_gem_object_unreference(&obj->base); 3494 unlock: 3495 mutex_unlock(&dev->struct_mutex); 3496 return ret; 3497 } 3498 3499 int 3500 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 3501 struct drm_file *file_priv) 3502 { 3503 return i915_gem_ring_throttle(dev, file_priv); 3504 } 3505 3506 int 3507 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 3508 struct drm_file *file_priv) 3509 { 3510 struct drm_i915_gem_madvise *args = data; 3511 struct drm_i915_gem_object *obj; 3512 int ret; 3513 3514 switch (args->madv) { 3515 case I915_MADV_DONTNEED: 3516 case I915_MADV_WILLNEED: 3517 break; 3518 default: 3519 return -EINVAL; 3520 } 3521 3522 ret = i915_mutex_lock_interruptible(dev); 3523 if (ret) 3524 return ret; 3525 3526 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle)); 3527 if (&obj->base == NULL) { 3528 ret = -ENOENT; 3529 goto unlock; 3530 } 3531 3532 if (obj->pin_count) { 3533 ret = -EINVAL; 3534 goto out; 3535 } 3536 3537 if (obj->madv != __I915_MADV_PURGED) 3538 obj->madv = args->madv; 3539 3540 /* if the object is no longer bound, discard its backing storage */ 3541 if (i915_gem_object_is_purgeable(obj) && 3542 obj->gtt_space == NULL) 3543 i915_gem_object_truncate(obj); 3544 3545 args->retained = obj->madv != __I915_MADV_PURGED; 3546 3547 out: 3548 drm_gem_object_unreference(&obj->base); 3549 unlock: 3550 mutex_unlock(&dev->struct_mutex); 3551 return ret; 3552 } 3553 3554 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, 3555 size_t size) 3556 { 3557 struct drm_i915_private *dev_priv = dev->dev_private; 3558 struct drm_i915_gem_object *obj; 3559 3560 obj = kzalloc(sizeof(*obj), GFP_KERNEL); 3561 if (obj == NULL) 3562 return NULL; 3563 3564 if (drm_gem_object_init(dev, &obj->base, size) != 0) { 3565 kfree(obj); 3566 return NULL; 3567 } 3568 3569 i915_gem_info_add_obj(dev_priv, size); 3570 3571 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 3572 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 3573 3574 obj->agp_type = AGP_USER_MEMORY; 3575 obj->base.driver_private = NULL; 3576 obj->fence_reg = I915_FENCE_REG_NONE; 3577 INIT_LIST_HEAD(&obj->mm_list); 3578 INIT_LIST_HEAD(&obj->gtt_list); 3579 INIT_LIST_HEAD(&obj->ring_list); 3580 INIT_LIST_HEAD(&obj->exec_list); 3581 INIT_LIST_HEAD(&obj->gpu_write_list); 3582 obj->madv = I915_MADV_WILLNEED; 3583 /* Avoid an unnecessary call to unbind on the first bind. */ 3584 obj->map_and_fenceable = true; 3585 3586 return obj; 3587 } 3588 3589 int i915_gem_init_object(struct drm_gem_object *obj) 3590 { 3591 BUG(); 3592 3593 return 0; 3594 } 3595 3596 static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj) 3597 { 3598 struct drm_device *dev = obj->base.dev; 3599 drm_i915_private_t *dev_priv = dev->dev_private; 3600 int ret; 3601 3602 ret = i915_gem_object_unbind(obj); 3603 if (ret == -ERESTARTSYS) { 3604 list_move(&obj->mm_list, 3605 &dev_priv->mm.deferred_free_list); 3606 return; 3607 } 3608 3609 if (obj->base.map_list.map) 3610 i915_gem_free_mmap_offset(obj); 3611 3612 drm_gem_object_release(&obj->base); 3613 i915_gem_info_remove_obj(dev_priv, obj->base.size); 3614 3615 kfree(obj->page_cpu_valid); 3616 kfree(obj->bit_17); 3617 kfree(obj); 3618 3619 trace_i915_gem_object_destroy(obj); 3620 } 3621 3622 void i915_gem_free_object(struct drm_gem_object *gem_obj) 3623 { 3624 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 3625 struct drm_device *dev = obj->base.dev; 3626 3627 while (obj->pin_count > 0) 3628 i915_gem_object_unpin(obj); 3629 3630 if (obj->phys_obj) 3631 i915_gem_detach_phys_object(dev, obj); 3632 3633 i915_gem_free_object_tail(obj); 3634 } 3635 3636 int 3637 i915_gem_idle(struct drm_device *dev) 3638 { 3639 drm_i915_private_t *dev_priv = dev->dev_private; 3640 int ret; 3641 3642 mutex_lock(&dev->struct_mutex); 3643 3644 if (dev_priv->mm.suspended) { 3645 mutex_unlock(&dev->struct_mutex); 3646 return 0; 3647 } 3648 3649 ret = i915_gpu_idle(dev); 3650 if (ret) { 3651 mutex_unlock(&dev->struct_mutex); 3652 return ret; 3653 } 3654 3655 /* Under UMS, be paranoid and evict. */ 3656 if (!drm_core_check_feature(dev, DRIVER_MODESET)) { 3657 ret = i915_gem_evict_inactive(dev, false); 3658 if (ret) { 3659 mutex_unlock(&dev->struct_mutex); 3660 return ret; 3661 } 3662 } 3663 3664 i915_gem_reset_fences(dev); 3665 3666 /* Hack! Don't let anybody do execbuf while we don't control the chip. 3667 * We need to replace this with a semaphore, or something. 3668 * And not confound mm.suspended! 3669 */ 3670 dev_priv->mm.suspended = 1; 3671 del_timer_sync(&dev_priv->hangcheck_timer); 3672 3673 i915_kernel_lost_context(dev); 3674 i915_gem_cleanup_ringbuffer(dev); 3675 3676 mutex_unlock(&dev->struct_mutex); 3677 3678 /* Cancel the retire work handler, which should be idle now. */ 3679 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 3680 3681 return 0; 3682 } 3683 3684 int 3685 i915_gem_init_ringbuffer(struct drm_device *dev) 3686 { 3687 drm_i915_private_t *dev_priv = dev->dev_private; 3688 int ret; 3689 3690 ret = intel_init_render_ring_buffer(dev); 3691 if (ret) 3692 return ret; 3693 3694 if (HAS_BSD(dev)) { 3695 ret = intel_init_bsd_ring_buffer(dev); 3696 if (ret) 3697 goto cleanup_render_ring; 3698 } 3699 3700 if (HAS_BLT(dev)) { 3701 ret = intel_init_blt_ring_buffer(dev); 3702 if (ret) 3703 goto cleanup_bsd_ring; 3704 } 3705 3706 dev_priv->next_seqno = 1; 3707 3708 return 0; 3709 3710 cleanup_bsd_ring: 3711 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]); 3712 cleanup_render_ring: 3713 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]); 3714 return ret; 3715 } 3716 3717 void 3718 i915_gem_cleanup_ringbuffer(struct drm_device *dev) 3719 { 3720 drm_i915_private_t *dev_priv = dev->dev_private; 3721 int i; 3722 3723 for (i = 0; i < I915_NUM_RINGS; i++) 3724 intel_cleanup_ring_buffer(&dev_priv->ring[i]); 3725 } 3726 3727 int 3728 i915_gem_entervt_ioctl(struct drm_device *dev, void *data, 3729 struct drm_file *file_priv) 3730 { 3731 drm_i915_private_t *dev_priv = dev->dev_private; 3732 int ret, i; 3733 3734 if (drm_core_check_feature(dev, DRIVER_MODESET)) 3735 return 0; 3736 3737 if (atomic_read(&dev_priv->mm.wedged)) { 3738 DRM_ERROR("Reenabling wedged hardware, good luck\n"); 3739 atomic_set(&dev_priv->mm.wedged, 0); 3740 } 3741 3742 mutex_lock(&dev->struct_mutex); 3743 dev_priv->mm.suspended = 0; 3744 3745 ret = i915_gem_init_ringbuffer(dev); 3746 if (ret != 0) { 3747 mutex_unlock(&dev->struct_mutex); 3748 return ret; 3749 } 3750 3751 BUG_ON(!list_empty(&dev_priv->mm.active_list)); 3752 BUG_ON(!list_empty(&dev_priv->mm.flushing_list)); 3753 BUG_ON(!list_empty(&dev_priv->mm.inactive_list)); 3754 for (i = 0; i < I915_NUM_RINGS; i++) { 3755 BUG_ON(!list_empty(&dev_priv->ring[i].active_list)); 3756 BUG_ON(!list_empty(&dev_priv->ring[i].request_list)); 3757 } 3758 mutex_unlock(&dev->struct_mutex); 3759 3760 ret = drm_irq_install(dev); 3761 if (ret) 3762 goto cleanup_ringbuffer; 3763 3764 return 0; 3765 3766 cleanup_ringbuffer: 3767 mutex_lock(&dev->struct_mutex); 3768 i915_gem_cleanup_ringbuffer(dev); 3769 dev_priv->mm.suspended = 1; 3770 mutex_unlock(&dev->struct_mutex); 3771 3772 return ret; 3773 } 3774 3775 int 3776 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, 3777 struct drm_file *file_priv) 3778 { 3779 if (drm_core_check_feature(dev, DRIVER_MODESET)) 3780 return 0; 3781 3782 drm_irq_uninstall(dev); 3783 return i915_gem_idle(dev); 3784 } 3785 3786 void 3787 i915_gem_lastclose(struct drm_device *dev) 3788 { 3789 int ret; 3790 3791 if (drm_core_check_feature(dev, DRIVER_MODESET)) 3792 return; 3793 3794 ret = i915_gem_idle(dev); 3795 if (ret) 3796 DRM_ERROR("failed to idle hardware: %d\n", ret); 3797 } 3798 3799 static void 3800 init_ring_lists(struct intel_ring_buffer *ring) 3801 { 3802 INIT_LIST_HEAD(&ring->active_list); 3803 INIT_LIST_HEAD(&ring->request_list); 3804 INIT_LIST_HEAD(&ring->gpu_write_list); 3805 } 3806 3807 void 3808 i915_gem_load(struct drm_device *dev) 3809 { 3810 int i; 3811 drm_i915_private_t *dev_priv = dev->dev_private; 3812 3813 INIT_LIST_HEAD(&dev_priv->mm.active_list); 3814 INIT_LIST_HEAD(&dev_priv->mm.flushing_list); 3815 INIT_LIST_HEAD(&dev_priv->mm.inactive_list); 3816 INIT_LIST_HEAD(&dev_priv->mm.pinned_list); 3817 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 3818 INIT_LIST_HEAD(&dev_priv->mm.deferred_free_list); 3819 INIT_LIST_HEAD(&dev_priv->mm.gtt_list); 3820 for (i = 0; i < I915_NUM_RINGS; i++) 3821 init_ring_lists(&dev_priv->ring[i]); 3822 for (i = 0; i < 16; i++) 3823 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 3824 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 3825 i915_gem_retire_work_handler); 3826 init_completion(&dev_priv->error_completion); 3827 3828 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ 3829 if (IS_GEN3(dev)) { 3830 u32 tmp = I915_READ(MI_ARB_STATE); 3831 if (!(tmp & MI_ARB_C3_LP_WRITE_ENABLE)) { 3832 /* arb state is a masked write, so set bit + bit in mask */ 3833 tmp = MI_ARB_C3_LP_WRITE_ENABLE | (MI_ARB_C3_LP_WRITE_ENABLE << MI_ARB_MASK_SHIFT); 3834 I915_WRITE(MI_ARB_STATE, tmp); 3835 } 3836 } 3837 3838 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 3839 3840 /* Old X drivers will take 0-2 for front, back, depth buffers */ 3841 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 3842 dev_priv->fence_reg_start = 3; 3843 3844 if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 3845 dev_priv->num_fence_regs = 16; 3846 else 3847 dev_priv->num_fence_regs = 8; 3848 3849 /* Initialize fence registers to zero */ 3850 switch (INTEL_INFO(dev)->gen) { 3851 case 6: 3852 for (i = 0; i < 16; i++) 3853 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + (i * 8), 0); 3854 break; 3855 case 5: 3856 case 4: 3857 for (i = 0; i < 16; i++) 3858 I915_WRITE64(FENCE_REG_965_0 + (i * 8), 0); 3859 break; 3860 case 3: 3861 if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 3862 for (i = 0; i < 8; i++) 3863 I915_WRITE(FENCE_REG_945_8 + (i * 4), 0); 3864 case 2: 3865 for (i = 0; i < 8; i++) 3866 I915_WRITE(FENCE_REG_830_0 + (i * 4), 0); 3867 break; 3868 } 3869 i915_gem_detect_bit_6_swizzle(dev); 3870 init_waitqueue_head(&dev_priv->pending_flip_queue); 3871 3872 dev_priv->mm.interruptible = true; 3873 3874 dev_priv->mm.inactive_shrinker.shrink = i915_gem_inactive_shrink; 3875 dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS; 3876 register_shrinker(&dev_priv->mm.inactive_shrinker); 3877 } 3878 3879 /* 3880 * Create a physically contiguous memory object for this object 3881 * e.g. for cursor + overlay regs 3882 */ 3883 static int i915_gem_init_phys_object(struct drm_device *dev, 3884 int id, int size, int align) 3885 { 3886 drm_i915_private_t *dev_priv = dev->dev_private; 3887 struct drm_i915_gem_phys_object *phys_obj; 3888 int ret; 3889 3890 if (dev_priv->mm.phys_objs[id - 1] || !size) 3891 return 0; 3892 3893 phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL); 3894 if (!phys_obj) 3895 return -ENOMEM; 3896 3897 phys_obj->id = id; 3898 3899 phys_obj->handle = drm_pci_alloc(dev, size, align); 3900 if (!phys_obj->handle) { 3901 ret = -ENOMEM; 3902 goto kfree_obj; 3903 } 3904 #ifdef CONFIG_X86 3905 set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE); 3906 #endif 3907 3908 dev_priv->mm.phys_objs[id - 1] = phys_obj; 3909 3910 return 0; 3911 kfree_obj: 3912 kfree(phys_obj); 3913 return ret; 3914 } 3915 3916 static void i915_gem_free_phys_object(struct drm_device *dev, int id) 3917 { 3918 drm_i915_private_t *dev_priv = dev->dev_private; 3919 struct drm_i915_gem_phys_object *phys_obj; 3920 3921 if (!dev_priv->mm.phys_objs[id - 1]) 3922 return; 3923 3924 phys_obj = dev_priv->mm.phys_objs[id - 1]; 3925 if (phys_obj->cur_obj) { 3926 i915_gem_detach_phys_object(dev, phys_obj->cur_obj); 3927 } 3928 3929 #ifdef CONFIG_X86 3930 set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE); 3931 #endif 3932 drm_pci_free(dev, phys_obj->handle); 3933 kfree(phys_obj); 3934 dev_priv->mm.phys_objs[id - 1] = NULL; 3935 } 3936 3937 void i915_gem_free_all_phys_object(struct drm_device *dev) 3938 { 3939 int i; 3940 3941 for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++) 3942 i915_gem_free_phys_object(dev, i); 3943 } 3944 3945 void i915_gem_detach_phys_object(struct drm_device *dev, 3946 struct drm_i915_gem_object *obj) 3947 { 3948 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 3949 char *vaddr; 3950 int i; 3951 int page_count; 3952 3953 if (!obj->phys_obj) 3954 return; 3955 vaddr = obj->phys_obj->handle->vaddr; 3956 3957 page_count = obj->base.size / PAGE_SIZE; 3958 for (i = 0; i < page_count; i++) { 3959 struct page *page = read_cache_page_gfp(mapping, i, 3960 GFP_HIGHUSER | __GFP_RECLAIMABLE); 3961 if (!IS_ERR(page)) { 3962 char *dst = kmap_atomic(page); 3963 memcpy(dst, vaddr + i*PAGE_SIZE, PAGE_SIZE); 3964 kunmap_atomic(dst); 3965 3966 drm_clflush_pages(&page, 1); 3967 3968 set_page_dirty(page); 3969 mark_page_accessed(page); 3970 page_cache_release(page); 3971 } 3972 } 3973 intel_gtt_chipset_flush(); 3974 3975 obj->phys_obj->cur_obj = NULL; 3976 obj->phys_obj = NULL; 3977 } 3978 3979 int 3980 i915_gem_attach_phys_object(struct drm_device *dev, 3981 struct drm_i915_gem_object *obj, 3982 int id, 3983 int align) 3984 { 3985 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 3986 drm_i915_private_t *dev_priv = dev->dev_private; 3987 int ret = 0; 3988 int page_count; 3989 int i; 3990 3991 if (id > I915_MAX_PHYS_OBJECT) 3992 return -EINVAL; 3993 3994 if (obj->phys_obj) { 3995 if (obj->phys_obj->id == id) 3996 return 0; 3997 i915_gem_detach_phys_object(dev, obj); 3998 } 3999 4000 /* create a new object */ 4001 if (!dev_priv->mm.phys_objs[id - 1]) { 4002 ret = i915_gem_init_phys_object(dev, id, 4003 obj->base.size, align); 4004 if (ret) { 4005 DRM_ERROR("failed to init phys object %d size: %zu\n", 4006 id, obj->base.size); 4007 return ret; 4008 } 4009 } 4010 4011 /* bind to the object */ 4012 obj->phys_obj = dev_priv->mm.phys_objs[id - 1]; 4013 obj->phys_obj->cur_obj = obj; 4014 4015 page_count = obj->base.size / PAGE_SIZE; 4016 4017 for (i = 0; i < page_count; i++) { 4018 struct page *page; 4019 char *dst, *src; 4020 4021 page = read_cache_page_gfp(mapping, i, 4022 GFP_HIGHUSER | __GFP_RECLAIMABLE); 4023 if (IS_ERR(page)) 4024 return PTR_ERR(page); 4025 4026 src = kmap_atomic(page); 4027 dst = obj->phys_obj->handle->vaddr + (i * PAGE_SIZE); 4028 memcpy(dst, src, PAGE_SIZE); 4029 kunmap_atomic(src); 4030 4031 mark_page_accessed(page); 4032 page_cache_release(page); 4033 } 4034 4035 return 0; 4036 } 4037 4038 static int 4039 i915_gem_phys_pwrite(struct drm_device *dev, 4040 struct drm_i915_gem_object *obj, 4041 struct drm_i915_gem_pwrite *args, 4042 struct drm_file *file_priv) 4043 { 4044 void *vaddr = obj->phys_obj->handle->vaddr + args->offset; 4045 char __user *user_data = (char __user *) (uintptr_t) args->data_ptr; 4046 4047 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { 4048 unsigned long unwritten; 4049 4050 /* The physical object once assigned is fixed for the lifetime 4051 * of the obj, so we can safely drop the lock and continue 4052 * to access vaddr. 4053 */ 4054 mutex_unlock(&dev->struct_mutex); 4055 unwritten = copy_from_user(vaddr, user_data, args->size); 4056 mutex_lock(&dev->struct_mutex); 4057 if (unwritten) 4058 return -EFAULT; 4059 } 4060 4061 intel_gtt_chipset_flush(); 4062 return 0; 4063 } 4064 4065 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 4066 { 4067 struct drm_i915_file_private *file_priv = file->driver_priv; 4068 4069 /* Clean up our request list when the client is going away, so that 4070 * later retire_requests won't dereference our soon-to-be-gone 4071 * file_priv. 4072 */ 4073 spin_lock(&file_priv->mm.lock); 4074 while (!list_empty(&file_priv->mm.request_list)) { 4075 struct drm_i915_gem_request *request; 4076 4077 request = list_first_entry(&file_priv->mm.request_list, 4078 struct drm_i915_gem_request, 4079 client_list); 4080 list_del(&request->client_list); 4081 request->file_priv = NULL; 4082 } 4083 spin_unlock(&file_priv->mm.lock); 4084 } 4085 4086 static int 4087 i915_gpu_is_active(struct drm_device *dev) 4088 { 4089 drm_i915_private_t *dev_priv = dev->dev_private; 4090 int lists_empty; 4091 4092 lists_empty = list_empty(&dev_priv->mm.flushing_list) && 4093 list_empty(&dev_priv->mm.active_list); 4094 4095 return !lists_empty; 4096 } 4097 4098 static int 4099 i915_gem_inactive_shrink(struct shrinker *shrinker, 4100 int nr_to_scan, 4101 gfp_t gfp_mask) 4102 { 4103 struct drm_i915_private *dev_priv = 4104 container_of(shrinker, 4105 struct drm_i915_private, 4106 mm.inactive_shrinker); 4107 struct drm_device *dev = dev_priv->dev; 4108 struct drm_i915_gem_object *obj, *next; 4109 int cnt; 4110 4111 if (!mutex_trylock(&dev->struct_mutex)) 4112 return 0; 4113 4114 /* "fast-path" to count number of available objects */ 4115 if (nr_to_scan == 0) { 4116 cnt = 0; 4117 list_for_each_entry(obj, 4118 &dev_priv->mm.inactive_list, 4119 mm_list) 4120 cnt++; 4121 mutex_unlock(&dev->struct_mutex); 4122 return cnt / 100 * sysctl_vfs_cache_pressure; 4123 } 4124 4125 rescan: 4126 /* first scan for clean buffers */ 4127 i915_gem_retire_requests(dev); 4128 4129 list_for_each_entry_safe(obj, next, 4130 &dev_priv->mm.inactive_list, 4131 mm_list) { 4132 if (i915_gem_object_is_purgeable(obj)) { 4133 if (i915_gem_object_unbind(obj) == 0 && 4134 --nr_to_scan == 0) 4135 break; 4136 } 4137 } 4138 4139 /* second pass, evict/count anything still on the inactive list */ 4140 cnt = 0; 4141 list_for_each_entry_safe(obj, next, 4142 &dev_priv->mm.inactive_list, 4143 mm_list) { 4144 if (nr_to_scan && 4145 i915_gem_object_unbind(obj) == 0) 4146 nr_to_scan--; 4147 else 4148 cnt++; 4149 } 4150 4151 if (nr_to_scan && i915_gpu_is_active(dev)) { 4152 /* 4153 * We are desperate for pages, so as a last resort, wait 4154 * for the GPU to finish and discard whatever we can. 4155 * This has a dramatic impact to reduce the number of 4156 * OOM-killer events whilst running the GPU aggressively. 4157 */ 4158 if (i915_gpu_idle(dev) == 0) 4159 goto rescan; 4160 } 4161 mutex_unlock(&dev->struct_mutex); 4162 return cnt / 100 * sysctl_vfs_cache_pressure; 4163 } 4164