1 /* 2 * Copyright © 2008 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include "drmP.h" 29 #include "drm.h" 30 #include "i915_drm.h" 31 #include "i915_drv.h" 32 #include "i915_trace.h" 33 #include "intel_drv.h" 34 #include <linux/shmem_fs.h> 35 #include <linux/slab.h> 36 #include <linux/swap.h> 37 #include <linux/pci.h> 38 39 static __must_check int i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj); 40 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 41 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); 42 static __must_check int i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, 43 bool write); 44 static __must_check int i915_gem_object_set_cpu_read_domain_range(struct drm_i915_gem_object *obj, 45 uint64_t offset, 46 uint64_t size); 47 static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_i915_gem_object *obj); 48 static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, 49 unsigned alignment, 50 bool map_and_fenceable); 51 static void i915_gem_clear_fence_reg(struct drm_device *dev, 52 struct drm_i915_fence_reg *reg); 53 static int i915_gem_phys_pwrite(struct drm_device *dev, 54 struct drm_i915_gem_object *obj, 55 struct drm_i915_gem_pwrite *args, 56 struct drm_file *file); 57 static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj); 58 59 static int i915_gem_inactive_shrink(struct shrinker *shrinker, 60 struct shrink_control *sc); 61 static void i915_gem_object_truncate(struct drm_i915_gem_object *obj); 62 63 /* some bookkeeping */ 64 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 65 size_t size) 66 { 67 dev_priv->mm.object_count++; 68 dev_priv->mm.object_memory += size; 69 } 70 71 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 72 size_t size) 73 { 74 dev_priv->mm.object_count--; 75 dev_priv->mm.object_memory -= size; 76 } 77 78 static int 79 i915_gem_wait_for_error(struct drm_device *dev) 80 { 81 struct drm_i915_private *dev_priv = dev->dev_private; 82 struct completion *x = &dev_priv->error_completion; 83 unsigned long flags; 84 int ret; 85 86 if (!atomic_read(&dev_priv->mm.wedged)) 87 return 0; 88 89 ret = wait_for_completion_interruptible(x); 90 if (ret) 91 return ret; 92 93 if (atomic_read(&dev_priv->mm.wedged)) { 94 /* GPU is hung, bump the completion count to account for 95 * the token we just consumed so that we never hit zero and 96 * end up waiting upon a subsequent completion event that 97 * will never happen. 98 */ 99 spin_lock_irqsave(&x->wait.lock, flags); 100 x->done++; 101 spin_unlock_irqrestore(&x->wait.lock, flags); 102 } 103 return 0; 104 } 105 106 int i915_mutex_lock_interruptible(struct drm_device *dev) 107 { 108 int ret; 109 110 ret = i915_gem_wait_for_error(dev); 111 if (ret) 112 return ret; 113 114 ret = mutex_lock_interruptible(&dev->struct_mutex); 115 if (ret) 116 return ret; 117 118 WARN_ON(i915_verify_lists(dev)); 119 return 0; 120 } 121 122 static inline bool 123 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj) 124 { 125 return obj->gtt_space && !obj->active && obj->pin_count == 0; 126 } 127 128 void i915_gem_do_init(struct drm_device *dev, 129 unsigned long start, 130 unsigned long mappable_end, 131 unsigned long end) 132 { 133 drm_i915_private_t *dev_priv = dev->dev_private; 134 135 drm_mm_init(&dev_priv->mm.gtt_space, start, end - start); 136 137 dev_priv->mm.gtt_start = start; 138 dev_priv->mm.gtt_mappable_end = mappable_end; 139 dev_priv->mm.gtt_end = end; 140 dev_priv->mm.gtt_total = end - start; 141 dev_priv->mm.mappable_gtt_total = min(end, mappable_end) - start; 142 143 /* Take over this portion of the GTT */ 144 intel_gtt_clear_range(start / PAGE_SIZE, (end-start) / PAGE_SIZE); 145 } 146 147 int 148 i915_gem_init_ioctl(struct drm_device *dev, void *data, 149 struct drm_file *file) 150 { 151 struct drm_i915_gem_init *args = data; 152 153 if (args->gtt_start >= args->gtt_end || 154 (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1)) 155 return -EINVAL; 156 157 mutex_lock(&dev->struct_mutex); 158 i915_gem_do_init(dev, args->gtt_start, args->gtt_end, args->gtt_end); 159 mutex_unlock(&dev->struct_mutex); 160 161 return 0; 162 } 163 164 int 165 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 166 struct drm_file *file) 167 { 168 struct drm_i915_private *dev_priv = dev->dev_private; 169 struct drm_i915_gem_get_aperture *args = data; 170 struct drm_i915_gem_object *obj; 171 size_t pinned; 172 173 if (!(dev->driver->driver_features & DRIVER_GEM)) 174 return -ENODEV; 175 176 pinned = 0; 177 mutex_lock(&dev->struct_mutex); 178 list_for_each_entry(obj, &dev_priv->mm.pinned_list, mm_list) 179 pinned += obj->gtt_space->size; 180 mutex_unlock(&dev->struct_mutex); 181 182 args->aper_size = dev_priv->mm.gtt_total; 183 args->aper_available_size = args->aper_size - pinned; 184 185 return 0; 186 } 187 188 static int 189 i915_gem_create(struct drm_file *file, 190 struct drm_device *dev, 191 uint64_t size, 192 uint32_t *handle_p) 193 { 194 struct drm_i915_gem_object *obj; 195 int ret; 196 u32 handle; 197 198 size = roundup(size, PAGE_SIZE); 199 if (size == 0) 200 return -EINVAL; 201 202 /* Allocate the new object */ 203 obj = i915_gem_alloc_object(dev, size); 204 if (obj == NULL) 205 return -ENOMEM; 206 207 ret = drm_gem_handle_create(file, &obj->base, &handle); 208 if (ret) { 209 drm_gem_object_release(&obj->base); 210 i915_gem_info_remove_obj(dev->dev_private, obj->base.size); 211 kfree(obj); 212 return ret; 213 } 214 215 /* drop reference from allocate - handle holds it now */ 216 drm_gem_object_unreference(&obj->base); 217 trace_i915_gem_object_create(obj); 218 219 *handle_p = handle; 220 return 0; 221 } 222 223 int 224 i915_gem_dumb_create(struct drm_file *file, 225 struct drm_device *dev, 226 struct drm_mode_create_dumb *args) 227 { 228 /* have to work out size/pitch and return them */ 229 args->pitch = ALIGN(args->width * ((args->bpp + 7) / 8), 64); 230 args->size = args->pitch * args->height; 231 return i915_gem_create(file, dev, 232 args->size, &args->handle); 233 } 234 235 int i915_gem_dumb_destroy(struct drm_file *file, 236 struct drm_device *dev, 237 uint32_t handle) 238 { 239 return drm_gem_handle_delete(file, handle); 240 } 241 242 /** 243 * Creates a new mm object and returns a handle to it. 244 */ 245 int 246 i915_gem_create_ioctl(struct drm_device *dev, void *data, 247 struct drm_file *file) 248 { 249 struct drm_i915_gem_create *args = data; 250 return i915_gem_create(file, dev, 251 args->size, &args->handle); 252 } 253 254 static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj) 255 { 256 drm_i915_private_t *dev_priv = obj->base.dev->dev_private; 257 258 return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 && 259 obj->tiling_mode != I915_TILING_NONE; 260 } 261 262 /** 263 * This is the fast shmem pread path, which attempts to copy_from_user directly 264 * from the backing pages of the object to the user's address space. On a 265 * fault, it fails so we can fall back to i915_gem_shmem_pwrite_slow(). 266 */ 267 static int 268 i915_gem_shmem_pread_fast(struct drm_device *dev, 269 struct drm_i915_gem_object *obj, 270 struct drm_i915_gem_pread *args, 271 struct drm_file *file) 272 { 273 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 274 ssize_t remain; 275 loff_t offset; 276 char __user *user_data; 277 int page_offset, page_length; 278 279 user_data = (char __user *) (uintptr_t) args->data_ptr; 280 remain = args->size; 281 282 offset = args->offset; 283 284 while (remain > 0) { 285 struct page *page; 286 char *vaddr; 287 int ret; 288 289 /* Operation in this page 290 * 291 * page_offset = offset within page 292 * page_length = bytes to copy for this page 293 */ 294 page_offset = offset_in_page(offset); 295 page_length = remain; 296 if ((page_offset + remain) > PAGE_SIZE) 297 page_length = PAGE_SIZE - page_offset; 298 299 page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT); 300 if (IS_ERR(page)) 301 return PTR_ERR(page); 302 303 vaddr = kmap_atomic(page); 304 ret = __copy_to_user_inatomic(user_data, 305 vaddr + page_offset, 306 page_length); 307 kunmap_atomic(vaddr); 308 309 mark_page_accessed(page); 310 page_cache_release(page); 311 if (ret) 312 return -EFAULT; 313 314 remain -= page_length; 315 user_data += page_length; 316 offset += page_length; 317 } 318 319 return 0; 320 } 321 322 static inline int 323 __copy_to_user_swizzled(char __user *cpu_vaddr, 324 const char *gpu_vaddr, int gpu_offset, 325 int length) 326 { 327 int ret, cpu_offset = 0; 328 329 while (length > 0) { 330 int cacheline_end = ALIGN(gpu_offset + 1, 64); 331 int this_length = min(cacheline_end - gpu_offset, length); 332 int swizzled_gpu_offset = gpu_offset ^ 64; 333 334 ret = __copy_to_user(cpu_vaddr + cpu_offset, 335 gpu_vaddr + swizzled_gpu_offset, 336 this_length); 337 if (ret) 338 return ret + length; 339 340 cpu_offset += this_length; 341 gpu_offset += this_length; 342 length -= this_length; 343 } 344 345 return 0; 346 } 347 348 static inline int 349 __copy_from_user_swizzled(char __user *gpu_vaddr, int gpu_offset, 350 const char *cpu_vaddr, 351 int length) 352 { 353 int ret, cpu_offset = 0; 354 355 while (length > 0) { 356 int cacheline_end = ALIGN(gpu_offset + 1, 64); 357 int this_length = min(cacheline_end - gpu_offset, length); 358 int swizzled_gpu_offset = gpu_offset ^ 64; 359 360 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 361 cpu_vaddr + cpu_offset, 362 this_length); 363 if (ret) 364 return ret + length; 365 366 cpu_offset += this_length; 367 gpu_offset += this_length; 368 length -= this_length; 369 } 370 371 return 0; 372 } 373 374 /** 375 * This is the fallback shmem pread path, which allocates temporary storage 376 * in kernel space to copy_to_user into outside of the struct_mutex, so we 377 * can copy out of the object's backing pages while holding the struct mutex 378 * and not take page faults. 379 */ 380 static int 381 i915_gem_shmem_pread_slow(struct drm_device *dev, 382 struct drm_i915_gem_object *obj, 383 struct drm_i915_gem_pread *args, 384 struct drm_file *file) 385 { 386 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 387 char __user *user_data; 388 ssize_t remain; 389 loff_t offset; 390 int shmem_page_offset, page_length, ret; 391 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 392 393 user_data = (char __user *) (uintptr_t) args->data_ptr; 394 remain = args->size; 395 396 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 397 398 offset = args->offset; 399 400 mutex_unlock(&dev->struct_mutex); 401 402 while (remain > 0) { 403 struct page *page; 404 char *vaddr; 405 406 /* Operation in this page 407 * 408 * shmem_page_offset = offset within page in shmem file 409 * page_length = bytes to copy for this page 410 */ 411 shmem_page_offset = offset_in_page(offset); 412 page_length = remain; 413 if ((shmem_page_offset + page_length) > PAGE_SIZE) 414 page_length = PAGE_SIZE - shmem_page_offset; 415 416 page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT); 417 if (IS_ERR(page)) { 418 ret = PTR_ERR(page); 419 goto out; 420 } 421 422 page_do_bit17_swizzling = obj_do_bit17_swizzling && 423 (page_to_phys(page) & (1 << 17)) != 0; 424 425 vaddr = kmap(page); 426 if (page_do_bit17_swizzling) 427 ret = __copy_to_user_swizzled(user_data, 428 vaddr, shmem_page_offset, 429 page_length); 430 else 431 ret = __copy_to_user(user_data, 432 vaddr + shmem_page_offset, 433 page_length); 434 kunmap(page); 435 436 mark_page_accessed(page); 437 page_cache_release(page); 438 439 if (ret) { 440 ret = -EFAULT; 441 goto out; 442 } 443 444 remain -= page_length; 445 user_data += page_length; 446 offset += page_length; 447 } 448 449 out: 450 mutex_lock(&dev->struct_mutex); 451 /* Fixup: Kill any reinstated backing storage pages */ 452 if (obj->madv == __I915_MADV_PURGED) 453 i915_gem_object_truncate(obj); 454 455 return ret; 456 } 457 458 /** 459 * Reads data from the object referenced by handle. 460 * 461 * On error, the contents of *data are undefined. 462 */ 463 int 464 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 465 struct drm_file *file) 466 { 467 struct drm_i915_gem_pread *args = data; 468 struct drm_i915_gem_object *obj; 469 int ret = 0; 470 471 if (args->size == 0) 472 return 0; 473 474 if (!access_ok(VERIFY_WRITE, 475 (char __user *)(uintptr_t)args->data_ptr, 476 args->size)) 477 return -EFAULT; 478 479 ret = fault_in_pages_writeable((char __user *)(uintptr_t)args->data_ptr, 480 args->size); 481 if (ret) 482 return -EFAULT; 483 484 ret = i915_mutex_lock_interruptible(dev); 485 if (ret) 486 return ret; 487 488 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 489 if (&obj->base == NULL) { 490 ret = -ENOENT; 491 goto unlock; 492 } 493 494 /* Bounds check source. */ 495 if (args->offset > obj->base.size || 496 args->size > obj->base.size - args->offset) { 497 ret = -EINVAL; 498 goto out; 499 } 500 501 trace_i915_gem_object_pread(obj, args->offset, args->size); 502 503 ret = i915_gem_object_set_cpu_read_domain_range(obj, 504 args->offset, 505 args->size); 506 if (ret) 507 goto out; 508 509 ret = -EFAULT; 510 if (!i915_gem_object_needs_bit17_swizzle(obj)) 511 ret = i915_gem_shmem_pread_fast(dev, obj, args, file); 512 if (ret == -EFAULT) 513 ret = i915_gem_shmem_pread_slow(dev, obj, args, file); 514 515 out: 516 drm_gem_object_unreference(&obj->base); 517 unlock: 518 mutex_unlock(&dev->struct_mutex); 519 return ret; 520 } 521 522 /* This is the fast write path which cannot handle 523 * page faults in the source data 524 */ 525 526 static inline int 527 fast_user_write(struct io_mapping *mapping, 528 loff_t page_base, int page_offset, 529 char __user *user_data, 530 int length) 531 { 532 char *vaddr_atomic; 533 unsigned long unwritten; 534 535 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 536 unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset, 537 user_data, length); 538 io_mapping_unmap_atomic(vaddr_atomic); 539 return unwritten; 540 } 541 542 /* Here's the write path which can sleep for 543 * page faults 544 */ 545 546 static inline void 547 slow_kernel_write(struct io_mapping *mapping, 548 loff_t gtt_base, int gtt_offset, 549 struct page *user_page, int user_offset, 550 int length) 551 { 552 char __iomem *dst_vaddr; 553 char *src_vaddr; 554 555 dst_vaddr = io_mapping_map_wc(mapping, gtt_base); 556 src_vaddr = kmap(user_page); 557 558 memcpy_toio(dst_vaddr + gtt_offset, 559 src_vaddr + user_offset, 560 length); 561 562 kunmap(user_page); 563 io_mapping_unmap(dst_vaddr); 564 } 565 566 /** 567 * This is the fast pwrite path, where we copy the data directly from the 568 * user into the GTT, uncached. 569 */ 570 static int 571 i915_gem_gtt_pwrite_fast(struct drm_device *dev, 572 struct drm_i915_gem_object *obj, 573 struct drm_i915_gem_pwrite *args, 574 struct drm_file *file) 575 { 576 drm_i915_private_t *dev_priv = dev->dev_private; 577 ssize_t remain; 578 loff_t offset, page_base; 579 char __user *user_data; 580 int page_offset, page_length; 581 582 user_data = (char __user *) (uintptr_t) args->data_ptr; 583 remain = args->size; 584 585 offset = obj->gtt_offset + args->offset; 586 587 while (remain > 0) { 588 /* Operation in this page 589 * 590 * page_base = page offset within aperture 591 * page_offset = offset within page 592 * page_length = bytes to copy for this page 593 */ 594 page_base = offset & PAGE_MASK; 595 page_offset = offset_in_page(offset); 596 page_length = remain; 597 if ((page_offset + remain) > PAGE_SIZE) 598 page_length = PAGE_SIZE - page_offset; 599 600 /* If we get a fault while copying data, then (presumably) our 601 * source page isn't available. Return the error and we'll 602 * retry in the slow path. 603 */ 604 if (fast_user_write(dev_priv->mm.gtt_mapping, page_base, 605 page_offset, user_data, page_length)) 606 return -EFAULT; 607 608 remain -= page_length; 609 user_data += page_length; 610 offset += page_length; 611 } 612 613 return 0; 614 } 615 616 /** 617 * This is the fallback GTT pwrite path, which uses get_user_pages to pin 618 * the memory and maps it using kmap_atomic for copying. 619 * 620 * This code resulted in x11perf -rgb10text consuming about 10% more CPU 621 * than using i915_gem_gtt_pwrite_fast on a G45 (32-bit). 622 */ 623 static int 624 i915_gem_gtt_pwrite_slow(struct drm_device *dev, 625 struct drm_i915_gem_object *obj, 626 struct drm_i915_gem_pwrite *args, 627 struct drm_file *file) 628 { 629 drm_i915_private_t *dev_priv = dev->dev_private; 630 ssize_t remain; 631 loff_t gtt_page_base, offset; 632 loff_t first_data_page, last_data_page, num_pages; 633 loff_t pinned_pages, i; 634 struct page **user_pages; 635 struct mm_struct *mm = current->mm; 636 int gtt_page_offset, data_page_offset, data_page_index, page_length; 637 int ret; 638 uint64_t data_ptr = args->data_ptr; 639 640 remain = args->size; 641 642 /* Pin the user pages containing the data. We can't fault while 643 * holding the struct mutex, and all of the pwrite implementations 644 * want to hold it while dereferencing the user data. 645 */ 646 first_data_page = data_ptr / PAGE_SIZE; 647 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; 648 num_pages = last_data_page - first_data_page + 1; 649 650 user_pages = drm_malloc_ab(num_pages, sizeof(struct page *)); 651 if (user_pages == NULL) 652 return -ENOMEM; 653 654 mutex_unlock(&dev->struct_mutex); 655 down_read(&mm->mmap_sem); 656 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, 657 num_pages, 0, 0, user_pages, NULL); 658 up_read(&mm->mmap_sem); 659 mutex_lock(&dev->struct_mutex); 660 if (pinned_pages < num_pages) { 661 ret = -EFAULT; 662 goto out_unpin_pages; 663 } 664 665 ret = i915_gem_object_set_to_gtt_domain(obj, true); 666 if (ret) 667 goto out_unpin_pages; 668 669 ret = i915_gem_object_put_fence(obj); 670 if (ret) 671 goto out_unpin_pages; 672 673 offset = obj->gtt_offset + args->offset; 674 675 while (remain > 0) { 676 /* Operation in this page 677 * 678 * gtt_page_base = page offset within aperture 679 * gtt_page_offset = offset within page in aperture 680 * data_page_index = page number in get_user_pages return 681 * data_page_offset = offset with data_page_index page. 682 * page_length = bytes to copy for this page 683 */ 684 gtt_page_base = offset & PAGE_MASK; 685 gtt_page_offset = offset_in_page(offset); 686 data_page_index = data_ptr / PAGE_SIZE - first_data_page; 687 data_page_offset = offset_in_page(data_ptr); 688 689 page_length = remain; 690 if ((gtt_page_offset + page_length) > PAGE_SIZE) 691 page_length = PAGE_SIZE - gtt_page_offset; 692 if ((data_page_offset + page_length) > PAGE_SIZE) 693 page_length = PAGE_SIZE - data_page_offset; 694 695 slow_kernel_write(dev_priv->mm.gtt_mapping, 696 gtt_page_base, gtt_page_offset, 697 user_pages[data_page_index], 698 data_page_offset, 699 page_length); 700 701 remain -= page_length; 702 offset += page_length; 703 data_ptr += page_length; 704 } 705 706 out_unpin_pages: 707 for (i = 0; i < pinned_pages; i++) 708 page_cache_release(user_pages[i]); 709 drm_free_large(user_pages); 710 711 return ret; 712 } 713 714 /** 715 * This is the fast shmem pwrite path, which attempts to directly 716 * copy_from_user into the kmapped pages backing the object. 717 */ 718 static int 719 i915_gem_shmem_pwrite_fast(struct drm_device *dev, 720 struct drm_i915_gem_object *obj, 721 struct drm_i915_gem_pwrite *args, 722 struct drm_file *file) 723 { 724 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 725 ssize_t remain; 726 loff_t offset; 727 char __user *user_data; 728 int page_offset, page_length; 729 730 user_data = (char __user *) (uintptr_t) args->data_ptr; 731 remain = args->size; 732 733 offset = args->offset; 734 obj->dirty = 1; 735 736 while (remain > 0) { 737 struct page *page; 738 char *vaddr; 739 int ret; 740 741 /* Operation in this page 742 * 743 * page_offset = offset within page 744 * page_length = bytes to copy for this page 745 */ 746 page_offset = offset_in_page(offset); 747 page_length = remain; 748 if ((page_offset + remain) > PAGE_SIZE) 749 page_length = PAGE_SIZE - page_offset; 750 751 page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT); 752 if (IS_ERR(page)) 753 return PTR_ERR(page); 754 755 vaddr = kmap_atomic(page); 756 ret = __copy_from_user_inatomic(vaddr + page_offset, 757 user_data, 758 page_length); 759 kunmap_atomic(vaddr); 760 761 set_page_dirty(page); 762 mark_page_accessed(page); 763 page_cache_release(page); 764 765 /* If we get a fault while copying data, then (presumably) our 766 * source page isn't available. Return the error and we'll 767 * retry in the slow path. 768 */ 769 if (ret) 770 return -EFAULT; 771 772 remain -= page_length; 773 user_data += page_length; 774 offset += page_length; 775 } 776 777 return 0; 778 } 779 780 /** 781 * This is the fallback shmem pwrite path, which uses get_user_pages to pin 782 * the memory and maps it using kmap_atomic for copying. 783 * 784 * This avoids taking mmap_sem for faulting on the user's address while the 785 * struct_mutex is held. 786 */ 787 static int 788 i915_gem_shmem_pwrite_slow(struct drm_device *dev, 789 struct drm_i915_gem_object *obj, 790 struct drm_i915_gem_pwrite *args, 791 struct drm_file *file) 792 { 793 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 794 ssize_t remain; 795 loff_t offset; 796 char __user *user_data; 797 int shmem_page_offset, page_length, ret; 798 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 799 800 user_data = (char __user *) (uintptr_t) args->data_ptr; 801 remain = args->size; 802 803 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 804 805 offset = args->offset; 806 obj->dirty = 1; 807 808 mutex_unlock(&dev->struct_mutex); 809 810 while (remain > 0) { 811 struct page *page; 812 char *vaddr; 813 814 /* Operation in this page 815 * 816 * shmem_page_offset = offset within page in shmem file 817 * page_length = bytes to copy for this page 818 */ 819 shmem_page_offset = offset_in_page(offset); 820 821 page_length = remain; 822 if ((shmem_page_offset + page_length) > PAGE_SIZE) 823 page_length = PAGE_SIZE - shmem_page_offset; 824 825 page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT); 826 if (IS_ERR(page)) { 827 ret = PTR_ERR(page); 828 goto out; 829 } 830 831 page_do_bit17_swizzling = obj_do_bit17_swizzling && 832 (page_to_phys(page) & (1 << 17)) != 0; 833 834 vaddr = kmap(page); 835 if (page_do_bit17_swizzling) 836 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, 837 user_data, 838 page_length); 839 else 840 ret = __copy_from_user(vaddr + shmem_page_offset, 841 user_data, 842 page_length); 843 kunmap(page); 844 845 set_page_dirty(page); 846 mark_page_accessed(page); 847 page_cache_release(page); 848 849 if (ret) { 850 ret = -EFAULT; 851 goto out; 852 } 853 854 remain -= page_length; 855 user_data += page_length; 856 offset += page_length; 857 } 858 859 out: 860 mutex_lock(&dev->struct_mutex); 861 /* Fixup: Kill any reinstated backing storage pages */ 862 if (obj->madv == __I915_MADV_PURGED) 863 i915_gem_object_truncate(obj); 864 /* and flush dirty cachelines in case the object isn't in the cpu write 865 * domain anymore. */ 866 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 867 i915_gem_clflush_object(obj); 868 intel_gtt_chipset_flush(); 869 } 870 871 return ret; 872 } 873 874 /** 875 * Writes data to the object referenced by handle. 876 * 877 * On error, the contents of the buffer that were to be modified are undefined. 878 */ 879 int 880 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 881 struct drm_file *file) 882 { 883 struct drm_i915_gem_pwrite *args = data; 884 struct drm_i915_gem_object *obj; 885 int ret; 886 887 if (args->size == 0) 888 return 0; 889 890 if (!access_ok(VERIFY_READ, 891 (char __user *)(uintptr_t)args->data_ptr, 892 args->size)) 893 return -EFAULT; 894 895 ret = fault_in_pages_readable((char __user *)(uintptr_t)args->data_ptr, 896 args->size); 897 if (ret) 898 return -EFAULT; 899 900 ret = i915_mutex_lock_interruptible(dev); 901 if (ret) 902 return ret; 903 904 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 905 if (&obj->base == NULL) { 906 ret = -ENOENT; 907 goto unlock; 908 } 909 910 /* Bounds check destination. */ 911 if (args->offset > obj->base.size || 912 args->size > obj->base.size - args->offset) { 913 ret = -EINVAL; 914 goto out; 915 } 916 917 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 918 919 /* We can only do the GTT pwrite on untiled buffers, as otherwise 920 * it would end up going through the fenced access, and we'll get 921 * different detiling behavior between reading and writing. 922 * pread/pwrite currently are reading and writing from the CPU 923 * perspective, requiring manual detiling by the client. 924 */ 925 if (obj->phys_obj) { 926 ret = i915_gem_phys_pwrite(dev, obj, args, file); 927 goto out; 928 } 929 930 if (obj->gtt_space && 931 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 932 ret = i915_gem_object_pin(obj, 0, true); 933 if (ret) 934 goto out; 935 936 ret = i915_gem_object_set_to_gtt_domain(obj, true); 937 if (ret) 938 goto out_unpin; 939 940 ret = i915_gem_object_put_fence(obj); 941 if (ret) 942 goto out_unpin; 943 944 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); 945 if (ret == -EFAULT) 946 ret = i915_gem_gtt_pwrite_slow(dev, obj, args, file); 947 948 out_unpin: 949 i915_gem_object_unpin(obj); 950 951 if (ret != -EFAULT) 952 goto out; 953 /* Fall through to the shmfs paths because the gtt paths might 954 * fail with non-page-backed user pointers (e.g. gtt mappings 955 * when moving data between textures). */ 956 } 957 958 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 959 if (ret) 960 goto out; 961 962 ret = -EFAULT; 963 if (!i915_gem_object_needs_bit17_swizzle(obj)) 964 ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file); 965 if (ret == -EFAULT) 966 ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file); 967 968 out: 969 drm_gem_object_unreference(&obj->base); 970 unlock: 971 mutex_unlock(&dev->struct_mutex); 972 return ret; 973 } 974 975 /** 976 * Called when user space prepares to use an object with the CPU, either 977 * through the mmap ioctl's mapping or a GTT mapping. 978 */ 979 int 980 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 981 struct drm_file *file) 982 { 983 struct drm_i915_gem_set_domain *args = data; 984 struct drm_i915_gem_object *obj; 985 uint32_t read_domains = args->read_domains; 986 uint32_t write_domain = args->write_domain; 987 int ret; 988 989 if (!(dev->driver->driver_features & DRIVER_GEM)) 990 return -ENODEV; 991 992 /* Only handle setting domains to types used by the CPU. */ 993 if (write_domain & I915_GEM_GPU_DOMAINS) 994 return -EINVAL; 995 996 if (read_domains & I915_GEM_GPU_DOMAINS) 997 return -EINVAL; 998 999 /* Having something in the write domain implies it's in the read 1000 * domain, and only that read domain. Enforce that in the request. 1001 */ 1002 if (write_domain != 0 && read_domains != write_domain) 1003 return -EINVAL; 1004 1005 ret = i915_mutex_lock_interruptible(dev); 1006 if (ret) 1007 return ret; 1008 1009 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1010 if (&obj->base == NULL) { 1011 ret = -ENOENT; 1012 goto unlock; 1013 } 1014 1015 if (read_domains & I915_GEM_DOMAIN_GTT) { 1016 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1017 1018 /* Silently promote "you're not bound, there was nothing to do" 1019 * to success, since the client was just asking us to 1020 * make sure everything was done. 1021 */ 1022 if (ret == -EINVAL) 1023 ret = 0; 1024 } else { 1025 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1026 } 1027 1028 drm_gem_object_unreference(&obj->base); 1029 unlock: 1030 mutex_unlock(&dev->struct_mutex); 1031 return ret; 1032 } 1033 1034 /** 1035 * Called when user space has done writes to this buffer 1036 */ 1037 int 1038 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1039 struct drm_file *file) 1040 { 1041 struct drm_i915_gem_sw_finish *args = data; 1042 struct drm_i915_gem_object *obj; 1043 int ret = 0; 1044 1045 if (!(dev->driver->driver_features & DRIVER_GEM)) 1046 return -ENODEV; 1047 1048 ret = i915_mutex_lock_interruptible(dev); 1049 if (ret) 1050 return ret; 1051 1052 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1053 if (&obj->base == NULL) { 1054 ret = -ENOENT; 1055 goto unlock; 1056 } 1057 1058 /* Pinned buffers may be scanout, so flush the cache */ 1059 if (obj->pin_count) 1060 i915_gem_object_flush_cpu_write_domain(obj); 1061 1062 drm_gem_object_unreference(&obj->base); 1063 unlock: 1064 mutex_unlock(&dev->struct_mutex); 1065 return ret; 1066 } 1067 1068 /** 1069 * Maps the contents of an object, returning the address it is mapped 1070 * into. 1071 * 1072 * While the mapping holds a reference on the contents of the object, it doesn't 1073 * imply a ref on the object itself. 1074 */ 1075 int 1076 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1077 struct drm_file *file) 1078 { 1079 struct drm_i915_gem_mmap *args = data; 1080 struct drm_gem_object *obj; 1081 unsigned long addr; 1082 1083 if (!(dev->driver->driver_features & DRIVER_GEM)) 1084 return -ENODEV; 1085 1086 obj = drm_gem_object_lookup(dev, file, args->handle); 1087 if (obj == NULL) 1088 return -ENOENT; 1089 1090 addr = vm_mmap(obj->filp, 0, args->size, 1091 PROT_READ | PROT_WRITE, MAP_SHARED, 1092 args->offset); 1093 drm_gem_object_unreference_unlocked(obj); 1094 if (IS_ERR((void *)addr)) 1095 return addr; 1096 1097 args->addr_ptr = (uint64_t) addr; 1098 1099 return 0; 1100 } 1101 1102 /** 1103 * i915_gem_fault - fault a page into the GTT 1104 * vma: VMA in question 1105 * vmf: fault info 1106 * 1107 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1108 * from userspace. The fault handler takes care of binding the object to 1109 * the GTT (if needed), allocating and programming a fence register (again, 1110 * only if needed based on whether the old reg is still valid or the object 1111 * is tiled) and inserting a new PTE into the faulting process. 1112 * 1113 * Note that the faulting process may involve evicting existing objects 1114 * from the GTT and/or fence registers to make room. So performance may 1115 * suffer if the GTT working set is large or there are few fence registers 1116 * left. 1117 */ 1118 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1119 { 1120 struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data); 1121 struct drm_device *dev = obj->base.dev; 1122 drm_i915_private_t *dev_priv = dev->dev_private; 1123 pgoff_t page_offset; 1124 unsigned long pfn; 1125 int ret = 0; 1126 bool write = !!(vmf->flags & FAULT_FLAG_WRITE); 1127 1128 /* We don't use vmf->pgoff since that has the fake offset */ 1129 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> 1130 PAGE_SHIFT; 1131 1132 ret = i915_mutex_lock_interruptible(dev); 1133 if (ret) 1134 goto out; 1135 1136 trace_i915_gem_object_fault(obj, page_offset, true, write); 1137 1138 /* Now bind it into the GTT if needed */ 1139 if (!obj->map_and_fenceable) { 1140 ret = i915_gem_object_unbind(obj); 1141 if (ret) 1142 goto unlock; 1143 } 1144 if (!obj->gtt_space) { 1145 ret = i915_gem_object_bind_to_gtt(obj, 0, true); 1146 if (ret) 1147 goto unlock; 1148 1149 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1150 if (ret) 1151 goto unlock; 1152 } 1153 1154 if (obj->tiling_mode == I915_TILING_NONE) 1155 ret = i915_gem_object_put_fence(obj); 1156 else 1157 ret = i915_gem_object_get_fence(obj, NULL); 1158 if (ret) 1159 goto unlock; 1160 1161 if (i915_gem_object_is_inactive(obj)) 1162 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 1163 1164 obj->fault_mappable = true; 1165 1166 pfn = ((dev->agp->base + obj->gtt_offset) >> PAGE_SHIFT) + 1167 page_offset; 1168 1169 /* Finally, remap it using the new GTT offset */ 1170 ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn); 1171 unlock: 1172 mutex_unlock(&dev->struct_mutex); 1173 out: 1174 switch (ret) { 1175 case -EIO: 1176 case -EAGAIN: 1177 /* Give the error handler a chance to run and move the 1178 * objects off the GPU active list. Next time we service the 1179 * fault, we should be able to transition the page into the 1180 * GTT without touching the GPU (and so avoid further 1181 * EIO/EGAIN). If the GPU is wedged, then there is no issue 1182 * with coherency, just lost writes. 1183 */ 1184 set_need_resched(); 1185 case 0: 1186 case -ERESTARTSYS: 1187 case -EINTR: 1188 return VM_FAULT_NOPAGE; 1189 case -ENOMEM: 1190 return VM_FAULT_OOM; 1191 default: 1192 return VM_FAULT_SIGBUS; 1193 } 1194 } 1195 1196 /** 1197 * i915_gem_release_mmap - remove physical page mappings 1198 * @obj: obj in question 1199 * 1200 * Preserve the reservation of the mmapping with the DRM core code, but 1201 * relinquish ownership of the pages back to the system. 1202 * 1203 * It is vital that we remove the page mapping if we have mapped a tiled 1204 * object through the GTT and then lose the fence register due to 1205 * resource pressure. Similarly if the object has been moved out of the 1206 * aperture, than pages mapped into userspace must be revoked. Removing the 1207 * mapping will then trigger a page fault on the next user access, allowing 1208 * fixup by i915_gem_fault(). 1209 */ 1210 void 1211 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 1212 { 1213 if (!obj->fault_mappable) 1214 return; 1215 1216 if (obj->base.dev->dev_mapping) 1217 unmap_mapping_range(obj->base.dev->dev_mapping, 1218 (loff_t)obj->base.map_list.hash.key<<PAGE_SHIFT, 1219 obj->base.size, 1); 1220 1221 obj->fault_mappable = false; 1222 } 1223 1224 static uint32_t 1225 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) 1226 { 1227 uint32_t gtt_size; 1228 1229 if (INTEL_INFO(dev)->gen >= 4 || 1230 tiling_mode == I915_TILING_NONE) 1231 return size; 1232 1233 /* Previous chips need a power-of-two fence region when tiling */ 1234 if (INTEL_INFO(dev)->gen == 3) 1235 gtt_size = 1024*1024; 1236 else 1237 gtt_size = 512*1024; 1238 1239 while (gtt_size < size) 1240 gtt_size <<= 1; 1241 1242 return gtt_size; 1243 } 1244 1245 /** 1246 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 1247 * @obj: object to check 1248 * 1249 * Return the required GTT alignment for an object, taking into account 1250 * potential fence register mapping. 1251 */ 1252 static uint32_t 1253 i915_gem_get_gtt_alignment(struct drm_device *dev, 1254 uint32_t size, 1255 int tiling_mode) 1256 { 1257 /* 1258 * Minimum alignment is 4k (GTT page size), but might be greater 1259 * if a fence register is needed for the object. 1260 */ 1261 if (INTEL_INFO(dev)->gen >= 4 || 1262 tiling_mode == I915_TILING_NONE) 1263 return 4096; 1264 1265 /* 1266 * Previous chips need to be aligned to the size of the smallest 1267 * fence register that can contain the object. 1268 */ 1269 return i915_gem_get_gtt_size(dev, size, tiling_mode); 1270 } 1271 1272 /** 1273 * i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an 1274 * unfenced object 1275 * @dev: the device 1276 * @size: size of the object 1277 * @tiling_mode: tiling mode of the object 1278 * 1279 * Return the required GTT alignment for an object, only taking into account 1280 * unfenced tiled surface requirements. 1281 */ 1282 uint32_t 1283 i915_gem_get_unfenced_gtt_alignment(struct drm_device *dev, 1284 uint32_t size, 1285 int tiling_mode) 1286 { 1287 /* 1288 * Minimum alignment is 4k (GTT page size) for sane hw. 1289 */ 1290 if (INTEL_INFO(dev)->gen >= 4 || IS_G33(dev) || 1291 tiling_mode == I915_TILING_NONE) 1292 return 4096; 1293 1294 /* Previous hardware however needs to be aligned to a power-of-two 1295 * tile height. The simplest method for determining this is to reuse 1296 * the power-of-tile object size. 1297 */ 1298 return i915_gem_get_gtt_size(dev, size, tiling_mode); 1299 } 1300 1301 int 1302 i915_gem_mmap_gtt(struct drm_file *file, 1303 struct drm_device *dev, 1304 uint32_t handle, 1305 uint64_t *offset) 1306 { 1307 struct drm_i915_private *dev_priv = dev->dev_private; 1308 struct drm_i915_gem_object *obj; 1309 int ret; 1310 1311 if (!(dev->driver->driver_features & DRIVER_GEM)) 1312 return -ENODEV; 1313 1314 ret = i915_mutex_lock_interruptible(dev); 1315 if (ret) 1316 return ret; 1317 1318 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle)); 1319 if (&obj->base == NULL) { 1320 ret = -ENOENT; 1321 goto unlock; 1322 } 1323 1324 if (obj->base.size > dev_priv->mm.gtt_mappable_end) { 1325 ret = -E2BIG; 1326 goto out; 1327 } 1328 1329 if (obj->madv != I915_MADV_WILLNEED) { 1330 DRM_ERROR("Attempting to mmap a purgeable buffer\n"); 1331 ret = -EINVAL; 1332 goto out; 1333 } 1334 1335 if (!obj->base.map_list.map) { 1336 ret = drm_gem_create_mmap_offset(&obj->base); 1337 if (ret) 1338 goto out; 1339 } 1340 1341 *offset = (u64)obj->base.map_list.hash.key << PAGE_SHIFT; 1342 1343 out: 1344 drm_gem_object_unreference(&obj->base); 1345 unlock: 1346 mutex_unlock(&dev->struct_mutex); 1347 return ret; 1348 } 1349 1350 /** 1351 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 1352 * @dev: DRM device 1353 * @data: GTT mapping ioctl data 1354 * @file: GEM object info 1355 * 1356 * Simply returns the fake offset to userspace so it can mmap it. 1357 * The mmap call will end up in drm_gem_mmap(), which will set things 1358 * up so we can get faults in the handler above. 1359 * 1360 * The fault handler will take care of binding the object into the GTT 1361 * (since it may have been evicted to make room for something), allocating 1362 * a fence register, and mapping the appropriate aperture address into 1363 * userspace. 1364 */ 1365 int 1366 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 1367 struct drm_file *file) 1368 { 1369 struct drm_i915_gem_mmap_gtt *args = data; 1370 1371 if (!(dev->driver->driver_features & DRIVER_GEM)) 1372 return -ENODEV; 1373 1374 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 1375 } 1376 1377 1378 static int 1379 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj, 1380 gfp_t gfpmask) 1381 { 1382 int page_count, i; 1383 struct address_space *mapping; 1384 struct inode *inode; 1385 struct page *page; 1386 1387 /* Get the list of pages out of our struct file. They'll be pinned 1388 * at this point until we release them. 1389 */ 1390 page_count = obj->base.size / PAGE_SIZE; 1391 BUG_ON(obj->pages != NULL); 1392 obj->pages = drm_malloc_ab(page_count, sizeof(struct page *)); 1393 if (obj->pages == NULL) 1394 return -ENOMEM; 1395 1396 inode = obj->base.filp->f_path.dentry->d_inode; 1397 mapping = inode->i_mapping; 1398 gfpmask |= mapping_gfp_mask(mapping); 1399 1400 for (i = 0; i < page_count; i++) { 1401 page = shmem_read_mapping_page_gfp(mapping, i, gfpmask); 1402 if (IS_ERR(page)) 1403 goto err_pages; 1404 1405 obj->pages[i] = page; 1406 } 1407 1408 if (i915_gem_object_needs_bit17_swizzle(obj)) 1409 i915_gem_object_do_bit_17_swizzle(obj); 1410 1411 return 0; 1412 1413 err_pages: 1414 while (i--) 1415 page_cache_release(obj->pages[i]); 1416 1417 drm_free_large(obj->pages); 1418 obj->pages = NULL; 1419 return PTR_ERR(page); 1420 } 1421 1422 static void 1423 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 1424 { 1425 int page_count = obj->base.size / PAGE_SIZE; 1426 int i; 1427 1428 BUG_ON(obj->madv == __I915_MADV_PURGED); 1429 1430 if (i915_gem_object_needs_bit17_swizzle(obj)) 1431 i915_gem_object_save_bit_17_swizzle(obj); 1432 1433 if (obj->madv == I915_MADV_DONTNEED) 1434 obj->dirty = 0; 1435 1436 for (i = 0; i < page_count; i++) { 1437 if (obj->dirty) 1438 set_page_dirty(obj->pages[i]); 1439 1440 if (obj->madv == I915_MADV_WILLNEED) 1441 mark_page_accessed(obj->pages[i]); 1442 1443 page_cache_release(obj->pages[i]); 1444 } 1445 obj->dirty = 0; 1446 1447 drm_free_large(obj->pages); 1448 obj->pages = NULL; 1449 } 1450 1451 void 1452 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, 1453 struct intel_ring_buffer *ring, 1454 u32 seqno) 1455 { 1456 struct drm_device *dev = obj->base.dev; 1457 struct drm_i915_private *dev_priv = dev->dev_private; 1458 1459 BUG_ON(ring == NULL); 1460 obj->ring = ring; 1461 1462 /* Add a reference if we're newly entering the active list. */ 1463 if (!obj->active) { 1464 drm_gem_object_reference(&obj->base); 1465 obj->active = 1; 1466 } 1467 1468 /* Move from whatever list we were on to the tail of execution. */ 1469 list_move_tail(&obj->mm_list, &dev_priv->mm.active_list); 1470 list_move_tail(&obj->ring_list, &ring->active_list); 1471 1472 obj->last_rendering_seqno = seqno; 1473 1474 if (obj->fenced_gpu_access) { 1475 obj->last_fenced_seqno = seqno; 1476 obj->last_fenced_ring = ring; 1477 1478 /* Bump MRU to take account of the delayed flush */ 1479 if (obj->fence_reg != I915_FENCE_REG_NONE) { 1480 struct drm_i915_fence_reg *reg; 1481 1482 reg = &dev_priv->fence_regs[obj->fence_reg]; 1483 list_move_tail(®->lru_list, 1484 &dev_priv->mm.fence_list); 1485 } 1486 } 1487 } 1488 1489 static void 1490 i915_gem_object_move_off_active(struct drm_i915_gem_object *obj) 1491 { 1492 list_del_init(&obj->ring_list); 1493 obj->last_rendering_seqno = 0; 1494 obj->last_fenced_seqno = 0; 1495 } 1496 1497 static void 1498 i915_gem_object_move_to_flushing(struct drm_i915_gem_object *obj) 1499 { 1500 struct drm_device *dev = obj->base.dev; 1501 drm_i915_private_t *dev_priv = dev->dev_private; 1502 1503 BUG_ON(!obj->active); 1504 list_move_tail(&obj->mm_list, &dev_priv->mm.flushing_list); 1505 1506 i915_gem_object_move_off_active(obj); 1507 } 1508 1509 static void 1510 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) 1511 { 1512 struct drm_device *dev = obj->base.dev; 1513 struct drm_i915_private *dev_priv = dev->dev_private; 1514 1515 if (obj->pin_count != 0) 1516 list_move_tail(&obj->mm_list, &dev_priv->mm.pinned_list); 1517 else 1518 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 1519 1520 BUG_ON(!list_empty(&obj->gpu_write_list)); 1521 BUG_ON(!obj->active); 1522 obj->ring = NULL; 1523 obj->last_fenced_ring = NULL; 1524 1525 i915_gem_object_move_off_active(obj); 1526 obj->fenced_gpu_access = false; 1527 1528 obj->active = 0; 1529 obj->pending_gpu_write = false; 1530 drm_gem_object_unreference(&obj->base); 1531 1532 WARN_ON(i915_verify_lists(dev)); 1533 } 1534 1535 /* Immediately discard the backing storage */ 1536 static void 1537 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 1538 { 1539 struct inode *inode; 1540 1541 /* Our goal here is to return as much of the memory as 1542 * is possible back to the system as we are called from OOM. 1543 * To do this we must instruct the shmfs to drop all of its 1544 * backing pages, *now*. 1545 */ 1546 inode = obj->base.filp->f_path.dentry->d_inode; 1547 shmem_truncate_range(inode, 0, (loff_t)-1); 1548 1549 obj->madv = __I915_MADV_PURGED; 1550 } 1551 1552 static inline int 1553 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj) 1554 { 1555 return obj->madv == I915_MADV_DONTNEED; 1556 } 1557 1558 static void 1559 i915_gem_process_flushing_list(struct intel_ring_buffer *ring, 1560 uint32_t flush_domains) 1561 { 1562 struct drm_i915_gem_object *obj, *next; 1563 1564 list_for_each_entry_safe(obj, next, 1565 &ring->gpu_write_list, 1566 gpu_write_list) { 1567 if (obj->base.write_domain & flush_domains) { 1568 uint32_t old_write_domain = obj->base.write_domain; 1569 1570 obj->base.write_domain = 0; 1571 list_del_init(&obj->gpu_write_list); 1572 i915_gem_object_move_to_active(obj, ring, 1573 i915_gem_next_request_seqno(ring)); 1574 1575 trace_i915_gem_object_change_domain(obj, 1576 obj->base.read_domains, 1577 old_write_domain); 1578 } 1579 } 1580 } 1581 1582 static u32 1583 i915_gem_get_seqno(struct drm_device *dev) 1584 { 1585 drm_i915_private_t *dev_priv = dev->dev_private; 1586 u32 seqno = dev_priv->next_seqno; 1587 1588 /* reserve 0 for non-seqno */ 1589 if (++dev_priv->next_seqno == 0) 1590 dev_priv->next_seqno = 1; 1591 1592 return seqno; 1593 } 1594 1595 u32 1596 i915_gem_next_request_seqno(struct intel_ring_buffer *ring) 1597 { 1598 if (ring->outstanding_lazy_request == 0) 1599 ring->outstanding_lazy_request = i915_gem_get_seqno(ring->dev); 1600 1601 return ring->outstanding_lazy_request; 1602 } 1603 1604 int 1605 i915_add_request(struct intel_ring_buffer *ring, 1606 struct drm_file *file, 1607 struct drm_i915_gem_request *request) 1608 { 1609 drm_i915_private_t *dev_priv = ring->dev->dev_private; 1610 uint32_t seqno; 1611 u32 request_ring_position; 1612 int was_empty; 1613 int ret; 1614 1615 BUG_ON(request == NULL); 1616 seqno = i915_gem_next_request_seqno(ring); 1617 1618 /* Record the position of the start of the request so that 1619 * should we detect the updated seqno part-way through the 1620 * GPU processing the request, we never over-estimate the 1621 * position of the head. 1622 */ 1623 request_ring_position = intel_ring_get_tail(ring); 1624 1625 ret = ring->add_request(ring, &seqno); 1626 if (ret) 1627 return ret; 1628 1629 trace_i915_gem_request_add(ring, seqno); 1630 1631 request->seqno = seqno; 1632 request->ring = ring; 1633 request->tail = request_ring_position; 1634 request->emitted_jiffies = jiffies; 1635 was_empty = list_empty(&ring->request_list); 1636 list_add_tail(&request->list, &ring->request_list); 1637 1638 if (file) { 1639 struct drm_i915_file_private *file_priv = file->driver_priv; 1640 1641 spin_lock(&file_priv->mm.lock); 1642 request->file_priv = file_priv; 1643 list_add_tail(&request->client_list, 1644 &file_priv->mm.request_list); 1645 spin_unlock(&file_priv->mm.lock); 1646 } 1647 1648 ring->outstanding_lazy_request = 0; 1649 1650 if (!dev_priv->mm.suspended) { 1651 if (i915_enable_hangcheck) { 1652 mod_timer(&dev_priv->hangcheck_timer, 1653 jiffies + 1654 msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD)); 1655 } 1656 if (was_empty) 1657 queue_delayed_work(dev_priv->wq, 1658 &dev_priv->mm.retire_work, HZ); 1659 } 1660 return 0; 1661 } 1662 1663 static inline void 1664 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) 1665 { 1666 struct drm_i915_file_private *file_priv = request->file_priv; 1667 1668 if (!file_priv) 1669 return; 1670 1671 spin_lock(&file_priv->mm.lock); 1672 if (request->file_priv) { 1673 list_del(&request->client_list); 1674 request->file_priv = NULL; 1675 } 1676 spin_unlock(&file_priv->mm.lock); 1677 } 1678 1679 static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv, 1680 struct intel_ring_buffer *ring) 1681 { 1682 while (!list_empty(&ring->request_list)) { 1683 struct drm_i915_gem_request *request; 1684 1685 request = list_first_entry(&ring->request_list, 1686 struct drm_i915_gem_request, 1687 list); 1688 1689 list_del(&request->list); 1690 i915_gem_request_remove_from_client(request); 1691 kfree(request); 1692 } 1693 1694 while (!list_empty(&ring->active_list)) { 1695 struct drm_i915_gem_object *obj; 1696 1697 obj = list_first_entry(&ring->active_list, 1698 struct drm_i915_gem_object, 1699 ring_list); 1700 1701 obj->base.write_domain = 0; 1702 list_del_init(&obj->gpu_write_list); 1703 i915_gem_object_move_to_inactive(obj); 1704 } 1705 } 1706 1707 static void i915_gem_reset_fences(struct drm_device *dev) 1708 { 1709 struct drm_i915_private *dev_priv = dev->dev_private; 1710 int i; 1711 1712 for (i = 0; i < dev_priv->num_fence_regs; i++) { 1713 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 1714 struct drm_i915_gem_object *obj = reg->obj; 1715 1716 if (!obj) 1717 continue; 1718 1719 if (obj->tiling_mode) 1720 i915_gem_release_mmap(obj); 1721 1722 reg->obj->fence_reg = I915_FENCE_REG_NONE; 1723 reg->obj->fenced_gpu_access = false; 1724 reg->obj->last_fenced_seqno = 0; 1725 reg->obj->last_fenced_ring = NULL; 1726 i915_gem_clear_fence_reg(dev, reg); 1727 } 1728 } 1729 1730 void i915_gem_reset(struct drm_device *dev) 1731 { 1732 struct drm_i915_private *dev_priv = dev->dev_private; 1733 struct drm_i915_gem_object *obj; 1734 int i; 1735 1736 for (i = 0; i < I915_NUM_RINGS; i++) 1737 i915_gem_reset_ring_lists(dev_priv, &dev_priv->ring[i]); 1738 1739 /* Remove anything from the flushing lists. The GPU cache is likely 1740 * to be lost on reset along with the data, so simply move the 1741 * lost bo to the inactive list. 1742 */ 1743 while (!list_empty(&dev_priv->mm.flushing_list)) { 1744 obj = list_first_entry(&dev_priv->mm.flushing_list, 1745 struct drm_i915_gem_object, 1746 mm_list); 1747 1748 obj->base.write_domain = 0; 1749 list_del_init(&obj->gpu_write_list); 1750 i915_gem_object_move_to_inactive(obj); 1751 } 1752 1753 /* Move everything out of the GPU domains to ensure we do any 1754 * necessary invalidation upon reuse. 1755 */ 1756 list_for_each_entry(obj, 1757 &dev_priv->mm.inactive_list, 1758 mm_list) 1759 { 1760 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; 1761 } 1762 1763 /* The fence registers are invalidated so clear them out */ 1764 i915_gem_reset_fences(dev); 1765 } 1766 1767 /** 1768 * This function clears the request list as sequence numbers are passed. 1769 */ 1770 void 1771 i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) 1772 { 1773 uint32_t seqno; 1774 int i; 1775 1776 if (list_empty(&ring->request_list)) 1777 return; 1778 1779 WARN_ON(i915_verify_lists(ring->dev)); 1780 1781 seqno = ring->get_seqno(ring); 1782 1783 for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++) 1784 if (seqno >= ring->sync_seqno[i]) 1785 ring->sync_seqno[i] = 0; 1786 1787 while (!list_empty(&ring->request_list)) { 1788 struct drm_i915_gem_request *request; 1789 1790 request = list_first_entry(&ring->request_list, 1791 struct drm_i915_gem_request, 1792 list); 1793 1794 if (!i915_seqno_passed(seqno, request->seqno)) 1795 break; 1796 1797 trace_i915_gem_request_retire(ring, request->seqno); 1798 /* We know the GPU must have read the request to have 1799 * sent us the seqno + interrupt, so use the position 1800 * of tail of the request to update the last known position 1801 * of the GPU head. 1802 */ 1803 ring->last_retired_head = request->tail; 1804 1805 list_del(&request->list); 1806 i915_gem_request_remove_from_client(request); 1807 kfree(request); 1808 } 1809 1810 /* Move any buffers on the active list that are no longer referenced 1811 * by the ringbuffer to the flushing/inactive lists as appropriate. 1812 */ 1813 while (!list_empty(&ring->active_list)) { 1814 struct drm_i915_gem_object *obj; 1815 1816 obj = list_first_entry(&ring->active_list, 1817 struct drm_i915_gem_object, 1818 ring_list); 1819 1820 if (!i915_seqno_passed(seqno, obj->last_rendering_seqno)) 1821 break; 1822 1823 if (obj->base.write_domain != 0) 1824 i915_gem_object_move_to_flushing(obj); 1825 else 1826 i915_gem_object_move_to_inactive(obj); 1827 } 1828 1829 if (unlikely(ring->trace_irq_seqno && 1830 i915_seqno_passed(seqno, ring->trace_irq_seqno))) { 1831 ring->irq_put(ring); 1832 ring->trace_irq_seqno = 0; 1833 } 1834 1835 WARN_ON(i915_verify_lists(ring->dev)); 1836 } 1837 1838 void 1839 i915_gem_retire_requests(struct drm_device *dev) 1840 { 1841 drm_i915_private_t *dev_priv = dev->dev_private; 1842 int i; 1843 1844 if (!list_empty(&dev_priv->mm.deferred_free_list)) { 1845 struct drm_i915_gem_object *obj, *next; 1846 1847 /* We must be careful that during unbind() we do not 1848 * accidentally infinitely recurse into retire requests. 1849 * Currently: 1850 * retire -> free -> unbind -> wait -> retire_ring 1851 */ 1852 list_for_each_entry_safe(obj, next, 1853 &dev_priv->mm.deferred_free_list, 1854 mm_list) 1855 i915_gem_free_object_tail(obj); 1856 } 1857 1858 for (i = 0; i < I915_NUM_RINGS; i++) 1859 i915_gem_retire_requests_ring(&dev_priv->ring[i]); 1860 } 1861 1862 static void 1863 i915_gem_retire_work_handler(struct work_struct *work) 1864 { 1865 drm_i915_private_t *dev_priv; 1866 struct drm_device *dev; 1867 bool idle; 1868 int i; 1869 1870 dev_priv = container_of(work, drm_i915_private_t, 1871 mm.retire_work.work); 1872 dev = dev_priv->dev; 1873 1874 /* Come back later if the device is busy... */ 1875 if (!mutex_trylock(&dev->struct_mutex)) { 1876 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ); 1877 return; 1878 } 1879 1880 i915_gem_retire_requests(dev); 1881 1882 /* Send a periodic flush down the ring so we don't hold onto GEM 1883 * objects indefinitely. 1884 */ 1885 idle = true; 1886 for (i = 0; i < I915_NUM_RINGS; i++) { 1887 struct intel_ring_buffer *ring = &dev_priv->ring[i]; 1888 1889 if (!list_empty(&ring->gpu_write_list)) { 1890 struct drm_i915_gem_request *request; 1891 int ret; 1892 1893 ret = i915_gem_flush_ring(ring, 1894 0, I915_GEM_GPU_DOMAINS); 1895 request = kzalloc(sizeof(*request), GFP_KERNEL); 1896 if (ret || request == NULL || 1897 i915_add_request(ring, NULL, request)) 1898 kfree(request); 1899 } 1900 1901 idle &= list_empty(&ring->request_list); 1902 } 1903 1904 if (!dev_priv->mm.suspended && !idle) 1905 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ); 1906 1907 mutex_unlock(&dev->struct_mutex); 1908 } 1909 1910 /** 1911 * Waits for a sequence number to be signaled, and cleans up the 1912 * request and object lists appropriately for that event. 1913 */ 1914 int 1915 i915_wait_request(struct intel_ring_buffer *ring, 1916 uint32_t seqno, 1917 bool do_retire) 1918 { 1919 drm_i915_private_t *dev_priv = ring->dev->dev_private; 1920 u32 ier; 1921 int ret = 0; 1922 1923 BUG_ON(seqno == 0); 1924 1925 if (atomic_read(&dev_priv->mm.wedged)) { 1926 struct completion *x = &dev_priv->error_completion; 1927 bool recovery_complete; 1928 unsigned long flags; 1929 1930 /* Give the error handler a chance to run. */ 1931 spin_lock_irqsave(&x->wait.lock, flags); 1932 recovery_complete = x->done > 0; 1933 spin_unlock_irqrestore(&x->wait.lock, flags); 1934 1935 return recovery_complete ? -EIO : -EAGAIN; 1936 } 1937 1938 if (seqno == ring->outstanding_lazy_request) { 1939 struct drm_i915_gem_request *request; 1940 1941 request = kzalloc(sizeof(*request), GFP_KERNEL); 1942 if (request == NULL) 1943 return -ENOMEM; 1944 1945 ret = i915_add_request(ring, NULL, request); 1946 if (ret) { 1947 kfree(request); 1948 return ret; 1949 } 1950 1951 seqno = request->seqno; 1952 } 1953 1954 if (!i915_seqno_passed(ring->get_seqno(ring), seqno)) { 1955 if (HAS_PCH_SPLIT(ring->dev)) 1956 ier = I915_READ(DEIER) | I915_READ(GTIER); 1957 else 1958 ier = I915_READ(IER); 1959 if (!ier) { 1960 DRM_ERROR("something (likely vbetool) disabled " 1961 "interrupts, re-enabling\n"); 1962 ring->dev->driver->irq_preinstall(ring->dev); 1963 ring->dev->driver->irq_postinstall(ring->dev); 1964 } 1965 1966 trace_i915_gem_request_wait_begin(ring, seqno); 1967 1968 ring->waiting_seqno = seqno; 1969 if (ring->irq_get(ring)) { 1970 if (dev_priv->mm.interruptible) 1971 ret = wait_event_interruptible(ring->irq_queue, 1972 i915_seqno_passed(ring->get_seqno(ring), seqno) 1973 || atomic_read(&dev_priv->mm.wedged)); 1974 else 1975 wait_event(ring->irq_queue, 1976 i915_seqno_passed(ring->get_seqno(ring), seqno) 1977 || atomic_read(&dev_priv->mm.wedged)); 1978 1979 ring->irq_put(ring); 1980 } else if (wait_for_atomic(i915_seqno_passed(ring->get_seqno(ring), 1981 seqno) || 1982 atomic_read(&dev_priv->mm.wedged), 3000)) 1983 ret = -EBUSY; 1984 ring->waiting_seqno = 0; 1985 1986 trace_i915_gem_request_wait_end(ring, seqno); 1987 } 1988 if (atomic_read(&dev_priv->mm.wedged)) 1989 ret = -EAGAIN; 1990 1991 /* Directly dispatch request retiring. While we have the work queue 1992 * to handle this, the waiter on a request often wants an associated 1993 * buffer to have made it to the inactive list, and we would need 1994 * a separate wait queue to handle that. 1995 */ 1996 if (ret == 0 && do_retire) 1997 i915_gem_retire_requests_ring(ring); 1998 1999 return ret; 2000 } 2001 2002 /** 2003 * Ensures that all rendering to the object has completed and the object is 2004 * safe to unbind from the GTT or access from the CPU. 2005 */ 2006 int 2007 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj) 2008 { 2009 int ret; 2010 2011 /* This function only exists to support waiting for existing rendering, 2012 * not for emitting required flushes. 2013 */ 2014 BUG_ON((obj->base.write_domain & I915_GEM_GPU_DOMAINS) != 0); 2015 2016 /* If there is rendering queued on the buffer being evicted, wait for 2017 * it. 2018 */ 2019 if (obj->active) { 2020 ret = i915_wait_request(obj->ring, obj->last_rendering_seqno, 2021 true); 2022 if (ret) 2023 return ret; 2024 } 2025 2026 return 0; 2027 } 2028 2029 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) 2030 { 2031 u32 old_write_domain, old_read_domains; 2032 2033 /* Act a barrier for all accesses through the GTT */ 2034 mb(); 2035 2036 /* Force a pagefault for domain tracking on next user access */ 2037 i915_gem_release_mmap(obj); 2038 2039 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 2040 return; 2041 2042 old_read_domains = obj->base.read_domains; 2043 old_write_domain = obj->base.write_domain; 2044 2045 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; 2046 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; 2047 2048 trace_i915_gem_object_change_domain(obj, 2049 old_read_domains, 2050 old_write_domain); 2051 } 2052 2053 /** 2054 * Unbinds an object from the GTT aperture. 2055 */ 2056 int 2057 i915_gem_object_unbind(struct drm_i915_gem_object *obj) 2058 { 2059 drm_i915_private_t *dev_priv = obj->base.dev->dev_private; 2060 int ret = 0; 2061 2062 if (obj->gtt_space == NULL) 2063 return 0; 2064 2065 if (obj->pin_count != 0) { 2066 DRM_ERROR("Attempting to unbind pinned buffer\n"); 2067 return -EINVAL; 2068 } 2069 2070 ret = i915_gem_object_finish_gpu(obj); 2071 if (ret == -ERESTARTSYS) 2072 return ret; 2073 /* Continue on if we fail due to EIO, the GPU is hung so we 2074 * should be safe and we need to cleanup or else we might 2075 * cause memory corruption through use-after-free. 2076 */ 2077 2078 i915_gem_object_finish_gtt(obj); 2079 2080 /* Move the object to the CPU domain to ensure that 2081 * any possible CPU writes while it's not in the GTT 2082 * are flushed when we go to remap it. 2083 */ 2084 if (ret == 0) 2085 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 2086 if (ret == -ERESTARTSYS) 2087 return ret; 2088 if (ret) { 2089 /* In the event of a disaster, abandon all caches and 2090 * hope for the best. 2091 */ 2092 i915_gem_clflush_object(obj); 2093 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2094 } 2095 2096 /* release the fence reg _after_ flushing */ 2097 ret = i915_gem_object_put_fence(obj); 2098 if (ret == -ERESTARTSYS) 2099 return ret; 2100 2101 trace_i915_gem_object_unbind(obj); 2102 2103 i915_gem_gtt_unbind_object(obj); 2104 if (obj->has_aliasing_ppgtt_mapping) { 2105 i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj); 2106 obj->has_aliasing_ppgtt_mapping = 0; 2107 } 2108 2109 i915_gem_object_put_pages_gtt(obj); 2110 2111 list_del_init(&obj->gtt_list); 2112 list_del_init(&obj->mm_list); 2113 /* Avoid an unnecessary call to unbind on rebind. */ 2114 obj->map_and_fenceable = true; 2115 2116 drm_mm_put_block(obj->gtt_space); 2117 obj->gtt_space = NULL; 2118 obj->gtt_offset = 0; 2119 2120 if (i915_gem_object_is_purgeable(obj)) 2121 i915_gem_object_truncate(obj); 2122 2123 return ret; 2124 } 2125 2126 int 2127 i915_gem_flush_ring(struct intel_ring_buffer *ring, 2128 uint32_t invalidate_domains, 2129 uint32_t flush_domains) 2130 { 2131 int ret; 2132 2133 if (((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) == 0) 2134 return 0; 2135 2136 trace_i915_gem_ring_flush(ring, invalidate_domains, flush_domains); 2137 2138 ret = ring->flush(ring, invalidate_domains, flush_domains); 2139 if (ret) 2140 return ret; 2141 2142 if (flush_domains & I915_GEM_GPU_DOMAINS) 2143 i915_gem_process_flushing_list(ring, flush_domains); 2144 2145 return 0; 2146 } 2147 2148 static int i915_ring_idle(struct intel_ring_buffer *ring, bool do_retire) 2149 { 2150 int ret; 2151 2152 if (list_empty(&ring->gpu_write_list) && list_empty(&ring->active_list)) 2153 return 0; 2154 2155 if (!list_empty(&ring->gpu_write_list)) { 2156 ret = i915_gem_flush_ring(ring, 2157 I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 2158 if (ret) 2159 return ret; 2160 } 2161 2162 return i915_wait_request(ring, i915_gem_next_request_seqno(ring), 2163 do_retire); 2164 } 2165 2166 int i915_gpu_idle(struct drm_device *dev, bool do_retire) 2167 { 2168 drm_i915_private_t *dev_priv = dev->dev_private; 2169 int ret, i; 2170 2171 /* Flush everything onto the inactive list. */ 2172 for (i = 0; i < I915_NUM_RINGS; i++) { 2173 ret = i915_ring_idle(&dev_priv->ring[i], do_retire); 2174 if (ret) 2175 return ret; 2176 } 2177 2178 return 0; 2179 } 2180 2181 static int sandybridge_write_fence_reg(struct drm_i915_gem_object *obj, 2182 struct intel_ring_buffer *pipelined) 2183 { 2184 struct drm_device *dev = obj->base.dev; 2185 drm_i915_private_t *dev_priv = dev->dev_private; 2186 u32 size = obj->gtt_space->size; 2187 int regnum = obj->fence_reg; 2188 uint64_t val; 2189 2190 val = (uint64_t)((obj->gtt_offset + size - 4096) & 2191 0xfffff000) << 32; 2192 val |= obj->gtt_offset & 0xfffff000; 2193 val |= (uint64_t)((obj->stride / 128) - 1) << 2194 SANDYBRIDGE_FENCE_PITCH_SHIFT; 2195 2196 if (obj->tiling_mode == I915_TILING_Y) 2197 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 2198 val |= I965_FENCE_REG_VALID; 2199 2200 if (pipelined) { 2201 int ret = intel_ring_begin(pipelined, 6); 2202 if (ret) 2203 return ret; 2204 2205 intel_ring_emit(pipelined, MI_NOOP); 2206 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(2)); 2207 intel_ring_emit(pipelined, FENCE_REG_SANDYBRIDGE_0 + regnum*8); 2208 intel_ring_emit(pipelined, (u32)val); 2209 intel_ring_emit(pipelined, FENCE_REG_SANDYBRIDGE_0 + regnum*8 + 4); 2210 intel_ring_emit(pipelined, (u32)(val >> 32)); 2211 intel_ring_advance(pipelined); 2212 } else 2213 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + regnum * 8, val); 2214 2215 return 0; 2216 } 2217 2218 static int i965_write_fence_reg(struct drm_i915_gem_object *obj, 2219 struct intel_ring_buffer *pipelined) 2220 { 2221 struct drm_device *dev = obj->base.dev; 2222 drm_i915_private_t *dev_priv = dev->dev_private; 2223 u32 size = obj->gtt_space->size; 2224 int regnum = obj->fence_reg; 2225 uint64_t val; 2226 2227 val = (uint64_t)((obj->gtt_offset + size - 4096) & 2228 0xfffff000) << 32; 2229 val |= obj->gtt_offset & 0xfffff000; 2230 val |= ((obj->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT; 2231 if (obj->tiling_mode == I915_TILING_Y) 2232 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 2233 val |= I965_FENCE_REG_VALID; 2234 2235 if (pipelined) { 2236 int ret = intel_ring_begin(pipelined, 6); 2237 if (ret) 2238 return ret; 2239 2240 intel_ring_emit(pipelined, MI_NOOP); 2241 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(2)); 2242 intel_ring_emit(pipelined, FENCE_REG_965_0 + regnum*8); 2243 intel_ring_emit(pipelined, (u32)val); 2244 intel_ring_emit(pipelined, FENCE_REG_965_0 + regnum*8 + 4); 2245 intel_ring_emit(pipelined, (u32)(val >> 32)); 2246 intel_ring_advance(pipelined); 2247 } else 2248 I915_WRITE64(FENCE_REG_965_0 + regnum * 8, val); 2249 2250 return 0; 2251 } 2252 2253 static int i915_write_fence_reg(struct drm_i915_gem_object *obj, 2254 struct intel_ring_buffer *pipelined) 2255 { 2256 struct drm_device *dev = obj->base.dev; 2257 drm_i915_private_t *dev_priv = dev->dev_private; 2258 u32 size = obj->gtt_space->size; 2259 u32 fence_reg, val, pitch_val; 2260 int tile_width; 2261 2262 if (WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) || 2263 (size & -size) != size || 2264 (obj->gtt_offset & (size - 1)), 2265 "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n", 2266 obj->gtt_offset, obj->map_and_fenceable, size)) 2267 return -EINVAL; 2268 2269 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) 2270 tile_width = 128; 2271 else 2272 tile_width = 512; 2273 2274 /* Note: pitch better be a power of two tile widths */ 2275 pitch_val = obj->stride / tile_width; 2276 pitch_val = ffs(pitch_val) - 1; 2277 2278 val = obj->gtt_offset; 2279 if (obj->tiling_mode == I915_TILING_Y) 2280 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 2281 val |= I915_FENCE_SIZE_BITS(size); 2282 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 2283 val |= I830_FENCE_REG_VALID; 2284 2285 fence_reg = obj->fence_reg; 2286 if (fence_reg < 8) 2287 fence_reg = FENCE_REG_830_0 + fence_reg * 4; 2288 else 2289 fence_reg = FENCE_REG_945_8 + (fence_reg - 8) * 4; 2290 2291 if (pipelined) { 2292 int ret = intel_ring_begin(pipelined, 4); 2293 if (ret) 2294 return ret; 2295 2296 intel_ring_emit(pipelined, MI_NOOP); 2297 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(1)); 2298 intel_ring_emit(pipelined, fence_reg); 2299 intel_ring_emit(pipelined, val); 2300 intel_ring_advance(pipelined); 2301 } else 2302 I915_WRITE(fence_reg, val); 2303 2304 return 0; 2305 } 2306 2307 static int i830_write_fence_reg(struct drm_i915_gem_object *obj, 2308 struct intel_ring_buffer *pipelined) 2309 { 2310 struct drm_device *dev = obj->base.dev; 2311 drm_i915_private_t *dev_priv = dev->dev_private; 2312 u32 size = obj->gtt_space->size; 2313 int regnum = obj->fence_reg; 2314 uint32_t val; 2315 uint32_t pitch_val; 2316 2317 if (WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) || 2318 (size & -size) != size || 2319 (obj->gtt_offset & (size - 1)), 2320 "object 0x%08x not 512K or pot-size 0x%08x aligned\n", 2321 obj->gtt_offset, size)) 2322 return -EINVAL; 2323 2324 pitch_val = obj->stride / 128; 2325 pitch_val = ffs(pitch_val) - 1; 2326 2327 val = obj->gtt_offset; 2328 if (obj->tiling_mode == I915_TILING_Y) 2329 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 2330 val |= I830_FENCE_SIZE_BITS(size); 2331 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 2332 val |= I830_FENCE_REG_VALID; 2333 2334 if (pipelined) { 2335 int ret = intel_ring_begin(pipelined, 4); 2336 if (ret) 2337 return ret; 2338 2339 intel_ring_emit(pipelined, MI_NOOP); 2340 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(1)); 2341 intel_ring_emit(pipelined, FENCE_REG_830_0 + regnum*4); 2342 intel_ring_emit(pipelined, val); 2343 intel_ring_advance(pipelined); 2344 } else 2345 I915_WRITE(FENCE_REG_830_0 + regnum * 4, val); 2346 2347 return 0; 2348 } 2349 2350 static bool ring_passed_seqno(struct intel_ring_buffer *ring, u32 seqno) 2351 { 2352 return i915_seqno_passed(ring->get_seqno(ring), seqno); 2353 } 2354 2355 static int 2356 i915_gem_object_flush_fence(struct drm_i915_gem_object *obj, 2357 struct intel_ring_buffer *pipelined) 2358 { 2359 int ret; 2360 2361 if (obj->fenced_gpu_access) { 2362 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { 2363 ret = i915_gem_flush_ring(obj->last_fenced_ring, 2364 0, obj->base.write_domain); 2365 if (ret) 2366 return ret; 2367 } 2368 2369 obj->fenced_gpu_access = false; 2370 } 2371 2372 if (obj->last_fenced_seqno && pipelined != obj->last_fenced_ring) { 2373 if (!ring_passed_seqno(obj->last_fenced_ring, 2374 obj->last_fenced_seqno)) { 2375 ret = i915_wait_request(obj->last_fenced_ring, 2376 obj->last_fenced_seqno, 2377 true); 2378 if (ret) 2379 return ret; 2380 } 2381 2382 obj->last_fenced_seqno = 0; 2383 obj->last_fenced_ring = NULL; 2384 } 2385 2386 /* Ensure that all CPU reads are completed before installing a fence 2387 * and all writes before removing the fence. 2388 */ 2389 if (obj->base.read_domains & I915_GEM_DOMAIN_GTT) 2390 mb(); 2391 2392 return 0; 2393 } 2394 2395 int 2396 i915_gem_object_put_fence(struct drm_i915_gem_object *obj) 2397 { 2398 int ret; 2399 2400 if (obj->tiling_mode) 2401 i915_gem_release_mmap(obj); 2402 2403 ret = i915_gem_object_flush_fence(obj, NULL); 2404 if (ret) 2405 return ret; 2406 2407 if (obj->fence_reg != I915_FENCE_REG_NONE) { 2408 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2409 2410 WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count); 2411 i915_gem_clear_fence_reg(obj->base.dev, 2412 &dev_priv->fence_regs[obj->fence_reg]); 2413 2414 obj->fence_reg = I915_FENCE_REG_NONE; 2415 } 2416 2417 return 0; 2418 } 2419 2420 static struct drm_i915_fence_reg * 2421 i915_find_fence_reg(struct drm_device *dev, 2422 struct intel_ring_buffer *pipelined) 2423 { 2424 struct drm_i915_private *dev_priv = dev->dev_private; 2425 struct drm_i915_fence_reg *reg, *first, *avail; 2426 int i; 2427 2428 /* First try to find a free reg */ 2429 avail = NULL; 2430 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { 2431 reg = &dev_priv->fence_regs[i]; 2432 if (!reg->obj) 2433 return reg; 2434 2435 if (!reg->pin_count) 2436 avail = reg; 2437 } 2438 2439 if (avail == NULL) 2440 return NULL; 2441 2442 /* None available, try to steal one or wait for a user to finish */ 2443 avail = first = NULL; 2444 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) { 2445 if (reg->pin_count) 2446 continue; 2447 2448 if (first == NULL) 2449 first = reg; 2450 2451 if (!pipelined || 2452 !reg->obj->last_fenced_ring || 2453 reg->obj->last_fenced_ring == pipelined) { 2454 avail = reg; 2455 break; 2456 } 2457 } 2458 2459 if (avail == NULL) 2460 avail = first; 2461 2462 return avail; 2463 } 2464 2465 /** 2466 * i915_gem_object_get_fence - set up a fence reg for an object 2467 * @obj: object to map through a fence reg 2468 * @pipelined: ring on which to queue the change, or NULL for CPU access 2469 * @interruptible: must we wait uninterruptibly for the register to retire? 2470 * 2471 * When mapping objects through the GTT, userspace wants to be able to write 2472 * to them without having to worry about swizzling if the object is tiled. 2473 * 2474 * This function walks the fence regs looking for a free one for @obj, 2475 * stealing one if it can't find any. 2476 * 2477 * It then sets up the reg based on the object's properties: address, pitch 2478 * and tiling format. 2479 */ 2480 int 2481 i915_gem_object_get_fence(struct drm_i915_gem_object *obj, 2482 struct intel_ring_buffer *pipelined) 2483 { 2484 struct drm_device *dev = obj->base.dev; 2485 struct drm_i915_private *dev_priv = dev->dev_private; 2486 struct drm_i915_fence_reg *reg; 2487 int ret; 2488 2489 /* XXX disable pipelining. There are bugs. Shocking. */ 2490 pipelined = NULL; 2491 2492 /* Just update our place in the LRU if our fence is getting reused. */ 2493 if (obj->fence_reg != I915_FENCE_REG_NONE) { 2494 reg = &dev_priv->fence_regs[obj->fence_reg]; 2495 list_move_tail(®->lru_list, &dev_priv->mm.fence_list); 2496 2497 if (obj->tiling_changed) { 2498 ret = i915_gem_object_flush_fence(obj, pipelined); 2499 if (ret) 2500 return ret; 2501 2502 if (!obj->fenced_gpu_access && !obj->last_fenced_seqno) 2503 pipelined = NULL; 2504 2505 if (pipelined) { 2506 reg->setup_seqno = 2507 i915_gem_next_request_seqno(pipelined); 2508 obj->last_fenced_seqno = reg->setup_seqno; 2509 obj->last_fenced_ring = pipelined; 2510 } 2511 2512 goto update; 2513 } 2514 2515 if (!pipelined) { 2516 if (reg->setup_seqno) { 2517 if (!ring_passed_seqno(obj->last_fenced_ring, 2518 reg->setup_seqno)) { 2519 ret = i915_wait_request(obj->last_fenced_ring, 2520 reg->setup_seqno, 2521 true); 2522 if (ret) 2523 return ret; 2524 } 2525 2526 reg->setup_seqno = 0; 2527 } 2528 } else if (obj->last_fenced_ring && 2529 obj->last_fenced_ring != pipelined) { 2530 ret = i915_gem_object_flush_fence(obj, pipelined); 2531 if (ret) 2532 return ret; 2533 } 2534 2535 return 0; 2536 } 2537 2538 reg = i915_find_fence_reg(dev, pipelined); 2539 if (reg == NULL) 2540 return -EDEADLK; 2541 2542 ret = i915_gem_object_flush_fence(obj, pipelined); 2543 if (ret) 2544 return ret; 2545 2546 if (reg->obj) { 2547 struct drm_i915_gem_object *old = reg->obj; 2548 2549 drm_gem_object_reference(&old->base); 2550 2551 if (old->tiling_mode) 2552 i915_gem_release_mmap(old); 2553 2554 ret = i915_gem_object_flush_fence(old, pipelined); 2555 if (ret) { 2556 drm_gem_object_unreference(&old->base); 2557 return ret; 2558 } 2559 2560 if (old->last_fenced_seqno == 0 && obj->last_fenced_seqno == 0) 2561 pipelined = NULL; 2562 2563 old->fence_reg = I915_FENCE_REG_NONE; 2564 old->last_fenced_ring = pipelined; 2565 old->last_fenced_seqno = 2566 pipelined ? i915_gem_next_request_seqno(pipelined) : 0; 2567 2568 drm_gem_object_unreference(&old->base); 2569 } else if (obj->last_fenced_seqno == 0) 2570 pipelined = NULL; 2571 2572 reg->obj = obj; 2573 list_move_tail(®->lru_list, &dev_priv->mm.fence_list); 2574 obj->fence_reg = reg - dev_priv->fence_regs; 2575 obj->last_fenced_ring = pipelined; 2576 2577 reg->setup_seqno = 2578 pipelined ? i915_gem_next_request_seqno(pipelined) : 0; 2579 obj->last_fenced_seqno = reg->setup_seqno; 2580 2581 update: 2582 obj->tiling_changed = false; 2583 switch (INTEL_INFO(dev)->gen) { 2584 case 7: 2585 case 6: 2586 ret = sandybridge_write_fence_reg(obj, pipelined); 2587 break; 2588 case 5: 2589 case 4: 2590 ret = i965_write_fence_reg(obj, pipelined); 2591 break; 2592 case 3: 2593 ret = i915_write_fence_reg(obj, pipelined); 2594 break; 2595 case 2: 2596 ret = i830_write_fence_reg(obj, pipelined); 2597 break; 2598 } 2599 2600 return ret; 2601 } 2602 2603 /** 2604 * i915_gem_clear_fence_reg - clear out fence register info 2605 * @obj: object to clear 2606 * 2607 * Zeroes out the fence register itself and clears out the associated 2608 * data structures in dev_priv and obj. 2609 */ 2610 static void 2611 i915_gem_clear_fence_reg(struct drm_device *dev, 2612 struct drm_i915_fence_reg *reg) 2613 { 2614 drm_i915_private_t *dev_priv = dev->dev_private; 2615 uint32_t fence_reg = reg - dev_priv->fence_regs; 2616 2617 switch (INTEL_INFO(dev)->gen) { 2618 case 7: 2619 case 6: 2620 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + fence_reg*8, 0); 2621 break; 2622 case 5: 2623 case 4: 2624 I915_WRITE64(FENCE_REG_965_0 + fence_reg*8, 0); 2625 break; 2626 case 3: 2627 if (fence_reg >= 8) 2628 fence_reg = FENCE_REG_945_8 + (fence_reg - 8) * 4; 2629 else 2630 case 2: 2631 fence_reg = FENCE_REG_830_0 + fence_reg * 4; 2632 2633 I915_WRITE(fence_reg, 0); 2634 break; 2635 } 2636 2637 list_del_init(®->lru_list); 2638 reg->obj = NULL; 2639 reg->setup_seqno = 0; 2640 reg->pin_count = 0; 2641 } 2642 2643 /** 2644 * Finds free space in the GTT aperture and binds the object there. 2645 */ 2646 static int 2647 i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, 2648 unsigned alignment, 2649 bool map_and_fenceable) 2650 { 2651 struct drm_device *dev = obj->base.dev; 2652 drm_i915_private_t *dev_priv = dev->dev_private; 2653 struct drm_mm_node *free_space; 2654 gfp_t gfpmask = __GFP_NORETRY | __GFP_NOWARN; 2655 u32 size, fence_size, fence_alignment, unfenced_alignment; 2656 bool mappable, fenceable; 2657 int ret; 2658 2659 if (obj->madv != I915_MADV_WILLNEED) { 2660 DRM_ERROR("Attempting to bind a purgeable object\n"); 2661 return -EINVAL; 2662 } 2663 2664 fence_size = i915_gem_get_gtt_size(dev, 2665 obj->base.size, 2666 obj->tiling_mode); 2667 fence_alignment = i915_gem_get_gtt_alignment(dev, 2668 obj->base.size, 2669 obj->tiling_mode); 2670 unfenced_alignment = 2671 i915_gem_get_unfenced_gtt_alignment(dev, 2672 obj->base.size, 2673 obj->tiling_mode); 2674 2675 if (alignment == 0) 2676 alignment = map_and_fenceable ? fence_alignment : 2677 unfenced_alignment; 2678 if (map_and_fenceable && alignment & (fence_alignment - 1)) { 2679 DRM_ERROR("Invalid object alignment requested %u\n", alignment); 2680 return -EINVAL; 2681 } 2682 2683 size = map_and_fenceable ? fence_size : obj->base.size; 2684 2685 /* If the object is bigger than the entire aperture, reject it early 2686 * before evicting everything in a vain attempt to find space. 2687 */ 2688 if (obj->base.size > 2689 (map_and_fenceable ? dev_priv->mm.gtt_mappable_end : dev_priv->mm.gtt_total)) { 2690 DRM_ERROR("Attempting to bind an object larger than the aperture\n"); 2691 return -E2BIG; 2692 } 2693 2694 search_free: 2695 if (map_and_fenceable) 2696 free_space = 2697 drm_mm_search_free_in_range(&dev_priv->mm.gtt_space, 2698 size, alignment, 0, 2699 dev_priv->mm.gtt_mappable_end, 2700 0); 2701 else 2702 free_space = drm_mm_search_free(&dev_priv->mm.gtt_space, 2703 size, alignment, 0); 2704 2705 if (free_space != NULL) { 2706 if (map_and_fenceable) 2707 obj->gtt_space = 2708 drm_mm_get_block_range_generic(free_space, 2709 size, alignment, 0, 2710 dev_priv->mm.gtt_mappable_end, 2711 0); 2712 else 2713 obj->gtt_space = 2714 drm_mm_get_block(free_space, size, alignment); 2715 } 2716 if (obj->gtt_space == NULL) { 2717 /* If the gtt is empty and we're still having trouble 2718 * fitting our object in, we're out of memory. 2719 */ 2720 ret = i915_gem_evict_something(dev, size, alignment, 2721 map_and_fenceable); 2722 if (ret) 2723 return ret; 2724 2725 goto search_free; 2726 } 2727 2728 ret = i915_gem_object_get_pages_gtt(obj, gfpmask); 2729 if (ret) { 2730 drm_mm_put_block(obj->gtt_space); 2731 obj->gtt_space = NULL; 2732 2733 if (ret == -ENOMEM) { 2734 /* first try to reclaim some memory by clearing the GTT */ 2735 ret = i915_gem_evict_everything(dev, false); 2736 if (ret) { 2737 /* now try to shrink everyone else */ 2738 if (gfpmask) { 2739 gfpmask = 0; 2740 goto search_free; 2741 } 2742 2743 return -ENOMEM; 2744 } 2745 2746 goto search_free; 2747 } 2748 2749 return ret; 2750 } 2751 2752 ret = i915_gem_gtt_bind_object(obj); 2753 if (ret) { 2754 i915_gem_object_put_pages_gtt(obj); 2755 drm_mm_put_block(obj->gtt_space); 2756 obj->gtt_space = NULL; 2757 2758 if (i915_gem_evict_everything(dev, false)) 2759 return ret; 2760 2761 goto search_free; 2762 } 2763 2764 list_add_tail(&obj->gtt_list, &dev_priv->mm.gtt_list); 2765 list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 2766 2767 /* Assert that the object is not currently in any GPU domain. As it 2768 * wasn't in the GTT, there shouldn't be any way it could have been in 2769 * a GPU cache 2770 */ 2771 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2772 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2773 2774 obj->gtt_offset = obj->gtt_space->start; 2775 2776 fenceable = 2777 obj->gtt_space->size == fence_size && 2778 (obj->gtt_space->start & (fence_alignment - 1)) == 0; 2779 2780 mappable = 2781 obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end; 2782 2783 obj->map_and_fenceable = mappable && fenceable; 2784 2785 trace_i915_gem_object_bind(obj, map_and_fenceable); 2786 return 0; 2787 } 2788 2789 void 2790 i915_gem_clflush_object(struct drm_i915_gem_object *obj) 2791 { 2792 /* If we don't have a page list set up, then we're not pinned 2793 * to GPU, and we can ignore the cache flush because it'll happen 2794 * again at bind time. 2795 */ 2796 if (obj->pages == NULL) 2797 return; 2798 2799 /* If the GPU is snooping the contents of the CPU cache, 2800 * we do not need to manually clear the CPU cache lines. However, 2801 * the caches are only snooped when the render cache is 2802 * flushed/invalidated. As we always have to emit invalidations 2803 * and flushes when moving into and out of the RENDER domain, correct 2804 * snooping behaviour occurs naturally as the result of our domain 2805 * tracking. 2806 */ 2807 if (obj->cache_level != I915_CACHE_NONE) 2808 return; 2809 2810 trace_i915_gem_object_clflush(obj); 2811 2812 drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE); 2813 } 2814 2815 /** Flushes any GPU write domain for the object if it's dirty. */ 2816 static int 2817 i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj) 2818 { 2819 if ((obj->base.write_domain & I915_GEM_GPU_DOMAINS) == 0) 2820 return 0; 2821 2822 /* Queue the GPU write cache flushing we need. */ 2823 return i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain); 2824 } 2825 2826 /** Flushes the GTT write domain for the object if it's dirty. */ 2827 static void 2828 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 2829 { 2830 uint32_t old_write_domain; 2831 2832 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 2833 return; 2834 2835 /* No actual flushing is required for the GTT write domain. Writes 2836 * to it immediately go to main memory as far as we know, so there's 2837 * no chipset flush. It also doesn't land in render cache. 2838 * 2839 * However, we do have to enforce the order so that all writes through 2840 * the GTT land before any writes to the device, such as updates to 2841 * the GATT itself. 2842 */ 2843 wmb(); 2844 2845 old_write_domain = obj->base.write_domain; 2846 obj->base.write_domain = 0; 2847 2848 trace_i915_gem_object_change_domain(obj, 2849 obj->base.read_domains, 2850 old_write_domain); 2851 } 2852 2853 /** Flushes the CPU write domain for the object if it's dirty. */ 2854 static void 2855 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) 2856 { 2857 uint32_t old_write_domain; 2858 2859 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 2860 return; 2861 2862 i915_gem_clflush_object(obj); 2863 intel_gtt_chipset_flush(); 2864 old_write_domain = obj->base.write_domain; 2865 obj->base.write_domain = 0; 2866 2867 trace_i915_gem_object_change_domain(obj, 2868 obj->base.read_domains, 2869 old_write_domain); 2870 } 2871 2872 /** 2873 * Moves a single object to the GTT read, and possibly write domain. 2874 * 2875 * This function returns when the move is complete, including waiting on 2876 * flushes to occur. 2877 */ 2878 int 2879 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 2880 { 2881 uint32_t old_write_domain, old_read_domains; 2882 int ret; 2883 2884 /* Not valid to be called on unbound objects. */ 2885 if (obj->gtt_space == NULL) 2886 return -EINVAL; 2887 2888 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 2889 return 0; 2890 2891 ret = i915_gem_object_flush_gpu_write_domain(obj); 2892 if (ret) 2893 return ret; 2894 2895 if (obj->pending_gpu_write || write) { 2896 ret = i915_gem_object_wait_rendering(obj); 2897 if (ret) 2898 return ret; 2899 } 2900 2901 i915_gem_object_flush_cpu_write_domain(obj); 2902 2903 old_write_domain = obj->base.write_domain; 2904 old_read_domains = obj->base.read_domains; 2905 2906 /* It should now be out of any other write domains, and we can update 2907 * the domain values for our changes. 2908 */ 2909 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 2910 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 2911 if (write) { 2912 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 2913 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 2914 obj->dirty = 1; 2915 } 2916 2917 trace_i915_gem_object_change_domain(obj, 2918 old_read_domains, 2919 old_write_domain); 2920 2921 return 0; 2922 } 2923 2924 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 2925 enum i915_cache_level cache_level) 2926 { 2927 struct drm_device *dev = obj->base.dev; 2928 drm_i915_private_t *dev_priv = dev->dev_private; 2929 int ret; 2930 2931 if (obj->cache_level == cache_level) 2932 return 0; 2933 2934 if (obj->pin_count) { 2935 DRM_DEBUG("can not change the cache level of pinned objects\n"); 2936 return -EBUSY; 2937 } 2938 2939 if (obj->gtt_space) { 2940 ret = i915_gem_object_finish_gpu(obj); 2941 if (ret) 2942 return ret; 2943 2944 i915_gem_object_finish_gtt(obj); 2945 2946 /* Before SandyBridge, you could not use tiling or fence 2947 * registers with snooped memory, so relinquish any fences 2948 * currently pointing to our region in the aperture. 2949 */ 2950 if (INTEL_INFO(obj->base.dev)->gen < 6) { 2951 ret = i915_gem_object_put_fence(obj); 2952 if (ret) 2953 return ret; 2954 } 2955 2956 i915_gem_gtt_rebind_object(obj, cache_level); 2957 if (obj->has_aliasing_ppgtt_mapping) 2958 i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, 2959 obj, cache_level); 2960 } 2961 2962 if (cache_level == I915_CACHE_NONE) { 2963 u32 old_read_domains, old_write_domain; 2964 2965 /* If we're coming from LLC cached, then we haven't 2966 * actually been tracking whether the data is in the 2967 * CPU cache or not, since we only allow one bit set 2968 * in obj->write_domain and have been skipping the clflushes. 2969 * Just set it to the CPU cache for now. 2970 */ 2971 WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU); 2972 WARN_ON(obj->base.read_domains & ~I915_GEM_DOMAIN_CPU); 2973 2974 old_read_domains = obj->base.read_domains; 2975 old_write_domain = obj->base.write_domain; 2976 2977 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 2978 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2979 2980 trace_i915_gem_object_change_domain(obj, 2981 old_read_domains, 2982 old_write_domain); 2983 } 2984 2985 obj->cache_level = cache_level; 2986 return 0; 2987 } 2988 2989 /* 2990 * Prepare buffer for display plane (scanout, cursors, etc). 2991 * Can be called from an uninterruptible phase (modesetting) and allows 2992 * any flushes to be pipelined (for pageflips). 2993 * 2994 * For the display plane, we want to be in the GTT but out of any write 2995 * domains. So in many ways this looks like set_to_gtt_domain() apart from the 2996 * ability to pipeline the waits, pinning and any additional subtleties 2997 * that may differentiate the display plane from ordinary buffers. 2998 */ 2999 int 3000 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 3001 u32 alignment, 3002 struct intel_ring_buffer *pipelined) 3003 { 3004 u32 old_read_domains, old_write_domain; 3005 int ret; 3006 3007 ret = i915_gem_object_flush_gpu_write_domain(obj); 3008 if (ret) 3009 return ret; 3010 3011 if (pipelined != obj->ring) { 3012 ret = i915_gem_object_wait_rendering(obj); 3013 if (ret == -ERESTARTSYS) 3014 return ret; 3015 } 3016 3017 /* The display engine is not coherent with the LLC cache on gen6. As 3018 * a result, we make sure that the pinning that is about to occur is 3019 * done with uncached PTEs. This is lowest common denominator for all 3020 * chipsets. 3021 * 3022 * However for gen6+, we could do better by using the GFDT bit instead 3023 * of uncaching, which would allow us to flush all the LLC-cached data 3024 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 3025 */ 3026 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE); 3027 if (ret) 3028 return ret; 3029 3030 /* As the user may map the buffer once pinned in the display plane 3031 * (e.g. libkms for the bootup splash), we have to ensure that we 3032 * always use map_and_fenceable for all scanout buffers. 3033 */ 3034 ret = i915_gem_object_pin(obj, alignment, true); 3035 if (ret) 3036 return ret; 3037 3038 i915_gem_object_flush_cpu_write_domain(obj); 3039 3040 old_write_domain = obj->base.write_domain; 3041 old_read_domains = obj->base.read_domains; 3042 3043 /* It should now be out of any other write domains, and we can update 3044 * the domain values for our changes. 3045 */ 3046 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3047 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3048 3049 trace_i915_gem_object_change_domain(obj, 3050 old_read_domains, 3051 old_write_domain); 3052 3053 return 0; 3054 } 3055 3056 int 3057 i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj) 3058 { 3059 int ret; 3060 3061 if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0) 3062 return 0; 3063 3064 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { 3065 ret = i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain); 3066 if (ret) 3067 return ret; 3068 } 3069 3070 ret = i915_gem_object_wait_rendering(obj); 3071 if (ret) 3072 return ret; 3073 3074 /* Ensure that we invalidate the GPU's caches and TLBs. */ 3075 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; 3076 return 0; 3077 } 3078 3079 /** 3080 * Moves a single object to the CPU read, and possibly write domain. 3081 * 3082 * This function returns when the move is complete, including waiting on 3083 * flushes to occur. 3084 */ 3085 static int 3086 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 3087 { 3088 uint32_t old_write_domain, old_read_domains; 3089 int ret; 3090 3091 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 3092 return 0; 3093 3094 ret = i915_gem_object_flush_gpu_write_domain(obj); 3095 if (ret) 3096 return ret; 3097 3098 ret = i915_gem_object_wait_rendering(obj); 3099 if (ret) 3100 return ret; 3101 3102 i915_gem_object_flush_gtt_write_domain(obj); 3103 3104 /* If we have a partially-valid cache of the object in the CPU, 3105 * finish invalidating it and free the per-page flags. 3106 */ 3107 i915_gem_object_set_to_full_cpu_read_domain(obj); 3108 3109 old_write_domain = obj->base.write_domain; 3110 old_read_domains = obj->base.read_domains; 3111 3112 /* Flush the CPU cache if it's still invalid. */ 3113 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 3114 i915_gem_clflush_object(obj); 3115 3116 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 3117 } 3118 3119 /* It should now be out of any other write domains, and we can update 3120 * the domain values for our changes. 3121 */ 3122 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 3123 3124 /* If we're writing through the CPU, then the GPU read domains will 3125 * need to be invalidated at next use. 3126 */ 3127 if (write) { 3128 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 3129 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 3130 } 3131 3132 trace_i915_gem_object_change_domain(obj, 3133 old_read_domains, 3134 old_write_domain); 3135 3136 return 0; 3137 } 3138 3139 /** 3140 * Moves the object from a partially CPU read to a full one. 3141 * 3142 * Note that this only resolves i915_gem_object_set_cpu_read_domain_range(), 3143 * and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU). 3144 */ 3145 static void 3146 i915_gem_object_set_to_full_cpu_read_domain(struct drm_i915_gem_object *obj) 3147 { 3148 if (!obj->page_cpu_valid) 3149 return; 3150 3151 /* If we're partially in the CPU read domain, finish moving it in. 3152 */ 3153 if (obj->base.read_domains & I915_GEM_DOMAIN_CPU) { 3154 int i; 3155 3156 for (i = 0; i <= (obj->base.size - 1) / PAGE_SIZE; i++) { 3157 if (obj->page_cpu_valid[i]) 3158 continue; 3159 drm_clflush_pages(obj->pages + i, 1); 3160 } 3161 } 3162 3163 /* Free the page_cpu_valid mappings which are now stale, whether 3164 * or not we've got I915_GEM_DOMAIN_CPU. 3165 */ 3166 kfree(obj->page_cpu_valid); 3167 obj->page_cpu_valid = NULL; 3168 } 3169 3170 /** 3171 * Set the CPU read domain on a range of the object. 3172 * 3173 * The object ends up with I915_GEM_DOMAIN_CPU in its read flags although it's 3174 * not entirely valid. The page_cpu_valid member of the object flags which 3175 * pages have been flushed, and will be respected by 3176 * i915_gem_object_set_to_cpu_domain() if it's called on to get a valid mapping 3177 * of the whole object. 3178 * 3179 * This function returns when the move is complete, including waiting on 3180 * flushes to occur. 3181 */ 3182 static int 3183 i915_gem_object_set_cpu_read_domain_range(struct drm_i915_gem_object *obj, 3184 uint64_t offset, uint64_t size) 3185 { 3186 uint32_t old_read_domains; 3187 int i, ret; 3188 3189 if (offset == 0 && size == obj->base.size) 3190 return i915_gem_object_set_to_cpu_domain(obj, 0); 3191 3192 ret = i915_gem_object_flush_gpu_write_domain(obj); 3193 if (ret) 3194 return ret; 3195 3196 ret = i915_gem_object_wait_rendering(obj); 3197 if (ret) 3198 return ret; 3199 3200 i915_gem_object_flush_gtt_write_domain(obj); 3201 3202 /* If we're already fully in the CPU read domain, we're done. */ 3203 if (obj->page_cpu_valid == NULL && 3204 (obj->base.read_domains & I915_GEM_DOMAIN_CPU) != 0) 3205 return 0; 3206 3207 /* Otherwise, create/clear the per-page CPU read domain flag if we're 3208 * newly adding I915_GEM_DOMAIN_CPU 3209 */ 3210 if (obj->page_cpu_valid == NULL) { 3211 obj->page_cpu_valid = kzalloc(obj->base.size / PAGE_SIZE, 3212 GFP_KERNEL); 3213 if (obj->page_cpu_valid == NULL) 3214 return -ENOMEM; 3215 } else if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) 3216 memset(obj->page_cpu_valid, 0, obj->base.size / PAGE_SIZE); 3217 3218 /* Flush the cache on any pages that are still invalid from the CPU's 3219 * perspective. 3220 */ 3221 for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE; 3222 i++) { 3223 if (obj->page_cpu_valid[i]) 3224 continue; 3225 3226 drm_clflush_pages(obj->pages + i, 1); 3227 3228 obj->page_cpu_valid[i] = 1; 3229 } 3230 3231 /* It should now be out of any other write domains, and we can update 3232 * the domain values for our changes. 3233 */ 3234 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 3235 3236 old_read_domains = obj->base.read_domains; 3237 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 3238 3239 trace_i915_gem_object_change_domain(obj, 3240 old_read_domains, 3241 obj->base.write_domain); 3242 3243 return 0; 3244 } 3245 3246 /* Throttle our rendering by waiting until the ring has completed our requests 3247 * emitted over 20 msec ago. 3248 * 3249 * Note that if we were to use the current jiffies each time around the loop, 3250 * we wouldn't escape the function with any frames outstanding if the time to 3251 * render a frame was over 20ms. 3252 * 3253 * This should get us reasonable parallelism between CPU and GPU but also 3254 * relatively low latency when blocking on a particular request to finish. 3255 */ 3256 static int 3257 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 3258 { 3259 struct drm_i915_private *dev_priv = dev->dev_private; 3260 struct drm_i915_file_private *file_priv = file->driver_priv; 3261 unsigned long recent_enough = jiffies - msecs_to_jiffies(20); 3262 struct drm_i915_gem_request *request; 3263 struct intel_ring_buffer *ring = NULL; 3264 u32 seqno = 0; 3265 int ret; 3266 3267 if (atomic_read(&dev_priv->mm.wedged)) 3268 return -EIO; 3269 3270 spin_lock(&file_priv->mm.lock); 3271 list_for_each_entry(request, &file_priv->mm.request_list, client_list) { 3272 if (time_after_eq(request->emitted_jiffies, recent_enough)) 3273 break; 3274 3275 ring = request->ring; 3276 seqno = request->seqno; 3277 } 3278 spin_unlock(&file_priv->mm.lock); 3279 3280 if (seqno == 0) 3281 return 0; 3282 3283 ret = 0; 3284 if (!i915_seqno_passed(ring->get_seqno(ring), seqno)) { 3285 /* And wait for the seqno passing without holding any locks and 3286 * causing extra latency for others. This is safe as the irq 3287 * generation is designed to be run atomically and so is 3288 * lockless. 3289 */ 3290 if (ring->irq_get(ring)) { 3291 ret = wait_event_interruptible(ring->irq_queue, 3292 i915_seqno_passed(ring->get_seqno(ring), seqno) 3293 || atomic_read(&dev_priv->mm.wedged)); 3294 ring->irq_put(ring); 3295 3296 if (ret == 0 && atomic_read(&dev_priv->mm.wedged)) 3297 ret = -EIO; 3298 } else if (wait_for_atomic(i915_seqno_passed(ring->get_seqno(ring), 3299 seqno) || 3300 atomic_read(&dev_priv->mm.wedged), 3000)) { 3301 ret = -EBUSY; 3302 } 3303 } 3304 3305 if (ret == 0) 3306 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); 3307 3308 return ret; 3309 } 3310 3311 int 3312 i915_gem_object_pin(struct drm_i915_gem_object *obj, 3313 uint32_t alignment, 3314 bool map_and_fenceable) 3315 { 3316 struct drm_device *dev = obj->base.dev; 3317 struct drm_i915_private *dev_priv = dev->dev_private; 3318 int ret; 3319 3320 BUG_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT); 3321 WARN_ON(i915_verify_lists(dev)); 3322 3323 if (obj->gtt_space != NULL) { 3324 if ((alignment && obj->gtt_offset & (alignment - 1)) || 3325 (map_and_fenceable && !obj->map_and_fenceable)) { 3326 WARN(obj->pin_count, 3327 "bo is already pinned with incorrect alignment:" 3328 " offset=%x, req.alignment=%x, req.map_and_fenceable=%d," 3329 " obj->map_and_fenceable=%d\n", 3330 obj->gtt_offset, alignment, 3331 map_and_fenceable, 3332 obj->map_and_fenceable); 3333 ret = i915_gem_object_unbind(obj); 3334 if (ret) 3335 return ret; 3336 } 3337 } 3338 3339 if (obj->gtt_space == NULL) { 3340 ret = i915_gem_object_bind_to_gtt(obj, alignment, 3341 map_and_fenceable); 3342 if (ret) 3343 return ret; 3344 } 3345 3346 if (obj->pin_count++ == 0) { 3347 if (!obj->active) 3348 list_move_tail(&obj->mm_list, 3349 &dev_priv->mm.pinned_list); 3350 } 3351 obj->pin_mappable |= map_and_fenceable; 3352 3353 WARN_ON(i915_verify_lists(dev)); 3354 return 0; 3355 } 3356 3357 void 3358 i915_gem_object_unpin(struct drm_i915_gem_object *obj) 3359 { 3360 struct drm_device *dev = obj->base.dev; 3361 drm_i915_private_t *dev_priv = dev->dev_private; 3362 3363 WARN_ON(i915_verify_lists(dev)); 3364 BUG_ON(obj->pin_count == 0); 3365 BUG_ON(obj->gtt_space == NULL); 3366 3367 if (--obj->pin_count == 0) { 3368 if (!obj->active) 3369 list_move_tail(&obj->mm_list, 3370 &dev_priv->mm.inactive_list); 3371 obj->pin_mappable = false; 3372 } 3373 WARN_ON(i915_verify_lists(dev)); 3374 } 3375 3376 int 3377 i915_gem_pin_ioctl(struct drm_device *dev, void *data, 3378 struct drm_file *file) 3379 { 3380 struct drm_i915_gem_pin *args = data; 3381 struct drm_i915_gem_object *obj; 3382 int ret; 3383 3384 ret = i915_mutex_lock_interruptible(dev); 3385 if (ret) 3386 return ret; 3387 3388 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3389 if (&obj->base == NULL) { 3390 ret = -ENOENT; 3391 goto unlock; 3392 } 3393 3394 if (obj->madv != I915_MADV_WILLNEED) { 3395 DRM_ERROR("Attempting to pin a purgeable buffer\n"); 3396 ret = -EINVAL; 3397 goto out; 3398 } 3399 3400 if (obj->pin_filp != NULL && obj->pin_filp != file) { 3401 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n", 3402 args->handle); 3403 ret = -EINVAL; 3404 goto out; 3405 } 3406 3407 obj->user_pin_count++; 3408 obj->pin_filp = file; 3409 if (obj->user_pin_count == 1) { 3410 ret = i915_gem_object_pin(obj, args->alignment, true); 3411 if (ret) 3412 goto out; 3413 } 3414 3415 /* XXX - flush the CPU caches for pinned objects 3416 * as the X server doesn't manage domains yet 3417 */ 3418 i915_gem_object_flush_cpu_write_domain(obj); 3419 args->offset = obj->gtt_offset; 3420 out: 3421 drm_gem_object_unreference(&obj->base); 3422 unlock: 3423 mutex_unlock(&dev->struct_mutex); 3424 return ret; 3425 } 3426 3427 int 3428 i915_gem_unpin_ioctl(struct drm_device *dev, void *data, 3429 struct drm_file *file) 3430 { 3431 struct drm_i915_gem_pin *args = data; 3432 struct drm_i915_gem_object *obj; 3433 int ret; 3434 3435 ret = i915_mutex_lock_interruptible(dev); 3436 if (ret) 3437 return ret; 3438 3439 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3440 if (&obj->base == NULL) { 3441 ret = -ENOENT; 3442 goto unlock; 3443 } 3444 3445 if (obj->pin_filp != file) { 3446 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n", 3447 args->handle); 3448 ret = -EINVAL; 3449 goto out; 3450 } 3451 obj->user_pin_count--; 3452 if (obj->user_pin_count == 0) { 3453 obj->pin_filp = NULL; 3454 i915_gem_object_unpin(obj); 3455 } 3456 3457 out: 3458 drm_gem_object_unreference(&obj->base); 3459 unlock: 3460 mutex_unlock(&dev->struct_mutex); 3461 return ret; 3462 } 3463 3464 int 3465 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 3466 struct drm_file *file) 3467 { 3468 struct drm_i915_gem_busy *args = data; 3469 struct drm_i915_gem_object *obj; 3470 int ret; 3471 3472 ret = i915_mutex_lock_interruptible(dev); 3473 if (ret) 3474 return ret; 3475 3476 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3477 if (&obj->base == NULL) { 3478 ret = -ENOENT; 3479 goto unlock; 3480 } 3481 3482 /* Count all active objects as busy, even if they are currently not used 3483 * by the gpu. Users of this interface expect objects to eventually 3484 * become non-busy without any further actions, therefore emit any 3485 * necessary flushes here. 3486 */ 3487 args->busy = obj->active; 3488 if (args->busy) { 3489 /* Unconditionally flush objects, even when the gpu still uses this 3490 * object. Userspace calling this function indicates that it wants to 3491 * use this buffer rather sooner than later, so issuing the required 3492 * flush earlier is beneficial. 3493 */ 3494 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { 3495 ret = i915_gem_flush_ring(obj->ring, 3496 0, obj->base.write_domain); 3497 } else if (obj->ring->outstanding_lazy_request == 3498 obj->last_rendering_seqno) { 3499 struct drm_i915_gem_request *request; 3500 3501 /* This ring is not being cleared by active usage, 3502 * so emit a request to do so. 3503 */ 3504 request = kzalloc(sizeof(*request), GFP_KERNEL); 3505 if (request) { 3506 ret = i915_add_request(obj->ring, NULL, request); 3507 if (ret) 3508 kfree(request); 3509 } else 3510 ret = -ENOMEM; 3511 } 3512 3513 /* Update the active list for the hardware's current position. 3514 * Otherwise this only updates on a delayed timer or when irqs 3515 * are actually unmasked, and our working set ends up being 3516 * larger than required. 3517 */ 3518 i915_gem_retire_requests_ring(obj->ring); 3519 3520 args->busy = obj->active; 3521 } 3522 3523 drm_gem_object_unreference(&obj->base); 3524 unlock: 3525 mutex_unlock(&dev->struct_mutex); 3526 return ret; 3527 } 3528 3529 int 3530 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 3531 struct drm_file *file_priv) 3532 { 3533 return i915_gem_ring_throttle(dev, file_priv); 3534 } 3535 3536 int 3537 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 3538 struct drm_file *file_priv) 3539 { 3540 struct drm_i915_gem_madvise *args = data; 3541 struct drm_i915_gem_object *obj; 3542 int ret; 3543 3544 switch (args->madv) { 3545 case I915_MADV_DONTNEED: 3546 case I915_MADV_WILLNEED: 3547 break; 3548 default: 3549 return -EINVAL; 3550 } 3551 3552 ret = i915_mutex_lock_interruptible(dev); 3553 if (ret) 3554 return ret; 3555 3556 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle)); 3557 if (&obj->base == NULL) { 3558 ret = -ENOENT; 3559 goto unlock; 3560 } 3561 3562 if (obj->pin_count) { 3563 ret = -EINVAL; 3564 goto out; 3565 } 3566 3567 if (obj->madv != __I915_MADV_PURGED) 3568 obj->madv = args->madv; 3569 3570 /* if the object is no longer bound, discard its backing storage */ 3571 if (i915_gem_object_is_purgeable(obj) && 3572 obj->gtt_space == NULL) 3573 i915_gem_object_truncate(obj); 3574 3575 args->retained = obj->madv != __I915_MADV_PURGED; 3576 3577 out: 3578 drm_gem_object_unreference(&obj->base); 3579 unlock: 3580 mutex_unlock(&dev->struct_mutex); 3581 return ret; 3582 } 3583 3584 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, 3585 size_t size) 3586 { 3587 struct drm_i915_private *dev_priv = dev->dev_private; 3588 struct drm_i915_gem_object *obj; 3589 struct address_space *mapping; 3590 3591 obj = kzalloc(sizeof(*obj), GFP_KERNEL); 3592 if (obj == NULL) 3593 return NULL; 3594 3595 if (drm_gem_object_init(dev, &obj->base, size) != 0) { 3596 kfree(obj); 3597 return NULL; 3598 } 3599 3600 mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 3601 mapping_set_gfp_mask(mapping, GFP_HIGHUSER | __GFP_RECLAIMABLE); 3602 3603 i915_gem_info_add_obj(dev_priv, size); 3604 3605 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 3606 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 3607 3608 if (HAS_LLC(dev)) { 3609 /* On some devices, we can have the GPU use the LLC (the CPU 3610 * cache) for about a 10% performance improvement 3611 * compared to uncached. Graphics requests other than 3612 * display scanout are coherent with the CPU in 3613 * accessing this cache. This means in this mode we 3614 * don't need to clflush on the CPU side, and on the 3615 * GPU side we only need to flush internal caches to 3616 * get data visible to the CPU. 3617 * 3618 * However, we maintain the display planes as UC, and so 3619 * need to rebind when first used as such. 3620 */ 3621 obj->cache_level = I915_CACHE_LLC; 3622 } else 3623 obj->cache_level = I915_CACHE_NONE; 3624 3625 obj->base.driver_private = NULL; 3626 obj->fence_reg = I915_FENCE_REG_NONE; 3627 INIT_LIST_HEAD(&obj->mm_list); 3628 INIT_LIST_HEAD(&obj->gtt_list); 3629 INIT_LIST_HEAD(&obj->ring_list); 3630 INIT_LIST_HEAD(&obj->exec_list); 3631 INIT_LIST_HEAD(&obj->gpu_write_list); 3632 obj->madv = I915_MADV_WILLNEED; 3633 /* Avoid an unnecessary call to unbind on the first bind. */ 3634 obj->map_and_fenceable = true; 3635 3636 return obj; 3637 } 3638 3639 int i915_gem_init_object(struct drm_gem_object *obj) 3640 { 3641 BUG(); 3642 3643 return 0; 3644 } 3645 3646 static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj) 3647 { 3648 struct drm_device *dev = obj->base.dev; 3649 drm_i915_private_t *dev_priv = dev->dev_private; 3650 int ret; 3651 3652 ret = i915_gem_object_unbind(obj); 3653 if (ret == -ERESTARTSYS) { 3654 list_move(&obj->mm_list, 3655 &dev_priv->mm.deferred_free_list); 3656 return; 3657 } 3658 3659 trace_i915_gem_object_destroy(obj); 3660 3661 if (obj->base.map_list.map) 3662 drm_gem_free_mmap_offset(&obj->base); 3663 3664 drm_gem_object_release(&obj->base); 3665 i915_gem_info_remove_obj(dev_priv, obj->base.size); 3666 3667 kfree(obj->page_cpu_valid); 3668 kfree(obj->bit_17); 3669 kfree(obj); 3670 } 3671 3672 void i915_gem_free_object(struct drm_gem_object *gem_obj) 3673 { 3674 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 3675 struct drm_device *dev = obj->base.dev; 3676 3677 while (obj->pin_count > 0) 3678 i915_gem_object_unpin(obj); 3679 3680 if (obj->phys_obj) 3681 i915_gem_detach_phys_object(dev, obj); 3682 3683 i915_gem_free_object_tail(obj); 3684 } 3685 3686 int 3687 i915_gem_idle(struct drm_device *dev) 3688 { 3689 drm_i915_private_t *dev_priv = dev->dev_private; 3690 int ret; 3691 3692 mutex_lock(&dev->struct_mutex); 3693 3694 if (dev_priv->mm.suspended) { 3695 mutex_unlock(&dev->struct_mutex); 3696 return 0; 3697 } 3698 3699 ret = i915_gpu_idle(dev, true); 3700 if (ret) { 3701 mutex_unlock(&dev->struct_mutex); 3702 return ret; 3703 } 3704 3705 /* Under UMS, be paranoid and evict. */ 3706 if (!drm_core_check_feature(dev, DRIVER_MODESET)) { 3707 ret = i915_gem_evict_inactive(dev, false); 3708 if (ret) { 3709 mutex_unlock(&dev->struct_mutex); 3710 return ret; 3711 } 3712 } 3713 3714 i915_gem_reset_fences(dev); 3715 3716 /* Hack! Don't let anybody do execbuf while we don't control the chip. 3717 * We need to replace this with a semaphore, or something. 3718 * And not confound mm.suspended! 3719 */ 3720 dev_priv->mm.suspended = 1; 3721 del_timer_sync(&dev_priv->hangcheck_timer); 3722 3723 i915_kernel_lost_context(dev); 3724 i915_gem_cleanup_ringbuffer(dev); 3725 3726 mutex_unlock(&dev->struct_mutex); 3727 3728 /* Cancel the retire work handler, which should be idle now. */ 3729 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 3730 3731 return 0; 3732 } 3733 3734 void i915_gem_init_swizzling(struct drm_device *dev) 3735 { 3736 drm_i915_private_t *dev_priv = dev->dev_private; 3737 3738 if (INTEL_INFO(dev)->gen < 5 || 3739 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 3740 return; 3741 3742 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 3743 DISP_TILE_SURFACE_SWIZZLING); 3744 3745 if (IS_GEN5(dev)) 3746 return; 3747 3748 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 3749 if (IS_GEN6(dev)) 3750 I915_WRITE(ARB_MODE, ARB_MODE_ENABLE(ARB_MODE_SWIZZLE_SNB)); 3751 else 3752 I915_WRITE(ARB_MODE, ARB_MODE_ENABLE(ARB_MODE_SWIZZLE_IVB)); 3753 } 3754 3755 void i915_gem_init_ppgtt(struct drm_device *dev) 3756 { 3757 drm_i915_private_t *dev_priv = dev->dev_private; 3758 uint32_t pd_offset; 3759 struct intel_ring_buffer *ring; 3760 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; 3761 uint32_t __iomem *pd_addr; 3762 uint32_t pd_entry; 3763 int i; 3764 3765 if (!dev_priv->mm.aliasing_ppgtt) 3766 return; 3767 3768 3769 pd_addr = dev_priv->mm.gtt->gtt + ppgtt->pd_offset/sizeof(uint32_t); 3770 for (i = 0; i < ppgtt->num_pd_entries; i++) { 3771 dma_addr_t pt_addr; 3772 3773 if (dev_priv->mm.gtt->needs_dmar) 3774 pt_addr = ppgtt->pt_dma_addr[i]; 3775 else 3776 pt_addr = page_to_phys(ppgtt->pt_pages[i]); 3777 3778 pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr); 3779 pd_entry |= GEN6_PDE_VALID; 3780 3781 writel(pd_entry, pd_addr + i); 3782 } 3783 readl(pd_addr); 3784 3785 pd_offset = ppgtt->pd_offset; 3786 pd_offset /= 64; /* in cachelines, */ 3787 pd_offset <<= 16; 3788 3789 if (INTEL_INFO(dev)->gen == 6) { 3790 uint32_t ecochk = I915_READ(GAM_ECOCHK); 3791 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | 3792 ECOCHK_PPGTT_CACHE64B); 3793 I915_WRITE(GFX_MODE, GFX_MODE_ENABLE(GFX_PPGTT_ENABLE)); 3794 } else if (INTEL_INFO(dev)->gen >= 7) { 3795 I915_WRITE(GAM_ECOCHK, ECOCHK_PPGTT_CACHE64B); 3796 /* GFX_MODE is per-ring on gen7+ */ 3797 } 3798 3799 for (i = 0; i < I915_NUM_RINGS; i++) { 3800 ring = &dev_priv->ring[i]; 3801 3802 if (INTEL_INFO(dev)->gen >= 7) 3803 I915_WRITE(RING_MODE_GEN7(ring), 3804 GFX_MODE_ENABLE(GFX_PPGTT_ENABLE)); 3805 3806 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); 3807 I915_WRITE(RING_PP_DIR_BASE(ring), pd_offset); 3808 } 3809 } 3810 3811 int 3812 i915_gem_init_hw(struct drm_device *dev) 3813 { 3814 drm_i915_private_t *dev_priv = dev->dev_private; 3815 int ret; 3816 3817 i915_gem_init_swizzling(dev); 3818 3819 ret = intel_init_render_ring_buffer(dev); 3820 if (ret) 3821 return ret; 3822 3823 if (HAS_BSD(dev)) { 3824 ret = intel_init_bsd_ring_buffer(dev); 3825 if (ret) 3826 goto cleanup_render_ring; 3827 } 3828 3829 if (HAS_BLT(dev)) { 3830 ret = intel_init_blt_ring_buffer(dev); 3831 if (ret) 3832 goto cleanup_bsd_ring; 3833 } 3834 3835 dev_priv->next_seqno = 1; 3836 3837 i915_gem_init_ppgtt(dev); 3838 3839 return 0; 3840 3841 cleanup_bsd_ring: 3842 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]); 3843 cleanup_render_ring: 3844 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]); 3845 return ret; 3846 } 3847 3848 void 3849 i915_gem_cleanup_ringbuffer(struct drm_device *dev) 3850 { 3851 drm_i915_private_t *dev_priv = dev->dev_private; 3852 int i; 3853 3854 for (i = 0; i < I915_NUM_RINGS; i++) 3855 intel_cleanup_ring_buffer(&dev_priv->ring[i]); 3856 } 3857 3858 int 3859 i915_gem_entervt_ioctl(struct drm_device *dev, void *data, 3860 struct drm_file *file_priv) 3861 { 3862 drm_i915_private_t *dev_priv = dev->dev_private; 3863 int ret, i; 3864 3865 if (drm_core_check_feature(dev, DRIVER_MODESET)) 3866 return 0; 3867 3868 if (atomic_read(&dev_priv->mm.wedged)) { 3869 DRM_ERROR("Reenabling wedged hardware, good luck\n"); 3870 atomic_set(&dev_priv->mm.wedged, 0); 3871 } 3872 3873 mutex_lock(&dev->struct_mutex); 3874 dev_priv->mm.suspended = 0; 3875 3876 ret = i915_gem_init_hw(dev); 3877 if (ret != 0) { 3878 mutex_unlock(&dev->struct_mutex); 3879 return ret; 3880 } 3881 3882 BUG_ON(!list_empty(&dev_priv->mm.active_list)); 3883 BUG_ON(!list_empty(&dev_priv->mm.flushing_list)); 3884 BUG_ON(!list_empty(&dev_priv->mm.inactive_list)); 3885 for (i = 0; i < I915_NUM_RINGS; i++) { 3886 BUG_ON(!list_empty(&dev_priv->ring[i].active_list)); 3887 BUG_ON(!list_empty(&dev_priv->ring[i].request_list)); 3888 } 3889 mutex_unlock(&dev->struct_mutex); 3890 3891 ret = drm_irq_install(dev); 3892 if (ret) 3893 goto cleanup_ringbuffer; 3894 3895 return 0; 3896 3897 cleanup_ringbuffer: 3898 mutex_lock(&dev->struct_mutex); 3899 i915_gem_cleanup_ringbuffer(dev); 3900 dev_priv->mm.suspended = 1; 3901 mutex_unlock(&dev->struct_mutex); 3902 3903 return ret; 3904 } 3905 3906 int 3907 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, 3908 struct drm_file *file_priv) 3909 { 3910 if (drm_core_check_feature(dev, DRIVER_MODESET)) 3911 return 0; 3912 3913 drm_irq_uninstall(dev); 3914 return i915_gem_idle(dev); 3915 } 3916 3917 void 3918 i915_gem_lastclose(struct drm_device *dev) 3919 { 3920 int ret; 3921 3922 if (drm_core_check_feature(dev, DRIVER_MODESET)) 3923 return; 3924 3925 ret = i915_gem_idle(dev); 3926 if (ret) 3927 DRM_ERROR("failed to idle hardware: %d\n", ret); 3928 } 3929 3930 static void 3931 init_ring_lists(struct intel_ring_buffer *ring) 3932 { 3933 INIT_LIST_HEAD(&ring->active_list); 3934 INIT_LIST_HEAD(&ring->request_list); 3935 INIT_LIST_HEAD(&ring->gpu_write_list); 3936 } 3937 3938 void 3939 i915_gem_load(struct drm_device *dev) 3940 { 3941 int i; 3942 drm_i915_private_t *dev_priv = dev->dev_private; 3943 3944 INIT_LIST_HEAD(&dev_priv->mm.active_list); 3945 INIT_LIST_HEAD(&dev_priv->mm.flushing_list); 3946 INIT_LIST_HEAD(&dev_priv->mm.inactive_list); 3947 INIT_LIST_HEAD(&dev_priv->mm.pinned_list); 3948 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 3949 INIT_LIST_HEAD(&dev_priv->mm.deferred_free_list); 3950 INIT_LIST_HEAD(&dev_priv->mm.gtt_list); 3951 for (i = 0; i < I915_NUM_RINGS; i++) 3952 init_ring_lists(&dev_priv->ring[i]); 3953 for (i = 0; i < I915_MAX_NUM_FENCES; i++) 3954 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 3955 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 3956 i915_gem_retire_work_handler); 3957 init_completion(&dev_priv->error_completion); 3958 3959 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ 3960 if (IS_GEN3(dev)) { 3961 u32 tmp = I915_READ(MI_ARB_STATE); 3962 if (!(tmp & MI_ARB_C3_LP_WRITE_ENABLE)) { 3963 /* arb state is a masked write, so set bit + bit in mask */ 3964 tmp = MI_ARB_C3_LP_WRITE_ENABLE | (MI_ARB_C3_LP_WRITE_ENABLE << MI_ARB_MASK_SHIFT); 3965 I915_WRITE(MI_ARB_STATE, tmp); 3966 } 3967 } 3968 3969 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 3970 3971 /* Old X drivers will take 0-2 for front, back, depth buffers */ 3972 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 3973 dev_priv->fence_reg_start = 3; 3974 3975 if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 3976 dev_priv->num_fence_regs = 16; 3977 else 3978 dev_priv->num_fence_regs = 8; 3979 3980 /* Initialize fence registers to zero */ 3981 for (i = 0; i < dev_priv->num_fence_regs; i++) { 3982 i915_gem_clear_fence_reg(dev, &dev_priv->fence_regs[i]); 3983 } 3984 3985 i915_gem_detect_bit_6_swizzle(dev); 3986 init_waitqueue_head(&dev_priv->pending_flip_queue); 3987 3988 dev_priv->mm.interruptible = true; 3989 3990 dev_priv->mm.inactive_shrinker.shrink = i915_gem_inactive_shrink; 3991 dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS; 3992 register_shrinker(&dev_priv->mm.inactive_shrinker); 3993 } 3994 3995 /* 3996 * Create a physically contiguous memory object for this object 3997 * e.g. for cursor + overlay regs 3998 */ 3999 static int i915_gem_init_phys_object(struct drm_device *dev, 4000 int id, int size, int align) 4001 { 4002 drm_i915_private_t *dev_priv = dev->dev_private; 4003 struct drm_i915_gem_phys_object *phys_obj; 4004 int ret; 4005 4006 if (dev_priv->mm.phys_objs[id - 1] || !size) 4007 return 0; 4008 4009 phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL); 4010 if (!phys_obj) 4011 return -ENOMEM; 4012 4013 phys_obj->id = id; 4014 4015 phys_obj->handle = drm_pci_alloc(dev, size, align); 4016 if (!phys_obj->handle) { 4017 ret = -ENOMEM; 4018 goto kfree_obj; 4019 } 4020 #ifdef CONFIG_X86 4021 set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE); 4022 #endif 4023 4024 dev_priv->mm.phys_objs[id - 1] = phys_obj; 4025 4026 return 0; 4027 kfree_obj: 4028 kfree(phys_obj); 4029 return ret; 4030 } 4031 4032 static void i915_gem_free_phys_object(struct drm_device *dev, int id) 4033 { 4034 drm_i915_private_t *dev_priv = dev->dev_private; 4035 struct drm_i915_gem_phys_object *phys_obj; 4036 4037 if (!dev_priv->mm.phys_objs[id - 1]) 4038 return; 4039 4040 phys_obj = dev_priv->mm.phys_objs[id - 1]; 4041 if (phys_obj->cur_obj) { 4042 i915_gem_detach_phys_object(dev, phys_obj->cur_obj); 4043 } 4044 4045 #ifdef CONFIG_X86 4046 set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE); 4047 #endif 4048 drm_pci_free(dev, phys_obj->handle); 4049 kfree(phys_obj); 4050 dev_priv->mm.phys_objs[id - 1] = NULL; 4051 } 4052 4053 void i915_gem_free_all_phys_object(struct drm_device *dev) 4054 { 4055 int i; 4056 4057 for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++) 4058 i915_gem_free_phys_object(dev, i); 4059 } 4060 4061 void i915_gem_detach_phys_object(struct drm_device *dev, 4062 struct drm_i915_gem_object *obj) 4063 { 4064 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 4065 char *vaddr; 4066 int i; 4067 int page_count; 4068 4069 if (!obj->phys_obj) 4070 return; 4071 vaddr = obj->phys_obj->handle->vaddr; 4072 4073 page_count = obj->base.size / PAGE_SIZE; 4074 for (i = 0; i < page_count; i++) { 4075 struct page *page = shmem_read_mapping_page(mapping, i); 4076 if (!IS_ERR(page)) { 4077 char *dst = kmap_atomic(page); 4078 memcpy(dst, vaddr + i*PAGE_SIZE, PAGE_SIZE); 4079 kunmap_atomic(dst); 4080 4081 drm_clflush_pages(&page, 1); 4082 4083 set_page_dirty(page); 4084 mark_page_accessed(page); 4085 page_cache_release(page); 4086 } 4087 } 4088 intel_gtt_chipset_flush(); 4089 4090 obj->phys_obj->cur_obj = NULL; 4091 obj->phys_obj = NULL; 4092 } 4093 4094 int 4095 i915_gem_attach_phys_object(struct drm_device *dev, 4096 struct drm_i915_gem_object *obj, 4097 int id, 4098 int align) 4099 { 4100 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 4101 drm_i915_private_t *dev_priv = dev->dev_private; 4102 int ret = 0; 4103 int page_count; 4104 int i; 4105 4106 if (id > I915_MAX_PHYS_OBJECT) 4107 return -EINVAL; 4108 4109 if (obj->phys_obj) { 4110 if (obj->phys_obj->id == id) 4111 return 0; 4112 i915_gem_detach_phys_object(dev, obj); 4113 } 4114 4115 /* create a new object */ 4116 if (!dev_priv->mm.phys_objs[id - 1]) { 4117 ret = i915_gem_init_phys_object(dev, id, 4118 obj->base.size, align); 4119 if (ret) { 4120 DRM_ERROR("failed to init phys object %d size: %zu\n", 4121 id, obj->base.size); 4122 return ret; 4123 } 4124 } 4125 4126 /* bind to the object */ 4127 obj->phys_obj = dev_priv->mm.phys_objs[id - 1]; 4128 obj->phys_obj->cur_obj = obj; 4129 4130 page_count = obj->base.size / PAGE_SIZE; 4131 4132 for (i = 0; i < page_count; i++) { 4133 struct page *page; 4134 char *dst, *src; 4135 4136 page = shmem_read_mapping_page(mapping, i); 4137 if (IS_ERR(page)) 4138 return PTR_ERR(page); 4139 4140 src = kmap_atomic(page); 4141 dst = obj->phys_obj->handle->vaddr + (i * PAGE_SIZE); 4142 memcpy(dst, src, PAGE_SIZE); 4143 kunmap_atomic(src); 4144 4145 mark_page_accessed(page); 4146 page_cache_release(page); 4147 } 4148 4149 return 0; 4150 } 4151 4152 static int 4153 i915_gem_phys_pwrite(struct drm_device *dev, 4154 struct drm_i915_gem_object *obj, 4155 struct drm_i915_gem_pwrite *args, 4156 struct drm_file *file_priv) 4157 { 4158 void *vaddr = obj->phys_obj->handle->vaddr + args->offset; 4159 char __user *user_data = (char __user *) (uintptr_t) args->data_ptr; 4160 4161 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { 4162 unsigned long unwritten; 4163 4164 /* The physical object once assigned is fixed for the lifetime 4165 * of the obj, so we can safely drop the lock and continue 4166 * to access vaddr. 4167 */ 4168 mutex_unlock(&dev->struct_mutex); 4169 unwritten = copy_from_user(vaddr, user_data, args->size); 4170 mutex_lock(&dev->struct_mutex); 4171 if (unwritten) 4172 return -EFAULT; 4173 } 4174 4175 intel_gtt_chipset_flush(); 4176 return 0; 4177 } 4178 4179 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 4180 { 4181 struct drm_i915_file_private *file_priv = file->driver_priv; 4182 4183 /* Clean up our request list when the client is going away, so that 4184 * later retire_requests won't dereference our soon-to-be-gone 4185 * file_priv. 4186 */ 4187 spin_lock(&file_priv->mm.lock); 4188 while (!list_empty(&file_priv->mm.request_list)) { 4189 struct drm_i915_gem_request *request; 4190 4191 request = list_first_entry(&file_priv->mm.request_list, 4192 struct drm_i915_gem_request, 4193 client_list); 4194 list_del(&request->client_list); 4195 request->file_priv = NULL; 4196 } 4197 spin_unlock(&file_priv->mm.lock); 4198 } 4199 4200 static int 4201 i915_gpu_is_active(struct drm_device *dev) 4202 { 4203 drm_i915_private_t *dev_priv = dev->dev_private; 4204 int lists_empty; 4205 4206 lists_empty = list_empty(&dev_priv->mm.flushing_list) && 4207 list_empty(&dev_priv->mm.active_list); 4208 4209 return !lists_empty; 4210 } 4211 4212 static int 4213 i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc) 4214 { 4215 struct drm_i915_private *dev_priv = 4216 container_of(shrinker, 4217 struct drm_i915_private, 4218 mm.inactive_shrinker); 4219 struct drm_device *dev = dev_priv->dev; 4220 struct drm_i915_gem_object *obj, *next; 4221 int nr_to_scan = sc->nr_to_scan; 4222 int cnt; 4223 4224 if (!mutex_trylock(&dev->struct_mutex)) 4225 return 0; 4226 4227 /* "fast-path" to count number of available objects */ 4228 if (nr_to_scan == 0) { 4229 cnt = 0; 4230 list_for_each_entry(obj, 4231 &dev_priv->mm.inactive_list, 4232 mm_list) 4233 cnt++; 4234 mutex_unlock(&dev->struct_mutex); 4235 return cnt / 100 * sysctl_vfs_cache_pressure; 4236 } 4237 4238 rescan: 4239 /* first scan for clean buffers */ 4240 i915_gem_retire_requests(dev); 4241 4242 list_for_each_entry_safe(obj, next, 4243 &dev_priv->mm.inactive_list, 4244 mm_list) { 4245 if (i915_gem_object_is_purgeable(obj)) { 4246 if (i915_gem_object_unbind(obj) == 0 && 4247 --nr_to_scan == 0) 4248 break; 4249 } 4250 } 4251 4252 /* second pass, evict/count anything still on the inactive list */ 4253 cnt = 0; 4254 list_for_each_entry_safe(obj, next, 4255 &dev_priv->mm.inactive_list, 4256 mm_list) { 4257 if (nr_to_scan && 4258 i915_gem_object_unbind(obj) == 0) 4259 nr_to_scan--; 4260 else 4261 cnt++; 4262 } 4263 4264 if (nr_to_scan && i915_gpu_is_active(dev)) { 4265 /* 4266 * We are desperate for pages, so as a last resort, wait 4267 * for the GPU to finish and discard whatever we can. 4268 * This has a dramatic impact to reduce the number of 4269 * OOM-killer events whilst running the GPU aggressively. 4270 */ 4271 if (i915_gpu_idle(dev, true) == 0) 4272 goto rescan; 4273 } 4274 mutex_unlock(&dev->struct_mutex); 4275 return cnt / 100 * sysctl_vfs_cache_pressure; 4276 } 4277