1 /* 2 * Copyright © 2008 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include "drmP.h" 29 #include "drm.h" 30 #include "i915_drm.h" 31 #include "i915_drv.h" 32 #include "i915_trace.h" 33 #include "intel_drv.h" 34 #include <linux/shmem_fs.h> 35 #include <linux/slab.h> 36 #include <linux/swap.h> 37 #include <linux/pci.h> 38 39 static __must_check int i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj); 40 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 41 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); 42 static __must_check int i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, 43 bool write); 44 static __must_check int i915_gem_object_set_cpu_read_domain_range(struct drm_i915_gem_object *obj, 45 uint64_t offset, 46 uint64_t size); 47 static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_i915_gem_object *obj); 48 static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, 49 unsigned alignment, 50 bool map_and_fenceable); 51 static void i915_gem_clear_fence_reg(struct drm_device *dev, 52 struct drm_i915_fence_reg *reg); 53 static int i915_gem_phys_pwrite(struct drm_device *dev, 54 struct drm_i915_gem_object *obj, 55 struct drm_i915_gem_pwrite *args, 56 struct drm_file *file); 57 static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj); 58 59 static int i915_gem_inactive_shrink(struct shrinker *shrinker, 60 struct shrink_control *sc); 61 62 /* some bookkeeping */ 63 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 64 size_t size) 65 { 66 dev_priv->mm.object_count++; 67 dev_priv->mm.object_memory += size; 68 } 69 70 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 71 size_t size) 72 { 73 dev_priv->mm.object_count--; 74 dev_priv->mm.object_memory -= size; 75 } 76 77 static int 78 i915_gem_wait_for_error(struct drm_device *dev) 79 { 80 struct drm_i915_private *dev_priv = dev->dev_private; 81 struct completion *x = &dev_priv->error_completion; 82 unsigned long flags; 83 int ret; 84 85 if (!atomic_read(&dev_priv->mm.wedged)) 86 return 0; 87 88 ret = wait_for_completion_interruptible(x); 89 if (ret) 90 return ret; 91 92 if (atomic_read(&dev_priv->mm.wedged)) { 93 /* GPU is hung, bump the completion count to account for 94 * the token we just consumed so that we never hit zero and 95 * end up waiting upon a subsequent completion event that 96 * will never happen. 97 */ 98 spin_lock_irqsave(&x->wait.lock, flags); 99 x->done++; 100 spin_unlock_irqrestore(&x->wait.lock, flags); 101 } 102 return 0; 103 } 104 105 int i915_mutex_lock_interruptible(struct drm_device *dev) 106 { 107 int ret; 108 109 ret = i915_gem_wait_for_error(dev); 110 if (ret) 111 return ret; 112 113 ret = mutex_lock_interruptible(&dev->struct_mutex); 114 if (ret) 115 return ret; 116 117 WARN_ON(i915_verify_lists(dev)); 118 return 0; 119 } 120 121 static inline bool 122 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj) 123 { 124 return obj->gtt_space && !obj->active && obj->pin_count == 0; 125 } 126 127 void i915_gem_do_init(struct drm_device *dev, 128 unsigned long start, 129 unsigned long mappable_end, 130 unsigned long end) 131 { 132 drm_i915_private_t *dev_priv = dev->dev_private; 133 134 drm_mm_init(&dev_priv->mm.gtt_space, start, end - start); 135 136 dev_priv->mm.gtt_start = start; 137 dev_priv->mm.gtt_mappable_end = mappable_end; 138 dev_priv->mm.gtt_end = end; 139 dev_priv->mm.gtt_total = end - start; 140 dev_priv->mm.mappable_gtt_total = min(end, mappable_end) - start; 141 142 /* Take over this portion of the GTT */ 143 intel_gtt_clear_range(start / PAGE_SIZE, (end-start) / PAGE_SIZE); 144 } 145 146 int 147 i915_gem_init_ioctl(struct drm_device *dev, void *data, 148 struct drm_file *file) 149 { 150 struct drm_i915_gem_init *args = data; 151 152 if (args->gtt_start >= args->gtt_end || 153 (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1)) 154 return -EINVAL; 155 156 mutex_lock(&dev->struct_mutex); 157 i915_gem_do_init(dev, args->gtt_start, args->gtt_end, args->gtt_end); 158 mutex_unlock(&dev->struct_mutex); 159 160 return 0; 161 } 162 163 int 164 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 165 struct drm_file *file) 166 { 167 struct drm_i915_private *dev_priv = dev->dev_private; 168 struct drm_i915_gem_get_aperture *args = data; 169 struct drm_i915_gem_object *obj; 170 size_t pinned; 171 172 if (!(dev->driver->driver_features & DRIVER_GEM)) 173 return -ENODEV; 174 175 pinned = 0; 176 mutex_lock(&dev->struct_mutex); 177 list_for_each_entry(obj, &dev_priv->mm.pinned_list, mm_list) 178 pinned += obj->gtt_space->size; 179 mutex_unlock(&dev->struct_mutex); 180 181 args->aper_size = dev_priv->mm.gtt_total; 182 args->aper_available_size = args->aper_size -pinned; 183 184 return 0; 185 } 186 187 static int 188 i915_gem_create(struct drm_file *file, 189 struct drm_device *dev, 190 uint64_t size, 191 uint32_t *handle_p) 192 { 193 struct drm_i915_gem_object *obj; 194 int ret; 195 u32 handle; 196 197 size = roundup(size, PAGE_SIZE); 198 199 /* Allocate the new object */ 200 obj = i915_gem_alloc_object(dev, size); 201 if (obj == NULL) 202 return -ENOMEM; 203 204 ret = drm_gem_handle_create(file, &obj->base, &handle); 205 if (ret) { 206 drm_gem_object_release(&obj->base); 207 i915_gem_info_remove_obj(dev->dev_private, obj->base.size); 208 kfree(obj); 209 return ret; 210 } 211 212 /* drop reference from allocate - handle holds it now */ 213 drm_gem_object_unreference(&obj->base); 214 trace_i915_gem_object_create(obj); 215 216 *handle_p = handle; 217 return 0; 218 } 219 220 int 221 i915_gem_dumb_create(struct drm_file *file, 222 struct drm_device *dev, 223 struct drm_mode_create_dumb *args) 224 { 225 /* have to work out size/pitch and return them */ 226 args->pitch = ALIGN(args->width * ((args->bpp + 7) / 8), 64); 227 args->size = args->pitch * args->height; 228 return i915_gem_create(file, dev, 229 args->size, &args->handle); 230 } 231 232 int i915_gem_dumb_destroy(struct drm_file *file, 233 struct drm_device *dev, 234 uint32_t handle) 235 { 236 return drm_gem_handle_delete(file, handle); 237 } 238 239 /** 240 * Creates a new mm object and returns a handle to it. 241 */ 242 int 243 i915_gem_create_ioctl(struct drm_device *dev, void *data, 244 struct drm_file *file) 245 { 246 struct drm_i915_gem_create *args = data; 247 return i915_gem_create(file, dev, 248 args->size, &args->handle); 249 } 250 251 static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj) 252 { 253 drm_i915_private_t *dev_priv = obj->base.dev->dev_private; 254 255 return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 && 256 obj->tiling_mode != I915_TILING_NONE; 257 } 258 259 static inline void 260 slow_shmem_copy(struct page *dst_page, 261 int dst_offset, 262 struct page *src_page, 263 int src_offset, 264 int length) 265 { 266 char *dst_vaddr, *src_vaddr; 267 268 dst_vaddr = kmap(dst_page); 269 src_vaddr = kmap(src_page); 270 271 memcpy(dst_vaddr + dst_offset, src_vaddr + src_offset, length); 272 273 kunmap(src_page); 274 kunmap(dst_page); 275 } 276 277 static inline void 278 slow_shmem_bit17_copy(struct page *gpu_page, 279 int gpu_offset, 280 struct page *cpu_page, 281 int cpu_offset, 282 int length, 283 int is_read) 284 { 285 char *gpu_vaddr, *cpu_vaddr; 286 287 /* Use the unswizzled path if this page isn't affected. */ 288 if ((page_to_phys(gpu_page) & (1 << 17)) == 0) { 289 if (is_read) 290 return slow_shmem_copy(cpu_page, cpu_offset, 291 gpu_page, gpu_offset, length); 292 else 293 return slow_shmem_copy(gpu_page, gpu_offset, 294 cpu_page, cpu_offset, length); 295 } 296 297 gpu_vaddr = kmap(gpu_page); 298 cpu_vaddr = kmap(cpu_page); 299 300 /* Copy the data, XORing A6 with A17 (1). The user already knows he's 301 * XORing with the other bits (A9 for Y, A9 and A10 for X) 302 */ 303 while (length > 0) { 304 int cacheline_end = ALIGN(gpu_offset + 1, 64); 305 int this_length = min(cacheline_end - gpu_offset, length); 306 int swizzled_gpu_offset = gpu_offset ^ 64; 307 308 if (is_read) { 309 memcpy(cpu_vaddr + cpu_offset, 310 gpu_vaddr + swizzled_gpu_offset, 311 this_length); 312 } else { 313 memcpy(gpu_vaddr + swizzled_gpu_offset, 314 cpu_vaddr + cpu_offset, 315 this_length); 316 } 317 cpu_offset += this_length; 318 gpu_offset += this_length; 319 length -= this_length; 320 } 321 322 kunmap(cpu_page); 323 kunmap(gpu_page); 324 } 325 326 /** 327 * This is the fast shmem pread path, which attempts to copy_from_user directly 328 * from the backing pages of the object to the user's address space. On a 329 * fault, it fails so we can fall back to i915_gem_shmem_pwrite_slow(). 330 */ 331 static int 332 i915_gem_shmem_pread_fast(struct drm_device *dev, 333 struct drm_i915_gem_object *obj, 334 struct drm_i915_gem_pread *args, 335 struct drm_file *file) 336 { 337 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 338 ssize_t remain; 339 loff_t offset; 340 char __user *user_data; 341 int page_offset, page_length; 342 343 user_data = (char __user *) (uintptr_t) args->data_ptr; 344 remain = args->size; 345 346 offset = args->offset; 347 348 while (remain > 0) { 349 struct page *page; 350 char *vaddr; 351 int ret; 352 353 /* Operation in this page 354 * 355 * page_offset = offset within page 356 * page_length = bytes to copy for this page 357 */ 358 page_offset = offset_in_page(offset); 359 page_length = remain; 360 if ((page_offset + remain) > PAGE_SIZE) 361 page_length = PAGE_SIZE - page_offset; 362 363 page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT); 364 if (IS_ERR(page)) 365 return PTR_ERR(page); 366 367 vaddr = kmap_atomic(page); 368 ret = __copy_to_user_inatomic(user_data, 369 vaddr + page_offset, 370 page_length); 371 kunmap_atomic(vaddr); 372 373 mark_page_accessed(page); 374 page_cache_release(page); 375 if (ret) 376 return -EFAULT; 377 378 remain -= page_length; 379 user_data += page_length; 380 offset += page_length; 381 } 382 383 return 0; 384 } 385 386 /** 387 * This is the fallback shmem pread path, which allocates temporary storage 388 * in kernel space to copy_to_user into outside of the struct_mutex, so we 389 * can copy out of the object's backing pages while holding the struct mutex 390 * and not take page faults. 391 */ 392 static int 393 i915_gem_shmem_pread_slow(struct drm_device *dev, 394 struct drm_i915_gem_object *obj, 395 struct drm_i915_gem_pread *args, 396 struct drm_file *file) 397 { 398 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 399 struct mm_struct *mm = current->mm; 400 struct page **user_pages; 401 ssize_t remain; 402 loff_t offset, pinned_pages, i; 403 loff_t first_data_page, last_data_page, num_pages; 404 int shmem_page_offset; 405 int data_page_index, data_page_offset; 406 int page_length; 407 int ret; 408 uint64_t data_ptr = args->data_ptr; 409 int do_bit17_swizzling; 410 411 remain = args->size; 412 413 /* Pin the user pages containing the data. We can't fault while 414 * holding the struct mutex, yet we want to hold it while 415 * dereferencing the user data. 416 */ 417 first_data_page = data_ptr / PAGE_SIZE; 418 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; 419 num_pages = last_data_page - first_data_page + 1; 420 421 user_pages = drm_malloc_ab(num_pages, sizeof(struct page *)); 422 if (user_pages == NULL) 423 return -ENOMEM; 424 425 mutex_unlock(&dev->struct_mutex); 426 down_read(&mm->mmap_sem); 427 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, 428 num_pages, 1, 0, user_pages, NULL); 429 up_read(&mm->mmap_sem); 430 mutex_lock(&dev->struct_mutex); 431 if (pinned_pages < num_pages) { 432 ret = -EFAULT; 433 goto out; 434 } 435 436 ret = i915_gem_object_set_cpu_read_domain_range(obj, 437 args->offset, 438 args->size); 439 if (ret) 440 goto out; 441 442 do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 443 444 offset = args->offset; 445 446 while (remain > 0) { 447 struct page *page; 448 449 /* Operation in this page 450 * 451 * shmem_page_offset = offset within page in shmem file 452 * data_page_index = page number in get_user_pages return 453 * data_page_offset = offset with data_page_index page. 454 * page_length = bytes to copy for this page 455 */ 456 shmem_page_offset = offset_in_page(offset); 457 data_page_index = data_ptr / PAGE_SIZE - first_data_page; 458 data_page_offset = offset_in_page(data_ptr); 459 460 page_length = remain; 461 if ((shmem_page_offset + page_length) > PAGE_SIZE) 462 page_length = PAGE_SIZE - shmem_page_offset; 463 if ((data_page_offset + page_length) > PAGE_SIZE) 464 page_length = PAGE_SIZE - data_page_offset; 465 466 page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT); 467 if (IS_ERR(page)) { 468 ret = PTR_ERR(page); 469 goto out; 470 } 471 472 if (do_bit17_swizzling) { 473 slow_shmem_bit17_copy(page, 474 shmem_page_offset, 475 user_pages[data_page_index], 476 data_page_offset, 477 page_length, 478 1); 479 } else { 480 slow_shmem_copy(user_pages[data_page_index], 481 data_page_offset, 482 page, 483 shmem_page_offset, 484 page_length); 485 } 486 487 mark_page_accessed(page); 488 page_cache_release(page); 489 490 remain -= page_length; 491 data_ptr += page_length; 492 offset += page_length; 493 } 494 495 out: 496 for (i = 0; i < pinned_pages; i++) { 497 SetPageDirty(user_pages[i]); 498 mark_page_accessed(user_pages[i]); 499 page_cache_release(user_pages[i]); 500 } 501 drm_free_large(user_pages); 502 503 return ret; 504 } 505 506 /** 507 * Reads data from the object referenced by handle. 508 * 509 * On error, the contents of *data are undefined. 510 */ 511 int 512 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 513 struct drm_file *file) 514 { 515 struct drm_i915_gem_pread *args = data; 516 struct drm_i915_gem_object *obj; 517 int ret = 0; 518 519 if (args->size == 0) 520 return 0; 521 522 if (!access_ok(VERIFY_WRITE, 523 (char __user *)(uintptr_t)args->data_ptr, 524 args->size)) 525 return -EFAULT; 526 527 ret = fault_in_pages_writeable((char __user *)(uintptr_t)args->data_ptr, 528 args->size); 529 if (ret) 530 return -EFAULT; 531 532 ret = i915_mutex_lock_interruptible(dev); 533 if (ret) 534 return ret; 535 536 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 537 if (&obj->base == NULL) { 538 ret = -ENOENT; 539 goto unlock; 540 } 541 542 /* Bounds check source. */ 543 if (args->offset > obj->base.size || 544 args->size > obj->base.size - args->offset) { 545 ret = -EINVAL; 546 goto out; 547 } 548 549 trace_i915_gem_object_pread(obj, args->offset, args->size); 550 551 ret = i915_gem_object_set_cpu_read_domain_range(obj, 552 args->offset, 553 args->size); 554 if (ret) 555 goto out; 556 557 ret = -EFAULT; 558 if (!i915_gem_object_needs_bit17_swizzle(obj)) 559 ret = i915_gem_shmem_pread_fast(dev, obj, args, file); 560 if (ret == -EFAULT) 561 ret = i915_gem_shmem_pread_slow(dev, obj, args, file); 562 563 out: 564 drm_gem_object_unreference(&obj->base); 565 unlock: 566 mutex_unlock(&dev->struct_mutex); 567 return ret; 568 } 569 570 /* This is the fast write path which cannot handle 571 * page faults in the source data 572 */ 573 574 static inline int 575 fast_user_write(struct io_mapping *mapping, 576 loff_t page_base, int page_offset, 577 char __user *user_data, 578 int length) 579 { 580 char *vaddr_atomic; 581 unsigned long unwritten; 582 583 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 584 unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset, 585 user_data, length); 586 io_mapping_unmap_atomic(vaddr_atomic); 587 return unwritten; 588 } 589 590 /* Here's the write path which can sleep for 591 * page faults 592 */ 593 594 static inline void 595 slow_kernel_write(struct io_mapping *mapping, 596 loff_t gtt_base, int gtt_offset, 597 struct page *user_page, int user_offset, 598 int length) 599 { 600 char __iomem *dst_vaddr; 601 char *src_vaddr; 602 603 dst_vaddr = io_mapping_map_wc(mapping, gtt_base); 604 src_vaddr = kmap(user_page); 605 606 memcpy_toio(dst_vaddr + gtt_offset, 607 src_vaddr + user_offset, 608 length); 609 610 kunmap(user_page); 611 io_mapping_unmap(dst_vaddr); 612 } 613 614 /** 615 * This is the fast pwrite path, where we copy the data directly from the 616 * user into the GTT, uncached. 617 */ 618 static int 619 i915_gem_gtt_pwrite_fast(struct drm_device *dev, 620 struct drm_i915_gem_object *obj, 621 struct drm_i915_gem_pwrite *args, 622 struct drm_file *file) 623 { 624 drm_i915_private_t *dev_priv = dev->dev_private; 625 ssize_t remain; 626 loff_t offset, page_base; 627 char __user *user_data; 628 int page_offset, page_length; 629 630 user_data = (char __user *) (uintptr_t) args->data_ptr; 631 remain = args->size; 632 633 offset = obj->gtt_offset + args->offset; 634 635 while (remain > 0) { 636 /* Operation in this page 637 * 638 * page_base = page offset within aperture 639 * page_offset = offset within page 640 * page_length = bytes to copy for this page 641 */ 642 page_base = offset & PAGE_MASK; 643 page_offset = offset_in_page(offset); 644 page_length = remain; 645 if ((page_offset + remain) > PAGE_SIZE) 646 page_length = PAGE_SIZE - page_offset; 647 648 /* If we get a fault while copying data, then (presumably) our 649 * source page isn't available. Return the error and we'll 650 * retry in the slow path. 651 */ 652 if (fast_user_write(dev_priv->mm.gtt_mapping, page_base, 653 page_offset, user_data, page_length)) 654 return -EFAULT; 655 656 remain -= page_length; 657 user_data += page_length; 658 offset += page_length; 659 } 660 661 return 0; 662 } 663 664 /** 665 * This is the fallback GTT pwrite path, which uses get_user_pages to pin 666 * the memory and maps it using kmap_atomic for copying. 667 * 668 * This code resulted in x11perf -rgb10text consuming about 10% more CPU 669 * than using i915_gem_gtt_pwrite_fast on a G45 (32-bit). 670 */ 671 static int 672 i915_gem_gtt_pwrite_slow(struct drm_device *dev, 673 struct drm_i915_gem_object *obj, 674 struct drm_i915_gem_pwrite *args, 675 struct drm_file *file) 676 { 677 drm_i915_private_t *dev_priv = dev->dev_private; 678 ssize_t remain; 679 loff_t gtt_page_base, offset; 680 loff_t first_data_page, last_data_page, num_pages; 681 loff_t pinned_pages, i; 682 struct page **user_pages; 683 struct mm_struct *mm = current->mm; 684 int gtt_page_offset, data_page_offset, data_page_index, page_length; 685 int ret; 686 uint64_t data_ptr = args->data_ptr; 687 688 remain = args->size; 689 690 /* Pin the user pages containing the data. We can't fault while 691 * holding the struct mutex, and all of the pwrite implementations 692 * want to hold it while dereferencing the user data. 693 */ 694 first_data_page = data_ptr / PAGE_SIZE; 695 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; 696 num_pages = last_data_page - first_data_page + 1; 697 698 user_pages = drm_malloc_ab(num_pages, sizeof(struct page *)); 699 if (user_pages == NULL) 700 return -ENOMEM; 701 702 mutex_unlock(&dev->struct_mutex); 703 down_read(&mm->mmap_sem); 704 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, 705 num_pages, 0, 0, user_pages, NULL); 706 up_read(&mm->mmap_sem); 707 mutex_lock(&dev->struct_mutex); 708 if (pinned_pages < num_pages) { 709 ret = -EFAULT; 710 goto out_unpin_pages; 711 } 712 713 ret = i915_gem_object_set_to_gtt_domain(obj, true); 714 if (ret) 715 goto out_unpin_pages; 716 717 ret = i915_gem_object_put_fence(obj); 718 if (ret) 719 goto out_unpin_pages; 720 721 offset = obj->gtt_offset + args->offset; 722 723 while (remain > 0) { 724 /* Operation in this page 725 * 726 * gtt_page_base = page offset within aperture 727 * gtt_page_offset = offset within page in aperture 728 * data_page_index = page number in get_user_pages return 729 * data_page_offset = offset with data_page_index page. 730 * page_length = bytes to copy for this page 731 */ 732 gtt_page_base = offset & PAGE_MASK; 733 gtt_page_offset = offset_in_page(offset); 734 data_page_index = data_ptr / PAGE_SIZE - first_data_page; 735 data_page_offset = offset_in_page(data_ptr); 736 737 page_length = remain; 738 if ((gtt_page_offset + page_length) > PAGE_SIZE) 739 page_length = PAGE_SIZE - gtt_page_offset; 740 if ((data_page_offset + page_length) > PAGE_SIZE) 741 page_length = PAGE_SIZE - data_page_offset; 742 743 slow_kernel_write(dev_priv->mm.gtt_mapping, 744 gtt_page_base, gtt_page_offset, 745 user_pages[data_page_index], 746 data_page_offset, 747 page_length); 748 749 remain -= page_length; 750 offset += page_length; 751 data_ptr += page_length; 752 } 753 754 out_unpin_pages: 755 for (i = 0; i < pinned_pages; i++) 756 page_cache_release(user_pages[i]); 757 drm_free_large(user_pages); 758 759 return ret; 760 } 761 762 /** 763 * This is the fast shmem pwrite path, which attempts to directly 764 * copy_from_user into the kmapped pages backing the object. 765 */ 766 static int 767 i915_gem_shmem_pwrite_fast(struct drm_device *dev, 768 struct drm_i915_gem_object *obj, 769 struct drm_i915_gem_pwrite *args, 770 struct drm_file *file) 771 { 772 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 773 ssize_t remain; 774 loff_t offset; 775 char __user *user_data; 776 int page_offset, page_length; 777 778 user_data = (char __user *) (uintptr_t) args->data_ptr; 779 remain = args->size; 780 781 offset = args->offset; 782 obj->dirty = 1; 783 784 while (remain > 0) { 785 struct page *page; 786 char *vaddr; 787 int ret; 788 789 /* Operation in this page 790 * 791 * page_offset = offset within page 792 * page_length = bytes to copy for this page 793 */ 794 page_offset = offset_in_page(offset); 795 page_length = remain; 796 if ((page_offset + remain) > PAGE_SIZE) 797 page_length = PAGE_SIZE - page_offset; 798 799 page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT); 800 if (IS_ERR(page)) 801 return PTR_ERR(page); 802 803 vaddr = kmap_atomic(page, KM_USER0); 804 ret = __copy_from_user_inatomic(vaddr + page_offset, 805 user_data, 806 page_length); 807 kunmap_atomic(vaddr, KM_USER0); 808 809 set_page_dirty(page); 810 mark_page_accessed(page); 811 page_cache_release(page); 812 813 /* If we get a fault while copying data, then (presumably) our 814 * source page isn't available. Return the error and we'll 815 * retry in the slow path. 816 */ 817 if (ret) 818 return -EFAULT; 819 820 remain -= page_length; 821 user_data += page_length; 822 offset += page_length; 823 } 824 825 return 0; 826 } 827 828 /** 829 * This is the fallback shmem pwrite path, which uses get_user_pages to pin 830 * the memory and maps it using kmap_atomic for copying. 831 * 832 * This avoids taking mmap_sem for faulting on the user's address while the 833 * struct_mutex is held. 834 */ 835 static int 836 i915_gem_shmem_pwrite_slow(struct drm_device *dev, 837 struct drm_i915_gem_object *obj, 838 struct drm_i915_gem_pwrite *args, 839 struct drm_file *file) 840 { 841 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 842 struct mm_struct *mm = current->mm; 843 struct page **user_pages; 844 ssize_t remain; 845 loff_t offset, pinned_pages, i; 846 loff_t first_data_page, last_data_page, num_pages; 847 int shmem_page_offset; 848 int data_page_index, data_page_offset; 849 int page_length; 850 int ret; 851 uint64_t data_ptr = args->data_ptr; 852 int do_bit17_swizzling; 853 854 remain = args->size; 855 856 /* Pin the user pages containing the data. We can't fault while 857 * holding the struct mutex, and all of the pwrite implementations 858 * want to hold it while dereferencing the user data. 859 */ 860 first_data_page = data_ptr / PAGE_SIZE; 861 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; 862 num_pages = last_data_page - first_data_page + 1; 863 864 user_pages = drm_malloc_ab(num_pages, sizeof(struct page *)); 865 if (user_pages == NULL) 866 return -ENOMEM; 867 868 mutex_unlock(&dev->struct_mutex); 869 down_read(&mm->mmap_sem); 870 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, 871 num_pages, 0, 0, user_pages, NULL); 872 up_read(&mm->mmap_sem); 873 mutex_lock(&dev->struct_mutex); 874 if (pinned_pages < num_pages) { 875 ret = -EFAULT; 876 goto out; 877 } 878 879 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 880 if (ret) 881 goto out; 882 883 do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 884 885 offset = args->offset; 886 obj->dirty = 1; 887 888 while (remain > 0) { 889 struct page *page; 890 891 /* Operation in this page 892 * 893 * shmem_page_offset = offset within page in shmem file 894 * data_page_index = page number in get_user_pages return 895 * data_page_offset = offset with data_page_index page. 896 * page_length = bytes to copy for this page 897 */ 898 shmem_page_offset = offset_in_page(offset); 899 data_page_index = data_ptr / PAGE_SIZE - first_data_page; 900 data_page_offset = offset_in_page(data_ptr); 901 902 page_length = remain; 903 if ((shmem_page_offset + page_length) > PAGE_SIZE) 904 page_length = PAGE_SIZE - shmem_page_offset; 905 if ((data_page_offset + page_length) > PAGE_SIZE) 906 page_length = PAGE_SIZE - data_page_offset; 907 908 page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT); 909 if (IS_ERR(page)) { 910 ret = PTR_ERR(page); 911 goto out; 912 } 913 914 if (do_bit17_swizzling) { 915 slow_shmem_bit17_copy(page, 916 shmem_page_offset, 917 user_pages[data_page_index], 918 data_page_offset, 919 page_length, 920 0); 921 } else { 922 slow_shmem_copy(page, 923 shmem_page_offset, 924 user_pages[data_page_index], 925 data_page_offset, 926 page_length); 927 } 928 929 set_page_dirty(page); 930 mark_page_accessed(page); 931 page_cache_release(page); 932 933 remain -= page_length; 934 data_ptr += page_length; 935 offset += page_length; 936 } 937 938 out: 939 for (i = 0; i < pinned_pages; i++) 940 page_cache_release(user_pages[i]); 941 drm_free_large(user_pages); 942 943 return ret; 944 } 945 946 /** 947 * Writes data to the object referenced by handle. 948 * 949 * On error, the contents of the buffer that were to be modified are undefined. 950 */ 951 int 952 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 953 struct drm_file *file) 954 { 955 struct drm_i915_gem_pwrite *args = data; 956 struct drm_i915_gem_object *obj; 957 int ret; 958 959 if (args->size == 0) 960 return 0; 961 962 if (!access_ok(VERIFY_READ, 963 (char __user *)(uintptr_t)args->data_ptr, 964 args->size)) 965 return -EFAULT; 966 967 ret = fault_in_pages_readable((char __user *)(uintptr_t)args->data_ptr, 968 args->size); 969 if (ret) 970 return -EFAULT; 971 972 ret = i915_mutex_lock_interruptible(dev); 973 if (ret) 974 return ret; 975 976 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 977 if (&obj->base == NULL) { 978 ret = -ENOENT; 979 goto unlock; 980 } 981 982 /* Bounds check destination. */ 983 if (args->offset > obj->base.size || 984 args->size > obj->base.size - args->offset) { 985 ret = -EINVAL; 986 goto out; 987 } 988 989 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 990 991 /* We can only do the GTT pwrite on untiled buffers, as otherwise 992 * it would end up going through the fenced access, and we'll get 993 * different detiling behavior between reading and writing. 994 * pread/pwrite currently are reading and writing from the CPU 995 * perspective, requiring manual detiling by the client. 996 */ 997 if (obj->phys_obj) 998 ret = i915_gem_phys_pwrite(dev, obj, args, file); 999 else if (obj->gtt_space && 1000 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 1001 ret = i915_gem_object_pin(obj, 0, true); 1002 if (ret) 1003 goto out; 1004 1005 ret = i915_gem_object_set_to_gtt_domain(obj, true); 1006 if (ret) 1007 goto out_unpin; 1008 1009 ret = i915_gem_object_put_fence(obj); 1010 if (ret) 1011 goto out_unpin; 1012 1013 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); 1014 if (ret == -EFAULT) 1015 ret = i915_gem_gtt_pwrite_slow(dev, obj, args, file); 1016 1017 out_unpin: 1018 i915_gem_object_unpin(obj); 1019 } else { 1020 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 1021 if (ret) 1022 goto out; 1023 1024 ret = -EFAULT; 1025 if (!i915_gem_object_needs_bit17_swizzle(obj)) 1026 ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file); 1027 if (ret == -EFAULT) 1028 ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file); 1029 } 1030 1031 out: 1032 drm_gem_object_unreference(&obj->base); 1033 unlock: 1034 mutex_unlock(&dev->struct_mutex); 1035 return ret; 1036 } 1037 1038 /** 1039 * Called when user space prepares to use an object with the CPU, either 1040 * through the mmap ioctl's mapping or a GTT mapping. 1041 */ 1042 int 1043 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1044 struct drm_file *file) 1045 { 1046 struct drm_i915_gem_set_domain *args = data; 1047 struct drm_i915_gem_object *obj; 1048 uint32_t read_domains = args->read_domains; 1049 uint32_t write_domain = args->write_domain; 1050 int ret; 1051 1052 if (!(dev->driver->driver_features & DRIVER_GEM)) 1053 return -ENODEV; 1054 1055 /* Only handle setting domains to types used by the CPU. */ 1056 if (write_domain & I915_GEM_GPU_DOMAINS) 1057 return -EINVAL; 1058 1059 if (read_domains & I915_GEM_GPU_DOMAINS) 1060 return -EINVAL; 1061 1062 /* Having something in the write domain implies it's in the read 1063 * domain, and only that read domain. Enforce that in the request. 1064 */ 1065 if (write_domain != 0 && read_domains != write_domain) 1066 return -EINVAL; 1067 1068 ret = i915_mutex_lock_interruptible(dev); 1069 if (ret) 1070 return ret; 1071 1072 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1073 if (&obj->base == NULL) { 1074 ret = -ENOENT; 1075 goto unlock; 1076 } 1077 1078 if (read_domains & I915_GEM_DOMAIN_GTT) { 1079 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1080 1081 /* Silently promote "you're not bound, there was nothing to do" 1082 * to success, since the client was just asking us to 1083 * make sure everything was done. 1084 */ 1085 if (ret == -EINVAL) 1086 ret = 0; 1087 } else { 1088 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1089 } 1090 1091 drm_gem_object_unreference(&obj->base); 1092 unlock: 1093 mutex_unlock(&dev->struct_mutex); 1094 return ret; 1095 } 1096 1097 /** 1098 * Called when user space has done writes to this buffer 1099 */ 1100 int 1101 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1102 struct drm_file *file) 1103 { 1104 struct drm_i915_gem_sw_finish *args = data; 1105 struct drm_i915_gem_object *obj; 1106 int ret = 0; 1107 1108 if (!(dev->driver->driver_features & DRIVER_GEM)) 1109 return -ENODEV; 1110 1111 ret = i915_mutex_lock_interruptible(dev); 1112 if (ret) 1113 return ret; 1114 1115 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1116 if (&obj->base == NULL) { 1117 ret = -ENOENT; 1118 goto unlock; 1119 } 1120 1121 /* Pinned buffers may be scanout, so flush the cache */ 1122 if (obj->pin_count) 1123 i915_gem_object_flush_cpu_write_domain(obj); 1124 1125 drm_gem_object_unreference(&obj->base); 1126 unlock: 1127 mutex_unlock(&dev->struct_mutex); 1128 return ret; 1129 } 1130 1131 /** 1132 * Maps the contents of an object, returning the address it is mapped 1133 * into. 1134 * 1135 * While the mapping holds a reference on the contents of the object, it doesn't 1136 * imply a ref on the object itself. 1137 */ 1138 int 1139 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1140 struct drm_file *file) 1141 { 1142 struct drm_i915_private *dev_priv = dev->dev_private; 1143 struct drm_i915_gem_mmap *args = data; 1144 struct drm_gem_object *obj; 1145 unsigned long addr; 1146 1147 if (!(dev->driver->driver_features & DRIVER_GEM)) 1148 return -ENODEV; 1149 1150 obj = drm_gem_object_lookup(dev, file, args->handle); 1151 if (obj == NULL) 1152 return -ENOENT; 1153 1154 if (obj->size > dev_priv->mm.gtt_mappable_end) { 1155 drm_gem_object_unreference_unlocked(obj); 1156 return -E2BIG; 1157 } 1158 1159 down_write(¤t->mm->mmap_sem); 1160 addr = do_mmap(obj->filp, 0, args->size, 1161 PROT_READ | PROT_WRITE, MAP_SHARED, 1162 args->offset); 1163 up_write(¤t->mm->mmap_sem); 1164 drm_gem_object_unreference_unlocked(obj); 1165 if (IS_ERR((void *)addr)) 1166 return addr; 1167 1168 args->addr_ptr = (uint64_t) addr; 1169 1170 return 0; 1171 } 1172 1173 /** 1174 * i915_gem_fault - fault a page into the GTT 1175 * vma: VMA in question 1176 * vmf: fault info 1177 * 1178 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1179 * from userspace. The fault handler takes care of binding the object to 1180 * the GTT (if needed), allocating and programming a fence register (again, 1181 * only if needed based on whether the old reg is still valid or the object 1182 * is tiled) and inserting a new PTE into the faulting process. 1183 * 1184 * Note that the faulting process may involve evicting existing objects 1185 * from the GTT and/or fence registers to make room. So performance may 1186 * suffer if the GTT working set is large or there are few fence registers 1187 * left. 1188 */ 1189 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1190 { 1191 struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data); 1192 struct drm_device *dev = obj->base.dev; 1193 drm_i915_private_t *dev_priv = dev->dev_private; 1194 pgoff_t page_offset; 1195 unsigned long pfn; 1196 int ret = 0; 1197 bool write = !!(vmf->flags & FAULT_FLAG_WRITE); 1198 1199 /* We don't use vmf->pgoff since that has the fake offset */ 1200 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> 1201 PAGE_SHIFT; 1202 1203 ret = i915_mutex_lock_interruptible(dev); 1204 if (ret) 1205 goto out; 1206 1207 trace_i915_gem_object_fault(obj, page_offset, true, write); 1208 1209 /* Now bind it into the GTT if needed */ 1210 if (!obj->map_and_fenceable) { 1211 ret = i915_gem_object_unbind(obj); 1212 if (ret) 1213 goto unlock; 1214 } 1215 if (!obj->gtt_space) { 1216 ret = i915_gem_object_bind_to_gtt(obj, 0, true); 1217 if (ret) 1218 goto unlock; 1219 1220 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1221 if (ret) 1222 goto unlock; 1223 } 1224 1225 if (obj->tiling_mode == I915_TILING_NONE) 1226 ret = i915_gem_object_put_fence(obj); 1227 else 1228 ret = i915_gem_object_get_fence(obj, NULL); 1229 if (ret) 1230 goto unlock; 1231 1232 if (i915_gem_object_is_inactive(obj)) 1233 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 1234 1235 obj->fault_mappable = true; 1236 1237 pfn = ((dev->agp->base + obj->gtt_offset) >> PAGE_SHIFT) + 1238 page_offset; 1239 1240 /* Finally, remap it using the new GTT offset */ 1241 ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn); 1242 unlock: 1243 mutex_unlock(&dev->struct_mutex); 1244 out: 1245 switch (ret) { 1246 case -EIO: 1247 case -EAGAIN: 1248 /* Give the error handler a chance to run and move the 1249 * objects off the GPU active list. Next time we service the 1250 * fault, we should be able to transition the page into the 1251 * GTT without touching the GPU (and so avoid further 1252 * EIO/EGAIN). If the GPU is wedged, then there is no issue 1253 * with coherency, just lost writes. 1254 */ 1255 set_need_resched(); 1256 case 0: 1257 case -ERESTARTSYS: 1258 case -EINTR: 1259 return VM_FAULT_NOPAGE; 1260 case -ENOMEM: 1261 return VM_FAULT_OOM; 1262 default: 1263 return VM_FAULT_SIGBUS; 1264 } 1265 } 1266 1267 /** 1268 * i915_gem_create_mmap_offset - create a fake mmap offset for an object 1269 * @obj: obj in question 1270 * 1271 * GEM memory mapping works by handing back to userspace a fake mmap offset 1272 * it can use in a subsequent mmap(2) call. The DRM core code then looks 1273 * up the object based on the offset and sets up the various memory mapping 1274 * structures. 1275 * 1276 * This routine allocates and attaches a fake offset for @obj. 1277 */ 1278 static int 1279 i915_gem_create_mmap_offset(struct drm_i915_gem_object *obj) 1280 { 1281 struct drm_device *dev = obj->base.dev; 1282 struct drm_gem_mm *mm = dev->mm_private; 1283 struct drm_map_list *list; 1284 struct drm_local_map *map; 1285 int ret = 0; 1286 1287 /* Set the object up for mmap'ing */ 1288 list = &obj->base.map_list; 1289 list->map = kzalloc(sizeof(struct drm_map_list), GFP_KERNEL); 1290 if (!list->map) 1291 return -ENOMEM; 1292 1293 map = list->map; 1294 map->type = _DRM_GEM; 1295 map->size = obj->base.size; 1296 map->handle = obj; 1297 1298 /* Get a DRM GEM mmap offset allocated... */ 1299 list->file_offset_node = drm_mm_search_free(&mm->offset_manager, 1300 obj->base.size / PAGE_SIZE, 1301 0, 0); 1302 if (!list->file_offset_node) { 1303 DRM_ERROR("failed to allocate offset for bo %d\n", 1304 obj->base.name); 1305 ret = -ENOSPC; 1306 goto out_free_list; 1307 } 1308 1309 list->file_offset_node = drm_mm_get_block(list->file_offset_node, 1310 obj->base.size / PAGE_SIZE, 1311 0); 1312 if (!list->file_offset_node) { 1313 ret = -ENOMEM; 1314 goto out_free_list; 1315 } 1316 1317 list->hash.key = list->file_offset_node->start; 1318 ret = drm_ht_insert_item(&mm->offset_hash, &list->hash); 1319 if (ret) { 1320 DRM_ERROR("failed to add to map hash\n"); 1321 goto out_free_mm; 1322 } 1323 1324 return 0; 1325 1326 out_free_mm: 1327 drm_mm_put_block(list->file_offset_node); 1328 out_free_list: 1329 kfree(list->map); 1330 list->map = NULL; 1331 1332 return ret; 1333 } 1334 1335 /** 1336 * i915_gem_release_mmap - remove physical page mappings 1337 * @obj: obj in question 1338 * 1339 * Preserve the reservation of the mmapping with the DRM core code, but 1340 * relinquish ownership of the pages back to the system. 1341 * 1342 * It is vital that we remove the page mapping if we have mapped a tiled 1343 * object through the GTT and then lose the fence register due to 1344 * resource pressure. Similarly if the object has been moved out of the 1345 * aperture, than pages mapped into userspace must be revoked. Removing the 1346 * mapping will then trigger a page fault on the next user access, allowing 1347 * fixup by i915_gem_fault(). 1348 */ 1349 void 1350 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 1351 { 1352 if (!obj->fault_mappable) 1353 return; 1354 1355 if (obj->base.dev->dev_mapping) 1356 unmap_mapping_range(obj->base.dev->dev_mapping, 1357 (loff_t)obj->base.map_list.hash.key<<PAGE_SHIFT, 1358 obj->base.size, 1); 1359 1360 obj->fault_mappable = false; 1361 } 1362 1363 static void 1364 i915_gem_free_mmap_offset(struct drm_i915_gem_object *obj) 1365 { 1366 struct drm_device *dev = obj->base.dev; 1367 struct drm_gem_mm *mm = dev->mm_private; 1368 struct drm_map_list *list = &obj->base.map_list; 1369 1370 drm_ht_remove_item(&mm->offset_hash, &list->hash); 1371 drm_mm_put_block(list->file_offset_node); 1372 kfree(list->map); 1373 list->map = NULL; 1374 } 1375 1376 static uint32_t 1377 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) 1378 { 1379 uint32_t gtt_size; 1380 1381 if (INTEL_INFO(dev)->gen >= 4 || 1382 tiling_mode == I915_TILING_NONE) 1383 return size; 1384 1385 /* Previous chips need a power-of-two fence region when tiling */ 1386 if (INTEL_INFO(dev)->gen == 3) 1387 gtt_size = 1024*1024; 1388 else 1389 gtt_size = 512*1024; 1390 1391 while (gtt_size < size) 1392 gtt_size <<= 1; 1393 1394 return gtt_size; 1395 } 1396 1397 /** 1398 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 1399 * @obj: object to check 1400 * 1401 * Return the required GTT alignment for an object, taking into account 1402 * potential fence register mapping. 1403 */ 1404 static uint32_t 1405 i915_gem_get_gtt_alignment(struct drm_device *dev, 1406 uint32_t size, 1407 int tiling_mode) 1408 { 1409 /* 1410 * Minimum alignment is 4k (GTT page size), but might be greater 1411 * if a fence register is needed for the object. 1412 */ 1413 if (INTEL_INFO(dev)->gen >= 4 || 1414 tiling_mode == I915_TILING_NONE) 1415 return 4096; 1416 1417 /* 1418 * Previous chips need to be aligned to the size of the smallest 1419 * fence register that can contain the object. 1420 */ 1421 return i915_gem_get_gtt_size(dev, size, tiling_mode); 1422 } 1423 1424 /** 1425 * i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an 1426 * unfenced object 1427 * @dev: the device 1428 * @size: size of the object 1429 * @tiling_mode: tiling mode of the object 1430 * 1431 * Return the required GTT alignment for an object, only taking into account 1432 * unfenced tiled surface requirements. 1433 */ 1434 uint32_t 1435 i915_gem_get_unfenced_gtt_alignment(struct drm_device *dev, 1436 uint32_t size, 1437 int tiling_mode) 1438 { 1439 /* 1440 * Minimum alignment is 4k (GTT page size) for sane hw. 1441 */ 1442 if (INTEL_INFO(dev)->gen >= 4 || IS_G33(dev) || 1443 tiling_mode == I915_TILING_NONE) 1444 return 4096; 1445 1446 /* Previous hardware however needs to be aligned to a power-of-two 1447 * tile height. The simplest method for determining this is to reuse 1448 * the power-of-tile object size. 1449 */ 1450 return i915_gem_get_gtt_size(dev, size, tiling_mode); 1451 } 1452 1453 int 1454 i915_gem_mmap_gtt(struct drm_file *file, 1455 struct drm_device *dev, 1456 uint32_t handle, 1457 uint64_t *offset) 1458 { 1459 struct drm_i915_private *dev_priv = dev->dev_private; 1460 struct drm_i915_gem_object *obj; 1461 int ret; 1462 1463 if (!(dev->driver->driver_features & DRIVER_GEM)) 1464 return -ENODEV; 1465 1466 ret = i915_mutex_lock_interruptible(dev); 1467 if (ret) 1468 return ret; 1469 1470 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle)); 1471 if (&obj->base == NULL) { 1472 ret = -ENOENT; 1473 goto unlock; 1474 } 1475 1476 if (obj->base.size > dev_priv->mm.gtt_mappable_end) { 1477 ret = -E2BIG; 1478 goto unlock; 1479 } 1480 1481 if (obj->madv != I915_MADV_WILLNEED) { 1482 DRM_ERROR("Attempting to mmap a purgeable buffer\n"); 1483 ret = -EINVAL; 1484 goto out; 1485 } 1486 1487 if (!obj->base.map_list.map) { 1488 ret = i915_gem_create_mmap_offset(obj); 1489 if (ret) 1490 goto out; 1491 } 1492 1493 *offset = (u64)obj->base.map_list.hash.key << PAGE_SHIFT; 1494 1495 out: 1496 drm_gem_object_unreference(&obj->base); 1497 unlock: 1498 mutex_unlock(&dev->struct_mutex); 1499 return ret; 1500 } 1501 1502 /** 1503 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 1504 * @dev: DRM device 1505 * @data: GTT mapping ioctl data 1506 * @file: GEM object info 1507 * 1508 * Simply returns the fake offset to userspace so it can mmap it. 1509 * The mmap call will end up in drm_gem_mmap(), which will set things 1510 * up so we can get faults in the handler above. 1511 * 1512 * The fault handler will take care of binding the object into the GTT 1513 * (since it may have been evicted to make room for something), allocating 1514 * a fence register, and mapping the appropriate aperture address into 1515 * userspace. 1516 */ 1517 int 1518 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 1519 struct drm_file *file) 1520 { 1521 struct drm_i915_gem_mmap_gtt *args = data; 1522 1523 if (!(dev->driver->driver_features & DRIVER_GEM)) 1524 return -ENODEV; 1525 1526 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 1527 } 1528 1529 1530 static int 1531 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj, 1532 gfp_t gfpmask) 1533 { 1534 int page_count, i; 1535 struct address_space *mapping; 1536 struct inode *inode; 1537 struct page *page; 1538 1539 /* Get the list of pages out of our struct file. They'll be pinned 1540 * at this point until we release them. 1541 */ 1542 page_count = obj->base.size / PAGE_SIZE; 1543 BUG_ON(obj->pages != NULL); 1544 obj->pages = drm_malloc_ab(page_count, sizeof(struct page *)); 1545 if (obj->pages == NULL) 1546 return -ENOMEM; 1547 1548 inode = obj->base.filp->f_path.dentry->d_inode; 1549 mapping = inode->i_mapping; 1550 gfpmask |= mapping_gfp_mask(mapping); 1551 1552 for (i = 0; i < page_count; i++) { 1553 page = shmem_read_mapping_page_gfp(mapping, i, gfpmask); 1554 if (IS_ERR(page)) 1555 goto err_pages; 1556 1557 obj->pages[i] = page; 1558 } 1559 1560 if (obj->tiling_mode != I915_TILING_NONE) 1561 i915_gem_object_do_bit_17_swizzle(obj); 1562 1563 return 0; 1564 1565 err_pages: 1566 while (i--) 1567 page_cache_release(obj->pages[i]); 1568 1569 drm_free_large(obj->pages); 1570 obj->pages = NULL; 1571 return PTR_ERR(page); 1572 } 1573 1574 static void 1575 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 1576 { 1577 int page_count = obj->base.size / PAGE_SIZE; 1578 int i; 1579 1580 BUG_ON(obj->madv == __I915_MADV_PURGED); 1581 1582 if (obj->tiling_mode != I915_TILING_NONE) 1583 i915_gem_object_save_bit_17_swizzle(obj); 1584 1585 if (obj->madv == I915_MADV_DONTNEED) 1586 obj->dirty = 0; 1587 1588 for (i = 0; i < page_count; i++) { 1589 if (obj->dirty) 1590 set_page_dirty(obj->pages[i]); 1591 1592 if (obj->madv == I915_MADV_WILLNEED) 1593 mark_page_accessed(obj->pages[i]); 1594 1595 page_cache_release(obj->pages[i]); 1596 } 1597 obj->dirty = 0; 1598 1599 drm_free_large(obj->pages); 1600 obj->pages = NULL; 1601 } 1602 1603 void 1604 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, 1605 struct intel_ring_buffer *ring, 1606 u32 seqno) 1607 { 1608 struct drm_device *dev = obj->base.dev; 1609 struct drm_i915_private *dev_priv = dev->dev_private; 1610 1611 BUG_ON(ring == NULL); 1612 obj->ring = ring; 1613 1614 /* Add a reference if we're newly entering the active list. */ 1615 if (!obj->active) { 1616 drm_gem_object_reference(&obj->base); 1617 obj->active = 1; 1618 } 1619 1620 /* Move from whatever list we were on to the tail of execution. */ 1621 list_move_tail(&obj->mm_list, &dev_priv->mm.active_list); 1622 list_move_tail(&obj->ring_list, &ring->active_list); 1623 1624 obj->last_rendering_seqno = seqno; 1625 if (obj->fenced_gpu_access) { 1626 struct drm_i915_fence_reg *reg; 1627 1628 BUG_ON(obj->fence_reg == I915_FENCE_REG_NONE); 1629 1630 obj->last_fenced_seqno = seqno; 1631 obj->last_fenced_ring = ring; 1632 1633 reg = &dev_priv->fence_regs[obj->fence_reg]; 1634 list_move_tail(®->lru_list, &dev_priv->mm.fence_list); 1635 } 1636 } 1637 1638 static void 1639 i915_gem_object_move_off_active(struct drm_i915_gem_object *obj) 1640 { 1641 list_del_init(&obj->ring_list); 1642 obj->last_rendering_seqno = 0; 1643 } 1644 1645 static void 1646 i915_gem_object_move_to_flushing(struct drm_i915_gem_object *obj) 1647 { 1648 struct drm_device *dev = obj->base.dev; 1649 drm_i915_private_t *dev_priv = dev->dev_private; 1650 1651 BUG_ON(!obj->active); 1652 list_move_tail(&obj->mm_list, &dev_priv->mm.flushing_list); 1653 1654 i915_gem_object_move_off_active(obj); 1655 } 1656 1657 static void 1658 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) 1659 { 1660 struct drm_device *dev = obj->base.dev; 1661 struct drm_i915_private *dev_priv = dev->dev_private; 1662 1663 if (obj->pin_count != 0) 1664 list_move_tail(&obj->mm_list, &dev_priv->mm.pinned_list); 1665 else 1666 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 1667 1668 BUG_ON(!list_empty(&obj->gpu_write_list)); 1669 BUG_ON(!obj->active); 1670 obj->ring = NULL; 1671 1672 i915_gem_object_move_off_active(obj); 1673 obj->fenced_gpu_access = false; 1674 1675 obj->active = 0; 1676 obj->pending_gpu_write = false; 1677 drm_gem_object_unreference(&obj->base); 1678 1679 WARN_ON(i915_verify_lists(dev)); 1680 } 1681 1682 /* Immediately discard the backing storage */ 1683 static void 1684 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 1685 { 1686 struct inode *inode; 1687 1688 /* Our goal here is to return as much of the memory as 1689 * is possible back to the system as we are called from OOM. 1690 * To do this we must instruct the shmfs to drop all of its 1691 * backing pages, *now*. 1692 */ 1693 inode = obj->base.filp->f_path.dentry->d_inode; 1694 shmem_truncate_range(inode, 0, (loff_t)-1); 1695 1696 obj->madv = __I915_MADV_PURGED; 1697 } 1698 1699 static inline int 1700 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj) 1701 { 1702 return obj->madv == I915_MADV_DONTNEED; 1703 } 1704 1705 static void 1706 i915_gem_process_flushing_list(struct intel_ring_buffer *ring, 1707 uint32_t flush_domains) 1708 { 1709 struct drm_i915_gem_object *obj, *next; 1710 1711 list_for_each_entry_safe(obj, next, 1712 &ring->gpu_write_list, 1713 gpu_write_list) { 1714 if (obj->base.write_domain & flush_domains) { 1715 uint32_t old_write_domain = obj->base.write_domain; 1716 1717 obj->base.write_domain = 0; 1718 list_del_init(&obj->gpu_write_list); 1719 i915_gem_object_move_to_active(obj, ring, 1720 i915_gem_next_request_seqno(ring)); 1721 1722 trace_i915_gem_object_change_domain(obj, 1723 obj->base.read_domains, 1724 old_write_domain); 1725 } 1726 } 1727 } 1728 1729 int 1730 i915_add_request(struct intel_ring_buffer *ring, 1731 struct drm_file *file, 1732 struct drm_i915_gem_request *request) 1733 { 1734 drm_i915_private_t *dev_priv = ring->dev->dev_private; 1735 uint32_t seqno; 1736 int was_empty; 1737 int ret; 1738 1739 BUG_ON(request == NULL); 1740 1741 ret = ring->add_request(ring, &seqno); 1742 if (ret) 1743 return ret; 1744 1745 trace_i915_gem_request_add(ring, seqno); 1746 1747 request->seqno = seqno; 1748 request->ring = ring; 1749 request->emitted_jiffies = jiffies; 1750 was_empty = list_empty(&ring->request_list); 1751 list_add_tail(&request->list, &ring->request_list); 1752 1753 if (file) { 1754 struct drm_i915_file_private *file_priv = file->driver_priv; 1755 1756 spin_lock(&file_priv->mm.lock); 1757 request->file_priv = file_priv; 1758 list_add_tail(&request->client_list, 1759 &file_priv->mm.request_list); 1760 spin_unlock(&file_priv->mm.lock); 1761 } 1762 1763 ring->outstanding_lazy_request = false; 1764 1765 if (!dev_priv->mm.suspended) { 1766 if (i915_enable_hangcheck) { 1767 mod_timer(&dev_priv->hangcheck_timer, 1768 jiffies + 1769 msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD)); 1770 } 1771 if (was_empty) 1772 queue_delayed_work(dev_priv->wq, 1773 &dev_priv->mm.retire_work, HZ); 1774 } 1775 return 0; 1776 } 1777 1778 static inline void 1779 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) 1780 { 1781 struct drm_i915_file_private *file_priv = request->file_priv; 1782 1783 if (!file_priv) 1784 return; 1785 1786 spin_lock(&file_priv->mm.lock); 1787 if (request->file_priv) { 1788 list_del(&request->client_list); 1789 request->file_priv = NULL; 1790 } 1791 spin_unlock(&file_priv->mm.lock); 1792 } 1793 1794 static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv, 1795 struct intel_ring_buffer *ring) 1796 { 1797 while (!list_empty(&ring->request_list)) { 1798 struct drm_i915_gem_request *request; 1799 1800 request = list_first_entry(&ring->request_list, 1801 struct drm_i915_gem_request, 1802 list); 1803 1804 list_del(&request->list); 1805 i915_gem_request_remove_from_client(request); 1806 kfree(request); 1807 } 1808 1809 while (!list_empty(&ring->active_list)) { 1810 struct drm_i915_gem_object *obj; 1811 1812 obj = list_first_entry(&ring->active_list, 1813 struct drm_i915_gem_object, 1814 ring_list); 1815 1816 obj->base.write_domain = 0; 1817 list_del_init(&obj->gpu_write_list); 1818 i915_gem_object_move_to_inactive(obj); 1819 } 1820 } 1821 1822 static void i915_gem_reset_fences(struct drm_device *dev) 1823 { 1824 struct drm_i915_private *dev_priv = dev->dev_private; 1825 int i; 1826 1827 for (i = 0; i < 16; i++) { 1828 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 1829 struct drm_i915_gem_object *obj = reg->obj; 1830 1831 if (!obj) 1832 continue; 1833 1834 if (obj->tiling_mode) 1835 i915_gem_release_mmap(obj); 1836 1837 reg->obj->fence_reg = I915_FENCE_REG_NONE; 1838 reg->obj->fenced_gpu_access = false; 1839 reg->obj->last_fenced_seqno = 0; 1840 reg->obj->last_fenced_ring = NULL; 1841 i915_gem_clear_fence_reg(dev, reg); 1842 } 1843 } 1844 1845 void i915_gem_reset(struct drm_device *dev) 1846 { 1847 struct drm_i915_private *dev_priv = dev->dev_private; 1848 struct drm_i915_gem_object *obj; 1849 int i; 1850 1851 for (i = 0; i < I915_NUM_RINGS; i++) 1852 i915_gem_reset_ring_lists(dev_priv, &dev_priv->ring[i]); 1853 1854 /* Remove anything from the flushing lists. The GPU cache is likely 1855 * to be lost on reset along with the data, so simply move the 1856 * lost bo to the inactive list. 1857 */ 1858 while (!list_empty(&dev_priv->mm.flushing_list)) { 1859 obj= list_first_entry(&dev_priv->mm.flushing_list, 1860 struct drm_i915_gem_object, 1861 mm_list); 1862 1863 obj->base.write_domain = 0; 1864 list_del_init(&obj->gpu_write_list); 1865 i915_gem_object_move_to_inactive(obj); 1866 } 1867 1868 /* Move everything out of the GPU domains to ensure we do any 1869 * necessary invalidation upon reuse. 1870 */ 1871 list_for_each_entry(obj, 1872 &dev_priv->mm.inactive_list, 1873 mm_list) 1874 { 1875 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; 1876 } 1877 1878 /* The fence registers are invalidated so clear them out */ 1879 i915_gem_reset_fences(dev); 1880 } 1881 1882 /** 1883 * This function clears the request list as sequence numbers are passed. 1884 */ 1885 static void 1886 i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) 1887 { 1888 uint32_t seqno; 1889 int i; 1890 1891 if (list_empty(&ring->request_list)) 1892 return; 1893 1894 WARN_ON(i915_verify_lists(ring->dev)); 1895 1896 seqno = ring->get_seqno(ring); 1897 1898 for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++) 1899 if (seqno >= ring->sync_seqno[i]) 1900 ring->sync_seqno[i] = 0; 1901 1902 while (!list_empty(&ring->request_list)) { 1903 struct drm_i915_gem_request *request; 1904 1905 request = list_first_entry(&ring->request_list, 1906 struct drm_i915_gem_request, 1907 list); 1908 1909 if (!i915_seqno_passed(seqno, request->seqno)) 1910 break; 1911 1912 trace_i915_gem_request_retire(ring, request->seqno); 1913 1914 list_del(&request->list); 1915 i915_gem_request_remove_from_client(request); 1916 kfree(request); 1917 } 1918 1919 /* Move any buffers on the active list that are no longer referenced 1920 * by the ringbuffer to the flushing/inactive lists as appropriate. 1921 */ 1922 while (!list_empty(&ring->active_list)) { 1923 struct drm_i915_gem_object *obj; 1924 1925 obj= list_first_entry(&ring->active_list, 1926 struct drm_i915_gem_object, 1927 ring_list); 1928 1929 if (!i915_seqno_passed(seqno, obj->last_rendering_seqno)) 1930 break; 1931 1932 if (obj->base.write_domain != 0) 1933 i915_gem_object_move_to_flushing(obj); 1934 else 1935 i915_gem_object_move_to_inactive(obj); 1936 } 1937 1938 if (unlikely(ring->trace_irq_seqno && 1939 i915_seqno_passed(seqno, ring->trace_irq_seqno))) { 1940 ring->irq_put(ring); 1941 ring->trace_irq_seqno = 0; 1942 } 1943 1944 WARN_ON(i915_verify_lists(ring->dev)); 1945 } 1946 1947 void 1948 i915_gem_retire_requests(struct drm_device *dev) 1949 { 1950 drm_i915_private_t *dev_priv = dev->dev_private; 1951 int i; 1952 1953 if (!list_empty(&dev_priv->mm.deferred_free_list)) { 1954 struct drm_i915_gem_object *obj, *next; 1955 1956 /* We must be careful that during unbind() we do not 1957 * accidentally infinitely recurse into retire requests. 1958 * Currently: 1959 * retire -> free -> unbind -> wait -> retire_ring 1960 */ 1961 list_for_each_entry_safe(obj, next, 1962 &dev_priv->mm.deferred_free_list, 1963 mm_list) 1964 i915_gem_free_object_tail(obj); 1965 } 1966 1967 for (i = 0; i < I915_NUM_RINGS; i++) 1968 i915_gem_retire_requests_ring(&dev_priv->ring[i]); 1969 } 1970 1971 static void 1972 i915_gem_retire_work_handler(struct work_struct *work) 1973 { 1974 drm_i915_private_t *dev_priv; 1975 struct drm_device *dev; 1976 bool idle; 1977 int i; 1978 1979 dev_priv = container_of(work, drm_i915_private_t, 1980 mm.retire_work.work); 1981 dev = dev_priv->dev; 1982 1983 /* Come back later if the device is busy... */ 1984 if (!mutex_trylock(&dev->struct_mutex)) { 1985 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ); 1986 return; 1987 } 1988 1989 i915_gem_retire_requests(dev); 1990 1991 /* Send a periodic flush down the ring so we don't hold onto GEM 1992 * objects indefinitely. 1993 */ 1994 idle = true; 1995 for (i = 0; i < I915_NUM_RINGS; i++) { 1996 struct intel_ring_buffer *ring = &dev_priv->ring[i]; 1997 1998 if (!list_empty(&ring->gpu_write_list)) { 1999 struct drm_i915_gem_request *request; 2000 int ret; 2001 2002 ret = i915_gem_flush_ring(ring, 2003 0, I915_GEM_GPU_DOMAINS); 2004 request = kzalloc(sizeof(*request), GFP_KERNEL); 2005 if (ret || request == NULL || 2006 i915_add_request(ring, NULL, request)) 2007 kfree(request); 2008 } 2009 2010 idle &= list_empty(&ring->request_list); 2011 } 2012 2013 if (!dev_priv->mm.suspended && !idle) 2014 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ); 2015 2016 mutex_unlock(&dev->struct_mutex); 2017 } 2018 2019 /** 2020 * Waits for a sequence number to be signaled, and cleans up the 2021 * request and object lists appropriately for that event. 2022 */ 2023 int 2024 i915_wait_request(struct intel_ring_buffer *ring, 2025 uint32_t seqno) 2026 { 2027 drm_i915_private_t *dev_priv = ring->dev->dev_private; 2028 u32 ier; 2029 int ret = 0; 2030 2031 BUG_ON(seqno == 0); 2032 2033 if (atomic_read(&dev_priv->mm.wedged)) { 2034 struct completion *x = &dev_priv->error_completion; 2035 bool recovery_complete; 2036 unsigned long flags; 2037 2038 /* Give the error handler a chance to run. */ 2039 spin_lock_irqsave(&x->wait.lock, flags); 2040 recovery_complete = x->done > 0; 2041 spin_unlock_irqrestore(&x->wait.lock, flags); 2042 2043 return recovery_complete ? -EIO : -EAGAIN; 2044 } 2045 2046 if (seqno == ring->outstanding_lazy_request) { 2047 struct drm_i915_gem_request *request; 2048 2049 request = kzalloc(sizeof(*request), GFP_KERNEL); 2050 if (request == NULL) 2051 return -ENOMEM; 2052 2053 ret = i915_add_request(ring, NULL, request); 2054 if (ret) { 2055 kfree(request); 2056 return ret; 2057 } 2058 2059 seqno = request->seqno; 2060 } 2061 2062 if (!i915_seqno_passed(ring->get_seqno(ring), seqno)) { 2063 if (HAS_PCH_SPLIT(ring->dev)) 2064 ier = I915_READ(DEIER) | I915_READ(GTIER); 2065 else 2066 ier = I915_READ(IER); 2067 if (!ier) { 2068 DRM_ERROR("something (likely vbetool) disabled " 2069 "interrupts, re-enabling\n"); 2070 ring->dev->driver->irq_preinstall(ring->dev); 2071 ring->dev->driver->irq_postinstall(ring->dev); 2072 } 2073 2074 trace_i915_gem_request_wait_begin(ring, seqno); 2075 2076 ring->waiting_seqno = seqno; 2077 if (ring->irq_get(ring)) { 2078 if (dev_priv->mm.interruptible) 2079 ret = wait_event_interruptible(ring->irq_queue, 2080 i915_seqno_passed(ring->get_seqno(ring), seqno) 2081 || atomic_read(&dev_priv->mm.wedged)); 2082 else 2083 wait_event(ring->irq_queue, 2084 i915_seqno_passed(ring->get_seqno(ring), seqno) 2085 || atomic_read(&dev_priv->mm.wedged)); 2086 2087 ring->irq_put(ring); 2088 } else if (wait_for(i915_seqno_passed(ring->get_seqno(ring), 2089 seqno) || 2090 atomic_read(&dev_priv->mm.wedged), 3000)) 2091 ret = -EBUSY; 2092 ring->waiting_seqno = 0; 2093 2094 trace_i915_gem_request_wait_end(ring, seqno); 2095 } 2096 if (atomic_read(&dev_priv->mm.wedged)) 2097 ret = -EAGAIN; 2098 2099 if (ret && ret != -ERESTARTSYS) 2100 DRM_ERROR("%s returns %d (awaiting %d at %d, next %d)\n", 2101 __func__, ret, seqno, ring->get_seqno(ring), 2102 dev_priv->next_seqno); 2103 2104 /* Directly dispatch request retiring. While we have the work queue 2105 * to handle this, the waiter on a request often wants an associated 2106 * buffer to have made it to the inactive list, and we would need 2107 * a separate wait queue to handle that. 2108 */ 2109 if (ret == 0) 2110 i915_gem_retire_requests_ring(ring); 2111 2112 return ret; 2113 } 2114 2115 /** 2116 * Ensures that all rendering to the object has completed and the object is 2117 * safe to unbind from the GTT or access from the CPU. 2118 */ 2119 int 2120 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj) 2121 { 2122 int ret; 2123 2124 /* This function only exists to support waiting for existing rendering, 2125 * not for emitting required flushes. 2126 */ 2127 BUG_ON((obj->base.write_domain & I915_GEM_GPU_DOMAINS) != 0); 2128 2129 /* If there is rendering queued on the buffer being evicted, wait for 2130 * it. 2131 */ 2132 if (obj->active) { 2133 ret = i915_wait_request(obj->ring, obj->last_rendering_seqno); 2134 if (ret) 2135 return ret; 2136 } 2137 2138 return 0; 2139 } 2140 2141 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) 2142 { 2143 u32 old_write_domain, old_read_domains; 2144 2145 /* Act a barrier for all accesses through the GTT */ 2146 mb(); 2147 2148 /* Force a pagefault for domain tracking on next user access */ 2149 i915_gem_release_mmap(obj); 2150 2151 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 2152 return; 2153 2154 old_read_domains = obj->base.read_domains; 2155 old_write_domain = obj->base.write_domain; 2156 2157 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; 2158 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; 2159 2160 trace_i915_gem_object_change_domain(obj, 2161 old_read_domains, 2162 old_write_domain); 2163 } 2164 2165 /** 2166 * Unbinds an object from the GTT aperture. 2167 */ 2168 int 2169 i915_gem_object_unbind(struct drm_i915_gem_object *obj) 2170 { 2171 int ret = 0; 2172 2173 if (obj->gtt_space == NULL) 2174 return 0; 2175 2176 if (obj->pin_count != 0) { 2177 DRM_ERROR("Attempting to unbind pinned buffer\n"); 2178 return -EINVAL; 2179 } 2180 2181 ret = i915_gem_object_finish_gpu(obj); 2182 if (ret == -ERESTARTSYS) 2183 return ret; 2184 /* Continue on if we fail due to EIO, the GPU is hung so we 2185 * should be safe and we need to cleanup or else we might 2186 * cause memory corruption through use-after-free. 2187 */ 2188 2189 i915_gem_object_finish_gtt(obj); 2190 2191 /* Move the object to the CPU domain to ensure that 2192 * any possible CPU writes while it's not in the GTT 2193 * are flushed when we go to remap it. 2194 */ 2195 if (ret == 0) 2196 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 2197 if (ret == -ERESTARTSYS) 2198 return ret; 2199 if (ret) { 2200 /* In the event of a disaster, abandon all caches and 2201 * hope for the best. 2202 */ 2203 i915_gem_clflush_object(obj); 2204 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2205 } 2206 2207 /* release the fence reg _after_ flushing */ 2208 ret = i915_gem_object_put_fence(obj); 2209 if (ret == -ERESTARTSYS) 2210 return ret; 2211 2212 trace_i915_gem_object_unbind(obj); 2213 2214 i915_gem_gtt_unbind_object(obj); 2215 i915_gem_object_put_pages_gtt(obj); 2216 2217 list_del_init(&obj->gtt_list); 2218 list_del_init(&obj->mm_list); 2219 /* Avoid an unnecessary call to unbind on rebind. */ 2220 obj->map_and_fenceable = true; 2221 2222 drm_mm_put_block(obj->gtt_space); 2223 obj->gtt_space = NULL; 2224 obj->gtt_offset = 0; 2225 2226 if (i915_gem_object_is_purgeable(obj)) 2227 i915_gem_object_truncate(obj); 2228 2229 return ret; 2230 } 2231 2232 int 2233 i915_gem_flush_ring(struct intel_ring_buffer *ring, 2234 uint32_t invalidate_domains, 2235 uint32_t flush_domains) 2236 { 2237 int ret; 2238 2239 if (((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) == 0) 2240 return 0; 2241 2242 trace_i915_gem_ring_flush(ring, invalidate_domains, flush_domains); 2243 2244 ret = ring->flush(ring, invalidate_domains, flush_domains); 2245 if (ret) 2246 return ret; 2247 2248 if (flush_domains & I915_GEM_GPU_DOMAINS) 2249 i915_gem_process_flushing_list(ring, flush_domains); 2250 2251 return 0; 2252 } 2253 2254 static int i915_ring_idle(struct intel_ring_buffer *ring) 2255 { 2256 int ret; 2257 2258 if (list_empty(&ring->gpu_write_list) && list_empty(&ring->active_list)) 2259 return 0; 2260 2261 if (!list_empty(&ring->gpu_write_list)) { 2262 ret = i915_gem_flush_ring(ring, 2263 I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 2264 if (ret) 2265 return ret; 2266 } 2267 2268 return i915_wait_request(ring, i915_gem_next_request_seqno(ring)); 2269 } 2270 2271 int 2272 i915_gpu_idle(struct drm_device *dev) 2273 { 2274 drm_i915_private_t *dev_priv = dev->dev_private; 2275 bool lists_empty; 2276 int ret, i; 2277 2278 lists_empty = (list_empty(&dev_priv->mm.flushing_list) && 2279 list_empty(&dev_priv->mm.active_list)); 2280 if (lists_empty) 2281 return 0; 2282 2283 /* Flush everything onto the inactive list. */ 2284 for (i = 0; i < I915_NUM_RINGS; i++) { 2285 ret = i915_ring_idle(&dev_priv->ring[i]); 2286 if (ret) 2287 return ret; 2288 } 2289 2290 return 0; 2291 } 2292 2293 static int sandybridge_write_fence_reg(struct drm_i915_gem_object *obj, 2294 struct intel_ring_buffer *pipelined) 2295 { 2296 struct drm_device *dev = obj->base.dev; 2297 drm_i915_private_t *dev_priv = dev->dev_private; 2298 u32 size = obj->gtt_space->size; 2299 int regnum = obj->fence_reg; 2300 uint64_t val; 2301 2302 val = (uint64_t)((obj->gtt_offset + size - 4096) & 2303 0xfffff000) << 32; 2304 val |= obj->gtt_offset & 0xfffff000; 2305 val |= (uint64_t)((obj->stride / 128) - 1) << 2306 SANDYBRIDGE_FENCE_PITCH_SHIFT; 2307 2308 if (obj->tiling_mode == I915_TILING_Y) 2309 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 2310 val |= I965_FENCE_REG_VALID; 2311 2312 if (pipelined) { 2313 int ret = intel_ring_begin(pipelined, 6); 2314 if (ret) 2315 return ret; 2316 2317 intel_ring_emit(pipelined, MI_NOOP); 2318 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(2)); 2319 intel_ring_emit(pipelined, FENCE_REG_SANDYBRIDGE_0 + regnum*8); 2320 intel_ring_emit(pipelined, (u32)val); 2321 intel_ring_emit(pipelined, FENCE_REG_SANDYBRIDGE_0 + regnum*8 + 4); 2322 intel_ring_emit(pipelined, (u32)(val >> 32)); 2323 intel_ring_advance(pipelined); 2324 } else 2325 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + regnum * 8, val); 2326 2327 return 0; 2328 } 2329 2330 static int i965_write_fence_reg(struct drm_i915_gem_object *obj, 2331 struct intel_ring_buffer *pipelined) 2332 { 2333 struct drm_device *dev = obj->base.dev; 2334 drm_i915_private_t *dev_priv = dev->dev_private; 2335 u32 size = obj->gtt_space->size; 2336 int regnum = obj->fence_reg; 2337 uint64_t val; 2338 2339 val = (uint64_t)((obj->gtt_offset + size - 4096) & 2340 0xfffff000) << 32; 2341 val |= obj->gtt_offset & 0xfffff000; 2342 val |= ((obj->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT; 2343 if (obj->tiling_mode == I915_TILING_Y) 2344 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 2345 val |= I965_FENCE_REG_VALID; 2346 2347 if (pipelined) { 2348 int ret = intel_ring_begin(pipelined, 6); 2349 if (ret) 2350 return ret; 2351 2352 intel_ring_emit(pipelined, MI_NOOP); 2353 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(2)); 2354 intel_ring_emit(pipelined, FENCE_REG_965_0 + regnum*8); 2355 intel_ring_emit(pipelined, (u32)val); 2356 intel_ring_emit(pipelined, FENCE_REG_965_0 + regnum*8 + 4); 2357 intel_ring_emit(pipelined, (u32)(val >> 32)); 2358 intel_ring_advance(pipelined); 2359 } else 2360 I915_WRITE64(FENCE_REG_965_0 + regnum * 8, val); 2361 2362 return 0; 2363 } 2364 2365 static int i915_write_fence_reg(struct drm_i915_gem_object *obj, 2366 struct intel_ring_buffer *pipelined) 2367 { 2368 struct drm_device *dev = obj->base.dev; 2369 drm_i915_private_t *dev_priv = dev->dev_private; 2370 u32 size = obj->gtt_space->size; 2371 u32 fence_reg, val, pitch_val; 2372 int tile_width; 2373 2374 if (WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) || 2375 (size & -size) != size || 2376 (obj->gtt_offset & (size - 1)), 2377 "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n", 2378 obj->gtt_offset, obj->map_and_fenceable, size)) 2379 return -EINVAL; 2380 2381 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) 2382 tile_width = 128; 2383 else 2384 tile_width = 512; 2385 2386 /* Note: pitch better be a power of two tile widths */ 2387 pitch_val = obj->stride / tile_width; 2388 pitch_val = ffs(pitch_val) - 1; 2389 2390 val = obj->gtt_offset; 2391 if (obj->tiling_mode == I915_TILING_Y) 2392 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 2393 val |= I915_FENCE_SIZE_BITS(size); 2394 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 2395 val |= I830_FENCE_REG_VALID; 2396 2397 fence_reg = obj->fence_reg; 2398 if (fence_reg < 8) 2399 fence_reg = FENCE_REG_830_0 + fence_reg * 4; 2400 else 2401 fence_reg = FENCE_REG_945_8 + (fence_reg - 8) * 4; 2402 2403 if (pipelined) { 2404 int ret = intel_ring_begin(pipelined, 4); 2405 if (ret) 2406 return ret; 2407 2408 intel_ring_emit(pipelined, MI_NOOP); 2409 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(1)); 2410 intel_ring_emit(pipelined, fence_reg); 2411 intel_ring_emit(pipelined, val); 2412 intel_ring_advance(pipelined); 2413 } else 2414 I915_WRITE(fence_reg, val); 2415 2416 return 0; 2417 } 2418 2419 static int i830_write_fence_reg(struct drm_i915_gem_object *obj, 2420 struct intel_ring_buffer *pipelined) 2421 { 2422 struct drm_device *dev = obj->base.dev; 2423 drm_i915_private_t *dev_priv = dev->dev_private; 2424 u32 size = obj->gtt_space->size; 2425 int regnum = obj->fence_reg; 2426 uint32_t val; 2427 uint32_t pitch_val; 2428 2429 if (WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) || 2430 (size & -size) != size || 2431 (obj->gtt_offset & (size - 1)), 2432 "object 0x%08x not 512K or pot-size 0x%08x aligned\n", 2433 obj->gtt_offset, size)) 2434 return -EINVAL; 2435 2436 pitch_val = obj->stride / 128; 2437 pitch_val = ffs(pitch_val) - 1; 2438 2439 val = obj->gtt_offset; 2440 if (obj->tiling_mode == I915_TILING_Y) 2441 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 2442 val |= I830_FENCE_SIZE_BITS(size); 2443 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 2444 val |= I830_FENCE_REG_VALID; 2445 2446 if (pipelined) { 2447 int ret = intel_ring_begin(pipelined, 4); 2448 if (ret) 2449 return ret; 2450 2451 intel_ring_emit(pipelined, MI_NOOP); 2452 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(1)); 2453 intel_ring_emit(pipelined, FENCE_REG_830_0 + regnum*4); 2454 intel_ring_emit(pipelined, val); 2455 intel_ring_advance(pipelined); 2456 } else 2457 I915_WRITE(FENCE_REG_830_0 + regnum * 4, val); 2458 2459 return 0; 2460 } 2461 2462 static bool ring_passed_seqno(struct intel_ring_buffer *ring, u32 seqno) 2463 { 2464 return i915_seqno_passed(ring->get_seqno(ring), seqno); 2465 } 2466 2467 static int 2468 i915_gem_object_flush_fence(struct drm_i915_gem_object *obj, 2469 struct intel_ring_buffer *pipelined) 2470 { 2471 int ret; 2472 2473 if (obj->fenced_gpu_access) { 2474 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { 2475 ret = i915_gem_flush_ring(obj->last_fenced_ring, 2476 0, obj->base.write_domain); 2477 if (ret) 2478 return ret; 2479 } 2480 2481 obj->fenced_gpu_access = false; 2482 } 2483 2484 if (obj->last_fenced_seqno && pipelined != obj->last_fenced_ring) { 2485 if (!ring_passed_seqno(obj->last_fenced_ring, 2486 obj->last_fenced_seqno)) { 2487 ret = i915_wait_request(obj->last_fenced_ring, 2488 obj->last_fenced_seqno); 2489 if (ret) 2490 return ret; 2491 } 2492 2493 obj->last_fenced_seqno = 0; 2494 obj->last_fenced_ring = NULL; 2495 } 2496 2497 /* Ensure that all CPU reads are completed before installing a fence 2498 * and all writes before removing the fence. 2499 */ 2500 if (obj->base.read_domains & I915_GEM_DOMAIN_GTT) 2501 mb(); 2502 2503 return 0; 2504 } 2505 2506 int 2507 i915_gem_object_put_fence(struct drm_i915_gem_object *obj) 2508 { 2509 int ret; 2510 2511 if (obj->tiling_mode) 2512 i915_gem_release_mmap(obj); 2513 2514 ret = i915_gem_object_flush_fence(obj, NULL); 2515 if (ret) 2516 return ret; 2517 2518 if (obj->fence_reg != I915_FENCE_REG_NONE) { 2519 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2520 i915_gem_clear_fence_reg(obj->base.dev, 2521 &dev_priv->fence_regs[obj->fence_reg]); 2522 2523 obj->fence_reg = I915_FENCE_REG_NONE; 2524 } 2525 2526 return 0; 2527 } 2528 2529 static struct drm_i915_fence_reg * 2530 i915_find_fence_reg(struct drm_device *dev, 2531 struct intel_ring_buffer *pipelined) 2532 { 2533 struct drm_i915_private *dev_priv = dev->dev_private; 2534 struct drm_i915_fence_reg *reg, *first, *avail; 2535 int i; 2536 2537 /* First try to find a free reg */ 2538 avail = NULL; 2539 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { 2540 reg = &dev_priv->fence_regs[i]; 2541 if (!reg->obj) 2542 return reg; 2543 2544 if (!reg->obj->pin_count) 2545 avail = reg; 2546 } 2547 2548 if (avail == NULL) 2549 return NULL; 2550 2551 /* None available, try to steal one or wait for a user to finish */ 2552 avail = first = NULL; 2553 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) { 2554 if (reg->obj->pin_count) 2555 continue; 2556 2557 if (first == NULL) 2558 first = reg; 2559 2560 if (!pipelined || 2561 !reg->obj->last_fenced_ring || 2562 reg->obj->last_fenced_ring == pipelined) { 2563 avail = reg; 2564 break; 2565 } 2566 } 2567 2568 if (avail == NULL) 2569 avail = first; 2570 2571 return avail; 2572 } 2573 2574 /** 2575 * i915_gem_object_get_fence - set up a fence reg for an object 2576 * @obj: object to map through a fence reg 2577 * @pipelined: ring on which to queue the change, or NULL for CPU access 2578 * @interruptible: must we wait uninterruptibly for the register to retire? 2579 * 2580 * When mapping objects through the GTT, userspace wants to be able to write 2581 * to them without having to worry about swizzling if the object is tiled. 2582 * 2583 * This function walks the fence regs looking for a free one for @obj, 2584 * stealing one if it can't find any. 2585 * 2586 * It then sets up the reg based on the object's properties: address, pitch 2587 * and tiling format. 2588 */ 2589 int 2590 i915_gem_object_get_fence(struct drm_i915_gem_object *obj, 2591 struct intel_ring_buffer *pipelined) 2592 { 2593 struct drm_device *dev = obj->base.dev; 2594 struct drm_i915_private *dev_priv = dev->dev_private; 2595 struct drm_i915_fence_reg *reg; 2596 int ret; 2597 2598 /* XXX disable pipelining. There are bugs. Shocking. */ 2599 pipelined = NULL; 2600 2601 /* Just update our place in the LRU if our fence is getting reused. */ 2602 if (obj->fence_reg != I915_FENCE_REG_NONE) { 2603 reg = &dev_priv->fence_regs[obj->fence_reg]; 2604 list_move_tail(®->lru_list, &dev_priv->mm.fence_list); 2605 2606 if (obj->tiling_changed) { 2607 ret = i915_gem_object_flush_fence(obj, pipelined); 2608 if (ret) 2609 return ret; 2610 2611 if (!obj->fenced_gpu_access && !obj->last_fenced_seqno) 2612 pipelined = NULL; 2613 2614 if (pipelined) { 2615 reg->setup_seqno = 2616 i915_gem_next_request_seqno(pipelined); 2617 obj->last_fenced_seqno = reg->setup_seqno; 2618 obj->last_fenced_ring = pipelined; 2619 } 2620 2621 goto update; 2622 } 2623 2624 if (!pipelined) { 2625 if (reg->setup_seqno) { 2626 if (!ring_passed_seqno(obj->last_fenced_ring, 2627 reg->setup_seqno)) { 2628 ret = i915_wait_request(obj->last_fenced_ring, 2629 reg->setup_seqno); 2630 if (ret) 2631 return ret; 2632 } 2633 2634 reg->setup_seqno = 0; 2635 } 2636 } else if (obj->last_fenced_ring && 2637 obj->last_fenced_ring != pipelined) { 2638 ret = i915_gem_object_flush_fence(obj, pipelined); 2639 if (ret) 2640 return ret; 2641 } 2642 2643 return 0; 2644 } 2645 2646 reg = i915_find_fence_reg(dev, pipelined); 2647 if (reg == NULL) 2648 return -ENOSPC; 2649 2650 ret = i915_gem_object_flush_fence(obj, pipelined); 2651 if (ret) 2652 return ret; 2653 2654 if (reg->obj) { 2655 struct drm_i915_gem_object *old = reg->obj; 2656 2657 drm_gem_object_reference(&old->base); 2658 2659 if (old->tiling_mode) 2660 i915_gem_release_mmap(old); 2661 2662 ret = i915_gem_object_flush_fence(old, pipelined); 2663 if (ret) { 2664 drm_gem_object_unreference(&old->base); 2665 return ret; 2666 } 2667 2668 if (old->last_fenced_seqno == 0 && obj->last_fenced_seqno == 0) 2669 pipelined = NULL; 2670 2671 old->fence_reg = I915_FENCE_REG_NONE; 2672 old->last_fenced_ring = pipelined; 2673 old->last_fenced_seqno = 2674 pipelined ? i915_gem_next_request_seqno(pipelined) : 0; 2675 2676 drm_gem_object_unreference(&old->base); 2677 } else if (obj->last_fenced_seqno == 0) 2678 pipelined = NULL; 2679 2680 reg->obj = obj; 2681 list_move_tail(®->lru_list, &dev_priv->mm.fence_list); 2682 obj->fence_reg = reg - dev_priv->fence_regs; 2683 obj->last_fenced_ring = pipelined; 2684 2685 reg->setup_seqno = 2686 pipelined ? i915_gem_next_request_seqno(pipelined) : 0; 2687 obj->last_fenced_seqno = reg->setup_seqno; 2688 2689 update: 2690 obj->tiling_changed = false; 2691 switch (INTEL_INFO(dev)->gen) { 2692 case 7: 2693 case 6: 2694 ret = sandybridge_write_fence_reg(obj, pipelined); 2695 break; 2696 case 5: 2697 case 4: 2698 ret = i965_write_fence_reg(obj, pipelined); 2699 break; 2700 case 3: 2701 ret = i915_write_fence_reg(obj, pipelined); 2702 break; 2703 case 2: 2704 ret = i830_write_fence_reg(obj, pipelined); 2705 break; 2706 } 2707 2708 return ret; 2709 } 2710 2711 /** 2712 * i915_gem_clear_fence_reg - clear out fence register info 2713 * @obj: object to clear 2714 * 2715 * Zeroes out the fence register itself and clears out the associated 2716 * data structures in dev_priv and obj. 2717 */ 2718 static void 2719 i915_gem_clear_fence_reg(struct drm_device *dev, 2720 struct drm_i915_fence_reg *reg) 2721 { 2722 drm_i915_private_t *dev_priv = dev->dev_private; 2723 uint32_t fence_reg = reg - dev_priv->fence_regs; 2724 2725 switch (INTEL_INFO(dev)->gen) { 2726 case 7: 2727 case 6: 2728 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + fence_reg*8, 0); 2729 break; 2730 case 5: 2731 case 4: 2732 I915_WRITE64(FENCE_REG_965_0 + fence_reg*8, 0); 2733 break; 2734 case 3: 2735 if (fence_reg >= 8) 2736 fence_reg = FENCE_REG_945_8 + (fence_reg - 8) * 4; 2737 else 2738 case 2: 2739 fence_reg = FENCE_REG_830_0 + fence_reg * 4; 2740 2741 I915_WRITE(fence_reg, 0); 2742 break; 2743 } 2744 2745 list_del_init(®->lru_list); 2746 reg->obj = NULL; 2747 reg->setup_seqno = 0; 2748 } 2749 2750 /** 2751 * Finds free space in the GTT aperture and binds the object there. 2752 */ 2753 static int 2754 i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, 2755 unsigned alignment, 2756 bool map_and_fenceable) 2757 { 2758 struct drm_device *dev = obj->base.dev; 2759 drm_i915_private_t *dev_priv = dev->dev_private; 2760 struct drm_mm_node *free_space; 2761 gfp_t gfpmask = __GFP_NORETRY | __GFP_NOWARN; 2762 u32 size, fence_size, fence_alignment, unfenced_alignment; 2763 bool mappable, fenceable; 2764 int ret; 2765 2766 if (obj->madv != I915_MADV_WILLNEED) { 2767 DRM_ERROR("Attempting to bind a purgeable object\n"); 2768 return -EINVAL; 2769 } 2770 2771 fence_size = i915_gem_get_gtt_size(dev, 2772 obj->base.size, 2773 obj->tiling_mode); 2774 fence_alignment = i915_gem_get_gtt_alignment(dev, 2775 obj->base.size, 2776 obj->tiling_mode); 2777 unfenced_alignment = 2778 i915_gem_get_unfenced_gtt_alignment(dev, 2779 obj->base.size, 2780 obj->tiling_mode); 2781 2782 if (alignment == 0) 2783 alignment = map_and_fenceable ? fence_alignment : 2784 unfenced_alignment; 2785 if (map_and_fenceable && alignment & (fence_alignment - 1)) { 2786 DRM_ERROR("Invalid object alignment requested %u\n", alignment); 2787 return -EINVAL; 2788 } 2789 2790 size = map_and_fenceable ? fence_size : obj->base.size; 2791 2792 /* If the object is bigger than the entire aperture, reject it early 2793 * before evicting everything in a vain attempt to find space. 2794 */ 2795 if (obj->base.size > 2796 (map_and_fenceable ? dev_priv->mm.gtt_mappable_end : dev_priv->mm.gtt_total)) { 2797 DRM_ERROR("Attempting to bind an object larger than the aperture\n"); 2798 return -E2BIG; 2799 } 2800 2801 search_free: 2802 if (map_and_fenceable) 2803 free_space = 2804 drm_mm_search_free_in_range(&dev_priv->mm.gtt_space, 2805 size, alignment, 0, 2806 dev_priv->mm.gtt_mappable_end, 2807 0); 2808 else 2809 free_space = drm_mm_search_free(&dev_priv->mm.gtt_space, 2810 size, alignment, 0); 2811 2812 if (free_space != NULL) { 2813 if (map_and_fenceable) 2814 obj->gtt_space = 2815 drm_mm_get_block_range_generic(free_space, 2816 size, alignment, 0, 2817 dev_priv->mm.gtt_mappable_end, 2818 0); 2819 else 2820 obj->gtt_space = 2821 drm_mm_get_block(free_space, size, alignment); 2822 } 2823 if (obj->gtt_space == NULL) { 2824 /* If the gtt is empty and we're still having trouble 2825 * fitting our object in, we're out of memory. 2826 */ 2827 ret = i915_gem_evict_something(dev, size, alignment, 2828 map_and_fenceable); 2829 if (ret) 2830 return ret; 2831 2832 goto search_free; 2833 } 2834 2835 ret = i915_gem_object_get_pages_gtt(obj, gfpmask); 2836 if (ret) { 2837 drm_mm_put_block(obj->gtt_space); 2838 obj->gtt_space = NULL; 2839 2840 if (ret == -ENOMEM) { 2841 /* first try to reclaim some memory by clearing the GTT */ 2842 ret = i915_gem_evict_everything(dev, false); 2843 if (ret) { 2844 /* now try to shrink everyone else */ 2845 if (gfpmask) { 2846 gfpmask = 0; 2847 goto search_free; 2848 } 2849 2850 return -ENOMEM; 2851 } 2852 2853 goto search_free; 2854 } 2855 2856 return ret; 2857 } 2858 2859 ret = i915_gem_gtt_bind_object(obj); 2860 if (ret) { 2861 i915_gem_object_put_pages_gtt(obj); 2862 drm_mm_put_block(obj->gtt_space); 2863 obj->gtt_space = NULL; 2864 2865 if (i915_gem_evict_everything(dev, false)) 2866 return ret; 2867 2868 goto search_free; 2869 } 2870 2871 list_add_tail(&obj->gtt_list, &dev_priv->mm.gtt_list); 2872 list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 2873 2874 /* Assert that the object is not currently in any GPU domain. As it 2875 * wasn't in the GTT, there shouldn't be any way it could have been in 2876 * a GPU cache 2877 */ 2878 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2879 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2880 2881 obj->gtt_offset = obj->gtt_space->start; 2882 2883 fenceable = 2884 obj->gtt_space->size == fence_size && 2885 (obj->gtt_space->start & (fence_alignment -1)) == 0; 2886 2887 mappable = 2888 obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end; 2889 2890 obj->map_and_fenceable = mappable && fenceable; 2891 2892 trace_i915_gem_object_bind(obj, map_and_fenceable); 2893 return 0; 2894 } 2895 2896 void 2897 i915_gem_clflush_object(struct drm_i915_gem_object *obj) 2898 { 2899 /* If we don't have a page list set up, then we're not pinned 2900 * to GPU, and we can ignore the cache flush because it'll happen 2901 * again at bind time. 2902 */ 2903 if (obj->pages == NULL) 2904 return; 2905 2906 /* If the GPU is snooping the contents of the CPU cache, 2907 * we do not need to manually clear the CPU cache lines. However, 2908 * the caches are only snooped when the render cache is 2909 * flushed/invalidated. As we always have to emit invalidations 2910 * and flushes when moving into and out of the RENDER domain, correct 2911 * snooping behaviour occurs naturally as the result of our domain 2912 * tracking. 2913 */ 2914 if (obj->cache_level != I915_CACHE_NONE) 2915 return; 2916 2917 trace_i915_gem_object_clflush(obj); 2918 2919 drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE); 2920 } 2921 2922 /** Flushes any GPU write domain for the object if it's dirty. */ 2923 static int 2924 i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj) 2925 { 2926 if ((obj->base.write_domain & I915_GEM_GPU_DOMAINS) == 0) 2927 return 0; 2928 2929 /* Queue the GPU write cache flushing we need. */ 2930 return i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain); 2931 } 2932 2933 /** Flushes the GTT write domain for the object if it's dirty. */ 2934 static void 2935 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 2936 { 2937 uint32_t old_write_domain; 2938 2939 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 2940 return; 2941 2942 /* No actual flushing is required for the GTT write domain. Writes 2943 * to it immediately go to main memory as far as we know, so there's 2944 * no chipset flush. It also doesn't land in render cache. 2945 * 2946 * However, we do have to enforce the order so that all writes through 2947 * the GTT land before any writes to the device, such as updates to 2948 * the GATT itself. 2949 */ 2950 wmb(); 2951 2952 old_write_domain = obj->base.write_domain; 2953 obj->base.write_domain = 0; 2954 2955 trace_i915_gem_object_change_domain(obj, 2956 obj->base.read_domains, 2957 old_write_domain); 2958 } 2959 2960 /** Flushes the CPU write domain for the object if it's dirty. */ 2961 static void 2962 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) 2963 { 2964 uint32_t old_write_domain; 2965 2966 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 2967 return; 2968 2969 i915_gem_clflush_object(obj); 2970 intel_gtt_chipset_flush(); 2971 old_write_domain = obj->base.write_domain; 2972 obj->base.write_domain = 0; 2973 2974 trace_i915_gem_object_change_domain(obj, 2975 obj->base.read_domains, 2976 old_write_domain); 2977 } 2978 2979 /** 2980 * Moves a single object to the GTT read, and possibly write domain. 2981 * 2982 * This function returns when the move is complete, including waiting on 2983 * flushes to occur. 2984 */ 2985 int 2986 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 2987 { 2988 uint32_t old_write_domain, old_read_domains; 2989 int ret; 2990 2991 /* Not valid to be called on unbound objects. */ 2992 if (obj->gtt_space == NULL) 2993 return -EINVAL; 2994 2995 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 2996 return 0; 2997 2998 ret = i915_gem_object_flush_gpu_write_domain(obj); 2999 if (ret) 3000 return ret; 3001 3002 if (obj->pending_gpu_write || write) { 3003 ret = i915_gem_object_wait_rendering(obj); 3004 if (ret) 3005 return ret; 3006 } 3007 3008 i915_gem_object_flush_cpu_write_domain(obj); 3009 3010 old_write_domain = obj->base.write_domain; 3011 old_read_domains = obj->base.read_domains; 3012 3013 /* It should now be out of any other write domains, and we can update 3014 * the domain values for our changes. 3015 */ 3016 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3017 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3018 if (write) { 3019 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 3020 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 3021 obj->dirty = 1; 3022 } 3023 3024 trace_i915_gem_object_change_domain(obj, 3025 old_read_domains, 3026 old_write_domain); 3027 3028 return 0; 3029 } 3030 3031 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 3032 enum i915_cache_level cache_level) 3033 { 3034 int ret; 3035 3036 if (obj->cache_level == cache_level) 3037 return 0; 3038 3039 if (obj->pin_count) { 3040 DRM_DEBUG("can not change the cache level of pinned objects\n"); 3041 return -EBUSY; 3042 } 3043 3044 if (obj->gtt_space) { 3045 ret = i915_gem_object_finish_gpu(obj); 3046 if (ret) 3047 return ret; 3048 3049 i915_gem_object_finish_gtt(obj); 3050 3051 /* Before SandyBridge, you could not use tiling or fence 3052 * registers with snooped memory, so relinquish any fences 3053 * currently pointing to our region in the aperture. 3054 */ 3055 if (INTEL_INFO(obj->base.dev)->gen < 6) { 3056 ret = i915_gem_object_put_fence(obj); 3057 if (ret) 3058 return ret; 3059 } 3060 3061 i915_gem_gtt_rebind_object(obj, cache_level); 3062 } 3063 3064 if (cache_level == I915_CACHE_NONE) { 3065 u32 old_read_domains, old_write_domain; 3066 3067 /* If we're coming from LLC cached, then we haven't 3068 * actually been tracking whether the data is in the 3069 * CPU cache or not, since we only allow one bit set 3070 * in obj->write_domain and have been skipping the clflushes. 3071 * Just set it to the CPU cache for now. 3072 */ 3073 WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU); 3074 WARN_ON(obj->base.read_domains & ~I915_GEM_DOMAIN_CPU); 3075 3076 old_read_domains = obj->base.read_domains; 3077 old_write_domain = obj->base.write_domain; 3078 3079 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 3080 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 3081 3082 trace_i915_gem_object_change_domain(obj, 3083 old_read_domains, 3084 old_write_domain); 3085 } 3086 3087 obj->cache_level = cache_level; 3088 return 0; 3089 } 3090 3091 /* 3092 * Prepare buffer for display plane (scanout, cursors, etc). 3093 * Can be called from an uninterruptible phase (modesetting) and allows 3094 * any flushes to be pipelined (for pageflips). 3095 * 3096 * For the display plane, we want to be in the GTT but out of any write 3097 * domains. So in many ways this looks like set_to_gtt_domain() apart from the 3098 * ability to pipeline the waits, pinning and any additional subtleties 3099 * that may differentiate the display plane from ordinary buffers. 3100 */ 3101 int 3102 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 3103 u32 alignment, 3104 struct intel_ring_buffer *pipelined) 3105 { 3106 u32 old_read_domains, old_write_domain; 3107 int ret; 3108 3109 ret = i915_gem_object_flush_gpu_write_domain(obj); 3110 if (ret) 3111 return ret; 3112 3113 if (pipelined != obj->ring) { 3114 ret = i915_gem_object_wait_rendering(obj); 3115 if (ret == -ERESTARTSYS) 3116 return ret; 3117 } 3118 3119 /* The display engine is not coherent with the LLC cache on gen6. As 3120 * a result, we make sure that the pinning that is about to occur is 3121 * done with uncached PTEs. This is lowest common denominator for all 3122 * chipsets. 3123 * 3124 * However for gen6+, we could do better by using the GFDT bit instead 3125 * of uncaching, which would allow us to flush all the LLC-cached data 3126 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 3127 */ 3128 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE); 3129 if (ret) 3130 return ret; 3131 3132 /* As the user may map the buffer once pinned in the display plane 3133 * (e.g. libkms for the bootup splash), we have to ensure that we 3134 * always use map_and_fenceable for all scanout buffers. 3135 */ 3136 ret = i915_gem_object_pin(obj, alignment, true); 3137 if (ret) 3138 return ret; 3139 3140 i915_gem_object_flush_cpu_write_domain(obj); 3141 3142 old_write_domain = obj->base.write_domain; 3143 old_read_domains = obj->base.read_domains; 3144 3145 /* It should now be out of any other write domains, and we can update 3146 * the domain values for our changes. 3147 */ 3148 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3149 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3150 3151 trace_i915_gem_object_change_domain(obj, 3152 old_read_domains, 3153 old_write_domain); 3154 3155 return 0; 3156 } 3157 3158 int 3159 i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj) 3160 { 3161 int ret; 3162 3163 if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0) 3164 return 0; 3165 3166 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { 3167 ret = i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain); 3168 if (ret) 3169 return ret; 3170 } 3171 3172 /* Ensure that we invalidate the GPU's caches and TLBs. */ 3173 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; 3174 3175 return i915_gem_object_wait_rendering(obj); 3176 } 3177 3178 /** 3179 * Moves a single object to the CPU read, and possibly write domain. 3180 * 3181 * This function returns when the move is complete, including waiting on 3182 * flushes to occur. 3183 */ 3184 static int 3185 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 3186 { 3187 uint32_t old_write_domain, old_read_domains; 3188 int ret; 3189 3190 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 3191 return 0; 3192 3193 ret = i915_gem_object_flush_gpu_write_domain(obj); 3194 if (ret) 3195 return ret; 3196 3197 ret = i915_gem_object_wait_rendering(obj); 3198 if (ret) 3199 return ret; 3200 3201 i915_gem_object_flush_gtt_write_domain(obj); 3202 3203 /* If we have a partially-valid cache of the object in the CPU, 3204 * finish invalidating it and free the per-page flags. 3205 */ 3206 i915_gem_object_set_to_full_cpu_read_domain(obj); 3207 3208 old_write_domain = obj->base.write_domain; 3209 old_read_domains = obj->base.read_domains; 3210 3211 /* Flush the CPU cache if it's still invalid. */ 3212 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 3213 i915_gem_clflush_object(obj); 3214 3215 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 3216 } 3217 3218 /* It should now be out of any other write domains, and we can update 3219 * the domain values for our changes. 3220 */ 3221 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 3222 3223 /* If we're writing through the CPU, then the GPU read domains will 3224 * need to be invalidated at next use. 3225 */ 3226 if (write) { 3227 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 3228 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 3229 } 3230 3231 trace_i915_gem_object_change_domain(obj, 3232 old_read_domains, 3233 old_write_domain); 3234 3235 return 0; 3236 } 3237 3238 /** 3239 * Moves the object from a partially CPU read to a full one. 3240 * 3241 * Note that this only resolves i915_gem_object_set_cpu_read_domain_range(), 3242 * and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU). 3243 */ 3244 static void 3245 i915_gem_object_set_to_full_cpu_read_domain(struct drm_i915_gem_object *obj) 3246 { 3247 if (!obj->page_cpu_valid) 3248 return; 3249 3250 /* If we're partially in the CPU read domain, finish moving it in. 3251 */ 3252 if (obj->base.read_domains & I915_GEM_DOMAIN_CPU) { 3253 int i; 3254 3255 for (i = 0; i <= (obj->base.size - 1) / PAGE_SIZE; i++) { 3256 if (obj->page_cpu_valid[i]) 3257 continue; 3258 drm_clflush_pages(obj->pages + i, 1); 3259 } 3260 } 3261 3262 /* Free the page_cpu_valid mappings which are now stale, whether 3263 * or not we've got I915_GEM_DOMAIN_CPU. 3264 */ 3265 kfree(obj->page_cpu_valid); 3266 obj->page_cpu_valid = NULL; 3267 } 3268 3269 /** 3270 * Set the CPU read domain on a range of the object. 3271 * 3272 * The object ends up with I915_GEM_DOMAIN_CPU in its read flags although it's 3273 * not entirely valid. The page_cpu_valid member of the object flags which 3274 * pages have been flushed, and will be respected by 3275 * i915_gem_object_set_to_cpu_domain() if it's called on to get a valid mapping 3276 * of the whole object. 3277 * 3278 * This function returns when the move is complete, including waiting on 3279 * flushes to occur. 3280 */ 3281 static int 3282 i915_gem_object_set_cpu_read_domain_range(struct drm_i915_gem_object *obj, 3283 uint64_t offset, uint64_t size) 3284 { 3285 uint32_t old_read_domains; 3286 int i, ret; 3287 3288 if (offset == 0 && size == obj->base.size) 3289 return i915_gem_object_set_to_cpu_domain(obj, 0); 3290 3291 ret = i915_gem_object_flush_gpu_write_domain(obj); 3292 if (ret) 3293 return ret; 3294 3295 ret = i915_gem_object_wait_rendering(obj); 3296 if (ret) 3297 return ret; 3298 3299 i915_gem_object_flush_gtt_write_domain(obj); 3300 3301 /* If we're already fully in the CPU read domain, we're done. */ 3302 if (obj->page_cpu_valid == NULL && 3303 (obj->base.read_domains & I915_GEM_DOMAIN_CPU) != 0) 3304 return 0; 3305 3306 /* Otherwise, create/clear the per-page CPU read domain flag if we're 3307 * newly adding I915_GEM_DOMAIN_CPU 3308 */ 3309 if (obj->page_cpu_valid == NULL) { 3310 obj->page_cpu_valid = kzalloc(obj->base.size / PAGE_SIZE, 3311 GFP_KERNEL); 3312 if (obj->page_cpu_valid == NULL) 3313 return -ENOMEM; 3314 } else if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) 3315 memset(obj->page_cpu_valid, 0, obj->base.size / PAGE_SIZE); 3316 3317 /* Flush the cache on any pages that are still invalid from the CPU's 3318 * perspective. 3319 */ 3320 for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE; 3321 i++) { 3322 if (obj->page_cpu_valid[i]) 3323 continue; 3324 3325 drm_clflush_pages(obj->pages + i, 1); 3326 3327 obj->page_cpu_valid[i] = 1; 3328 } 3329 3330 /* It should now be out of any other write domains, and we can update 3331 * the domain values for our changes. 3332 */ 3333 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 3334 3335 old_read_domains = obj->base.read_domains; 3336 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 3337 3338 trace_i915_gem_object_change_domain(obj, 3339 old_read_domains, 3340 obj->base.write_domain); 3341 3342 return 0; 3343 } 3344 3345 /* Throttle our rendering by waiting until the ring has completed our requests 3346 * emitted over 20 msec ago. 3347 * 3348 * Note that if we were to use the current jiffies each time around the loop, 3349 * we wouldn't escape the function with any frames outstanding if the time to 3350 * render a frame was over 20ms. 3351 * 3352 * This should get us reasonable parallelism between CPU and GPU but also 3353 * relatively low latency when blocking on a particular request to finish. 3354 */ 3355 static int 3356 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 3357 { 3358 struct drm_i915_private *dev_priv = dev->dev_private; 3359 struct drm_i915_file_private *file_priv = file->driver_priv; 3360 unsigned long recent_enough = jiffies - msecs_to_jiffies(20); 3361 struct drm_i915_gem_request *request; 3362 struct intel_ring_buffer *ring = NULL; 3363 u32 seqno = 0; 3364 int ret; 3365 3366 if (atomic_read(&dev_priv->mm.wedged)) 3367 return -EIO; 3368 3369 spin_lock(&file_priv->mm.lock); 3370 list_for_each_entry(request, &file_priv->mm.request_list, client_list) { 3371 if (time_after_eq(request->emitted_jiffies, recent_enough)) 3372 break; 3373 3374 ring = request->ring; 3375 seqno = request->seqno; 3376 } 3377 spin_unlock(&file_priv->mm.lock); 3378 3379 if (seqno == 0) 3380 return 0; 3381 3382 ret = 0; 3383 if (!i915_seqno_passed(ring->get_seqno(ring), seqno)) { 3384 /* And wait for the seqno passing without holding any locks and 3385 * causing extra latency for others. This is safe as the irq 3386 * generation is designed to be run atomically and so is 3387 * lockless. 3388 */ 3389 if (ring->irq_get(ring)) { 3390 ret = wait_event_interruptible(ring->irq_queue, 3391 i915_seqno_passed(ring->get_seqno(ring), seqno) 3392 || atomic_read(&dev_priv->mm.wedged)); 3393 ring->irq_put(ring); 3394 3395 if (ret == 0 && atomic_read(&dev_priv->mm.wedged)) 3396 ret = -EIO; 3397 } 3398 } 3399 3400 if (ret == 0) 3401 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); 3402 3403 return ret; 3404 } 3405 3406 int 3407 i915_gem_object_pin(struct drm_i915_gem_object *obj, 3408 uint32_t alignment, 3409 bool map_and_fenceable) 3410 { 3411 struct drm_device *dev = obj->base.dev; 3412 struct drm_i915_private *dev_priv = dev->dev_private; 3413 int ret; 3414 3415 BUG_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT); 3416 WARN_ON(i915_verify_lists(dev)); 3417 3418 if (obj->gtt_space != NULL) { 3419 if ((alignment && obj->gtt_offset & (alignment - 1)) || 3420 (map_and_fenceable && !obj->map_and_fenceable)) { 3421 WARN(obj->pin_count, 3422 "bo is already pinned with incorrect alignment:" 3423 " offset=%x, req.alignment=%x, req.map_and_fenceable=%d," 3424 " obj->map_and_fenceable=%d\n", 3425 obj->gtt_offset, alignment, 3426 map_and_fenceable, 3427 obj->map_and_fenceable); 3428 ret = i915_gem_object_unbind(obj); 3429 if (ret) 3430 return ret; 3431 } 3432 } 3433 3434 if (obj->gtt_space == NULL) { 3435 ret = i915_gem_object_bind_to_gtt(obj, alignment, 3436 map_and_fenceable); 3437 if (ret) 3438 return ret; 3439 } 3440 3441 if (obj->pin_count++ == 0) { 3442 if (!obj->active) 3443 list_move_tail(&obj->mm_list, 3444 &dev_priv->mm.pinned_list); 3445 } 3446 obj->pin_mappable |= map_and_fenceable; 3447 3448 WARN_ON(i915_verify_lists(dev)); 3449 return 0; 3450 } 3451 3452 void 3453 i915_gem_object_unpin(struct drm_i915_gem_object *obj) 3454 { 3455 struct drm_device *dev = obj->base.dev; 3456 drm_i915_private_t *dev_priv = dev->dev_private; 3457 3458 WARN_ON(i915_verify_lists(dev)); 3459 BUG_ON(obj->pin_count == 0); 3460 BUG_ON(obj->gtt_space == NULL); 3461 3462 if (--obj->pin_count == 0) { 3463 if (!obj->active) 3464 list_move_tail(&obj->mm_list, 3465 &dev_priv->mm.inactive_list); 3466 obj->pin_mappable = false; 3467 } 3468 WARN_ON(i915_verify_lists(dev)); 3469 } 3470 3471 int 3472 i915_gem_pin_ioctl(struct drm_device *dev, void *data, 3473 struct drm_file *file) 3474 { 3475 struct drm_i915_gem_pin *args = data; 3476 struct drm_i915_gem_object *obj; 3477 int ret; 3478 3479 ret = i915_mutex_lock_interruptible(dev); 3480 if (ret) 3481 return ret; 3482 3483 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3484 if (&obj->base == NULL) { 3485 ret = -ENOENT; 3486 goto unlock; 3487 } 3488 3489 if (obj->madv != I915_MADV_WILLNEED) { 3490 DRM_ERROR("Attempting to pin a purgeable buffer\n"); 3491 ret = -EINVAL; 3492 goto out; 3493 } 3494 3495 if (obj->pin_filp != NULL && obj->pin_filp != file) { 3496 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n", 3497 args->handle); 3498 ret = -EINVAL; 3499 goto out; 3500 } 3501 3502 obj->user_pin_count++; 3503 obj->pin_filp = file; 3504 if (obj->user_pin_count == 1) { 3505 ret = i915_gem_object_pin(obj, args->alignment, true); 3506 if (ret) 3507 goto out; 3508 } 3509 3510 /* XXX - flush the CPU caches for pinned objects 3511 * as the X server doesn't manage domains yet 3512 */ 3513 i915_gem_object_flush_cpu_write_domain(obj); 3514 args->offset = obj->gtt_offset; 3515 out: 3516 drm_gem_object_unreference(&obj->base); 3517 unlock: 3518 mutex_unlock(&dev->struct_mutex); 3519 return ret; 3520 } 3521 3522 int 3523 i915_gem_unpin_ioctl(struct drm_device *dev, void *data, 3524 struct drm_file *file) 3525 { 3526 struct drm_i915_gem_pin *args = data; 3527 struct drm_i915_gem_object *obj; 3528 int ret; 3529 3530 ret = i915_mutex_lock_interruptible(dev); 3531 if (ret) 3532 return ret; 3533 3534 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3535 if (&obj->base == NULL) { 3536 ret = -ENOENT; 3537 goto unlock; 3538 } 3539 3540 if (obj->pin_filp != file) { 3541 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n", 3542 args->handle); 3543 ret = -EINVAL; 3544 goto out; 3545 } 3546 obj->user_pin_count--; 3547 if (obj->user_pin_count == 0) { 3548 obj->pin_filp = NULL; 3549 i915_gem_object_unpin(obj); 3550 } 3551 3552 out: 3553 drm_gem_object_unreference(&obj->base); 3554 unlock: 3555 mutex_unlock(&dev->struct_mutex); 3556 return ret; 3557 } 3558 3559 int 3560 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 3561 struct drm_file *file) 3562 { 3563 struct drm_i915_gem_busy *args = data; 3564 struct drm_i915_gem_object *obj; 3565 int ret; 3566 3567 ret = i915_mutex_lock_interruptible(dev); 3568 if (ret) 3569 return ret; 3570 3571 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3572 if (&obj->base == NULL) { 3573 ret = -ENOENT; 3574 goto unlock; 3575 } 3576 3577 /* Count all active objects as busy, even if they are currently not used 3578 * by the gpu. Users of this interface expect objects to eventually 3579 * become non-busy without any further actions, therefore emit any 3580 * necessary flushes here. 3581 */ 3582 args->busy = obj->active; 3583 if (args->busy) { 3584 /* Unconditionally flush objects, even when the gpu still uses this 3585 * object. Userspace calling this function indicates that it wants to 3586 * use this buffer rather sooner than later, so issuing the required 3587 * flush earlier is beneficial. 3588 */ 3589 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { 3590 ret = i915_gem_flush_ring(obj->ring, 3591 0, obj->base.write_domain); 3592 } else if (obj->ring->outstanding_lazy_request == 3593 obj->last_rendering_seqno) { 3594 struct drm_i915_gem_request *request; 3595 3596 /* This ring is not being cleared by active usage, 3597 * so emit a request to do so. 3598 */ 3599 request = kzalloc(sizeof(*request), GFP_KERNEL); 3600 if (request) 3601 ret = i915_add_request(obj->ring, NULL,request); 3602 else 3603 ret = -ENOMEM; 3604 } 3605 3606 /* Update the active list for the hardware's current position. 3607 * Otherwise this only updates on a delayed timer or when irqs 3608 * are actually unmasked, and our working set ends up being 3609 * larger than required. 3610 */ 3611 i915_gem_retire_requests_ring(obj->ring); 3612 3613 args->busy = obj->active; 3614 } 3615 3616 drm_gem_object_unreference(&obj->base); 3617 unlock: 3618 mutex_unlock(&dev->struct_mutex); 3619 return ret; 3620 } 3621 3622 int 3623 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 3624 struct drm_file *file_priv) 3625 { 3626 return i915_gem_ring_throttle(dev, file_priv); 3627 } 3628 3629 int 3630 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 3631 struct drm_file *file_priv) 3632 { 3633 struct drm_i915_gem_madvise *args = data; 3634 struct drm_i915_gem_object *obj; 3635 int ret; 3636 3637 switch (args->madv) { 3638 case I915_MADV_DONTNEED: 3639 case I915_MADV_WILLNEED: 3640 break; 3641 default: 3642 return -EINVAL; 3643 } 3644 3645 ret = i915_mutex_lock_interruptible(dev); 3646 if (ret) 3647 return ret; 3648 3649 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle)); 3650 if (&obj->base == NULL) { 3651 ret = -ENOENT; 3652 goto unlock; 3653 } 3654 3655 if (obj->pin_count) { 3656 ret = -EINVAL; 3657 goto out; 3658 } 3659 3660 if (obj->madv != __I915_MADV_PURGED) 3661 obj->madv = args->madv; 3662 3663 /* if the object is no longer bound, discard its backing storage */ 3664 if (i915_gem_object_is_purgeable(obj) && 3665 obj->gtt_space == NULL) 3666 i915_gem_object_truncate(obj); 3667 3668 args->retained = obj->madv != __I915_MADV_PURGED; 3669 3670 out: 3671 drm_gem_object_unreference(&obj->base); 3672 unlock: 3673 mutex_unlock(&dev->struct_mutex); 3674 return ret; 3675 } 3676 3677 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, 3678 size_t size) 3679 { 3680 struct drm_i915_private *dev_priv = dev->dev_private; 3681 struct drm_i915_gem_object *obj; 3682 struct address_space *mapping; 3683 3684 obj = kzalloc(sizeof(*obj), GFP_KERNEL); 3685 if (obj == NULL) 3686 return NULL; 3687 3688 if (drm_gem_object_init(dev, &obj->base, size) != 0) { 3689 kfree(obj); 3690 return NULL; 3691 } 3692 3693 mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 3694 mapping_set_gfp_mask(mapping, GFP_HIGHUSER | __GFP_RECLAIMABLE); 3695 3696 i915_gem_info_add_obj(dev_priv, size); 3697 3698 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 3699 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 3700 3701 if (IS_GEN6(dev)) { 3702 /* On Gen6, we can have the GPU use the LLC (the CPU 3703 * cache) for about a 10% performance improvement 3704 * compared to uncached. Graphics requests other than 3705 * display scanout are coherent with the CPU in 3706 * accessing this cache. This means in this mode we 3707 * don't need to clflush on the CPU side, and on the 3708 * GPU side we only need to flush internal caches to 3709 * get data visible to the CPU. 3710 * 3711 * However, we maintain the display planes as UC, and so 3712 * need to rebind when first used as such. 3713 */ 3714 obj->cache_level = I915_CACHE_LLC; 3715 } else 3716 obj->cache_level = I915_CACHE_NONE; 3717 3718 obj->base.driver_private = NULL; 3719 obj->fence_reg = I915_FENCE_REG_NONE; 3720 INIT_LIST_HEAD(&obj->mm_list); 3721 INIT_LIST_HEAD(&obj->gtt_list); 3722 INIT_LIST_HEAD(&obj->ring_list); 3723 INIT_LIST_HEAD(&obj->exec_list); 3724 INIT_LIST_HEAD(&obj->gpu_write_list); 3725 obj->madv = I915_MADV_WILLNEED; 3726 /* Avoid an unnecessary call to unbind on the first bind. */ 3727 obj->map_and_fenceable = true; 3728 3729 return obj; 3730 } 3731 3732 int i915_gem_init_object(struct drm_gem_object *obj) 3733 { 3734 BUG(); 3735 3736 return 0; 3737 } 3738 3739 static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj) 3740 { 3741 struct drm_device *dev = obj->base.dev; 3742 drm_i915_private_t *dev_priv = dev->dev_private; 3743 int ret; 3744 3745 ret = i915_gem_object_unbind(obj); 3746 if (ret == -ERESTARTSYS) { 3747 list_move(&obj->mm_list, 3748 &dev_priv->mm.deferred_free_list); 3749 return; 3750 } 3751 3752 trace_i915_gem_object_destroy(obj); 3753 3754 if (obj->base.map_list.map) 3755 i915_gem_free_mmap_offset(obj); 3756 3757 drm_gem_object_release(&obj->base); 3758 i915_gem_info_remove_obj(dev_priv, obj->base.size); 3759 3760 kfree(obj->page_cpu_valid); 3761 kfree(obj->bit_17); 3762 kfree(obj); 3763 } 3764 3765 void i915_gem_free_object(struct drm_gem_object *gem_obj) 3766 { 3767 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 3768 struct drm_device *dev = obj->base.dev; 3769 3770 while (obj->pin_count > 0) 3771 i915_gem_object_unpin(obj); 3772 3773 if (obj->phys_obj) 3774 i915_gem_detach_phys_object(dev, obj); 3775 3776 i915_gem_free_object_tail(obj); 3777 } 3778 3779 int 3780 i915_gem_idle(struct drm_device *dev) 3781 { 3782 drm_i915_private_t *dev_priv = dev->dev_private; 3783 int ret; 3784 3785 mutex_lock(&dev->struct_mutex); 3786 3787 if (dev_priv->mm.suspended) { 3788 mutex_unlock(&dev->struct_mutex); 3789 return 0; 3790 } 3791 3792 ret = i915_gpu_idle(dev); 3793 if (ret) { 3794 mutex_unlock(&dev->struct_mutex); 3795 return ret; 3796 } 3797 3798 /* Under UMS, be paranoid and evict. */ 3799 if (!drm_core_check_feature(dev, DRIVER_MODESET)) { 3800 ret = i915_gem_evict_inactive(dev, false); 3801 if (ret) { 3802 mutex_unlock(&dev->struct_mutex); 3803 return ret; 3804 } 3805 } 3806 3807 i915_gem_reset_fences(dev); 3808 3809 /* Hack! Don't let anybody do execbuf while we don't control the chip. 3810 * We need to replace this with a semaphore, or something. 3811 * And not confound mm.suspended! 3812 */ 3813 dev_priv->mm.suspended = 1; 3814 del_timer_sync(&dev_priv->hangcheck_timer); 3815 3816 i915_kernel_lost_context(dev); 3817 i915_gem_cleanup_ringbuffer(dev); 3818 3819 mutex_unlock(&dev->struct_mutex); 3820 3821 /* Cancel the retire work handler, which should be idle now. */ 3822 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 3823 3824 return 0; 3825 } 3826 3827 int 3828 i915_gem_init_ringbuffer(struct drm_device *dev) 3829 { 3830 drm_i915_private_t *dev_priv = dev->dev_private; 3831 int ret; 3832 3833 ret = intel_init_render_ring_buffer(dev); 3834 if (ret) 3835 return ret; 3836 3837 if (HAS_BSD(dev)) { 3838 ret = intel_init_bsd_ring_buffer(dev); 3839 if (ret) 3840 goto cleanup_render_ring; 3841 } 3842 3843 if (HAS_BLT(dev)) { 3844 ret = intel_init_blt_ring_buffer(dev); 3845 if (ret) 3846 goto cleanup_bsd_ring; 3847 } 3848 3849 dev_priv->next_seqno = 1; 3850 3851 return 0; 3852 3853 cleanup_bsd_ring: 3854 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]); 3855 cleanup_render_ring: 3856 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]); 3857 return ret; 3858 } 3859 3860 void 3861 i915_gem_cleanup_ringbuffer(struct drm_device *dev) 3862 { 3863 drm_i915_private_t *dev_priv = dev->dev_private; 3864 int i; 3865 3866 for (i = 0; i < I915_NUM_RINGS; i++) 3867 intel_cleanup_ring_buffer(&dev_priv->ring[i]); 3868 } 3869 3870 int 3871 i915_gem_entervt_ioctl(struct drm_device *dev, void *data, 3872 struct drm_file *file_priv) 3873 { 3874 drm_i915_private_t *dev_priv = dev->dev_private; 3875 int ret, i; 3876 3877 if (drm_core_check_feature(dev, DRIVER_MODESET)) 3878 return 0; 3879 3880 if (atomic_read(&dev_priv->mm.wedged)) { 3881 DRM_ERROR("Reenabling wedged hardware, good luck\n"); 3882 atomic_set(&dev_priv->mm.wedged, 0); 3883 } 3884 3885 mutex_lock(&dev->struct_mutex); 3886 dev_priv->mm.suspended = 0; 3887 3888 ret = i915_gem_init_ringbuffer(dev); 3889 if (ret != 0) { 3890 mutex_unlock(&dev->struct_mutex); 3891 return ret; 3892 } 3893 3894 BUG_ON(!list_empty(&dev_priv->mm.active_list)); 3895 BUG_ON(!list_empty(&dev_priv->mm.flushing_list)); 3896 BUG_ON(!list_empty(&dev_priv->mm.inactive_list)); 3897 for (i = 0; i < I915_NUM_RINGS; i++) { 3898 BUG_ON(!list_empty(&dev_priv->ring[i].active_list)); 3899 BUG_ON(!list_empty(&dev_priv->ring[i].request_list)); 3900 } 3901 mutex_unlock(&dev->struct_mutex); 3902 3903 ret = drm_irq_install(dev); 3904 if (ret) 3905 goto cleanup_ringbuffer; 3906 3907 return 0; 3908 3909 cleanup_ringbuffer: 3910 mutex_lock(&dev->struct_mutex); 3911 i915_gem_cleanup_ringbuffer(dev); 3912 dev_priv->mm.suspended = 1; 3913 mutex_unlock(&dev->struct_mutex); 3914 3915 return ret; 3916 } 3917 3918 int 3919 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, 3920 struct drm_file *file_priv) 3921 { 3922 if (drm_core_check_feature(dev, DRIVER_MODESET)) 3923 return 0; 3924 3925 drm_irq_uninstall(dev); 3926 return i915_gem_idle(dev); 3927 } 3928 3929 void 3930 i915_gem_lastclose(struct drm_device *dev) 3931 { 3932 int ret; 3933 3934 if (drm_core_check_feature(dev, DRIVER_MODESET)) 3935 return; 3936 3937 ret = i915_gem_idle(dev); 3938 if (ret) 3939 DRM_ERROR("failed to idle hardware: %d\n", ret); 3940 } 3941 3942 static void 3943 init_ring_lists(struct intel_ring_buffer *ring) 3944 { 3945 INIT_LIST_HEAD(&ring->active_list); 3946 INIT_LIST_HEAD(&ring->request_list); 3947 INIT_LIST_HEAD(&ring->gpu_write_list); 3948 } 3949 3950 void 3951 i915_gem_load(struct drm_device *dev) 3952 { 3953 int i; 3954 drm_i915_private_t *dev_priv = dev->dev_private; 3955 3956 INIT_LIST_HEAD(&dev_priv->mm.active_list); 3957 INIT_LIST_HEAD(&dev_priv->mm.flushing_list); 3958 INIT_LIST_HEAD(&dev_priv->mm.inactive_list); 3959 INIT_LIST_HEAD(&dev_priv->mm.pinned_list); 3960 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 3961 INIT_LIST_HEAD(&dev_priv->mm.deferred_free_list); 3962 INIT_LIST_HEAD(&dev_priv->mm.gtt_list); 3963 for (i = 0; i < I915_NUM_RINGS; i++) 3964 init_ring_lists(&dev_priv->ring[i]); 3965 for (i = 0; i < 16; i++) 3966 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 3967 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 3968 i915_gem_retire_work_handler); 3969 init_completion(&dev_priv->error_completion); 3970 3971 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ 3972 if (IS_GEN3(dev)) { 3973 u32 tmp = I915_READ(MI_ARB_STATE); 3974 if (!(tmp & MI_ARB_C3_LP_WRITE_ENABLE)) { 3975 /* arb state is a masked write, so set bit + bit in mask */ 3976 tmp = MI_ARB_C3_LP_WRITE_ENABLE | (MI_ARB_C3_LP_WRITE_ENABLE << MI_ARB_MASK_SHIFT); 3977 I915_WRITE(MI_ARB_STATE, tmp); 3978 } 3979 } 3980 3981 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 3982 3983 /* Old X drivers will take 0-2 for front, back, depth buffers */ 3984 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 3985 dev_priv->fence_reg_start = 3; 3986 3987 if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 3988 dev_priv->num_fence_regs = 16; 3989 else 3990 dev_priv->num_fence_regs = 8; 3991 3992 /* Initialize fence registers to zero */ 3993 for (i = 0; i < dev_priv->num_fence_regs; i++) { 3994 i915_gem_clear_fence_reg(dev, &dev_priv->fence_regs[i]); 3995 } 3996 3997 i915_gem_detect_bit_6_swizzle(dev); 3998 init_waitqueue_head(&dev_priv->pending_flip_queue); 3999 4000 dev_priv->mm.interruptible = true; 4001 4002 dev_priv->mm.inactive_shrinker.shrink = i915_gem_inactive_shrink; 4003 dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS; 4004 register_shrinker(&dev_priv->mm.inactive_shrinker); 4005 } 4006 4007 /* 4008 * Create a physically contiguous memory object for this object 4009 * e.g. for cursor + overlay regs 4010 */ 4011 static int i915_gem_init_phys_object(struct drm_device *dev, 4012 int id, int size, int align) 4013 { 4014 drm_i915_private_t *dev_priv = dev->dev_private; 4015 struct drm_i915_gem_phys_object *phys_obj; 4016 int ret; 4017 4018 if (dev_priv->mm.phys_objs[id - 1] || !size) 4019 return 0; 4020 4021 phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL); 4022 if (!phys_obj) 4023 return -ENOMEM; 4024 4025 phys_obj->id = id; 4026 4027 phys_obj->handle = drm_pci_alloc(dev, size, align); 4028 if (!phys_obj->handle) { 4029 ret = -ENOMEM; 4030 goto kfree_obj; 4031 } 4032 #ifdef CONFIG_X86 4033 set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE); 4034 #endif 4035 4036 dev_priv->mm.phys_objs[id - 1] = phys_obj; 4037 4038 return 0; 4039 kfree_obj: 4040 kfree(phys_obj); 4041 return ret; 4042 } 4043 4044 static void i915_gem_free_phys_object(struct drm_device *dev, int id) 4045 { 4046 drm_i915_private_t *dev_priv = dev->dev_private; 4047 struct drm_i915_gem_phys_object *phys_obj; 4048 4049 if (!dev_priv->mm.phys_objs[id - 1]) 4050 return; 4051 4052 phys_obj = dev_priv->mm.phys_objs[id - 1]; 4053 if (phys_obj->cur_obj) { 4054 i915_gem_detach_phys_object(dev, phys_obj->cur_obj); 4055 } 4056 4057 #ifdef CONFIG_X86 4058 set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE); 4059 #endif 4060 drm_pci_free(dev, phys_obj->handle); 4061 kfree(phys_obj); 4062 dev_priv->mm.phys_objs[id - 1] = NULL; 4063 } 4064 4065 void i915_gem_free_all_phys_object(struct drm_device *dev) 4066 { 4067 int i; 4068 4069 for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++) 4070 i915_gem_free_phys_object(dev, i); 4071 } 4072 4073 void i915_gem_detach_phys_object(struct drm_device *dev, 4074 struct drm_i915_gem_object *obj) 4075 { 4076 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 4077 char *vaddr; 4078 int i; 4079 int page_count; 4080 4081 if (!obj->phys_obj) 4082 return; 4083 vaddr = obj->phys_obj->handle->vaddr; 4084 4085 page_count = obj->base.size / PAGE_SIZE; 4086 for (i = 0; i < page_count; i++) { 4087 struct page *page = shmem_read_mapping_page(mapping, i); 4088 if (!IS_ERR(page)) { 4089 char *dst = kmap_atomic(page); 4090 memcpy(dst, vaddr + i*PAGE_SIZE, PAGE_SIZE); 4091 kunmap_atomic(dst); 4092 4093 drm_clflush_pages(&page, 1); 4094 4095 set_page_dirty(page); 4096 mark_page_accessed(page); 4097 page_cache_release(page); 4098 } 4099 } 4100 intel_gtt_chipset_flush(); 4101 4102 obj->phys_obj->cur_obj = NULL; 4103 obj->phys_obj = NULL; 4104 } 4105 4106 int 4107 i915_gem_attach_phys_object(struct drm_device *dev, 4108 struct drm_i915_gem_object *obj, 4109 int id, 4110 int align) 4111 { 4112 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 4113 drm_i915_private_t *dev_priv = dev->dev_private; 4114 int ret = 0; 4115 int page_count; 4116 int i; 4117 4118 if (id > I915_MAX_PHYS_OBJECT) 4119 return -EINVAL; 4120 4121 if (obj->phys_obj) { 4122 if (obj->phys_obj->id == id) 4123 return 0; 4124 i915_gem_detach_phys_object(dev, obj); 4125 } 4126 4127 /* create a new object */ 4128 if (!dev_priv->mm.phys_objs[id - 1]) { 4129 ret = i915_gem_init_phys_object(dev, id, 4130 obj->base.size, align); 4131 if (ret) { 4132 DRM_ERROR("failed to init phys object %d size: %zu\n", 4133 id, obj->base.size); 4134 return ret; 4135 } 4136 } 4137 4138 /* bind to the object */ 4139 obj->phys_obj = dev_priv->mm.phys_objs[id - 1]; 4140 obj->phys_obj->cur_obj = obj; 4141 4142 page_count = obj->base.size / PAGE_SIZE; 4143 4144 for (i = 0; i < page_count; i++) { 4145 struct page *page; 4146 char *dst, *src; 4147 4148 page = shmem_read_mapping_page(mapping, i); 4149 if (IS_ERR(page)) 4150 return PTR_ERR(page); 4151 4152 src = kmap_atomic(page); 4153 dst = obj->phys_obj->handle->vaddr + (i * PAGE_SIZE); 4154 memcpy(dst, src, PAGE_SIZE); 4155 kunmap_atomic(src); 4156 4157 mark_page_accessed(page); 4158 page_cache_release(page); 4159 } 4160 4161 return 0; 4162 } 4163 4164 static int 4165 i915_gem_phys_pwrite(struct drm_device *dev, 4166 struct drm_i915_gem_object *obj, 4167 struct drm_i915_gem_pwrite *args, 4168 struct drm_file *file_priv) 4169 { 4170 void *vaddr = obj->phys_obj->handle->vaddr + args->offset; 4171 char __user *user_data = (char __user *) (uintptr_t) args->data_ptr; 4172 4173 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { 4174 unsigned long unwritten; 4175 4176 /* The physical object once assigned is fixed for the lifetime 4177 * of the obj, so we can safely drop the lock and continue 4178 * to access vaddr. 4179 */ 4180 mutex_unlock(&dev->struct_mutex); 4181 unwritten = copy_from_user(vaddr, user_data, args->size); 4182 mutex_lock(&dev->struct_mutex); 4183 if (unwritten) 4184 return -EFAULT; 4185 } 4186 4187 intel_gtt_chipset_flush(); 4188 return 0; 4189 } 4190 4191 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 4192 { 4193 struct drm_i915_file_private *file_priv = file->driver_priv; 4194 4195 /* Clean up our request list when the client is going away, so that 4196 * later retire_requests won't dereference our soon-to-be-gone 4197 * file_priv. 4198 */ 4199 spin_lock(&file_priv->mm.lock); 4200 while (!list_empty(&file_priv->mm.request_list)) { 4201 struct drm_i915_gem_request *request; 4202 4203 request = list_first_entry(&file_priv->mm.request_list, 4204 struct drm_i915_gem_request, 4205 client_list); 4206 list_del(&request->client_list); 4207 request->file_priv = NULL; 4208 } 4209 spin_unlock(&file_priv->mm.lock); 4210 } 4211 4212 static int 4213 i915_gpu_is_active(struct drm_device *dev) 4214 { 4215 drm_i915_private_t *dev_priv = dev->dev_private; 4216 int lists_empty; 4217 4218 lists_empty = list_empty(&dev_priv->mm.flushing_list) && 4219 list_empty(&dev_priv->mm.active_list); 4220 4221 return !lists_empty; 4222 } 4223 4224 static int 4225 i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc) 4226 { 4227 struct drm_i915_private *dev_priv = 4228 container_of(shrinker, 4229 struct drm_i915_private, 4230 mm.inactive_shrinker); 4231 struct drm_device *dev = dev_priv->dev; 4232 struct drm_i915_gem_object *obj, *next; 4233 int nr_to_scan = sc->nr_to_scan; 4234 int cnt; 4235 4236 if (!mutex_trylock(&dev->struct_mutex)) 4237 return 0; 4238 4239 /* "fast-path" to count number of available objects */ 4240 if (nr_to_scan == 0) { 4241 cnt = 0; 4242 list_for_each_entry(obj, 4243 &dev_priv->mm.inactive_list, 4244 mm_list) 4245 cnt++; 4246 mutex_unlock(&dev->struct_mutex); 4247 return cnt / 100 * sysctl_vfs_cache_pressure; 4248 } 4249 4250 rescan: 4251 /* first scan for clean buffers */ 4252 i915_gem_retire_requests(dev); 4253 4254 list_for_each_entry_safe(obj, next, 4255 &dev_priv->mm.inactive_list, 4256 mm_list) { 4257 if (i915_gem_object_is_purgeable(obj)) { 4258 if (i915_gem_object_unbind(obj) == 0 && 4259 --nr_to_scan == 0) 4260 break; 4261 } 4262 } 4263 4264 /* second pass, evict/count anything still on the inactive list */ 4265 cnt = 0; 4266 list_for_each_entry_safe(obj, next, 4267 &dev_priv->mm.inactive_list, 4268 mm_list) { 4269 if (nr_to_scan && 4270 i915_gem_object_unbind(obj) == 0) 4271 nr_to_scan--; 4272 else 4273 cnt++; 4274 } 4275 4276 if (nr_to_scan && i915_gpu_is_active(dev)) { 4277 /* 4278 * We are desperate for pages, so as a last resort, wait 4279 * for the GPU to finish and discard whatever we can. 4280 * This has a dramatic impact to reduce the number of 4281 * OOM-killer events whilst running the GPU aggressively. 4282 */ 4283 if (i915_gpu_idle(dev) == 0) 4284 goto rescan; 4285 } 4286 mutex_unlock(&dev->struct_mutex); 4287 return cnt / 100 * sysctl_vfs_cache_pressure; 4288 } 4289