1 /* 2 * Copyright © 2008 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include "drmP.h" 29 #include "drm.h" 30 #include "i915_drm.h" 31 #include "i915_drv.h" 32 #include "i915_trace.h" 33 #include "intel_drv.h" 34 #include <linux/shmem_fs.h> 35 #include <linux/slab.h> 36 #include <linux/swap.h> 37 #include <linux/pci.h> 38 39 static __must_check int i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj); 40 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 41 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); 42 static __must_check int i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, 43 bool write); 44 static __must_check int i915_gem_object_set_cpu_read_domain_range(struct drm_i915_gem_object *obj, 45 uint64_t offset, 46 uint64_t size); 47 static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_i915_gem_object *obj); 48 static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, 49 unsigned alignment, 50 bool map_and_fenceable); 51 static void i915_gem_clear_fence_reg(struct drm_device *dev, 52 struct drm_i915_fence_reg *reg); 53 static int i915_gem_phys_pwrite(struct drm_device *dev, 54 struct drm_i915_gem_object *obj, 55 struct drm_i915_gem_pwrite *args, 56 struct drm_file *file); 57 static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj); 58 59 static int i915_gem_inactive_shrink(struct shrinker *shrinker, 60 struct shrink_control *sc); 61 62 /* some bookkeeping */ 63 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 64 size_t size) 65 { 66 dev_priv->mm.object_count++; 67 dev_priv->mm.object_memory += size; 68 } 69 70 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 71 size_t size) 72 { 73 dev_priv->mm.object_count--; 74 dev_priv->mm.object_memory -= size; 75 } 76 77 static int 78 i915_gem_wait_for_error(struct drm_device *dev) 79 { 80 struct drm_i915_private *dev_priv = dev->dev_private; 81 struct completion *x = &dev_priv->error_completion; 82 unsigned long flags; 83 int ret; 84 85 if (!atomic_read(&dev_priv->mm.wedged)) 86 return 0; 87 88 ret = wait_for_completion_interruptible(x); 89 if (ret) 90 return ret; 91 92 if (atomic_read(&dev_priv->mm.wedged)) { 93 /* GPU is hung, bump the completion count to account for 94 * the token we just consumed so that we never hit zero and 95 * end up waiting upon a subsequent completion event that 96 * will never happen. 97 */ 98 spin_lock_irqsave(&x->wait.lock, flags); 99 x->done++; 100 spin_unlock_irqrestore(&x->wait.lock, flags); 101 } 102 return 0; 103 } 104 105 int i915_mutex_lock_interruptible(struct drm_device *dev) 106 { 107 int ret; 108 109 ret = i915_gem_wait_for_error(dev); 110 if (ret) 111 return ret; 112 113 ret = mutex_lock_interruptible(&dev->struct_mutex); 114 if (ret) 115 return ret; 116 117 WARN_ON(i915_verify_lists(dev)); 118 return 0; 119 } 120 121 static inline bool 122 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj) 123 { 124 return obj->gtt_space && !obj->active && obj->pin_count == 0; 125 } 126 127 void i915_gem_do_init(struct drm_device *dev, 128 unsigned long start, 129 unsigned long mappable_end, 130 unsigned long end) 131 { 132 drm_i915_private_t *dev_priv = dev->dev_private; 133 134 drm_mm_init(&dev_priv->mm.gtt_space, start, end - start); 135 136 dev_priv->mm.gtt_start = start; 137 dev_priv->mm.gtt_mappable_end = mappable_end; 138 dev_priv->mm.gtt_end = end; 139 dev_priv->mm.gtt_total = end - start; 140 dev_priv->mm.mappable_gtt_total = min(end, mappable_end) - start; 141 142 /* Take over this portion of the GTT */ 143 intel_gtt_clear_range(start / PAGE_SIZE, (end-start) / PAGE_SIZE); 144 } 145 146 int 147 i915_gem_init_ioctl(struct drm_device *dev, void *data, 148 struct drm_file *file) 149 { 150 struct drm_i915_gem_init *args = data; 151 152 if (args->gtt_start >= args->gtt_end || 153 (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1)) 154 return -EINVAL; 155 156 mutex_lock(&dev->struct_mutex); 157 i915_gem_do_init(dev, args->gtt_start, args->gtt_end, args->gtt_end); 158 mutex_unlock(&dev->struct_mutex); 159 160 return 0; 161 } 162 163 int 164 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 165 struct drm_file *file) 166 { 167 struct drm_i915_private *dev_priv = dev->dev_private; 168 struct drm_i915_gem_get_aperture *args = data; 169 struct drm_i915_gem_object *obj; 170 size_t pinned; 171 172 if (!(dev->driver->driver_features & DRIVER_GEM)) 173 return -ENODEV; 174 175 pinned = 0; 176 mutex_lock(&dev->struct_mutex); 177 list_for_each_entry(obj, &dev_priv->mm.pinned_list, mm_list) 178 pinned += obj->gtt_space->size; 179 mutex_unlock(&dev->struct_mutex); 180 181 args->aper_size = dev_priv->mm.gtt_total; 182 args->aper_available_size = args->aper_size -pinned; 183 184 return 0; 185 } 186 187 static int 188 i915_gem_create(struct drm_file *file, 189 struct drm_device *dev, 190 uint64_t size, 191 uint32_t *handle_p) 192 { 193 struct drm_i915_gem_object *obj; 194 int ret; 195 u32 handle; 196 197 size = roundup(size, PAGE_SIZE); 198 199 /* Allocate the new object */ 200 obj = i915_gem_alloc_object(dev, size); 201 if (obj == NULL) 202 return -ENOMEM; 203 204 ret = drm_gem_handle_create(file, &obj->base, &handle); 205 if (ret) { 206 drm_gem_object_release(&obj->base); 207 i915_gem_info_remove_obj(dev->dev_private, obj->base.size); 208 kfree(obj); 209 return ret; 210 } 211 212 /* drop reference from allocate - handle holds it now */ 213 drm_gem_object_unreference(&obj->base); 214 trace_i915_gem_object_create(obj); 215 216 *handle_p = handle; 217 return 0; 218 } 219 220 int 221 i915_gem_dumb_create(struct drm_file *file, 222 struct drm_device *dev, 223 struct drm_mode_create_dumb *args) 224 { 225 /* have to work out size/pitch and return them */ 226 args->pitch = ALIGN(args->width * ((args->bpp + 7) / 8), 64); 227 args->size = args->pitch * args->height; 228 return i915_gem_create(file, dev, 229 args->size, &args->handle); 230 } 231 232 int i915_gem_dumb_destroy(struct drm_file *file, 233 struct drm_device *dev, 234 uint32_t handle) 235 { 236 return drm_gem_handle_delete(file, handle); 237 } 238 239 /** 240 * Creates a new mm object and returns a handle to it. 241 */ 242 int 243 i915_gem_create_ioctl(struct drm_device *dev, void *data, 244 struct drm_file *file) 245 { 246 struct drm_i915_gem_create *args = data; 247 return i915_gem_create(file, dev, 248 args->size, &args->handle); 249 } 250 251 static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj) 252 { 253 drm_i915_private_t *dev_priv = obj->base.dev->dev_private; 254 255 return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 && 256 obj->tiling_mode != I915_TILING_NONE; 257 } 258 259 static inline void 260 slow_shmem_copy(struct page *dst_page, 261 int dst_offset, 262 struct page *src_page, 263 int src_offset, 264 int length) 265 { 266 char *dst_vaddr, *src_vaddr; 267 268 dst_vaddr = kmap(dst_page); 269 src_vaddr = kmap(src_page); 270 271 memcpy(dst_vaddr + dst_offset, src_vaddr + src_offset, length); 272 273 kunmap(src_page); 274 kunmap(dst_page); 275 } 276 277 static inline void 278 slow_shmem_bit17_copy(struct page *gpu_page, 279 int gpu_offset, 280 struct page *cpu_page, 281 int cpu_offset, 282 int length, 283 int is_read) 284 { 285 char *gpu_vaddr, *cpu_vaddr; 286 287 /* Use the unswizzled path if this page isn't affected. */ 288 if ((page_to_phys(gpu_page) & (1 << 17)) == 0) { 289 if (is_read) 290 return slow_shmem_copy(cpu_page, cpu_offset, 291 gpu_page, gpu_offset, length); 292 else 293 return slow_shmem_copy(gpu_page, gpu_offset, 294 cpu_page, cpu_offset, length); 295 } 296 297 gpu_vaddr = kmap(gpu_page); 298 cpu_vaddr = kmap(cpu_page); 299 300 /* Copy the data, XORing A6 with A17 (1). The user already knows he's 301 * XORing with the other bits (A9 for Y, A9 and A10 for X) 302 */ 303 while (length > 0) { 304 int cacheline_end = ALIGN(gpu_offset + 1, 64); 305 int this_length = min(cacheline_end - gpu_offset, length); 306 int swizzled_gpu_offset = gpu_offset ^ 64; 307 308 if (is_read) { 309 memcpy(cpu_vaddr + cpu_offset, 310 gpu_vaddr + swizzled_gpu_offset, 311 this_length); 312 } else { 313 memcpy(gpu_vaddr + swizzled_gpu_offset, 314 cpu_vaddr + cpu_offset, 315 this_length); 316 } 317 cpu_offset += this_length; 318 gpu_offset += this_length; 319 length -= this_length; 320 } 321 322 kunmap(cpu_page); 323 kunmap(gpu_page); 324 } 325 326 /** 327 * This is the fast shmem pread path, which attempts to copy_from_user directly 328 * from the backing pages of the object to the user's address space. On a 329 * fault, it fails so we can fall back to i915_gem_shmem_pwrite_slow(). 330 */ 331 static int 332 i915_gem_shmem_pread_fast(struct drm_device *dev, 333 struct drm_i915_gem_object *obj, 334 struct drm_i915_gem_pread *args, 335 struct drm_file *file) 336 { 337 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 338 ssize_t remain; 339 loff_t offset; 340 char __user *user_data; 341 int page_offset, page_length; 342 343 user_data = (char __user *) (uintptr_t) args->data_ptr; 344 remain = args->size; 345 346 offset = args->offset; 347 348 while (remain > 0) { 349 struct page *page; 350 char *vaddr; 351 int ret; 352 353 /* Operation in this page 354 * 355 * page_offset = offset within page 356 * page_length = bytes to copy for this page 357 */ 358 page_offset = offset_in_page(offset); 359 page_length = remain; 360 if ((page_offset + remain) > PAGE_SIZE) 361 page_length = PAGE_SIZE - page_offset; 362 363 page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT); 364 if (IS_ERR(page)) 365 return PTR_ERR(page); 366 367 vaddr = kmap_atomic(page); 368 ret = __copy_to_user_inatomic(user_data, 369 vaddr + page_offset, 370 page_length); 371 kunmap_atomic(vaddr); 372 373 mark_page_accessed(page); 374 page_cache_release(page); 375 if (ret) 376 return -EFAULT; 377 378 remain -= page_length; 379 user_data += page_length; 380 offset += page_length; 381 } 382 383 return 0; 384 } 385 386 /** 387 * This is the fallback shmem pread path, which allocates temporary storage 388 * in kernel space to copy_to_user into outside of the struct_mutex, so we 389 * can copy out of the object's backing pages while holding the struct mutex 390 * and not take page faults. 391 */ 392 static int 393 i915_gem_shmem_pread_slow(struct drm_device *dev, 394 struct drm_i915_gem_object *obj, 395 struct drm_i915_gem_pread *args, 396 struct drm_file *file) 397 { 398 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 399 struct mm_struct *mm = current->mm; 400 struct page **user_pages; 401 ssize_t remain; 402 loff_t offset, pinned_pages, i; 403 loff_t first_data_page, last_data_page, num_pages; 404 int shmem_page_offset; 405 int data_page_index, data_page_offset; 406 int page_length; 407 int ret; 408 uint64_t data_ptr = args->data_ptr; 409 int do_bit17_swizzling; 410 411 remain = args->size; 412 413 /* Pin the user pages containing the data. We can't fault while 414 * holding the struct mutex, yet we want to hold it while 415 * dereferencing the user data. 416 */ 417 first_data_page = data_ptr / PAGE_SIZE; 418 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; 419 num_pages = last_data_page - first_data_page + 1; 420 421 user_pages = drm_malloc_ab(num_pages, sizeof(struct page *)); 422 if (user_pages == NULL) 423 return -ENOMEM; 424 425 mutex_unlock(&dev->struct_mutex); 426 down_read(&mm->mmap_sem); 427 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, 428 num_pages, 1, 0, user_pages, NULL); 429 up_read(&mm->mmap_sem); 430 mutex_lock(&dev->struct_mutex); 431 if (pinned_pages < num_pages) { 432 ret = -EFAULT; 433 goto out; 434 } 435 436 ret = i915_gem_object_set_cpu_read_domain_range(obj, 437 args->offset, 438 args->size); 439 if (ret) 440 goto out; 441 442 do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 443 444 offset = args->offset; 445 446 while (remain > 0) { 447 struct page *page; 448 449 /* Operation in this page 450 * 451 * shmem_page_offset = offset within page in shmem file 452 * data_page_index = page number in get_user_pages return 453 * data_page_offset = offset with data_page_index page. 454 * page_length = bytes to copy for this page 455 */ 456 shmem_page_offset = offset_in_page(offset); 457 data_page_index = data_ptr / PAGE_SIZE - first_data_page; 458 data_page_offset = offset_in_page(data_ptr); 459 460 page_length = remain; 461 if ((shmem_page_offset + page_length) > PAGE_SIZE) 462 page_length = PAGE_SIZE - shmem_page_offset; 463 if ((data_page_offset + page_length) > PAGE_SIZE) 464 page_length = PAGE_SIZE - data_page_offset; 465 466 page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT); 467 if (IS_ERR(page)) { 468 ret = PTR_ERR(page); 469 goto out; 470 } 471 472 if (do_bit17_swizzling) { 473 slow_shmem_bit17_copy(page, 474 shmem_page_offset, 475 user_pages[data_page_index], 476 data_page_offset, 477 page_length, 478 1); 479 } else { 480 slow_shmem_copy(user_pages[data_page_index], 481 data_page_offset, 482 page, 483 shmem_page_offset, 484 page_length); 485 } 486 487 mark_page_accessed(page); 488 page_cache_release(page); 489 490 remain -= page_length; 491 data_ptr += page_length; 492 offset += page_length; 493 } 494 495 out: 496 for (i = 0; i < pinned_pages; i++) { 497 SetPageDirty(user_pages[i]); 498 mark_page_accessed(user_pages[i]); 499 page_cache_release(user_pages[i]); 500 } 501 drm_free_large(user_pages); 502 503 return ret; 504 } 505 506 /** 507 * Reads data from the object referenced by handle. 508 * 509 * On error, the contents of *data are undefined. 510 */ 511 int 512 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 513 struct drm_file *file) 514 { 515 struct drm_i915_gem_pread *args = data; 516 struct drm_i915_gem_object *obj; 517 int ret = 0; 518 519 if (args->size == 0) 520 return 0; 521 522 if (!access_ok(VERIFY_WRITE, 523 (char __user *)(uintptr_t)args->data_ptr, 524 args->size)) 525 return -EFAULT; 526 527 ret = fault_in_pages_writeable((char __user *)(uintptr_t)args->data_ptr, 528 args->size); 529 if (ret) 530 return -EFAULT; 531 532 ret = i915_mutex_lock_interruptible(dev); 533 if (ret) 534 return ret; 535 536 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 537 if (&obj->base == NULL) { 538 ret = -ENOENT; 539 goto unlock; 540 } 541 542 /* Bounds check source. */ 543 if (args->offset > obj->base.size || 544 args->size > obj->base.size - args->offset) { 545 ret = -EINVAL; 546 goto out; 547 } 548 549 trace_i915_gem_object_pread(obj, args->offset, args->size); 550 551 ret = i915_gem_object_set_cpu_read_domain_range(obj, 552 args->offset, 553 args->size); 554 if (ret) 555 goto out; 556 557 ret = -EFAULT; 558 if (!i915_gem_object_needs_bit17_swizzle(obj)) 559 ret = i915_gem_shmem_pread_fast(dev, obj, args, file); 560 if (ret == -EFAULT) 561 ret = i915_gem_shmem_pread_slow(dev, obj, args, file); 562 563 out: 564 drm_gem_object_unreference(&obj->base); 565 unlock: 566 mutex_unlock(&dev->struct_mutex); 567 return ret; 568 } 569 570 /* This is the fast write path which cannot handle 571 * page faults in the source data 572 */ 573 574 static inline int 575 fast_user_write(struct io_mapping *mapping, 576 loff_t page_base, int page_offset, 577 char __user *user_data, 578 int length) 579 { 580 char *vaddr_atomic; 581 unsigned long unwritten; 582 583 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 584 unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset, 585 user_data, length); 586 io_mapping_unmap_atomic(vaddr_atomic); 587 return unwritten; 588 } 589 590 /* Here's the write path which can sleep for 591 * page faults 592 */ 593 594 static inline void 595 slow_kernel_write(struct io_mapping *mapping, 596 loff_t gtt_base, int gtt_offset, 597 struct page *user_page, int user_offset, 598 int length) 599 { 600 char __iomem *dst_vaddr; 601 char *src_vaddr; 602 603 dst_vaddr = io_mapping_map_wc(mapping, gtt_base); 604 src_vaddr = kmap(user_page); 605 606 memcpy_toio(dst_vaddr + gtt_offset, 607 src_vaddr + user_offset, 608 length); 609 610 kunmap(user_page); 611 io_mapping_unmap(dst_vaddr); 612 } 613 614 /** 615 * This is the fast pwrite path, where we copy the data directly from the 616 * user into the GTT, uncached. 617 */ 618 static int 619 i915_gem_gtt_pwrite_fast(struct drm_device *dev, 620 struct drm_i915_gem_object *obj, 621 struct drm_i915_gem_pwrite *args, 622 struct drm_file *file) 623 { 624 drm_i915_private_t *dev_priv = dev->dev_private; 625 ssize_t remain; 626 loff_t offset, page_base; 627 char __user *user_data; 628 int page_offset, page_length; 629 630 user_data = (char __user *) (uintptr_t) args->data_ptr; 631 remain = args->size; 632 633 offset = obj->gtt_offset + args->offset; 634 635 while (remain > 0) { 636 /* Operation in this page 637 * 638 * page_base = page offset within aperture 639 * page_offset = offset within page 640 * page_length = bytes to copy for this page 641 */ 642 page_base = offset & PAGE_MASK; 643 page_offset = offset_in_page(offset); 644 page_length = remain; 645 if ((page_offset + remain) > PAGE_SIZE) 646 page_length = PAGE_SIZE - page_offset; 647 648 /* If we get a fault while copying data, then (presumably) our 649 * source page isn't available. Return the error and we'll 650 * retry in the slow path. 651 */ 652 if (fast_user_write(dev_priv->mm.gtt_mapping, page_base, 653 page_offset, user_data, page_length)) 654 return -EFAULT; 655 656 remain -= page_length; 657 user_data += page_length; 658 offset += page_length; 659 } 660 661 return 0; 662 } 663 664 /** 665 * This is the fallback GTT pwrite path, which uses get_user_pages to pin 666 * the memory and maps it using kmap_atomic for copying. 667 * 668 * This code resulted in x11perf -rgb10text consuming about 10% more CPU 669 * than using i915_gem_gtt_pwrite_fast on a G45 (32-bit). 670 */ 671 static int 672 i915_gem_gtt_pwrite_slow(struct drm_device *dev, 673 struct drm_i915_gem_object *obj, 674 struct drm_i915_gem_pwrite *args, 675 struct drm_file *file) 676 { 677 drm_i915_private_t *dev_priv = dev->dev_private; 678 ssize_t remain; 679 loff_t gtt_page_base, offset; 680 loff_t first_data_page, last_data_page, num_pages; 681 loff_t pinned_pages, i; 682 struct page **user_pages; 683 struct mm_struct *mm = current->mm; 684 int gtt_page_offset, data_page_offset, data_page_index, page_length; 685 int ret; 686 uint64_t data_ptr = args->data_ptr; 687 688 remain = args->size; 689 690 /* Pin the user pages containing the data. We can't fault while 691 * holding the struct mutex, and all of the pwrite implementations 692 * want to hold it while dereferencing the user data. 693 */ 694 first_data_page = data_ptr / PAGE_SIZE; 695 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; 696 num_pages = last_data_page - first_data_page + 1; 697 698 user_pages = drm_malloc_ab(num_pages, sizeof(struct page *)); 699 if (user_pages == NULL) 700 return -ENOMEM; 701 702 mutex_unlock(&dev->struct_mutex); 703 down_read(&mm->mmap_sem); 704 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, 705 num_pages, 0, 0, user_pages, NULL); 706 up_read(&mm->mmap_sem); 707 mutex_lock(&dev->struct_mutex); 708 if (pinned_pages < num_pages) { 709 ret = -EFAULT; 710 goto out_unpin_pages; 711 } 712 713 ret = i915_gem_object_set_to_gtt_domain(obj, true); 714 if (ret) 715 goto out_unpin_pages; 716 717 ret = i915_gem_object_put_fence(obj); 718 if (ret) 719 goto out_unpin_pages; 720 721 offset = obj->gtt_offset + args->offset; 722 723 while (remain > 0) { 724 /* Operation in this page 725 * 726 * gtt_page_base = page offset within aperture 727 * gtt_page_offset = offset within page in aperture 728 * data_page_index = page number in get_user_pages return 729 * data_page_offset = offset with data_page_index page. 730 * page_length = bytes to copy for this page 731 */ 732 gtt_page_base = offset & PAGE_MASK; 733 gtt_page_offset = offset_in_page(offset); 734 data_page_index = data_ptr / PAGE_SIZE - first_data_page; 735 data_page_offset = offset_in_page(data_ptr); 736 737 page_length = remain; 738 if ((gtt_page_offset + page_length) > PAGE_SIZE) 739 page_length = PAGE_SIZE - gtt_page_offset; 740 if ((data_page_offset + page_length) > PAGE_SIZE) 741 page_length = PAGE_SIZE - data_page_offset; 742 743 slow_kernel_write(dev_priv->mm.gtt_mapping, 744 gtt_page_base, gtt_page_offset, 745 user_pages[data_page_index], 746 data_page_offset, 747 page_length); 748 749 remain -= page_length; 750 offset += page_length; 751 data_ptr += page_length; 752 } 753 754 out_unpin_pages: 755 for (i = 0; i < pinned_pages; i++) 756 page_cache_release(user_pages[i]); 757 drm_free_large(user_pages); 758 759 return ret; 760 } 761 762 /** 763 * This is the fast shmem pwrite path, which attempts to directly 764 * copy_from_user into the kmapped pages backing the object. 765 */ 766 static int 767 i915_gem_shmem_pwrite_fast(struct drm_device *dev, 768 struct drm_i915_gem_object *obj, 769 struct drm_i915_gem_pwrite *args, 770 struct drm_file *file) 771 { 772 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 773 ssize_t remain; 774 loff_t offset; 775 char __user *user_data; 776 int page_offset, page_length; 777 778 user_data = (char __user *) (uintptr_t) args->data_ptr; 779 remain = args->size; 780 781 offset = args->offset; 782 obj->dirty = 1; 783 784 while (remain > 0) { 785 struct page *page; 786 char *vaddr; 787 int ret; 788 789 /* Operation in this page 790 * 791 * page_offset = offset within page 792 * page_length = bytes to copy for this page 793 */ 794 page_offset = offset_in_page(offset); 795 page_length = remain; 796 if ((page_offset + remain) > PAGE_SIZE) 797 page_length = PAGE_SIZE - page_offset; 798 799 page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT); 800 if (IS_ERR(page)) 801 return PTR_ERR(page); 802 803 vaddr = kmap_atomic(page, KM_USER0); 804 ret = __copy_from_user_inatomic(vaddr + page_offset, 805 user_data, 806 page_length); 807 kunmap_atomic(vaddr, KM_USER0); 808 809 set_page_dirty(page); 810 mark_page_accessed(page); 811 page_cache_release(page); 812 813 /* If we get a fault while copying data, then (presumably) our 814 * source page isn't available. Return the error and we'll 815 * retry in the slow path. 816 */ 817 if (ret) 818 return -EFAULT; 819 820 remain -= page_length; 821 user_data += page_length; 822 offset += page_length; 823 } 824 825 return 0; 826 } 827 828 /** 829 * This is the fallback shmem pwrite path, which uses get_user_pages to pin 830 * the memory and maps it using kmap_atomic for copying. 831 * 832 * This avoids taking mmap_sem for faulting on the user's address while the 833 * struct_mutex is held. 834 */ 835 static int 836 i915_gem_shmem_pwrite_slow(struct drm_device *dev, 837 struct drm_i915_gem_object *obj, 838 struct drm_i915_gem_pwrite *args, 839 struct drm_file *file) 840 { 841 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 842 struct mm_struct *mm = current->mm; 843 struct page **user_pages; 844 ssize_t remain; 845 loff_t offset, pinned_pages, i; 846 loff_t first_data_page, last_data_page, num_pages; 847 int shmem_page_offset; 848 int data_page_index, data_page_offset; 849 int page_length; 850 int ret; 851 uint64_t data_ptr = args->data_ptr; 852 int do_bit17_swizzling; 853 854 remain = args->size; 855 856 /* Pin the user pages containing the data. We can't fault while 857 * holding the struct mutex, and all of the pwrite implementations 858 * want to hold it while dereferencing the user data. 859 */ 860 first_data_page = data_ptr / PAGE_SIZE; 861 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; 862 num_pages = last_data_page - first_data_page + 1; 863 864 user_pages = drm_malloc_ab(num_pages, sizeof(struct page *)); 865 if (user_pages == NULL) 866 return -ENOMEM; 867 868 mutex_unlock(&dev->struct_mutex); 869 down_read(&mm->mmap_sem); 870 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, 871 num_pages, 0, 0, user_pages, NULL); 872 up_read(&mm->mmap_sem); 873 mutex_lock(&dev->struct_mutex); 874 if (pinned_pages < num_pages) { 875 ret = -EFAULT; 876 goto out; 877 } 878 879 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 880 if (ret) 881 goto out; 882 883 do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 884 885 offset = args->offset; 886 obj->dirty = 1; 887 888 while (remain > 0) { 889 struct page *page; 890 891 /* Operation in this page 892 * 893 * shmem_page_offset = offset within page in shmem file 894 * data_page_index = page number in get_user_pages return 895 * data_page_offset = offset with data_page_index page. 896 * page_length = bytes to copy for this page 897 */ 898 shmem_page_offset = offset_in_page(offset); 899 data_page_index = data_ptr / PAGE_SIZE - first_data_page; 900 data_page_offset = offset_in_page(data_ptr); 901 902 page_length = remain; 903 if ((shmem_page_offset + page_length) > PAGE_SIZE) 904 page_length = PAGE_SIZE - shmem_page_offset; 905 if ((data_page_offset + page_length) > PAGE_SIZE) 906 page_length = PAGE_SIZE - data_page_offset; 907 908 page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT); 909 if (IS_ERR(page)) { 910 ret = PTR_ERR(page); 911 goto out; 912 } 913 914 if (do_bit17_swizzling) { 915 slow_shmem_bit17_copy(page, 916 shmem_page_offset, 917 user_pages[data_page_index], 918 data_page_offset, 919 page_length, 920 0); 921 } else { 922 slow_shmem_copy(page, 923 shmem_page_offset, 924 user_pages[data_page_index], 925 data_page_offset, 926 page_length); 927 } 928 929 set_page_dirty(page); 930 mark_page_accessed(page); 931 page_cache_release(page); 932 933 remain -= page_length; 934 data_ptr += page_length; 935 offset += page_length; 936 } 937 938 out: 939 for (i = 0; i < pinned_pages; i++) 940 page_cache_release(user_pages[i]); 941 drm_free_large(user_pages); 942 943 return ret; 944 } 945 946 /** 947 * Writes data to the object referenced by handle. 948 * 949 * On error, the contents of the buffer that were to be modified are undefined. 950 */ 951 int 952 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 953 struct drm_file *file) 954 { 955 struct drm_i915_gem_pwrite *args = data; 956 struct drm_i915_gem_object *obj; 957 int ret; 958 959 if (args->size == 0) 960 return 0; 961 962 if (!access_ok(VERIFY_READ, 963 (char __user *)(uintptr_t)args->data_ptr, 964 args->size)) 965 return -EFAULT; 966 967 ret = fault_in_pages_readable((char __user *)(uintptr_t)args->data_ptr, 968 args->size); 969 if (ret) 970 return -EFAULT; 971 972 ret = i915_mutex_lock_interruptible(dev); 973 if (ret) 974 return ret; 975 976 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 977 if (&obj->base == NULL) { 978 ret = -ENOENT; 979 goto unlock; 980 } 981 982 /* Bounds check destination. */ 983 if (args->offset > obj->base.size || 984 args->size > obj->base.size - args->offset) { 985 ret = -EINVAL; 986 goto out; 987 } 988 989 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 990 991 /* We can only do the GTT pwrite on untiled buffers, as otherwise 992 * it would end up going through the fenced access, and we'll get 993 * different detiling behavior between reading and writing. 994 * pread/pwrite currently are reading and writing from the CPU 995 * perspective, requiring manual detiling by the client. 996 */ 997 if (obj->phys_obj) 998 ret = i915_gem_phys_pwrite(dev, obj, args, file); 999 else if (obj->gtt_space && 1000 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 1001 ret = i915_gem_object_pin(obj, 0, true); 1002 if (ret) 1003 goto out; 1004 1005 ret = i915_gem_object_set_to_gtt_domain(obj, true); 1006 if (ret) 1007 goto out_unpin; 1008 1009 ret = i915_gem_object_put_fence(obj); 1010 if (ret) 1011 goto out_unpin; 1012 1013 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); 1014 if (ret == -EFAULT) 1015 ret = i915_gem_gtt_pwrite_slow(dev, obj, args, file); 1016 1017 out_unpin: 1018 i915_gem_object_unpin(obj); 1019 } else { 1020 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 1021 if (ret) 1022 goto out; 1023 1024 ret = -EFAULT; 1025 if (!i915_gem_object_needs_bit17_swizzle(obj)) 1026 ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file); 1027 if (ret == -EFAULT) 1028 ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file); 1029 } 1030 1031 out: 1032 drm_gem_object_unreference(&obj->base); 1033 unlock: 1034 mutex_unlock(&dev->struct_mutex); 1035 return ret; 1036 } 1037 1038 /** 1039 * Called when user space prepares to use an object with the CPU, either 1040 * through the mmap ioctl's mapping or a GTT mapping. 1041 */ 1042 int 1043 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1044 struct drm_file *file) 1045 { 1046 struct drm_i915_gem_set_domain *args = data; 1047 struct drm_i915_gem_object *obj; 1048 uint32_t read_domains = args->read_domains; 1049 uint32_t write_domain = args->write_domain; 1050 int ret; 1051 1052 if (!(dev->driver->driver_features & DRIVER_GEM)) 1053 return -ENODEV; 1054 1055 /* Only handle setting domains to types used by the CPU. */ 1056 if (write_domain & I915_GEM_GPU_DOMAINS) 1057 return -EINVAL; 1058 1059 if (read_domains & I915_GEM_GPU_DOMAINS) 1060 return -EINVAL; 1061 1062 /* Having something in the write domain implies it's in the read 1063 * domain, and only that read domain. Enforce that in the request. 1064 */ 1065 if (write_domain != 0 && read_domains != write_domain) 1066 return -EINVAL; 1067 1068 ret = i915_mutex_lock_interruptible(dev); 1069 if (ret) 1070 return ret; 1071 1072 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1073 if (&obj->base == NULL) { 1074 ret = -ENOENT; 1075 goto unlock; 1076 } 1077 1078 if (read_domains & I915_GEM_DOMAIN_GTT) { 1079 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1080 1081 /* Silently promote "you're not bound, there was nothing to do" 1082 * to success, since the client was just asking us to 1083 * make sure everything was done. 1084 */ 1085 if (ret == -EINVAL) 1086 ret = 0; 1087 } else { 1088 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1089 } 1090 1091 drm_gem_object_unreference(&obj->base); 1092 unlock: 1093 mutex_unlock(&dev->struct_mutex); 1094 return ret; 1095 } 1096 1097 /** 1098 * Called when user space has done writes to this buffer 1099 */ 1100 int 1101 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1102 struct drm_file *file) 1103 { 1104 struct drm_i915_gem_sw_finish *args = data; 1105 struct drm_i915_gem_object *obj; 1106 int ret = 0; 1107 1108 if (!(dev->driver->driver_features & DRIVER_GEM)) 1109 return -ENODEV; 1110 1111 ret = i915_mutex_lock_interruptible(dev); 1112 if (ret) 1113 return ret; 1114 1115 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1116 if (&obj->base == NULL) { 1117 ret = -ENOENT; 1118 goto unlock; 1119 } 1120 1121 /* Pinned buffers may be scanout, so flush the cache */ 1122 if (obj->pin_count) 1123 i915_gem_object_flush_cpu_write_domain(obj); 1124 1125 drm_gem_object_unreference(&obj->base); 1126 unlock: 1127 mutex_unlock(&dev->struct_mutex); 1128 return ret; 1129 } 1130 1131 /** 1132 * Maps the contents of an object, returning the address it is mapped 1133 * into. 1134 * 1135 * While the mapping holds a reference on the contents of the object, it doesn't 1136 * imply a ref on the object itself. 1137 */ 1138 int 1139 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1140 struct drm_file *file) 1141 { 1142 struct drm_i915_private *dev_priv = dev->dev_private; 1143 struct drm_i915_gem_mmap *args = data; 1144 struct drm_gem_object *obj; 1145 unsigned long addr; 1146 1147 if (!(dev->driver->driver_features & DRIVER_GEM)) 1148 return -ENODEV; 1149 1150 obj = drm_gem_object_lookup(dev, file, args->handle); 1151 if (obj == NULL) 1152 return -ENOENT; 1153 1154 if (obj->size > dev_priv->mm.gtt_mappable_end) { 1155 drm_gem_object_unreference_unlocked(obj); 1156 return -E2BIG; 1157 } 1158 1159 down_write(¤t->mm->mmap_sem); 1160 addr = do_mmap(obj->filp, 0, args->size, 1161 PROT_READ | PROT_WRITE, MAP_SHARED, 1162 args->offset); 1163 up_write(¤t->mm->mmap_sem); 1164 drm_gem_object_unreference_unlocked(obj); 1165 if (IS_ERR((void *)addr)) 1166 return addr; 1167 1168 args->addr_ptr = (uint64_t) addr; 1169 1170 return 0; 1171 } 1172 1173 /** 1174 * i915_gem_fault - fault a page into the GTT 1175 * vma: VMA in question 1176 * vmf: fault info 1177 * 1178 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1179 * from userspace. The fault handler takes care of binding the object to 1180 * the GTT (if needed), allocating and programming a fence register (again, 1181 * only if needed based on whether the old reg is still valid or the object 1182 * is tiled) and inserting a new PTE into the faulting process. 1183 * 1184 * Note that the faulting process may involve evicting existing objects 1185 * from the GTT and/or fence registers to make room. So performance may 1186 * suffer if the GTT working set is large or there are few fence registers 1187 * left. 1188 */ 1189 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1190 { 1191 struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data); 1192 struct drm_device *dev = obj->base.dev; 1193 drm_i915_private_t *dev_priv = dev->dev_private; 1194 pgoff_t page_offset; 1195 unsigned long pfn; 1196 int ret = 0; 1197 bool write = !!(vmf->flags & FAULT_FLAG_WRITE); 1198 1199 /* We don't use vmf->pgoff since that has the fake offset */ 1200 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> 1201 PAGE_SHIFT; 1202 1203 ret = i915_mutex_lock_interruptible(dev); 1204 if (ret) 1205 goto out; 1206 1207 trace_i915_gem_object_fault(obj, page_offset, true, write); 1208 1209 /* Now bind it into the GTT if needed */ 1210 if (!obj->map_and_fenceable) { 1211 ret = i915_gem_object_unbind(obj); 1212 if (ret) 1213 goto unlock; 1214 } 1215 if (!obj->gtt_space) { 1216 ret = i915_gem_object_bind_to_gtt(obj, 0, true); 1217 if (ret) 1218 goto unlock; 1219 1220 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1221 if (ret) 1222 goto unlock; 1223 } 1224 1225 if (obj->tiling_mode == I915_TILING_NONE) 1226 ret = i915_gem_object_put_fence(obj); 1227 else 1228 ret = i915_gem_object_get_fence(obj, NULL); 1229 if (ret) 1230 goto unlock; 1231 1232 if (i915_gem_object_is_inactive(obj)) 1233 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 1234 1235 obj->fault_mappable = true; 1236 1237 pfn = ((dev->agp->base + obj->gtt_offset) >> PAGE_SHIFT) + 1238 page_offset; 1239 1240 /* Finally, remap it using the new GTT offset */ 1241 ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn); 1242 unlock: 1243 mutex_unlock(&dev->struct_mutex); 1244 out: 1245 switch (ret) { 1246 case -EIO: 1247 case -EAGAIN: 1248 /* Give the error handler a chance to run and move the 1249 * objects off the GPU active list. Next time we service the 1250 * fault, we should be able to transition the page into the 1251 * GTT without touching the GPU (and so avoid further 1252 * EIO/EGAIN). If the GPU is wedged, then there is no issue 1253 * with coherency, just lost writes. 1254 */ 1255 set_need_resched(); 1256 case 0: 1257 case -ERESTARTSYS: 1258 case -EINTR: 1259 return VM_FAULT_NOPAGE; 1260 case -ENOMEM: 1261 return VM_FAULT_OOM; 1262 default: 1263 return VM_FAULT_SIGBUS; 1264 } 1265 } 1266 1267 /** 1268 * i915_gem_create_mmap_offset - create a fake mmap offset for an object 1269 * @obj: obj in question 1270 * 1271 * GEM memory mapping works by handing back to userspace a fake mmap offset 1272 * it can use in a subsequent mmap(2) call. The DRM core code then looks 1273 * up the object based on the offset and sets up the various memory mapping 1274 * structures. 1275 * 1276 * This routine allocates and attaches a fake offset for @obj. 1277 */ 1278 static int 1279 i915_gem_create_mmap_offset(struct drm_i915_gem_object *obj) 1280 { 1281 struct drm_device *dev = obj->base.dev; 1282 struct drm_gem_mm *mm = dev->mm_private; 1283 struct drm_map_list *list; 1284 struct drm_local_map *map; 1285 int ret = 0; 1286 1287 /* Set the object up for mmap'ing */ 1288 list = &obj->base.map_list; 1289 list->map = kzalloc(sizeof(struct drm_map_list), GFP_KERNEL); 1290 if (!list->map) 1291 return -ENOMEM; 1292 1293 map = list->map; 1294 map->type = _DRM_GEM; 1295 map->size = obj->base.size; 1296 map->handle = obj; 1297 1298 /* Get a DRM GEM mmap offset allocated... */ 1299 list->file_offset_node = drm_mm_search_free(&mm->offset_manager, 1300 obj->base.size / PAGE_SIZE, 1301 0, 0); 1302 if (!list->file_offset_node) { 1303 DRM_ERROR("failed to allocate offset for bo %d\n", 1304 obj->base.name); 1305 ret = -ENOSPC; 1306 goto out_free_list; 1307 } 1308 1309 list->file_offset_node = drm_mm_get_block(list->file_offset_node, 1310 obj->base.size / PAGE_SIZE, 1311 0); 1312 if (!list->file_offset_node) { 1313 ret = -ENOMEM; 1314 goto out_free_list; 1315 } 1316 1317 list->hash.key = list->file_offset_node->start; 1318 ret = drm_ht_insert_item(&mm->offset_hash, &list->hash); 1319 if (ret) { 1320 DRM_ERROR("failed to add to map hash\n"); 1321 goto out_free_mm; 1322 } 1323 1324 return 0; 1325 1326 out_free_mm: 1327 drm_mm_put_block(list->file_offset_node); 1328 out_free_list: 1329 kfree(list->map); 1330 list->map = NULL; 1331 1332 return ret; 1333 } 1334 1335 /** 1336 * i915_gem_release_mmap - remove physical page mappings 1337 * @obj: obj in question 1338 * 1339 * Preserve the reservation of the mmapping with the DRM core code, but 1340 * relinquish ownership of the pages back to the system. 1341 * 1342 * It is vital that we remove the page mapping if we have mapped a tiled 1343 * object through the GTT and then lose the fence register due to 1344 * resource pressure. Similarly if the object has been moved out of the 1345 * aperture, than pages mapped into userspace must be revoked. Removing the 1346 * mapping will then trigger a page fault on the next user access, allowing 1347 * fixup by i915_gem_fault(). 1348 */ 1349 void 1350 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 1351 { 1352 if (!obj->fault_mappable) 1353 return; 1354 1355 if (obj->base.dev->dev_mapping) 1356 unmap_mapping_range(obj->base.dev->dev_mapping, 1357 (loff_t)obj->base.map_list.hash.key<<PAGE_SHIFT, 1358 obj->base.size, 1); 1359 1360 obj->fault_mappable = false; 1361 } 1362 1363 static void 1364 i915_gem_free_mmap_offset(struct drm_i915_gem_object *obj) 1365 { 1366 struct drm_device *dev = obj->base.dev; 1367 struct drm_gem_mm *mm = dev->mm_private; 1368 struct drm_map_list *list = &obj->base.map_list; 1369 1370 drm_ht_remove_item(&mm->offset_hash, &list->hash); 1371 drm_mm_put_block(list->file_offset_node); 1372 kfree(list->map); 1373 list->map = NULL; 1374 } 1375 1376 static uint32_t 1377 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) 1378 { 1379 uint32_t gtt_size; 1380 1381 if (INTEL_INFO(dev)->gen >= 4 || 1382 tiling_mode == I915_TILING_NONE) 1383 return size; 1384 1385 /* Previous chips need a power-of-two fence region when tiling */ 1386 if (INTEL_INFO(dev)->gen == 3) 1387 gtt_size = 1024*1024; 1388 else 1389 gtt_size = 512*1024; 1390 1391 while (gtt_size < size) 1392 gtt_size <<= 1; 1393 1394 return gtt_size; 1395 } 1396 1397 /** 1398 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 1399 * @obj: object to check 1400 * 1401 * Return the required GTT alignment for an object, taking into account 1402 * potential fence register mapping. 1403 */ 1404 static uint32_t 1405 i915_gem_get_gtt_alignment(struct drm_device *dev, 1406 uint32_t size, 1407 int tiling_mode) 1408 { 1409 /* 1410 * Minimum alignment is 4k (GTT page size), but might be greater 1411 * if a fence register is needed for the object. 1412 */ 1413 if (INTEL_INFO(dev)->gen >= 4 || 1414 tiling_mode == I915_TILING_NONE) 1415 return 4096; 1416 1417 /* 1418 * Previous chips need to be aligned to the size of the smallest 1419 * fence register that can contain the object. 1420 */ 1421 return i915_gem_get_gtt_size(dev, size, tiling_mode); 1422 } 1423 1424 /** 1425 * i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an 1426 * unfenced object 1427 * @dev: the device 1428 * @size: size of the object 1429 * @tiling_mode: tiling mode of the object 1430 * 1431 * Return the required GTT alignment for an object, only taking into account 1432 * unfenced tiled surface requirements. 1433 */ 1434 uint32_t 1435 i915_gem_get_unfenced_gtt_alignment(struct drm_device *dev, 1436 uint32_t size, 1437 int tiling_mode) 1438 { 1439 /* 1440 * Minimum alignment is 4k (GTT page size) for sane hw. 1441 */ 1442 if (INTEL_INFO(dev)->gen >= 4 || IS_G33(dev) || 1443 tiling_mode == I915_TILING_NONE) 1444 return 4096; 1445 1446 /* Previous hardware however needs to be aligned to a power-of-two 1447 * tile height. The simplest method for determining this is to reuse 1448 * the power-of-tile object size. 1449 */ 1450 return i915_gem_get_gtt_size(dev, size, tiling_mode); 1451 } 1452 1453 int 1454 i915_gem_mmap_gtt(struct drm_file *file, 1455 struct drm_device *dev, 1456 uint32_t handle, 1457 uint64_t *offset) 1458 { 1459 struct drm_i915_private *dev_priv = dev->dev_private; 1460 struct drm_i915_gem_object *obj; 1461 int ret; 1462 1463 if (!(dev->driver->driver_features & DRIVER_GEM)) 1464 return -ENODEV; 1465 1466 ret = i915_mutex_lock_interruptible(dev); 1467 if (ret) 1468 return ret; 1469 1470 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle)); 1471 if (&obj->base == NULL) { 1472 ret = -ENOENT; 1473 goto unlock; 1474 } 1475 1476 if (obj->base.size > dev_priv->mm.gtt_mappable_end) { 1477 ret = -E2BIG; 1478 goto unlock; 1479 } 1480 1481 if (obj->madv != I915_MADV_WILLNEED) { 1482 DRM_ERROR("Attempting to mmap a purgeable buffer\n"); 1483 ret = -EINVAL; 1484 goto out; 1485 } 1486 1487 if (!obj->base.map_list.map) { 1488 ret = i915_gem_create_mmap_offset(obj); 1489 if (ret) 1490 goto out; 1491 } 1492 1493 *offset = (u64)obj->base.map_list.hash.key << PAGE_SHIFT; 1494 1495 out: 1496 drm_gem_object_unreference(&obj->base); 1497 unlock: 1498 mutex_unlock(&dev->struct_mutex); 1499 return ret; 1500 } 1501 1502 /** 1503 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 1504 * @dev: DRM device 1505 * @data: GTT mapping ioctl data 1506 * @file: GEM object info 1507 * 1508 * Simply returns the fake offset to userspace so it can mmap it. 1509 * The mmap call will end up in drm_gem_mmap(), which will set things 1510 * up so we can get faults in the handler above. 1511 * 1512 * The fault handler will take care of binding the object into the GTT 1513 * (since it may have been evicted to make room for something), allocating 1514 * a fence register, and mapping the appropriate aperture address into 1515 * userspace. 1516 */ 1517 int 1518 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 1519 struct drm_file *file) 1520 { 1521 struct drm_i915_gem_mmap_gtt *args = data; 1522 1523 if (!(dev->driver->driver_features & DRIVER_GEM)) 1524 return -ENODEV; 1525 1526 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 1527 } 1528 1529 1530 static int 1531 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj, 1532 gfp_t gfpmask) 1533 { 1534 int page_count, i; 1535 struct address_space *mapping; 1536 struct inode *inode; 1537 struct page *page; 1538 1539 /* Get the list of pages out of our struct file. They'll be pinned 1540 * at this point until we release them. 1541 */ 1542 page_count = obj->base.size / PAGE_SIZE; 1543 BUG_ON(obj->pages != NULL); 1544 obj->pages = drm_malloc_ab(page_count, sizeof(struct page *)); 1545 if (obj->pages == NULL) 1546 return -ENOMEM; 1547 1548 inode = obj->base.filp->f_path.dentry->d_inode; 1549 mapping = inode->i_mapping; 1550 gfpmask |= mapping_gfp_mask(mapping); 1551 1552 for (i = 0; i < page_count; i++) { 1553 page = shmem_read_mapping_page_gfp(mapping, i, gfpmask); 1554 if (IS_ERR(page)) 1555 goto err_pages; 1556 1557 obj->pages[i] = page; 1558 } 1559 1560 if (obj->tiling_mode != I915_TILING_NONE) 1561 i915_gem_object_do_bit_17_swizzle(obj); 1562 1563 return 0; 1564 1565 err_pages: 1566 while (i--) 1567 page_cache_release(obj->pages[i]); 1568 1569 drm_free_large(obj->pages); 1570 obj->pages = NULL; 1571 return PTR_ERR(page); 1572 } 1573 1574 static void 1575 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 1576 { 1577 int page_count = obj->base.size / PAGE_SIZE; 1578 int i; 1579 1580 BUG_ON(obj->madv == __I915_MADV_PURGED); 1581 1582 if (obj->tiling_mode != I915_TILING_NONE) 1583 i915_gem_object_save_bit_17_swizzle(obj); 1584 1585 if (obj->madv == I915_MADV_DONTNEED) 1586 obj->dirty = 0; 1587 1588 for (i = 0; i < page_count; i++) { 1589 if (obj->dirty) 1590 set_page_dirty(obj->pages[i]); 1591 1592 if (obj->madv == I915_MADV_WILLNEED) 1593 mark_page_accessed(obj->pages[i]); 1594 1595 page_cache_release(obj->pages[i]); 1596 } 1597 obj->dirty = 0; 1598 1599 drm_free_large(obj->pages); 1600 obj->pages = NULL; 1601 } 1602 1603 void 1604 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, 1605 struct intel_ring_buffer *ring, 1606 u32 seqno) 1607 { 1608 struct drm_device *dev = obj->base.dev; 1609 struct drm_i915_private *dev_priv = dev->dev_private; 1610 1611 BUG_ON(ring == NULL); 1612 obj->ring = ring; 1613 1614 /* Add a reference if we're newly entering the active list. */ 1615 if (!obj->active) { 1616 drm_gem_object_reference(&obj->base); 1617 obj->active = 1; 1618 } 1619 1620 /* Move from whatever list we were on to the tail of execution. */ 1621 list_move_tail(&obj->mm_list, &dev_priv->mm.active_list); 1622 list_move_tail(&obj->ring_list, &ring->active_list); 1623 1624 obj->last_rendering_seqno = seqno; 1625 if (obj->fenced_gpu_access) { 1626 struct drm_i915_fence_reg *reg; 1627 1628 BUG_ON(obj->fence_reg == I915_FENCE_REG_NONE); 1629 1630 obj->last_fenced_seqno = seqno; 1631 obj->last_fenced_ring = ring; 1632 1633 reg = &dev_priv->fence_regs[obj->fence_reg]; 1634 list_move_tail(®->lru_list, &dev_priv->mm.fence_list); 1635 } 1636 } 1637 1638 static void 1639 i915_gem_object_move_off_active(struct drm_i915_gem_object *obj) 1640 { 1641 list_del_init(&obj->ring_list); 1642 obj->last_rendering_seqno = 0; 1643 } 1644 1645 static void 1646 i915_gem_object_move_to_flushing(struct drm_i915_gem_object *obj) 1647 { 1648 struct drm_device *dev = obj->base.dev; 1649 drm_i915_private_t *dev_priv = dev->dev_private; 1650 1651 BUG_ON(!obj->active); 1652 list_move_tail(&obj->mm_list, &dev_priv->mm.flushing_list); 1653 1654 i915_gem_object_move_off_active(obj); 1655 } 1656 1657 static void 1658 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) 1659 { 1660 struct drm_device *dev = obj->base.dev; 1661 struct drm_i915_private *dev_priv = dev->dev_private; 1662 1663 if (obj->pin_count != 0) 1664 list_move_tail(&obj->mm_list, &dev_priv->mm.pinned_list); 1665 else 1666 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 1667 1668 BUG_ON(!list_empty(&obj->gpu_write_list)); 1669 BUG_ON(!obj->active); 1670 obj->ring = NULL; 1671 1672 i915_gem_object_move_off_active(obj); 1673 obj->fenced_gpu_access = false; 1674 1675 obj->active = 0; 1676 obj->pending_gpu_write = false; 1677 drm_gem_object_unreference(&obj->base); 1678 1679 WARN_ON(i915_verify_lists(dev)); 1680 } 1681 1682 /* Immediately discard the backing storage */ 1683 static void 1684 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 1685 { 1686 struct inode *inode; 1687 1688 /* Our goal here is to return as much of the memory as 1689 * is possible back to the system as we are called from OOM. 1690 * To do this we must instruct the shmfs to drop all of its 1691 * backing pages, *now*. 1692 */ 1693 inode = obj->base.filp->f_path.dentry->d_inode; 1694 shmem_truncate_range(inode, 0, (loff_t)-1); 1695 1696 obj->madv = __I915_MADV_PURGED; 1697 } 1698 1699 static inline int 1700 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj) 1701 { 1702 return obj->madv == I915_MADV_DONTNEED; 1703 } 1704 1705 static void 1706 i915_gem_process_flushing_list(struct intel_ring_buffer *ring, 1707 uint32_t flush_domains) 1708 { 1709 struct drm_i915_gem_object *obj, *next; 1710 1711 list_for_each_entry_safe(obj, next, 1712 &ring->gpu_write_list, 1713 gpu_write_list) { 1714 if (obj->base.write_domain & flush_domains) { 1715 uint32_t old_write_domain = obj->base.write_domain; 1716 1717 obj->base.write_domain = 0; 1718 list_del_init(&obj->gpu_write_list); 1719 i915_gem_object_move_to_active(obj, ring, 1720 i915_gem_next_request_seqno(ring)); 1721 1722 trace_i915_gem_object_change_domain(obj, 1723 obj->base.read_domains, 1724 old_write_domain); 1725 } 1726 } 1727 } 1728 1729 int 1730 i915_add_request(struct intel_ring_buffer *ring, 1731 struct drm_file *file, 1732 struct drm_i915_gem_request *request) 1733 { 1734 drm_i915_private_t *dev_priv = ring->dev->dev_private; 1735 uint32_t seqno; 1736 int was_empty; 1737 int ret; 1738 1739 BUG_ON(request == NULL); 1740 1741 ret = ring->add_request(ring, &seqno); 1742 if (ret) 1743 return ret; 1744 1745 trace_i915_gem_request_add(ring, seqno); 1746 1747 request->seqno = seqno; 1748 request->ring = ring; 1749 request->emitted_jiffies = jiffies; 1750 was_empty = list_empty(&ring->request_list); 1751 list_add_tail(&request->list, &ring->request_list); 1752 1753 if (file) { 1754 struct drm_i915_file_private *file_priv = file->driver_priv; 1755 1756 spin_lock(&file_priv->mm.lock); 1757 request->file_priv = file_priv; 1758 list_add_tail(&request->client_list, 1759 &file_priv->mm.request_list); 1760 spin_unlock(&file_priv->mm.lock); 1761 } 1762 1763 ring->outstanding_lazy_request = false; 1764 1765 if (!dev_priv->mm.suspended) { 1766 mod_timer(&dev_priv->hangcheck_timer, 1767 jiffies + msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD)); 1768 if (was_empty) 1769 queue_delayed_work(dev_priv->wq, 1770 &dev_priv->mm.retire_work, HZ); 1771 } 1772 return 0; 1773 } 1774 1775 static inline void 1776 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) 1777 { 1778 struct drm_i915_file_private *file_priv = request->file_priv; 1779 1780 if (!file_priv) 1781 return; 1782 1783 spin_lock(&file_priv->mm.lock); 1784 if (request->file_priv) { 1785 list_del(&request->client_list); 1786 request->file_priv = NULL; 1787 } 1788 spin_unlock(&file_priv->mm.lock); 1789 } 1790 1791 static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv, 1792 struct intel_ring_buffer *ring) 1793 { 1794 while (!list_empty(&ring->request_list)) { 1795 struct drm_i915_gem_request *request; 1796 1797 request = list_first_entry(&ring->request_list, 1798 struct drm_i915_gem_request, 1799 list); 1800 1801 list_del(&request->list); 1802 i915_gem_request_remove_from_client(request); 1803 kfree(request); 1804 } 1805 1806 while (!list_empty(&ring->active_list)) { 1807 struct drm_i915_gem_object *obj; 1808 1809 obj = list_first_entry(&ring->active_list, 1810 struct drm_i915_gem_object, 1811 ring_list); 1812 1813 obj->base.write_domain = 0; 1814 list_del_init(&obj->gpu_write_list); 1815 i915_gem_object_move_to_inactive(obj); 1816 } 1817 } 1818 1819 static void i915_gem_reset_fences(struct drm_device *dev) 1820 { 1821 struct drm_i915_private *dev_priv = dev->dev_private; 1822 int i; 1823 1824 for (i = 0; i < 16; i++) { 1825 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 1826 struct drm_i915_gem_object *obj = reg->obj; 1827 1828 if (!obj) 1829 continue; 1830 1831 if (obj->tiling_mode) 1832 i915_gem_release_mmap(obj); 1833 1834 reg->obj->fence_reg = I915_FENCE_REG_NONE; 1835 reg->obj->fenced_gpu_access = false; 1836 reg->obj->last_fenced_seqno = 0; 1837 reg->obj->last_fenced_ring = NULL; 1838 i915_gem_clear_fence_reg(dev, reg); 1839 } 1840 } 1841 1842 void i915_gem_reset(struct drm_device *dev) 1843 { 1844 struct drm_i915_private *dev_priv = dev->dev_private; 1845 struct drm_i915_gem_object *obj; 1846 int i; 1847 1848 for (i = 0; i < I915_NUM_RINGS; i++) 1849 i915_gem_reset_ring_lists(dev_priv, &dev_priv->ring[i]); 1850 1851 /* Remove anything from the flushing lists. The GPU cache is likely 1852 * to be lost on reset along with the data, so simply move the 1853 * lost bo to the inactive list. 1854 */ 1855 while (!list_empty(&dev_priv->mm.flushing_list)) { 1856 obj= list_first_entry(&dev_priv->mm.flushing_list, 1857 struct drm_i915_gem_object, 1858 mm_list); 1859 1860 obj->base.write_domain = 0; 1861 list_del_init(&obj->gpu_write_list); 1862 i915_gem_object_move_to_inactive(obj); 1863 } 1864 1865 /* Move everything out of the GPU domains to ensure we do any 1866 * necessary invalidation upon reuse. 1867 */ 1868 list_for_each_entry(obj, 1869 &dev_priv->mm.inactive_list, 1870 mm_list) 1871 { 1872 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; 1873 } 1874 1875 /* The fence registers are invalidated so clear them out */ 1876 i915_gem_reset_fences(dev); 1877 } 1878 1879 /** 1880 * This function clears the request list as sequence numbers are passed. 1881 */ 1882 static void 1883 i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) 1884 { 1885 uint32_t seqno; 1886 int i; 1887 1888 if (list_empty(&ring->request_list)) 1889 return; 1890 1891 WARN_ON(i915_verify_lists(ring->dev)); 1892 1893 seqno = ring->get_seqno(ring); 1894 1895 for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++) 1896 if (seqno >= ring->sync_seqno[i]) 1897 ring->sync_seqno[i] = 0; 1898 1899 while (!list_empty(&ring->request_list)) { 1900 struct drm_i915_gem_request *request; 1901 1902 request = list_first_entry(&ring->request_list, 1903 struct drm_i915_gem_request, 1904 list); 1905 1906 if (!i915_seqno_passed(seqno, request->seqno)) 1907 break; 1908 1909 trace_i915_gem_request_retire(ring, request->seqno); 1910 1911 list_del(&request->list); 1912 i915_gem_request_remove_from_client(request); 1913 kfree(request); 1914 } 1915 1916 /* Move any buffers on the active list that are no longer referenced 1917 * by the ringbuffer to the flushing/inactive lists as appropriate. 1918 */ 1919 while (!list_empty(&ring->active_list)) { 1920 struct drm_i915_gem_object *obj; 1921 1922 obj= list_first_entry(&ring->active_list, 1923 struct drm_i915_gem_object, 1924 ring_list); 1925 1926 if (!i915_seqno_passed(seqno, obj->last_rendering_seqno)) 1927 break; 1928 1929 if (obj->base.write_domain != 0) 1930 i915_gem_object_move_to_flushing(obj); 1931 else 1932 i915_gem_object_move_to_inactive(obj); 1933 } 1934 1935 if (unlikely(ring->trace_irq_seqno && 1936 i915_seqno_passed(seqno, ring->trace_irq_seqno))) { 1937 ring->irq_put(ring); 1938 ring->trace_irq_seqno = 0; 1939 } 1940 1941 WARN_ON(i915_verify_lists(ring->dev)); 1942 } 1943 1944 void 1945 i915_gem_retire_requests(struct drm_device *dev) 1946 { 1947 drm_i915_private_t *dev_priv = dev->dev_private; 1948 int i; 1949 1950 if (!list_empty(&dev_priv->mm.deferred_free_list)) { 1951 struct drm_i915_gem_object *obj, *next; 1952 1953 /* We must be careful that during unbind() we do not 1954 * accidentally infinitely recurse into retire requests. 1955 * Currently: 1956 * retire -> free -> unbind -> wait -> retire_ring 1957 */ 1958 list_for_each_entry_safe(obj, next, 1959 &dev_priv->mm.deferred_free_list, 1960 mm_list) 1961 i915_gem_free_object_tail(obj); 1962 } 1963 1964 for (i = 0; i < I915_NUM_RINGS; i++) 1965 i915_gem_retire_requests_ring(&dev_priv->ring[i]); 1966 } 1967 1968 static void 1969 i915_gem_retire_work_handler(struct work_struct *work) 1970 { 1971 drm_i915_private_t *dev_priv; 1972 struct drm_device *dev; 1973 bool idle; 1974 int i; 1975 1976 dev_priv = container_of(work, drm_i915_private_t, 1977 mm.retire_work.work); 1978 dev = dev_priv->dev; 1979 1980 /* Come back later if the device is busy... */ 1981 if (!mutex_trylock(&dev->struct_mutex)) { 1982 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ); 1983 return; 1984 } 1985 1986 i915_gem_retire_requests(dev); 1987 1988 /* Send a periodic flush down the ring so we don't hold onto GEM 1989 * objects indefinitely. 1990 */ 1991 idle = true; 1992 for (i = 0; i < I915_NUM_RINGS; i++) { 1993 struct intel_ring_buffer *ring = &dev_priv->ring[i]; 1994 1995 if (!list_empty(&ring->gpu_write_list)) { 1996 struct drm_i915_gem_request *request; 1997 int ret; 1998 1999 ret = i915_gem_flush_ring(ring, 2000 0, I915_GEM_GPU_DOMAINS); 2001 request = kzalloc(sizeof(*request), GFP_KERNEL); 2002 if (ret || request == NULL || 2003 i915_add_request(ring, NULL, request)) 2004 kfree(request); 2005 } 2006 2007 idle &= list_empty(&ring->request_list); 2008 } 2009 2010 if (!dev_priv->mm.suspended && !idle) 2011 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ); 2012 2013 mutex_unlock(&dev->struct_mutex); 2014 } 2015 2016 /** 2017 * Waits for a sequence number to be signaled, and cleans up the 2018 * request and object lists appropriately for that event. 2019 */ 2020 int 2021 i915_wait_request(struct intel_ring_buffer *ring, 2022 uint32_t seqno) 2023 { 2024 drm_i915_private_t *dev_priv = ring->dev->dev_private; 2025 u32 ier; 2026 int ret = 0; 2027 2028 BUG_ON(seqno == 0); 2029 2030 if (atomic_read(&dev_priv->mm.wedged)) { 2031 struct completion *x = &dev_priv->error_completion; 2032 bool recovery_complete; 2033 unsigned long flags; 2034 2035 /* Give the error handler a chance to run. */ 2036 spin_lock_irqsave(&x->wait.lock, flags); 2037 recovery_complete = x->done > 0; 2038 spin_unlock_irqrestore(&x->wait.lock, flags); 2039 2040 return recovery_complete ? -EIO : -EAGAIN; 2041 } 2042 2043 if (seqno == ring->outstanding_lazy_request) { 2044 struct drm_i915_gem_request *request; 2045 2046 request = kzalloc(sizeof(*request), GFP_KERNEL); 2047 if (request == NULL) 2048 return -ENOMEM; 2049 2050 ret = i915_add_request(ring, NULL, request); 2051 if (ret) { 2052 kfree(request); 2053 return ret; 2054 } 2055 2056 seqno = request->seqno; 2057 } 2058 2059 if (!i915_seqno_passed(ring->get_seqno(ring), seqno)) { 2060 if (HAS_PCH_SPLIT(ring->dev)) 2061 ier = I915_READ(DEIER) | I915_READ(GTIER); 2062 else 2063 ier = I915_READ(IER); 2064 if (!ier) { 2065 DRM_ERROR("something (likely vbetool) disabled " 2066 "interrupts, re-enabling\n"); 2067 ring->dev->driver->irq_preinstall(ring->dev); 2068 ring->dev->driver->irq_postinstall(ring->dev); 2069 } 2070 2071 trace_i915_gem_request_wait_begin(ring, seqno); 2072 2073 ring->waiting_seqno = seqno; 2074 if (ring->irq_get(ring)) { 2075 if (dev_priv->mm.interruptible) 2076 ret = wait_event_interruptible(ring->irq_queue, 2077 i915_seqno_passed(ring->get_seqno(ring), seqno) 2078 || atomic_read(&dev_priv->mm.wedged)); 2079 else 2080 wait_event(ring->irq_queue, 2081 i915_seqno_passed(ring->get_seqno(ring), seqno) 2082 || atomic_read(&dev_priv->mm.wedged)); 2083 2084 ring->irq_put(ring); 2085 } else if (wait_for(i915_seqno_passed(ring->get_seqno(ring), 2086 seqno) || 2087 atomic_read(&dev_priv->mm.wedged), 3000)) 2088 ret = -EBUSY; 2089 ring->waiting_seqno = 0; 2090 2091 trace_i915_gem_request_wait_end(ring, seqno); 2092 } 2093 if (atomic_read(&dev_priv->mm.wedged)) 2094 ret = -EAGAIN; 2095 2096 if (ret && ret != -ERESTARTSYS) 2097 DRM_ERROR("%s returns %d (awaiting %d at %d, next %d)\n", 2098 __func__, ret, seqno, ring->get_seqno(ring), 2099 dev_priv->next_seqno); 2100 2101 /* Directly dispatch request retiring. While we have the work queue 2102 * to handle this, the waiter on a request often wants an associated 2103 * buffer to have made it to the inactive list, and we would need 2104 * a separate wait queue to handle that. 2105 */ 2106 if (ret == 0) 2107 i915_gem_retire_requests_ring(ring); 2108 2109 return ret; 2110 } 2111 2112 /** 2113 * Ensures that all rendering to the object has completed and the object is 2114 * safe to unbind from the GTT or access from the CPU. 2115 */ 2116 int 2117 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj) 2118 { 2119 int ret; 2120 2121 /* This function only exists to support waiting for existing rendering, 2122 * not for emitting required flushes. 2123 */ 2124 BUG_ON((obj->base.write_domain & I915_GEM_GPU_DOMAINS) != 0); 2125 2126 /* If there is rendering queued on the buffer being evicted, wait for 2127 * it. 2128 */ 2129 if (obj->active) { 2130 ret = i915_wait_request(obj->ring, obj->last_rendering_seqno); 2131 if (ret) 2132 return ret; 2133 } 2134 2135 return 0; 2136 } 2137 2138 /** 2139 * Unbinds an object from the GTT aperture. 2140 */ 2141 int 2142 i915_gem_object_unbind(struct drm_i915_gem_object *obj) 2143 { 2144 int ret = 0; 2145 2146 if (obj->gtt_space == NULL) 2147 return 0; 2148 2149 if (obj->pin_count != 0) { 2150 DRM_ERROR("Attempting to unbind pinned buffer\n"); 2151 return -EINVAL; 2152 } 2153 2154 /* blow away mappings if mapped through GTT */ 2155 i915_gem_release_mmap(obj); 2156 2157 /* Move the object to the CPU domain to ensure that 2158 * any possible CPU writes while it's not in the GTT 2159 * are flushed when we go to remap it. This will 2160 * also ensure that all pending GPU writes are finished 2161 * before we unbind. 2162 */ 2163 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 2164 if (ret == -ERESTARTSYS) 2165 return ret; 2166 /* Continue on if we fail due to EIO, the GPU is hung so we 2167 * should be safe and we need to cleanup or else we might 2168 * cause memory corruption through use-after-free. 2169 */ 2170 if (ret) { 2171 i915_gem_clflush_object(obj); 2172 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2173 } 2174 2175 /* release the fence reg _after_ flushing */ 2176 ret = i915_gem_object_put_fence(obj); 2177 if (ret == -ERESTARTSYS) 2178 return ret; 2179 2180 trace_i915_gem_object_unbind(obj); 2181 2182 i915_gem_gtt_unbind_object(obj); 2183 i915_gem_object_put_pages_gtt(obj); 2184 2185 list_del_init(&obj->gtt_list); 2186 list_del_init(&obj->mm_list); 2187 /* Avoid an unnecessary call to unbind on rebind. */ 2188 obj->map_and_fenceable = true; 2189 2190 drm_mm_put_block(obj->gtt_space); 2191 obj->gtt_space = NULL; 2192 obj->gtt_offset = 0; 2193 2194 if (i915_gem_object_is_purgeable(obj)) 2195 i915_gem_object_truncate(obj); 2196 2197 return ret; 2198 } 2199 2200 int 2201 i915_gem_flush_ring(struct intel_ring_buffer *ring, 2202 uint32_t invalidate_domains, 2203 uint32_t flush_domains) 2204 { 2205 int ret; 2206 2207 if (((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) == 0) 2208 return 0; 2209 2210 trace_i915_gem_ring_flush(ring, invalidate_domains, flush_domains); 2211 2212 ret = ring->flush(ring, invalidate_domains, flush_domains); 2213 if (ret) 2214 return ret; 2215 2216 if (flush_domains & I915_GEM_GPU_DOMAINS) 2217 i915_gem_process_flushing_list(ring, flush_domains); 2218 2219 return 0; 2220 } 2221 2222 static int i915_ring_idle(struct intel_ring_buffer *ring) 2223 { 2224 int ret; 2225 2226 if (list_empty(&ring->gpu_write_list) && list_empty(&ring->active_list)) 2227 return 0; 2228 2229 if (!list_empty(&ring->gpu_write_list)) { 2230 ret = i915_gem_flush_ring(ring, 2231 I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 2232 if (ret) 2233 return ret; 2234 } 2235 2236 return i915_wait_request(ring, i915_gem_next_request_seqno(ring)); 2237 } 2238 2239 int 2240 i915_gpu_idle(struct drm_device *dev) 2241 { 2242 drm_i915_private_t *dev_priv = dev->dev_private; 2243 bool lists_empty; 2244 int ret, i; 2245 2246 lists_empty = (list_empty(&dev_priv->mm.flushing_list) && 2247 list_empty(&dev_priv->mm.active_list)); 2248 if (lists_empty) 2249 return 0; 2250 2251 /* Flush everything onto the inactive list. */ 2252 for (i = 0; i < I915_NUM_RINGS; i++) { 2253 ret = i915_ring_idle(&dev_priv->ring[i]); 2254 if (ret) 2255 return ret; 2256 } 2257 2258 return 0; 2259 } 2260 2261 static int sandybridge_write_fence_reg(struct drm_i915_gem_object *obj, 2262 struct intel_ring_buffer *pipelined) 2263 { 2264 struct drm_device *dev = obj->base.dev; 2265 drm_i915_private_t *dev_priv = dev->dev_private; 2266 u32 size = obj->gtt_space->size; 2267 int regnum = obj->fence_reg; 2268 uint64_t val; 2269 2270 val = (uint64_t)((obj->gtt_offset + size - 4096) & 2271 0xfffff000) << 32; 2272 val |= obj->gtt_offset & 0xfffff000; 2273 val |= (uint64_t)((obj->stride / 128) - 1) << 2274 SANDYBRIDGE_FENCE_PITCH_SHIFT; 2275 2276 if (obj->tiling_mode == I915_TILING_Y) 2277 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 2278 val |= I965_FENCE_REG_VALID; 2279 2280 if (pipelined) { 2281 int ret = intel_ring_begin(pipelined, 6); 2282 if (ret) 2283 return ret; 2284 2285 intel_ring_emit(pipelined, MI_NOOP); 2286 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(2)); 2287 intel_ring_emit(pipelined, FENCE_REG_SANDYBRIDGE_0 + regnum*8); 2288 intel_ring_emit(pipelined, (u32)val); 2289 intel_ring_emit(pipelined, FENCE_REG_SANDYBRIDGE_0 + regnum*8 + 4); 2290 intel_ring_emit(pipelined, (u32)(val >> 32)); 2291 intel_ring_advance(pipelined); 2292 } else 2293 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + regnum * 8, val); 2294 2295 return 0; 2296 } 2297 2298 static int i965_write_fence_reg(struct drm_i915_gem_object *obj, 2299 struct intel_ring_buffer *pipelined) 2300 { 2301 struct drm_device *dev = obj->base.dev; 2302 drm_i915_private_t *dev_priv = dev->dev_private; 2303 u32 size = obj->gtt_space->size; 2304 int regnum = obj->fence_reg; 2305 uint64_t val; 2306 2307 val = (uint64_t)((obj->gtt_offset + size - 4096) & 2308 0xfffff000) << 32; 2309 val |= obj->gtt_offset & 0xfffff000; 2310 val |= ((obj->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT; 2311 if (obj->tiling_mode == I915_TILING_Y) 2312 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 2313 val |= I965_FENCE_REG_VALID; 2314 2315 if (pipelined) { 2316 int ret = intel_ring_begin(pipelined, 6); 2317 if (ret) 2318 return ret; 2319 2320 intel_ring_emit(pipelined, MI_NOOP); 2321 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(2)); 2322 intel_ring_emit(pipelined, FENCE_REG_965_0 + regnum*8); 2323 intel_ring_emit(pipelined, (u32)val); 2324 intel_ring_emit(pipelined, FENCE_REG_965_0 + regnum*8 + 4); 2325 intel_ring_emit(pipelined, (u32)(val >> 32)); 2326 intel_ring_advance(pipelined); 2327 } else 2328 I915_WRITE64(FENCE_REG_965_0 + regnum * 8, val); 2329 2330 return 0; 2331 } 2332 2333 static int i915_write_fence_reg(struct drm_i915_gem_object *obj, 2334 struct intel_ring_buffer *pipelined) 2335 { 2336 struct drm_device *dev = obj->base.dev; 2337 drm_i915_private_t *dev_priv = dev->dev_private; 2338 u32 size = obj->gtt_space->size; 2339 u32 fence_reg, val, pitch_val; 2340 int tile_width; 2341 2342 if (WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) || 2343 (size & -size) != size || 2344 (obj->gtt_offset & (size - 1)), 2345 "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n", 2346 obj->gtt_offset, obj->map_and_fenceable, size)) 2347 return -EINVAL; 2348 2349 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) 2350 tile_width = 128; 2351 else 2352 tile_width = 512; 2353 2354 /* Note: pitch better be a power of two tile widths */ 2355 pitch_val = obj->stride / tile_width; 2356 pitch_val = ffs(pitch_val) - 1; 2357 2358 val = obj->gtt_offset; 2359 if (obj->tiling_mode == I915_TILING_Y) 2360 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 2361 val |= I915_FENCE_SIZE_BITS(size); 2362 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 2363 val |= I830_FENCE_REG_VALID; 2364 2365 fence_reg = obj->fence_reg; 2366 if (fence_reg < 8) 2367 fence_reg = FENCE_REG_830_0 + fence_reg * 4; 2368 else 2369 fence_reg = FENCE_REG_945_8 + (fence_reg - 8) * 4; 2370 2371 if (pipelined) { 2372 int ret = intel_ring_begin(pipelined, 4); 2373 if (ret) 2374 return ret; 2375 2376 intel_ring_emit(pipelined, MI_NOOP); 2377 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(1)); 2378 intel_ring_emit(pipelined, fence_reg); 2379 intel_ring_emit(pipelined, val); 2380 intel_ring_advance(pipelined); 2381 } else 2382 I915_WRITE(fence_reg, val); 2383 2384 return 0; 2385 } 2386 2387 static int i830_write_fence_reg(struct drm_i915_gem_object *obj, 2388 struct intel_ring_buffer *pipelined) 2389 { 2390 struct drm_device *dev = obj->base.dev; 2391 drm_i915_private_t *dev_priv = dev->dev_private; 2392 u32 size = obj->gtt_space->size; 2393 int regnum = obj->fence_reg; 2394 uint32_t val; 2395 uint32_t pitch_val; 2396 2397 if (WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) || 2398 (size & -size) != size || 2399 (obj->gtt_offset & (size - 1)), 2400 "object 0x%08x not 512K or pot-size 0x%08x aligned\n", 2401 obj->gtt_offset, size)) 2402 return -EINVAL; 2403 2404 pitch_val = obj->stride / 128; 2405 pitch_val = ffs(pitch_val) - 1; 2406 2407 val = obj->gtt_offset; 2408 if (obj->tiling_mode == I915_TILING_Y) 2409 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 2410 val |= I830_FENCE_SIZE_BITS(size); 2411 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 2412 val |= I830_FENCE_REG_VALID; 2413 2414 if (pipelined) { 2415 int ret = intel_ring_begin(pipelined, 4); 2416 if (ret) 2417 return ret; 2418 2419 intel_ring_emit(pipelined, MI_NOOP); 2420 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(1)); 2421 intel_ring_emit(pipelined, FENCE_REG_830_0 + regnum*4); 2422 intel_ring_emit(pipelined, val); 2423 intel_ring_advance(pipelined); 2424 } else 2425 I915_WRITE(FENCE_REG_830_0 + regnum * 4, val); 2426 2427 return 0; 2428 } 2429 2430 static bool ring_passed_seqno(struct intel_ring_buffer *ring, u32 seqno) 2431 { 2432 return i915_seqno_passed(ring->get_seqno(ring), seqno); 2433 } 2434 2435 static int 2436 i915_gem_object_flush_fence(struct drm_i915_gem_object *obj, 2437 struct intel_ring_buffer *pipelined) 2438 { 2439 int ret; 2440 2441 if (obj->fenced_gpu_access) { 2442 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { 2443 ret = i915_gem_flush_ring(obj->last_fenced_ring, 2444 0, obj->base.write_domain); 2445 if (ret) 2446 return ret; 2447 } 2448 2449 obj->fenced_gpu_access = false; 2450 } 2451 2452 if (obj->last_fenced_seqno && pipelined != obj->last_fenced_ring) { 2453 if (!ring_passed_seqno(obj->last_fenced_ring, 2454 obj->last_fenced_seqno)) { 2455 ret = i915_wait_request(obj->last_fenced_ring, 2456 obj->last_fenced_seqno); 2457 if (ret) 2458 return ret; 2459 } 2460 2461 obj->last_fenced_seqno = 0; 2462 obj->last_fenced_ring = NULL; 2463 } 2464 2465 /* Ensure that all CPU reads are completed before installing a fence 2466 * and all writes before removing the fence. 2467 */ 2468 if (obj->base.read_domains & I915_GEM_DOMAIN_GTT) 2469 mb(); 2470 2471 return 0; 2472 } 2473 2474 int 2475 i915_gem_object_put_fence(struct drm_i915_gem_object *obj) 2476 { 2477 int ret; 2478 2479 if (obj->tiling_mode) 2480 i915_gem_release_mmap(obj); 2481 2482 ret = i915_gem_object_flush_fence(obj, NULL); 2483 if (ret) 2484 return ret; 2485 2486 if (obj->fence_reg != I915_FENCE_REG_NONE) { 2487 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2488 i915_gem_clear_fence_reg(obj->base.dev, 2489 &dev_priv->fence_regs[obj->fence_reg]); 2490 2491 obj->fence_reg = I915_FENCE_REG_NONE; 2492 } 2493 2494 return 0; 2495 } 2496 2497 static struct drm_i915_fence_reg * 2498 i915_find_fence_reg(struct drm_device *dev, 2499 struct intel_ring_buffer *pipelined) 2500 { 2501 struct drm_i915_private *dev_priv = dev->dev_private; 2502 struct drm_i915_fence_reg *reg, *first, *avail; 2503 int i; 2504 2505 /* First try to find a free reg */ 2506 avail = NULL; 2507 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { 2508 reg = &dev_priv->fence_regs[i]; 2509 if (!reg->obj) 2510 return reg; 2511 2512 if (!reg->obj->pin_count) 2513 avail = reg; 2514 } 2515 2516 if (avail == NULL) 2517 return NULL; 2518 2519 /* None available, try to steal one or wait for a user to finish */ 2520 avail = first = NULL; 2521 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) { 2522 if (reg->obj->pin_count) 2523 continue; 2524 2525 if (first == NULL) 2526 first = reg; 2527 2528 if (!pipelined || 2529 !reg->obj->last_fenced_ring || 2530 reg->obj->last_fenced_ring == pipelined) { 2531 avail = reg; 2532 break; 2533 } 2534 } 2535 2536 if (avail == NULL) 2537 avail = first; 2538 2539 return avail; 2540 } 2541 2542 /** 2543 * i915_gem_object_get_fence - set up a fence reg for an object 2544 * @obj: object to map through a fence reg 2545 * @pipelined: ring on which to queue the change, or NULL for CPU access 2546 * @interruptible: must we wait uninterruptibly for the register to retire? 2547 * 2548 * When mapping objects through the GTT, userspace wants to be able to write 2549 * to them without having to worry about swizzling if the object is tiled. 2550 * 2551 * This function walks the fence regs looking for a free one for @obj, 2552 * stealing one if it can't find any. 2553 * 2554 * It then sets up the reg based on the object's properties: address, pitch 2555 * and tiling format. 2556 */ 2557 int 2558 i915_gem_object_get_fence(struct drm_i915_gem_object *obj, 2559 struct intel_ring_buffer *pipelined) 2560 { 2561 struct drm_device *dev = obj->base.dev; 2562 struct drm_i915_private *dev_priv = dev->dev_private; 2563 struct drm_i915_fence_reg *reg; 2564 int ret; 2565 2566 /* XXX disable pipelining. There are bugs. Shocking. */ 2567 pipelined = NULL; 2568 2569 /* Just update our place in the LRU if our fence is getting reused. */ 2570 if (obj->fence_reg != I915_FENCE_REG_NONE) { 2571 reg = &dev_priv->fence_regs[obj->fence_reg]; 2572 list_move_tail(®->lru_list, &dev_priv->mm.fence_list); 2573 2574 if (obj->tiling_changed) { 2575 ret = i915_gem_object_flush_fence(obj, pipelined); 2576 if (ret) 2577 return ret; 2578 2579 if (!obj->fenced_gpu_access && !obj->last_fenced_seqno) 2580 pipelined = NULL; 2581 2582 if (pipelined) { 2583 reg->setup_seqno = 2584 i915_gem_next_request_seqno(pipelined); 2585 obj->last_fenced_seqno = reg->setup_seqno; 2586 obj->last_fenced_ring = pipelined; 2587 } 2588 2589 goto update; 2590 } 2591 2592 if (!pipelined) { 2593 if (reg->setup_seqno) { 2594 if (!ring_passed_seqno(obj->last_fenced_ring, 2595 reg->setup_seqno)) { 2596 ret = i915_wait_request(obj->last_fenced_ring, 2597 reg->setup_seqno); 2598 if (ret) 2599 return ret; 2600 } 2601 2602 reg->setup_seqno = 0; 2603 } 2604 } else if (obj->last_fenced_ring && 2605 obj->last_fenced_ring != pipelined) { 2606 ret = i915_gem_object_flush_fence(obj, pipelined); 2607 if (ret) 2608 return ret; 2609 } 2610 2611 return 0; 2612 } 2613 2614 reg = i915_find_fence_reg(dev, pipelined); 2615 if (reg == NULL) 2616 return -ENOSPC; 2617 2618 ret = i915_gem_object_flush_fence(obj, pipelined); 2619 if (ret) 2620 return ret; 2621 2622 if (reg->obj) { 2623 struct drm_i915_gem_object *old = reg->obj; 2624 2625 drm_gem_object_reference(&old->base); 2626 2627 if (old->tiling_mode) 2628 i915_gem_release_mmap(old); 2629 2630 ret = i915_gem_object_flush_fence(old, pipelined); 2631 if (ret) { 2632 drm_gem_object_unreference(&old->base); 2633 return ret; 2634 } 2635 2636 if (old->last_fenced_seqno == 0 && obj->last_fenced_seqno == 0) 2637 pipelined = NULL; 2638 2639 old->fence_reg = I915_FENCE_REG_NONE; 2640 old->last_fenced_ring = pipelined; 2641 old->last_fenced_seqno = 2642 pipelined ? i915_gem_next_request_seqno(pipelined) : 0; 2643 2644 drm_gem_object_unreference(&old->base); 2645 } else if (obj->last_fenced_seqno == 0) 2646 pipelined = NULL; 2647 2648 reg->obj = obj; 2649 list_move_tail(®->lru_list, &dev_priv->mm.fence_list); 2650 obj->fence_reg = reg - dev_priv->fence_regs; 2651 obj->last_fenced_ring = pipelined; 2652 2653 reg->setup_seqno = 2654 pipelined ? i915_gem_next_request_seqno(pipelined) : 0; 2655 obj->last_fenced_seqno = reg->setup_seqno; 2656 2657 update: 2658 obj->tiling_changed = false; 2659 switch (INTEL_INFO(dev)->gen) { 2660 case 7: 2661 case 6: 2662 ret = sandybridge_write_fence_reg(obj, pipelined); 2663 break; 2664 case 5: 2665 case 4: 2666 ret = i965_write_fence_reg(obj, pipelined); 2667 break; 2668 case 3: 2669 ret = i915_write_fence_reg(obj, pipelined); 2670 break; 2671 case 2: 2672 ret = i830_write_fence_reg(obj, pipelined); 2673 break; 2674 } 2675 2676 return ret; 2677 } 2678 2679 /** 2680 * i915_gem_clear_fence_reg - clear out fence register info 2681 * @obj: object to clear 2682 * 2683 * Zeroes out the fence register itself and clears out the associated 2684 * data structures in dev_priv and obj. 2685 */ 2686 static void 2687 i915_gem_clear_fence_reg(struct drm_device *dev, 2688 struct drm_i915_fence_reg *reg) 2689 { 2690 drm_i915_private_t *dev_priv = dev->dev_private; 2691 uint32_t fence_reg = reg - dev_priv->fence_regs; 2692 2693 switch (INTEL_INFO(dev)->gen) { 2694 case 7: 2695 case 6: 2696 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + fence_reg*8, 0); 2697 break; 2698 case 5: 2699 case 4: 2700 I915_WRITE64(FENCE_REG_965_0 + fence_reg*8, 0); 2701 break; 2702 case 3: 2703 if (fence_reg >= 8) 2704 fence_reg = FENCE_REG_945_8 + (fence_reg - 8) * 4; 2705 else 2706 case 2: 2707 fence_reg = FENCE_REG_830_0 + fence_reg * 4; 2708 2709 I915_WRITE(fence_reg, 0); 2710 break; 2711 } 2712 2713 list_del_init(®->lru_list); 2714 reg->obj = NULL; 2715 reg->setup_seqno = 0; 2716 } 2717 2718 /** 2719 * Finds free space in the GTT aperture and binds the object there. 2720 */ 2721 static int 2722 i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, 2723 unsigned alignment, 2724 bool map_and_fenceable) 2725 { 2726 struct drm_device *dev = obj->base.dev; 2727 drm_i915_private_t *dev_priv = dev->dev_private; 2728 struct drm_mm_node *free_space; 2729 gfp_t gfpmask = __GFP_NORETRY | __GFP_NOWARN; 2730 u32 size, fence_size, fence_alignment, unfenced_alignment; 2731 bool mappable, fenceable; 2732 int ret; 2733 2734 if (obj->madv != I915_MADV_WILLNEED) { 2735 DRM_ERROR("Attempting to bind a purgeable object\n"); 2736 return -EINVAL; 2737 } 2738 2739 fence_size = i915_gem_get_gtt_size(dev, 2740 obj->base.size, 2741 obj->tiling_mode); 2742 fence_alignment = i915_gem_get_gtt_alignment(dev, 2743 obj->base.size, 2744 obj->tiling_mode); 2745 unfenced_alignment = 2746 i915_gem_get_unfenced_gtt_alignment(dev, 2747 obj->base.size, 2748 obj->tiling_mode); 2749 2750 if (alignment == 0) 2751 alignment = map_and_fenceable ? fence_alignment : 2752 unfenced_alignment; 2753 if (map_and_fenceable && alignment & (fence_alignment - 1)) { 2754 DRM_ERROR("Invalid object alignment requested %u\n", alignment); 2755 return -EINVAL; 2756 } 2757 2758 size = map_and_fenceable ? fence_size : obj->base.size; 2759 2760 /* If the object is bigger than the entire aperture, reject it early 2761 * before evicting everything in a vain attempt to find space. 2762 */ 2763 if (obj->base.size > 2764 (map_and_fenceable ? dev_priv->mm.gtt_mappable_end : dev_priv->mm.gtt_total)) { 2765 DRM_ERROR("Attempting to bind an object larger than the aperture\n"); 2766 return -E2BIG; 2767 } 2768 2769 search_free: 2770 if (map_and_fenceable) 2771 free_space = 2772 drm_mm_search_free_in_range(&dev_priv->mm.gtt_space, 2773 size, alignment, 0, 2774 dev_priv->mm.gtt_mappable_end, 2775 0); 2776 else 2777 free_space = drm_mm_search_free(&dev_priv->mm.gtt_space, 2778 size, alignment, 0); 2779 2780 if (free_space != NULL) { 2781 if (map_and_fenceable) 2782 obj->gtt_space = 2783 drm_mm_get_block_range_generic(free_space, 2784 size, alignment, 0, 2785 dev_priv->mm.gtt_mappable_end, 2786 0); 2787 else 2788 obj->gtt_space = 2789 drm_mm_get_block(free_space, size, alignment); 2790 } 2791 if (obj->gtt_space == NULL) { 2792 /* If the gtt is empty and we're still having trouble 2793 * fitting our object in, we're out of memory. 2794 */ 2795 ret = i915_gem_evict_something(dev, size, alignment, 2796 map_and_fenceable); 2797 if (ret) 2798 return ret; 2799 2800 goto search_free; 2801 } 2802 2803 ret = i915_gem_object_get_pages_gtt(obj, gfpmask); 2804 if (ret) { 2805 drm_mm_put_block(obj->gtt_space); 2806 obj->gtt_space = NULL; 2807 2808 if (ret == -ENOMEM) { 2809 /* first try to reclaim some memory by clearing the GTT */ 2810 ret = i915_gem_evict_everything(dev, false); 2811 if (ret) { 2812 /* now try to shrink everyone else */ 2813 if (gfpmask) { 2814 gfpmask = 0; 2815 goto search_free; 2816 } 2817 2818 return -ENOMEM; 2819 } 2820 2821 goto search_free; 2822 } 2823 2824 return ret; 2825 } 2826 2827 ret = i915_gem_gtt_bind_object(obj); 2828 if (ret) { 2829 i915_gem_object_put_pages_gtt(obj); 2830 drm_mm_put_block(obj->gtt_space); 2831 obj->gtt_space = NULL; 2832 2833 if (i915_gem_evict_everything(dev, false)) 2834 return ret; 2835 2836 goto search_free; 2837 } 2838 2839 list_add_tail(&obj->gtt_list, &dev_priv->mm.gtt_list); 2840 list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 2841 2842 /* Assert that the object is not currently in any GPU domain. As it 2843 * wasn't in the GTT, there shouldn't be any way it could have been in 2844 * a GPU cache 2845 */ 2846 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2847 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2848 2849 obj->gtt_offset = obj->gtt_space->start; 2850 2851 fenceable = 2852 obj->gtt_space->size == fence_size && 2853 (obj->gtt_space->start & (fence_alignment -1)) == 0; 2854 2855 mappable = 2856 obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end; 2857 2858 obj->map_and_fenceable = mappable && fenceable; 2859 2860 trace_i915_gem_object_bind(obj, map_and_fenceable); 2861 return 0; 2862 } 2863 2864 void 2865 i915_gem_clflush_object(struct drm_i915_gem_object *obj) 2866 { 2867 /* If we don't have a page list set up, then we're not pinned 2868 * to GPU, and we can ignore the cache flush because it'll happen 2869 * again at bind time. 2870 */ 2871 if (obj->pages == NULL) 2872 return; 2873 2874 /* If the GPU is snooping the contents of the CPU cache, 2875 * we do not need to manually clear the CPU cache lines. However, 2876 * the caches are only snooped when the render cache is 2877 * flushed/invalidated. As we always have to emit invalidations 2878 * and flushes when moving into and out of the RENDER domain, correct 2879 * snooping behaviour occurs naturally as the result of our domain 2880 * tracking. 2881 */ 2882 if (obj->cache_level != I915_CACHE_NONE) 2883 return; 2884 2885 trace_i915_gem_object_clflush(obj); 2886 2887 drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE); 2888 } 2889 2890 /** Flushes any GPU write domain for the object if it's dirty. */ 2891 static int 2892 i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj) 2893 { 2894 if ((obj->base.write_domain & I915_GEM_GPU_DOMAINS) == 0) 2895 return 0; 2896 2897 /* Queue the GPU write cache flushing we need. */ 2898 return i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain); 2899 } 2900 2901 /** Flushes the GTT write domain for the object if it's dirty. */ 2902 static void 2903 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 2904 { 2905 uint32_t old_write_domain; 2906 2907 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 2908 return; 2909 2910 /* No actual flushing is required for the GTT write domain. Writes 2911 * to it immediately go to main memory as far as we know, so there's 2912 * no chipset flush. It also doesn't land in render cache. 2913 * 2914 * However, we do have to enforce the order so that all writes through 2915 * the GTT land before any writes to the device, such as updates to 2916 * the GATT itself. 2917 */ 2918 wmb(); 2919 2920 old_write_domain = obj->base.write_domain; 2921 obj->base.write_domain = 0; 2922 2923 trace_i915_gem_object_change_domain(obj, 2924 obj->base.read_domains, 2925 old_write_domain); 2926 } 2927 2928 /** Flushes the CPU write domain for the object if it's dirty. */ 2929 static void 2930 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) 2931 { 2932 uint32_t old_write_domain; 2933 2934 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 2935 return; 2936 2937 i915_gem_clflush_object(obj); 2938 intel_gtt_chipset_flush(); 2939 old_write_domain = obj->base.write_domain; 2940 obj->base.write_domain = 0; 2941 2942 trace_i915_gem_object_change_domain(obj, 2943 obj->base.read_domains, 2944 old_write_domain); 2945 } 2946 2947 /** 2948 * Moves a single object to the GTT read, and possibly write domain. 2949 * 2950 * This function returns when the move is complete, including waiting on 2951 * flushes to occur. 2952 */ 2953 int 2954 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 2955 { 2956 uint32_t old_write_domain, old_read_domains; 2957 int ret; 2958 2959 /* Not valid to be called on unbound objects. */ 2960 if (obj->gtt_space == NULL) 2961 return -EINVAL; 2962 2963 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 2964 return 0; 2965 2966 ret = i915_gem_object_flush_gpu_write_domain(obj); 2967 if (ret) 2968 return ret; 2969 2970 if (obj->pending_gpu_write || write) { 2971 ret = i915_gem_object_wait_rendering(obj); 2972 if (ret) 2973 return ret; 2974 } 2975 2976 i915_gem_object_flush_cpu_write_domain(obj); 2977 2978 old_write_domain = obj->base.write_domain; 2979 old_read_domains = obj->base.read_domains; 2980 2981 /* It should now be out of any other write domains, and we can update 2982 * the domain values for our changes. 2983 */ 2984 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 2985 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 2986 if (write) { 2987 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 2988 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 2989 obj->dirty = 1; 2990 } 2991 2992 trace_i915_gem_object_change_domain(obj, 2993 old_read_domains, 2994 old_write_domain); 2995 2996 return 0; 2997 } 2998 2999 /* 3000 * Prepare buffer for display plane. Use uninterruptible for possible flush 3001 * wait, as in modesetting process we're not supposed to be interrupted. 3002 */ 3003 int 3004 i915_gem_object_set_to_display_plane(struct drm_i915_gem_object *obj, 3005 struct intel_ring_buffer *pipelined) 3006 { 3007 uint32_t old_read_domains; 3008 int ret; 3009 3010 /* Not valid to be called on unbound objects. */ 3011 if (obj->gtt_space == NULL) 3012 return -EINVAL; 3013 3014 ret = i915_gem_object_flush_gpu_write_domain(obj); 3015 if (ret) 3016 return ret; 3017 3018 3019 /* Currently, we are always called from an non-interruptible context. */ 3020 if (pipelined != obj->ring) { 3021 ret = i915_gem_object_wait_rendering(obj); 3022 if (ret) 3023 return ret; 3024 } 3025 3026 i915_gem_object_flush_cpu_write_domain(obj); 3027 3028 old_read_domains = obj->base.read_domains; 3029 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3030 3031 trace_i915_gem_object_change_domain(obj, 3032 old_read_domains, 3033 obj->base.write_domain); 3034 3035 return 0; 3036 } 3037 3038 int 3039 i915_gem_object_flush_gpu(struct drm_i915_gem_object *obj) 3040 { 3041 int ret; 3042 3043 if (!obj->active) 3044 return 0; 3045 3046 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { 3047 ret = i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain); 3048 if (ret) 3049 return ret; 3050 } 3051 3052 return i915_gem_object_wait_rendering(obj); 3053 } 3054 3055 /** 3056 * Moves a single object to the CPU read, and possibly write domain. 3057 * 3058 * This function returns when the move is complete, including waiting on 3059 * flushes to occur. 3060 */ 3061 static int 3062 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 3063 { 3064 uint32_t old_write_domain, old_read_domains; 3065 int ret; 3066 3067 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 3068 return 0; 3069 3070 ret = i915_gem_object_flush_gpu_write_domain(obj); 3071 if (ret) 3072 return ret; 3073 3074 ret = i915_gem_object_wait_rendering(obj); 3075 if (ret) 3076 return ret; 3077 3078 i915_gem_object_flush_gtt_write_domain(obj); 3079 3080 /* If we have a partially-valid cache of the object in the CPU, 3081 * finish invalidating it and free the per-page flags. 3082 */ 3083 i915_gem_object_set_to_full_cpu_read_domain(obj); 3084 3085 old_write_domain = obj->base.write_domain; 3086 old_read_domains = obj->base.read_domains; 3087 3088 /* Flush the CPU cache if it's still invalid. */ 3089 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 3090 i915_gem_clflush_object(obj); 3091 3092 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 3093 } 3094 3095 /* It should now be out of any other write domains, and we can update 3096 * the domain values for our changes. 3097 */ 3098 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 3099 3100 /* If we're writing through the CPU, then the GPU read domains will 3101 * need to be invalidated at next use. 3102 */ 3103 if (write) { 3104 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 3105 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 3106 } 3107 3108 trace_i915_gem_object_change_domain(obj, 3109 old_read_domains, 3110 old_write_domain); 3111 3112 return 0; 3113 } 3114 3115 /** 3116 * Moves the object from a partially CPU read to a full one. 3117 * 3118 * Note that this only resolves i915_gem_object_set_cpu_read_domain_range(), 3119 * and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU). 3120 */ 3121 static void 3122 i915_gem_object_set_to_full_cpu_read_domain(struct drm_i915_gem_object *obj) 3123 { 3124 if (!obj->page_cpu_valid) 3125 return; 3126 3127 /* If we're partially in the CPU read domain, finish moving it in. 3128 */ 3129 if (obj->base.read_domains & I915_GEM_DOMAIN_CPU) { 3130 int i; 3131 3132 for (i = 0; i <= (obj->base.size - 1) / PAGE_SIZE; i++) { 3133 if (obj->page_cpu_valid[i]) 3134 continue; 3135 drm_clflush_pages(obj->pages + i, 1); 3136 } 3137 } 3138 3139 /* Free the page_cpu_valid mappings which are now stale, whether 3140 * or not we've got I915_GEM_DOMAIN_CPU. 3141 */ 3142 kfree(obj->page_cpu_valid); 3143 obj->page_cpu_valid = NULL; 3144 } 3145 3146 /** 3147 * Set the CPU read domain on a range of the object. 3148 * 3149 * The object ends up with I915_GEM_DOMAIN_CPU in its read flags although it's 3150 * not entirely valid. The page_cpu_valid member of the object flags which 3151 * pages have been flushed, and will be respected by 3152 * i915_gem_object_set_to_cpu_domain() if it's called on to get a valid mapping 3153 * of the whole object. 3154 * 3155 * This function returns when the move is complete, including waiting on 3156 * flushes to occur. 3157 */ 3158 static int 3159 i915_gem_object_set_cpu_read_domain_range(struct drm_i915_gem_object *obj, 3160 uint64_t offset, uint64_t size) 3161 { 3162 uint32_t old_read_domains; 3163 int i, ret; 3164 3165 if (offset == 0 && size == obj->base.size) 3166 return i915_gem_object_set_to_cpu_domain(obj, 0); 3167 3168 ret = i915_gem_object_flush_gpu_write_domain(obj); 3169 if (ret) 3170 return ret; 3171 3172 ret = i915_gem_object_wait_rendering(obj); 3173 if (ret) 3174 return ret; 3175 3176 i915_gem_object_flush_gtt_write_domain(obj); 3177 3178 /* If we're already fully in the CPU read domain, we're done. */ 3179 if (obj->page_cpu_valid == NULL && 3180 (obj->base.read_domains & I915_GEM_DOMAIN_CPU) != 0) 3181 return 0; 3182 3183 /* Otherwise, create/clear the per-page CPU read domain flag if we're 3184 * newly adding I915_GEM_DOMAIN_CPU 3185 */ 3186 if (obj->page_cpu_valid == NULL) { 3187 obj->page_cpu_valid = kzalloc(obj->base.size / PAGE_SIZE, 3188 GFP_KERNEL); 3189 if (obj->page_cpu_valid == NULL) 3190 return -ENOMEM; 3191 } else if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) 3192 memset(obj->page_cpu_valid, 0, obj->base.size / PAGE_SIZE); 3193 3194 /* Flush the cache on any pages that are still invalid from the CPU's 3195 * perspective. 3196 */ 3197 for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE; 3198 i++) { 3199 if (obj->page_cpu_valid[i]) 3200 continue; 3201 3202 drm_clflush_pages(obj->pages + i, 1); 3203 3204 obj->page_cpu_valid[i] = 1; 3205 } 3206 3207 /* It should now be out of any other write domains, and we can update 3208 * the domain values for our changes. 3209 */ 3210 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 3211 3212 old_read_domains = obj->base.read_domains; 3213 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 3214 3215 trace_i915_gem_object_change_domain(obj, 3216 old_read_domains, 3217 obj->base.write_domain); 3218 3219 return 0; 3220 } 3221 3222 /* Throttle our rendering by waiting until the ring has completed our requests 3223 * emitted over 20 msec ago. 3224 * 3225 * Note that if we were to use the current jiffies each time around the loop, 3226 * we wouldn't escape the function with any frames outstanding if the time to 3227 * render a frame was over 20ms. 3228 * 3229 * This should get us reasonable parallelism between CPU and GPU but also 3230 * relatively low latency when blocking on a particular request to finish. 3231 */ 3232 static int 3233 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 3234 { 3235 struct drm_i915_private *dev_priv = dev->dev_private; 3236 struct drm_i915_file_private *file_priv = file->driver_priv; 3237 unsigned long recent_enough = jiffies - msecs_to_jiffies(20); 3238 struct drm_i915_gem_request *request; 3239 struct intel_ring_buffer *ring = NULL; 3240 u32 seqno = 0; 3241 int ret; 3242 3243 if (atomic_read(&dev_priv->mm.wedged)) 3244 return -EIO; 3245 3246 spin_lock(&file_priv->mm.lock); 3247 list_for_each_entry(request, &file_priv->mm.request_list, client_list) { 3248 if (time_after_eq(request->emitted_jiffies, recent_enough)) 3249 break; 3250 3251 ring = request->ring; 3252 seqno = request->seqno; 3253 } 3254 spin_unlock(&file_priv->mm.lock); 3255 3256 if (seqno == 0) 3257 return 0; 3258 3259 ret = 0; 3260 if (!i915_seqno_passed(ring->get_seqno(ring), seqno)) { 3261 /* And wait for the seqno passing without holding any locks and 3262 * causing extra latency for others. This is safe as the irq 3263 * generation is designed to be run atomically and so is 3264 * lockless. 3265 */ 3266 if (ring->irq_get(ring)) { 3267 ret = wait_event_interruptible(ring->irq_queue, 3268 i915_seqno_passed(ring->get_seqno(ring), seqno) 3269 || atomic_read(&dev_priv->mm.wedged)); 3270 ring->irq_put(ring); 3271 3272 if (ret == 0 && atomic_read(&dev_priv->mm.wedged)) 3273 ret = -EIO; 3274 } 3275 } 3276 3277 if (ret == 0) 3278 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); 3279 3280 return ret; 3281 } 3282 3283 int 3284 i915_gem_object_pin(struct drm_i915_gem_object *obj, 3285 uint32_t alignment, 3286 bool map_and_fenceable) 3287 { 3288 struct drm_device *dev = obj->base.dev; 3289 struct drm_i915_private *dev_priv = dev->dev_private; 3290 int ret; 3291 3292 BUG_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT); 3293 WARN_ON(i915_verify_lists(dev)); 3294 3295 if (obj->gtt_space != NULL) { 3296 if ((alignment && obj->gtt_offset & (alignment - 1)) || 3297 (map_and_fenceable && !obj->map_and_fenceable)) { 3298 WARN(obj->pin_count, 3299 "bo is already pinned with incorrect alignment:" 3300 " offset=%x, req.alignment=%x, req.map_and_fenceable=%d," 3301 " obj->map_and_fenceable=%d\n", 3302 obj->gtt_offset, alignment, 3303 map_and_fenceable, 3304 obj->map_and_fenceable); 3305 ret = i915_gem_object_unbind(obj); 3306 if (ret) 3307 return ret; 3308 } 3309 } 3310 3311 if (obj->gtt_space == NULL) { 3312 ret = i915_gem_object_bind_to_gtt(obj, alignment, 3313 map_and_fenceable); 3314 if (ret) 3315 return ret; 3316 } 3317 3318 if (obj->pin_count++ == 0) { 3319 if (!obj->active) 3320 list_move_tail(&obj->mm_list, 3321 &dev_priv->mm.pinned_list); 3322 } 3323 obj->pin_mappable |= map_and_fenceable; 3324 3325 WARN_ON(i915_verify_lists(dev)); 3326 return 0; 3327 } 3328 3329 void 3330 i915_gem_object_unpin(struct drm_i915_gem_object *obj) 3331 { 3332 struct drm_device *dev = obj->base.dev; 3333 drm_i915_private_t *dev_priv = dev->dev_private; 3334 3335 WARN_ON(i915_verify_lists(dev)); 3336 BUG_ON(obj->pin_count == 0); 3337 BUG_ON(obj->gtt_space == NULL); 3338 3339 if (--obj->pin_count == 0) { 3340 if (!obj->active) 3341 list_move_tail(&obj->mm_list, 3342 &dev_priv->mm.inactive_list); 3343 obj->pin_mappable = false; 3344 } 3345 WARN_ON(i915_verify_lists(dev)); 3346 } 3347 3348 int 3349 i915_gem_pin_ioctl(struct drm_device *dev, void *data, 3350 struct drm_file *file) 3351 { 3352 struct drm_i915_gem_pin *args = data; 3353 struct drm_i915_gem_object *obj; 3354 int ret; 3355 3356 ret = i915_mutex_lock_interruptible(dev); 3357 if (ret) 3358 return ret; 3359 3360 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3361 if (&obj->base == NULL) { 3362 ret = -ENOENT; 3363 goto unlock; 3364 } 3365 3366 if (obj->madv != I915_MADV_WILLNEED) { 3367 DRM_ERROR("Attempting to pin a purgeable buffer\n"); 3368 ret = -EINVAL; 3369 goto out; 3370 } 3371 3372 if (obj->pin_filp != NULL && obj->pin_filp != file) { 3373 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n", 3374 args->handle); 3375 ret = -EINVAL; 3376 goto out; 3377 } 3378 3379 obj->user_pin_count++; 3380 obj->pin_filp = file; 3381 if (obj->user_pin_count == 1) { 3382 ret = i915_gem_object_pin(obj, args->alignment, true); 3383 if (ret) 3384 goto out; 3385 } 3386 3387 /* XXX - flush the CPU caches for pinned objects 3388 * as the X server doesn't manage domains yet 3389 */ 3390 i915_gem_object_flush_cpu_write_domain(obj); 3391 args->offset = obj->gtt_offset; 3392 out: 3393 drm_gem_object_unreference(&obj->base); 3394 unlock: 3395 mutex_unlock(&dev->struct_mutex); 3396 return ret; 3397 } 3398 3399 int 3400 i915_gem_unpin_ioctl(struct drm_device *dev, void *data, 3401 struct drm_file *file) 3402 { 3403 struct drm_i915_gem_pin *args = data; 3404 struct drm_i915_gem_object *obj; 3405 int ret; 3406 3407 ret = i915_mutex_lock_interruptible(dev); 3408 if (ret) 3409 return ret; 3410 3411 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3412 if (&obj->base == NULL) { 3413 ret = -ENOENT; 3414 goto unlock; 3415 } 3416 3417 if (obj->pin_filp != file) { 3418 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n", 3419 args->handle); 3420 ret = -EINVAL; 3421 goto out; 3422 } 3423 obj->user_pin_count--; 3424 if (obj->user_pin_count == 0) { 3425 obj->pin_filp = NULL; 3426 i915_gem_object_unpin(obj); 3427 } 3428 3429 out: 3430 drm_gem_object_unreference(&obj->base); 3431 unlock: 3432 mutex_unlock(&dev->struct_mutex); 3433 return ret; 3434 } 3435 3436 int 3437 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 3438 struct drm_file *file) 3439 { 3440 struct drm_i915_gem_busy *args = data; 3441 struct drm_i915_gem_object *obj; 3442 int ret; 3443 3444 ret = i915_mutex_lock_interruptible(dev); 3445 if (ret) 3446 return ret; 3447 3448 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3449 if (&obj->base == NULL) { 3450 ret = -ENOENT; 3451 goto unlock; 3452 } 3453 3454 /* Count all active objects as busy, even if they are currently not used 3455 * by the gpu. Users of this interface expect objects to eventually 3456 * become non-busy without any further actions, therefore emit any 3457 * necessary flushes here. 3458 */ 3459 args->busy = obj->active; 3460 if (args->busy) { 3461 /* Unconditionally flush objects, even when the gpu still uses this 3462 * object. Userspace calling this function indicates that it wants to 3463 * use this buffer rather sooner than later, so issuing the required 3464 * flush earlier is beneficial. 3465 */ 3466 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { 3467 ret = i915_gem_flush_ring(obj->ring, 3468 0, obj->base.write_domain); 3469 } else if (obj->ring->outstanding_lazy_request == 3470 obj->last_rendering_seqno) { 3471 struct drm_i915_gem_request *request; 3472 3473 /* This ring is not being cleared by active usage, 3474 * so emit a request to do so. 3475 */ 3476 request = kzalloc(sizeof(*request), GFP_KERNEL); 3477 if (request) 3478 ret = i915_add_request(obj->ring, NULL,request); 3479 else 3480 ret = -ENOMEM; 3481 } 3482 3483 /* Update the active list for the hardware's current position. 3484 * Otherwise this only updates on a delayed timer or when irqs 3485 * are actually unmasked, and our working set ends up being 3486 * larger than required. 3487 */ 3488 i915_gem_retire_requests_ring(obj->ring); 3489 3490 args->busy = obj->active; 3491 } 3492 3493 drm_gem_object_unreference(&obj->base); 3494 unlock: 3495 mutex_unlock(&dev->struct_mutex); 3496 return ret; 3497 } 3498 3499 int 3500 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 3501 struct drm_file *file_priv) 3502 { 3503 return i915_gem_ring_throttle(dev, file_priv); 3504 } 3505 3506 int 3507 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 3508 struct drm_file *file_priv) 3509 { 3510 struct drm_i915_gem_madvise *args = data; 3511 struct drm_i915_gem_object *obj; 3512 int ret; 3513 3514 switch (args->madv) { 3515 case I915_MADV_DONTNEED: 3516 case I915_MADV_WILLNEED: 3517 break; 3518 default: 3519 return -EINVAL; 3520 } 3521 3522 ret = i915_mutex_lock_interruptible(dev); 3523 if (ret) 3524 return ret; 3525 3526 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle)); 3527 if (&obj->base == NULL) { 3528 ret = -ENOENT; 3529 goto unlock; 3530 } 3531 3532 if (obj->pin_count) { 3533 ret = -EINVAL; 3534 goto out; 3535 } 3536 3537 if (obj->madv != __I915_MADV_PURGED) 3538 obj->madv = args->madv; 3539 3540 /* if the object is no longer bound, discard its backing storage */ 3541 if (i915_gem_object_is_purgeable(obj) && 3542 obj->gtt_space == NULL) 3543 i915_gem_object_truncate(obj); 3544 3545 args->retained = obj->madv != __I915_MADV_PURGED; 3546 3547 out: 3548 drm_gem_object_unreference(&obj->base); 3549 unlock: 3550 mutex_unlock(&dev->struct_mutex); 3551 return ret; 3552 } 3553 3554 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, 3555 size_t size) 3556 { 3557 struct drm_i915_private *dev_priv = dev->dev_private; 3558 struct drm_i915_gem_object *obj; 3559 struct address_space *mapping; 3560 3561 obj = kzalloc(sizeof(*obj), GFP_KERNEL); 3562 if (obj == NULL) 3563 return NULL; 3564 3565 if (drm_gem_object_init(dev, &obj->base, size) != 0) { 3566 kfree(obj); 3567 return NULL; 3568 } 3569 3570 mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 3571 mapping_set_gfp_mask(mapping, GFP_HIGHUSER | __GFP_RECLAIMABLE); 3572 3573 i915_gem_info_add_obj(dev_priv, size); 3574 3575 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 3576 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 3577 3578 obj->cache_level = I915_CACHE_NONE; 3579 obj->base.driver_private = NULL; 3580 obj->fence_reg = I915_FENCE_REG_NONE; 3581 INIT_LIST_HEAD(&obj->mm_list); 3582 INIT_LIST_HEAD(&obj->gtt_list); 3583 INIT_LIST_HEAD(&obj->ring_list); 3584 INIT_LIST_HEAD(&obj->exec_list); 3585 INIT_LIST_HEAD(&obj->gpu_write_list); 3586 obj->madv = I915_MADV_WILLNEED; 3587 /* Avoid an unnecessary call to unbind on the first bind. */ 3588 obj->map_and_fenceable = true; 3589 3590 return obj; 3591 } 3592 3593 int i915_gem_init_object(struct drm_gem_object *obj) 3594 { 3595 BUG(); 3596 3597 return 0; 3598 } 3599 3600 static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj) 3601 { 3602 struct drm_device *dev = obj->base.dev; 3603 drm_i915_private_t *dev_priv = dev->dev_private; 3604 int ret; 3605 3606 ret = i915_gem_object_unbind(obj); 3607 if (ret == -ERESTARTSYS) { 3608 list_move(&obj->mm_list, 3609 &dev_priv->mm.deferred_free_list); 3610 return; 3611 } 3612 3613 trace_i915_gem_object_destroy(obj); 3614 3615 if (obj->base.map_list.map) 3616 i915_gem_free_mmap_offset(obj); 3617 3618 drm_gem_object_release(&obj->base); 3619 i915_gem_info_remove_obj(dev_priv, obj->base.size); 3620 3621 kfree(obj->page_cpu_valid); 3622 kfree(obj->bit_17); 3623 kfree(obj); 3624 } 3625 3626 void i915_gem_free_object(struct drm_gem_object *gem_obj) 3627 { 3628 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 3629 struct drm_device *dev = obj->base.dev; 3630 3631 while (obj->pin_count > 0) 3632 i915_gem_object_unpin(obj); 3633 3634 if (obj->phys_obj) 3635 i915_gem_detach_phys_object(dev, obj); 3636 3637 i915_gem_free_object_tail(obj); 3638 } 3639 3640 int 3641 i915_gem_idle(struct drm_device *dev) 3642 { 3643 drm_i915_private_t *dev_priv = dev->dev_private; 3644 int ret; 3645 3646 mutex_lock(&dev->struct_mutex); 3647 3648 if (dev_priv->mm.suspended) { 3649 mutex_unlock(&dev->struct_mutex); 3650 return 0; 3651 } 3652 3653 ret = i915_gpu_idle(dev); 3654 if (ret) { 3655 mutex_unlock(&dev->struct_mutex); 3656 return ret; 3657 } 3658 3659 /* Under UMS, be paranoid and evict. */ 3660 if (!drm_core_check_feature(dev, DRIVER_MODESET)) { 3661 ret = i915_gem_evict_inactive(dev, false); 3662 if (ret) { 3663 mutex_unlock(&dev->struct_mutex); 3664 return ret; 3665 } 3666 } 3667 3668 i915_gem_reset_fences(dev); 3669 3670 /* Hack! Don't let anybody do execbuf while we don't control the chip. 3671 * We need to replace this with a semaphore, or something. 3672 * And not confound mm.suspended! 3673 */ 3674 dev_priv->mm.suspended = 1; 3675 del_timer_sync(&dev_priv->hangcheck_timer); 3676 3677 i915_kernel_lost_context(dev); 3678 i915_gem_cleanup_ringbuffer(dev); 3679 3680 mutex_unlock(&dev->struct_mutex); 3681 3682 /* Cancel the retire work handler, which should be idle now. */ 3683 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 3684 3685 return 0; 3686 } 3687 3688 int 3689 i915_gem_init_ringbuffer(struct drm_device *dev) 3690 { 3691 drm_i915_private_t *dev_priv = dev->dev_private; 3692 int ret; 3693 3694 ret = intel_init_render_ring_buffer(dev); 3695 if (ret) 3696 return ret; 3697 3698 if (HAS_BSD(dev)) { 3699 ret = intel_init_bsd_ring_buffer(dev); 3700 if (ret) 3701 goto cleanup_render_ring; 3702 } 3703 3704 if (HAS_BLT(dev)) { 3705 ret = intel_init_blt_ring_buffer(dev); 3706 if (ret) 3707 goto cleanup_bsd_ring; 3708 } 3709 3710 dev_priv->next_seqno = 1; 3711 3712 return 0; 3713 3714 cleanup_bsd_ring: 3715 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]); 3716 cleanup_render_ring: 3717 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]); 3718 return ret; 3719 } 3720 3721 void 3722 i915_gem_cleanup_ringbuffer(struct drm_device *dev) 3723 { 3724 drm_i915_private_t *dev_priv = dev->dev_private; 3725 int i; 3726 3727 for (i = 0; i < I915_NUM_RINGS; i++) 3728 intel_cleanup_ring_buffer(&dev_priv->ring[i]); 3729 } 3730 3731 int 3732 i915_gem_entervt_ioctl(struct drm_device *dev, void *data, 3733 struct drm_file *file_priv) 3734 { 3735 drm_i915_private_t *dev_priv = dev->dev_private; 3736 int ret, i; 3737 3738 if (drm_core_check_feature(dev, DRIVER_MODESET)) 3739 return 0; 3740 3741 if (atomic_read(&dev_priv->mm.wedged)) { 3742 DRM_ERROR("Reenabling wedged hardware, good luck\n"); 3743 atomic_set(&dev_priv->mm.wedged, 0); 3744 } 3745 3746 mutex_lock(&dev->struct_mutex); 3747 dev_priv->mm.suspended = 0; 3748 3749 ret = i915_gem_init_ringbuffer(dev); 3750 if (ret != 0) { 3751 mutex_unlock(&dev->struct_mutex); 3752 return ret; 3753 } 3754 3755 BUG_ON(!list_empty(&dev_priv->mm.active_list)); 3756 BUG_ON(!list_empty(&dev_priv->mm.flushing_list)); 3757 BUG_ON(!list_empty(&dev_priv->mm.inactive_list)); 3758 for (i = 0; i < I915_NUM_RINGS; i++) { 3759 BUG_ON(!list_empty(&dev_priv->ring[i].active_list)); 3760 BUG_ON(!list_empty(&dev_priv->ring[i].request_list)); 3761 } 3762 mutex_unlock(&dev->struct_mutex); 3763 3764 ret = drm_irq_install(dev); 3765 if (ret) 3766 goto cleanup_ringbuffer; 3767 3768 return 0; 3769 3770 cleanup_ringbuffer: 3771 mutex_lock(&dev->struct_mutex); 3772 i915_gem_cleanup_ringbuffer(dev); 3773 dev_priv->mm.suspended = 1; 3774 mutex_unlock(&dev->struct_mutex); 3775 3776 return ret; 3777 } 3778 3779 int 3780 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, 3781 struct drm_file *file_priv) 3782 { 3783 if (drm_core_check_feature(dev, DRIVER_MODESET)) 3784 return 0; 3785 3786 drm_irq_uninstall(dev); 3787 return i915_gem_idle(dev); 3788 } 3789 3790 void 3791 i915_gem_lastclose(struct drm_device *dev) 3792 { 3793 int ret; 3794 3795 if (drm_core_check_feature(dev, DRIVER_MODESET)) 3796 return; 3797 3798 ret = i915_gem_idle(dev); 3799 if (ret) 3800 DRM_ERROR("failed to idle hardware: %d\n", ret); 3801 } 3802 3803 static void 3804 init_ring_lists(struct intel_ring_buffer *ring) 3805 { 3806 INIT_LIST_HEAD(&ring->active_list); 3807 INIT_LIST_HEAD(&ring->request_list); 3808 INIT_LIST_HEAD(&ring->gpu_write_list); 3809 } 3810 3811 void 3812 i915_gem_load(struct drm_device *dev) 3813 { 3814 int i; 3815 drm_i915_private_t *dev_priv = dev->dev_private; 3816 3817 INIT_LIST_HEAD(&dev_priv->mm.active_list); 3818 INIT_LIST_HEAD(&dev_priv->mm.flushing_list); 3819 INIT_LIST_HEAD(&dev_priv->mm.inactive_list); 3820 INIT_LIST_HEAD(&dev_priv->mm.pinned_list); 3821 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 3822 INIT_LIST_HEAD(&dev_priv->mm.deferred_free_list); 3823 INIT_LIST_HEAD(&dev_priv->mm.gtt_list); 3824 for (i = 0; i < I915_NUM_RINGS; i++) 3825 init_ring_lists(&dev_priv->ring[i]); 3826 for (i = 0; i < 16; i++) 3827 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 3828 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 3829 i915_gem_retire_work_handler); 3830 init_completion(&dev_priv->error_completion); 3831 3832 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ 3833 if (IS_GEN3(dev)) { 3834 u32 tmp = I915_READ(MI_ARB_STATE); 3835 if (!(tmp & MI_ARB_C3_LP_WRITE_ENABLE)) { 3836 /* arb state is a masked write, so set bit + bit in mask */ 3837 tmp = MI_ARB_C3_LP_WRITE_ENABLE | (MI_ARB_C3_LP_WRITE_ENABLE << MI_ARB_MASK_SHIFT); 3838 I915_WRITE(MI_ARB_STATE, tmp); 3839 } 3840 } 3841 3842 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 3843 3844 /* Old X drivers will take 0-2 for front, back, depth buffers */ 3845 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 3846 dev_priv->fence_reg_start = 3; 3847 3848 if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 3849 dev_priv->num_fence_regs = 16; 3850 else 3851 dev_priv->num_fence_regs = 8; 3852 3853 /* Initialize fence registers to zero */ 3854 for (i = 0; i < dev_priv->num_fence_regs; i++) { 3855 i915_gem_clear_fence_reg(dev, &dev_priv->fence_regs[i]); 3856 } 3857 3858 i915_gem_detect_bit_6_swizzle(dev); 3859 init_waitqueue_head(&dev_priv->pending_flip_queue); 3860 3861 dev_priv->mm.interruptible = true; 3862 3863 dev_priv->mm.inactive_shrinker.shrink = i915_gem_inactive_shrink; 3864 dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS; 3865 register_shrinker(&dev_priv->mm.inactive_shrinker); 3866 } 3867 3868 /* 3869 * Create a physically contiguous memory object for this object 3870 * e.g. for cursor + overlay regs 3871 */ 3872 static int i915_gem_init_phys_object(struct drm_device *dev, 3873 int id, int size, int align) 3874 { 3875 drm_i915_private_t *dev_priv = dev->dev_private; 3876 struct drm_i915_gem_phys_object *phys_obj; 3877 int ret; 3878 3879 if (dev_priv->mm.phys_objs[id - 1] || !size) 3880 return 0; 3881 3882 phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL); 3883 if (!phys_obj) 3884 return -ENOMEM; 3885 3886 phys_obj->id = id; 3887 3888 phys_obj->handle = drm_pci_alloc(dev, size, align); 3889 if (!phys_obj->handle) { 3890 ret = -ENOMEM; 3891 goto kfree_obj; 3892 } 3893 #ifdef CONFIG_X86 3894 set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE); 3895 #endif 3896 3897 dev_priv->mm.phys_objs[id - 1] = phys_obj; 3898 3899 return 0; 3900 kfree_obj: 3901 kfree(phys_obj); 3902 return ret; 3903 } 3904 3905 static void i915_gem_free_phys_object(struct drm_device *dev, int id) 3906 { 3907 drm_i915_private_t *dev_priv = dev->dev_private; 3908 struct drm_i915_gem_phys_object *phys_obj; 3909 3910 if (!dev_priv->mm.phys_objs[id - 1]) 3911 return; 3912 3913 phys_obj = dev_priv->mm.phys_objs[id - 1]; 3914 if (phys_obj->cur_obj) { 3915 i915_gem_detach_phys_object(dev, phys_obj->cur_obj); 3916 } 3917 3918 #ifdef CONFIG_X86 3919 set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE); 3920 #endif 3921 drm_pci_free(dev, phys_obj->handle); 3922 kfree(phys_obj); 3923 dev_priv->mm.phys_objs[id - 1] = NULL; 3924 } 3925 3926 void i915_gem_free_all_phys_object(struct drm_device *dev) 3927 { 3928 int i; 3929 3930 for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++) 3931 i915_gem_free_phys_object(dev, i); 3932 } 3933 3934 void i915_gem_detach_phys_object(struct drm_device *dev, 3935 struct drm_i915_gem_object *obj) 3936 { 3937 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 3938 char *vaddr; 3939 int i; 3940 int page_count; 3941 3942 if (!obj->phys_obj) 3943 return; 3944 vaddr = obj->phys_obj->handle->vaddr; 3945 3946 page_count = obj->base.size / PAGE_SIZE; 3947 for (i = 0; i < page_count; i++) { 3948 struct page *page = shmem_read_mapping_page(mapping, i); 3949 if (!IS_ERR(page)) { 3950 char *dst = kmap_atomic(page); 3951 memcpy(dst, vaddr + i*PAGE_SIZE, PAGE_SIZE); 3952 kunmap_atomic(dst); 3953 3954 drm_clflush_pages(&page, 1); 3955 3956 set_page_dirty(page); 3957 mark_page_accessed(page); 3958 page_cache_release(page); 3959 } 3960 } 3961 intel_gtt_chipset_flush(); 3962 3963 obj->phys_obj->cur_obj = NULL; 3964 obj->phys_obj = NULL; 3965 } 3966 3967 int 3968 i915_gem_attach_phys_object(struct drm_device *dev, 3969 struct drm_i915_gem_object *obj, 3970 int id, 3971 int align) 3972 { 3973 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 3974 drm_i915_private_t *dev_priv = dev->dev_private; 3975 int ret = 0; 3976 int page_count; 3977 int i; 3978 3979 if (id > I915_MAX_PHYS_OBJECT) 3980 return -EINVAL; 3981 3982 if (obj->phys_obj) { 3983 if (obj->phys_obj->id == id) 3984 return 0; 3985 i915_gem_detach_phys_object(dev, obj); 3986 } 3987 3988 /* create a new object */ 3989 if (!dev_priv->mm.phys_objs[id - 1]) { 3990 ret = i915_gem_init_phys_object(dev, id, 3991 obj->base.size, align); 3992 if (ret) { 3993 DRM_ERROR("failed to init phys object %d size: %zu\n", 3994 id, obj->base.size); 3995 return ret; 3996 } 3997 } 3998 3999 /* bind to the object */ 4000 obj->phys_obj = dev_priv->mm.phys_objs[id - 1]; 4001 obj->phys_obj->cur_obj = obj; 4002 4003 page_count = obj->base.size / PAGE_SIZE; 4004 4005 for (i = 0; i < page_count; i++) { 4006 struct page *page; 4007 char *dst, *src; 4008 4009 page = shmem_read_mapping_page(mapping, i); 4010 if (IS_ERR(page)) 4011 return PTR_ERR(page); 4012 4013 src = kmap_atomic(page); 4014 dst = obj->phys_obj->handle->vaddr + (i * PAGE_SIZE); 4015 memcpy(dst, src, PAGE_SIZE); 4016 kunmap_atomic(src); 4017 4018 mark_page_accessed(page); 4019 page_cache_release(page); 4020 } 4021 4022 return 0; 4023 } 4024 4025 static int 4026 i915_gem_phys_pwrite(struct drm_device *dev, 4027 struct drm_i915_gem_object *obj, 4028 struct drm_i915_gem_pwrite *args, 4029 struct drm_file *file_priv) 4030 { 4031 void *vaddr = obj->phys_obj->handle->vaddr + args->offset; 4032 char __user *user_data = (char __user *) (uintptr_t) args->data_ptr; 4033 4034 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { 4035 unsigned long unwritten; 4036 4037 /* The physical object once assigned is fixed for the lifetime 4038 * of the obj, so we can safely drop the lock and continue 4039 * to access vaddr. 4040 */ 4041 mutex_unlock(&dev->struct_mutex); 4042 unwritten = copy_from_user(vaddr, user_data, args->size); 4043 mutex_lock(&dev->struct_mutex); 4044 if (unwritten) 4045 return -EFAULT; 4046 } 4047 4048 intel_gtt_chipset_flush(); 4049 return 0; 4050 } 4051 4052 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 4053 { 4054 struct drm_i915_file_private *file_priv = file->driver_priv; 4055 4056 /* Clean up our request list when the client is going away, so that 4057 * later retire_requests won't dereference our soon-to-be-gone 4058 * file_priv. 4059 */ 4060 spin_lock(&file_priv->mm.lock); 4061 while (!list_empty(&file_priv->mm.request_list)) { 4062 struct drm_i915_gem_request *request; 4063 4064 request = list_first_entry(&file_priv->mm.request_list, 4065 struct drm_i915_gem_request, 4066 client_list); 4067 list_del(&request->client_list); 4068 request->file_priv = NULL; 4069 } 4070 spin_unlock(&file_priv->mm.lock); 4071 } 4072 4073 static int 4074 i915_gpu_is_active(struct drm_device *dev) 4075 { 4076 drm_i915_private_t *dev_priv = dev->dev_private; 4077 int lists_empty; 4078 4079 lists_empty = list_empty(&dev_priv->mm.flushing_list) && 4080 list_empty(&dev_priv->mm.active_list); 4081 4082 return !lists_empty; 4083 } 4084 4085 static int 4086 i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc) 4087 { 4088 struct drm_i915_private *dev_priv = 4089 container_of(shrinker, 4090 struct drm_i915_private, 4091 mm.inactive_shrinker); 4092 struct drm_device *dev = dev_priv->dev; 4093 struct drm_i915_gem_object *obj, *next; 4094 int nr_to_scan = sc->nr_to_scan; 4095 int cnt; 4096 4097 if (!mutex_trylock(&dev->struct_mutex)) 4098 return 0; 4099 4100 /* "fast-path" to count number of available objects */ 4101 if (nr_to_scan == 0) { 4102 cnt = 0; 4103 list_for_each_entry(obj, 4104 &dev_priv->mm.inactive_list, 4105 mm_list) 4106 cnt++; 4107 mutex_unlock(&dev->struct_mutex); 4108 return cnt / 100 * sysctl_vfs_cache_pressure; 4109 } 4110 4111 rescan: 4112 /* first scan for clean buffers */ 4113 i915_gem_retire_requests(dev); 4114 4115 list_for_each_entry_safe(obj, next, 4116 &dev_priv->mm.inactive_list, 4117 mm_list) { 4118 if (i915_gem_object_is_purgeable(obj)) { 4119 if (i915_gem_object_unbind(obj) == 0 && 4120 --nr_to_scan == 0) 4121 break; 4122 } 4123 } 4124 4125 /* second pass, evict/count anything still on the inactive list */ 4126 cnt = 0; 4127 list_for_each_entry_safe(obj, next, 4128 &dev_priv->mm.inactive_list, 4129 mm_list) { 4130 if (nr_to_scan && 4131 i915_gem_object_unbind(obj) == 0) 4132 nr_to_scan--; 4133 else 4134 cnt++; 4135 } 4136 4137 if (nr_to_scan && i915_gpu_is_active(dev)) { 4138 /* 4139 * We are desperate for pages, so as a last resort, wait 4140 * for the GPU to finish and discard whatever we can. 4141 * This has a dramatic impact to reduce the number of 4142 * OOM-killer events whilst running the GPU aggressively. 4143 */ 4144 if (i915_gpu_idle(dev) == 0) 4145 goto rescan; 4146 } 4147 mutex_unlock(&dev->struct_mutex); 4148 return cnt / 100 * sysctl_vfs_cache_pressure; 4149 } 4150