1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drmP.h> 29 #include <drm/drm_vma_manager.h> 30 #include <drm/i915_drm.h> 31 #include "i915_drv.h" 32 #include "i915_vgpu.h" 33 #include "i915_trace.h" 34 #include "intel_drv.h" 35 #include <linux/shmem_fs.h> 36 #include <linux/slab.h> 37 #include <linux/swap.h> 38 #include <linux/pci.h> 39 #include <linux/dma-buf.h> 40 41 #define RQ_BUG_ON(expr) 42 43 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 44 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); 45 static void 46 i915_gem_object_retire__write(struct drm_i915_gem_object *obj); 47 static void 48 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring); 49 static void i915_gem_write_fence(struct drm_device *dev, int reg, 50 struct drm_i915_gem_object *obj); 51 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 52 struct drm_i915_fence_reg *fence, 53 bool enable); 54 55 static bool cpu_cache_is_coherent(struct drm_device *dev, 56 enum i915_cache_level level) 57 { 58 return HAS_LLC(dev) || level != I915_CACHE_NONE; 59 } 60 61 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 62 { 63 if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) 64 return true; 65 66 return obj->pin_display; 67 } 68 69 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) 70 { 71 if (obj->tiling_mode) 72 i915_gem_release_mmap(obj); 73 74 /* As we do not have an associated fence register, we will force 75 * a tiling change if we ever need to acquire one. 76 */ 77 obj->fence_dirty = false; 78 obj->fence_reg = I915_FENCE_REG_NONE; 79 } 80 81 /* some bookkeeping */ 82 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 83 size_t size) 84 { 85 spin_lock(&dev_priv->mm.object_stat_lock); 86 dev_priv->mm.object_count++; 87 dev_priv->mm.object_memory += size; 88 spin_unlock(&dev_priv->mm.object_stat_lock); 89 } 90 91 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 92 size_t size) 93 { 94 spin_lock(&dev_priv->mm.object_stat_lock); 95 dev_priv->mm.object_count--; 96 dev_priv->mm.object_memory -= size; 97 spin_unlock(&dev_priv->mm.object_stat_lock); 98 } 99 100 static int 101 i915_gem_wait_for_error(struct i915_gpu_error *error) 102 { 103 int ret; 104 105 #define EXIT_COND (!i915_reset_in_progress(error) || \ 106 i915_terminally_wedged(error)) 107 if (EXIT_COND) 108 return 0; 109 110 /* 111 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 112 * userspace. If it takes that long something really bad is going on and 113 * we should simply try to bail out and fail as gracefully as possible. 114 */ 115 ret = wait_event_interruptible_timeout(error->reset_queue, 116 EXIT_COND, 117 10*HZ); 118 if (ret == 0) { 119 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 120 return -EIO; 121 } else if (ret < 0) { 122 return ret; 123 } 124 #undef EXIT_COND 125 126 return 0; 127 } 128 129 int i915_mutex_lock_interruptible(struct drm_device *dev) 130 { 131 struct drm_i915_private *dev_priv = dev->dev_private; 132 int ret; 133 134 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 135 if (ret) 136 return ret; 137 138 ret = mutex_lock_interruptible(&dev->struct_mutex); 139 if (ret) 140 return ret; 141 142 WARN_ON(i915_verify_lists(dev)); 143 return 0; 144 } 145 146 int 147 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 148 struct drm_file *file) 149 { 150 struct drm_i915_private *dev_priv = dev->dev_private; 151 struct drm_i915_gem_get_aperture *args = data; 152 struct drm_i915_gem_object *obj; 153 size_t pinned; 154 155 pinned = 0; 156 mutex_lock(&dev->struct_mutex); 157 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) 158 if (i915_gem_obj_is_pinned(obj)) 159 pinned += i915_gem_obj_ggtt_size(obj); 160 mutex_unlock(&dev->struct_mutex); 161 162 args->aper_size = dev_priv->gtt.base.total; 163 args->aper_available_size = args->aper_size - pinned; 164 165 return 0; 166 } 167 168 static int 169 i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) 170 { 171 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 172 char *vaddr = obj->phys_handle->vaddr; 173 struct sg_table *st; 174 struct scatterlist *sg; 175 int i; 176 177 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) 178 return -EINVAL; 179 180 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 181 struct page *page; 182 char *src; 183 184 page = shmem_read_mapping_page(mapping, i); 185 if (IS_ERR(page)) 186 return PTR_ERR(page); 187 188 src = kmap_atomic(page); 189 memcpy(vaddr, src, PAGE_SIZE); 190 drm_clflush_virt_range(vaddr, PAGE_SIZE); 191 kunmap_atomic(src); 192 193 page_cache_release(page); 194 vaddr += PAGE_SIZE; 195 } 196 197 i915_gem_chipset_flush(obj->base.dev); 198 199 st = kmalloc(sizeof(*st), GFP_KERNEL); 200 if (st == NULL) 201 return -ENOMEM; 202 203 if (sg_alloc_table(st, 1, GFP_KERNEL)) { 204 kfree(st); 205 return -ENOMEM; 206 } 207 208 sg = st->sgl; 209 sg->offset = 0; 210 sg->length = obj->base.size; 211 212 sg_dma_address(sg) = obj->phys_handle->busaddr; 213 sg_dma_len(sg) = obj->base.size; 214 215 obj->pages = st; 216 return 0; 217 } 218 219 static void 220 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj) 221 { 222 int ret; 223 224 BUG_ON(obj->madv == __I915_MADV_PURGED); 225 226 ret = i915_gem_object_set_to_cpu_domain(obj, true); 227 if (ret) { 228 /* In the event of a disaster, abandon all caches and 229 * hope for the best. 230 */ 231 WARN_ON(ret != -EIO); 232 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 233 } 234 235 if (obj->madv == I915_MADV_DONTNEED) 236 obj->dirty = 0; 237 238 if (obj->dirty) { 239 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 240 char *vaddr = obj->phys_handle->vaddr; 241 int i; 242 243 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 244 struct page *page; 245 char *dst; 246 247 page = shmem_read_mapping_page(mapping, i); 248 if (IS_ERR(page)) 249 continue; 250 251 dst = kmap_atomic(page); 252 drm_clflush_virt_range(vaddr, PAGE_SIZE); 253 memcpy(dst, vaddr, PAGE_SIZE); 254 kunmap_atomic(dst); 255 256 set_page_dirty(page); 257 if (obj->madv == I915_MADV_WILLNEED) 258 mark_page_accessed(page); 259 page_cache_release(page); 260 vaddr += PAGE_SIZE; 261 } 262 obj->dirty = 0; 263 } 264 265 sg_free_table(obj->pages); 266 kfree(obj->pages); 267 } 268 269 static void 270 i915_gem_object_release_phys(struct drm_i915_gem_object *obj) 271 { 272 drm_pci_free(obj->base.dev, obj->phys_handle); 273 } 274 275 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { 276 .get_pages = i915_gem_object_get_pages_phys, 277 .put_pages = i915_gem_object_put_pages_phys, 278 .release = i915_gem_object_release_phys, 279 }; 280 281 static int 282 drop_pages(struct drm_i915_gem_object *obj) 283 { 284 struct i915_vma *vma, *next; 285 int ret; 286 287 drm_gem_object_reference(&obj->base); 288 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) 289 if (i915_vma_unbind(vma)) 290 break; 291 292 ret = i915_gem_object_put_pages(obj); 293 drm_gem_object_unreference(&obj->base); 294 295 return ret; 296 } 297 298 int 299 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, 300 int align) 301 { 302 drm_dma_handle_t *phys; 303 int ret; 304 305 if (obj->phys_handle) { 306 if ((unsigned long)obj->phys_handle->vaddr & (align -1)) 307 return -EBUSY; 308 309 return 0; 310 } 311 312 if (obj->madv != I915_MADV_WILLNEED) 313 return -EFAULT; 314 315 if (obj->base.filp == NULL) 316 return -EINVAL; 317 318 ret = drop_pages(obj); 319 if (ret) 320 return ret; 321 322 /* create a new object */ 323 phys = drm_pci_alloc(obj->base.dev, obj->base.size, align); 324 if (!phys) 325 return -ENOMEM; 326 327 obj->phys_handle = phys; 328 obj->ops = &i915_gem_phys_ops; 329 330 return i915_gem_object_get_pages(obj); 331 } 332 333 static int 334 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 335 struct drm_i915_gem_pwrite *args, 336 struct drm_file *file_priv) 337 { 338 struct drm_device *dev = obj->base.dev; 339 void *vaddr = obj->phys_handle->vaddr + args->offset; 340 char __user *user_data = to_user_ptr(args->data_ptr); 341 int ret = 0; 342 343 /* We manually control the domain here and pretend that it 344 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 345 */ 346 ret = i915_gem_object_wait_rendering(obj, false); 347 if (ret) 348 return ret; 349 350 intel_fb_obj_invalidate(obj, NULL, ORIGIN_CPU); 351 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { 352 unsigned long unwritten; 353 354 /* The physical object once assigned is fixed for the lifetime 355 * of the obj, so we can safely drop the lock and continue 356 * to access vaddr. 357 */ 358 mutex_unlock(&dev->struct_mutex); 359 unwritten = copy_from_user(vaddr, user_data, args->size); 360 mutex_lock(&dev->struct_mutex); 361 if (unwritten) { 362 ret = -EFAULT; 363 goto out; 364 } 365 } 366 367 drm_clflush_virt_range(vaddr, args->size); 368 i915_gem_chipset_flush(dev); 369 370 out: 371 intel_fb_obj_flush(obj, false); 372 return ret; 373 } 374 375 void *i915_gem_object_alloc(struct drm_device *dev) 376 { 377 struct drm_i915_private *dev_priv = dev->dev_private; 378 return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL); 379 } 380 381 void i915_gem_object_free(struct drm_i915_gem_object *obj) 382 { 383 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 384 kmem_cache_free(dev_priv->objects, obj); 385 } 386 387 static int 388 i915_gem_create(struct drm_file *file, 389 struct drm_device *dev, 390 uint64_t size, 391 uint32_t *handle_p) 392 { 393 struct drm_i915_gem_object *obj; 394 int ret; 395 u32 handle; 396 397 size = roundup(size, PAGE_SIZE); 398 if (size == 0) 399 return -EINVAL; 400 401 /* Allocate the new object */ 402 obj = i915_gem_alloc_object(dev, size); 403 if (obj == NULL) 404 return -ENOMEM; 405 406 ret = drm_gem_handle_create(file, &obj->base, &handle); 407 /* drop reference from allocate - handle holds it now */ 408 drm_gem_object_unreference_unlocked(&obj->base); 409 if (ret) 410 return ret; 411 412 *handle_p = handle; 413 return 0; 414 } 415 416 int 417 i915_gem_dumb_create(struct drm_file *file, 418 struct drm_device *dev, 419 struct drm_mode_create_dumb *args) 420 { 421 /* have to work out size/pitch and return them */ 422 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); 423 args->size = args->pitch * args->height; 424 return i915_gem_create(file, dev, 425 args->size, &args->handle); 426 } 427 428 /** 429 * Creates a new mm object and returns a handle to it. 430 */ 431 int 432 i915_gem_create_ioctl(struct drm_device *dev, void *data, 433 struct drm_file *file) 434 { 435 struct drm_i915_gem_create *args = data; 436 437 return i915_gem_create(file, dev, 438 args->size, &args->handle); 439 } 440 441 static inline int 442 __copy_to_user_swizzled(char __user *cpu_vaddr, 443 const char *gpu_vaddr, int gpu_offset, 444 int length) 445 { 446 int ret, cpu_offset = 0; 447 448 while (length > 0) { 449 int cacheline_end = ALIGN(gpu_offset + 1, 64); 450 int this_length = min(cacheline_end - gpu_offset, length); 451 int swizzled_gpu_offset = gpu_offset ^ 64; 452 453 ret = __copy_to_user(cpu_vaddr + cpu_offset, 454 gpu_vaddr + swizzled_gpu_offset, 455 this_length); 456 if (ret) 457 return ret + length; 458 459 cpu_offset += this_length; 460 gpu_offset += this_length; 461 length -= this_length; 462 } 463 464 return 0; 465 } 466 467 static inline int 468 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 469 const char __user *cpu_vaddr, 470 int length) 471 { 472 int ret, cpu_offset = 0; 473 474 while (length > 0) { 475 int cacheline_end = ALIGN(gpu_offset + 1, 64); 476 int this_length = min(cacheline_end - gpu_offset, length); 477 int swizzled_gpu_offset = gpu_offset ^ 64; 478 479 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 480 cpu_vaddr + cpu_offset, 481 this_length); 482 if (ret) 483 return ret + length; 484 485 cpu_offset += this_length; 486 gpu_offset += this_length; 487 length -= this_length; 488 } 489 490 return 0; 491 } 492 493 /* 494 * Pins the specified object's pages and synchronizes the object with 495 * GPU accesses. Sets needs_clflush to non-zero if the caller should 496 * flush the object from the CPU cache. 497 */ 498 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, 499 int *needs_clflush) 500 { 501 int ret; 502 503 *needs_clflush = 0; 504 505 if (!obj->base.filp) 506 return -EINVAL; 507 508 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { 509 /* If we're not in the cpu read domain, set ourself into the gtt 510 * read domain and manually flush cachelines (if required). This 511 * optimizes for the case when the gpu will dirty the data 512 * anyway again before the next pread happens. */ 513 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev, 514 obj->cache_level); 515 ret = i915_gem_object_wait_rendering(obj, true); 516 if (ret) 517 return ret; 518 } 519 520 ret = i915_gem_object_get_pages(obj); 521 if (ret) 522 return ret; 523 524 i915_gem_object_pin_pages(obj); 525 526 return ret; 527 } 528 529 /* Per-page copy function for the shmem pread fastpath. 530 * Flushes invalid cachelines before reading the target if 531 * needs_clflush is set. */ 532 static int 533 shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length, 534 char __user *user_data, 535 bool page_do_bit17_swizzling, bool needs_clflush) 536 { 537 char *vaddr; 538 int ret; 539 540 if (unlikely(page_do_bit17_swizzling)) 541 return -EINVAL; 542 543 vaddr = kmap_atomic(page); 544 if (needs_clflush) 545 drm_clflush_virt_range(vaddr + shmem_page_offset, 546 page_length); 547 ret = __copy_to_user_inatomic(user_data, 548 vaddr + shmem_page_offset, 549 page_length); 550 kunmap_atomic(vaddr); 551 552 return ret ? -EFAULT : 0; 553 } 554 555 static void 556 shmem_clflush_swizzled_range(char *addr, unsigned long length, 557 bool swizzled) 558 { 559 if (unlikely(swizzled)) { 560 unsigned long start = (unsigned long) addr; 561 unsigned long end = (unsigned long) addr + length; 562 563 /* For swizzling simply ensure that we always flush both 564 * channels. Lame, but simple and it works. Swizzled 565 * pwrite/pread is far from a hotpath - current userspace 566 * doesn't use it at all. */ 567 start = round_down(start, 128); 568 end = round_up(end, 128); 569 570 drm_clflush_virt_range((void *)start, end - start); 571 } else { 572 drm_clflush_virt_range(addr, length); 573 } 574 575 } 576 577 /* Only difference to the fast-path function is that this can handle bit17 578 * and uses non-atomic copy and kmap functions. */ 579 static int 580 shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length, 581 char __user *user_data, 582 bool page_do_bit17_swizzling, bool needs_clflush) 583 { 584 char *vaddr; 585 int ret; 586 587 vaddr = kmap(page); 588 if (needs_clflush) 589 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 590 page_length, 591 page_do_bit17_swizzling); 592 593 if (page_do_bit17_swizzling) 594 ret = __copy_to_user_swizzled(user_data, 595 vaddr, shmem_page_offset, 596 page_length); 597 else 598 ret = __copy_to_user(user_data, 599 vaddr + shmem_page_offset, 600 page_length); 601 kunmap(page); 602 603 return ret ? - EFAULT : 0; 604 } 605 606 static int 607 i915_gem_shmem_pread(struct drm_device *dev, 608 struct drm_i915_gem_object *obj, 609 struct drm_i915_gem_pread *args, 610 struct drm_file *file) 611 { 612 char __user *user_data; 613 ssize_t remain; 614 loff_t offset; 615 int shmem_page_offset, page_length, ret = 0; 616 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 617 int prefaulted = 0; 618 int needs_clflush = 0; 619 struct sg_page_iter sg_iter; 620 621 user_data = to_user_ptr(args->data_ptr); 622 remain = args->size; 623 624 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 625 626 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); 627 if (ret) 628 return ret; 629 630 offset = args->offset; 631 632 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 633 offset >> PAGE_SHIFT) { 634 struct page *page = sg_page_iter_page(&sg_iter); 635 636 if (remain <= 0) 637 break; 638 639 /* Operation in this page 640 * 641 * shmem_page_offset = offset within page in shmem file 642 * page_length = bytes to copy for this page 643 */ 644 shmem_page_offset = offset_in_page(offset); 645 page_length = remain; 646 if ((shmem_page_offset + page_length) > PAGE_SIZE) 647 page_length = PAGE_SIZE - shmem_page_offset; 648 649 page_do_bit17_swizzling = obj_do_bit17_swizzling && 650 (page_to_phys(page) & (1 << 17)) != 0; 651 652 ret = shmem_pread_fast(page, shmem_page_offset, page_length, 653 user_data, page_do_bit17_swizzling, 654 needs_clflush); 655 if (ret == 0) 656 goto next_page; 657 658 mutex_unlock(&dev->struct_mutex); 659 660 if (likely(!i915.prefault_disable) && !prefaulted) { 661 ret = fault_in_multipages_writeable(user_data, remain); 662 /* Userspace is tricking us, but we've already clobbered 663 * its pages with the prefault and promised to write the 664 * data up to the first fault. Hence ignore any errors 665 * and just continue. */ 666 (void)ret; 667 prefaulted = 1; 668 } 669 670 ret = shmem_pread_slow(page, shmem_page_offset, page_length, 671 user_data, page_do_bit17_swizzling, 672 needs_clflush); 673 674 mutex_lock(&dev->struct_mutex); 675 676 if (ret) 677 goto out; 678 679 next_page: 680 remain -= page_length; 681 user_data += page_length; 682 offset += page_length; 683 } 684 685 out: 686 i915_gem_object_unpin_pages(obj); 687 688 return ret; 689 } 690 691 /** 692 * Reads data from the object referenced by handle. 693 * 694 * On error, the contents of *data are undefined. 695 */ 696 int 697 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 698 struct drm_file *file) 699 { 700 struct drm_i915_gem_pread *args = data; 701 struct drm_i915_gem_object *obj; 702 int ret = 0; 703 704 if (args->size == 0) 705 return 0; 706 707 if (!access_ok(VERIFY_WRITE, 708 to_user_ptr(args->data_ptr), 709 args->size)) 710 return -EFAULT; 711 712 ret = i915_mutex_lock_interruptible(dev); 713 if (ret) 714 return ret; 715 716 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 717 if (&obj->base == NULL) { 718 ret = -ENOENT; 719 goto unlock; 720 } 721 722 /* Bounds check source. */ 723 if (args->offset > obj->base.size || 724 args->size > obj->base.size - args->offset) { 725 ret = -EINVAL; 726 goto out; 727 } 728 729 /* prime objects have no backing filp to GEM pread/pwrite 730 * pages from. 731 */ 732 if (!obj->base.filp) { 733 ret = -EINVAL; 734 goto out; 735 } 736 737 trace_i915_gem_object_pread(obj, args->offset, args->size); 738 739 ret = i915_gem_shmem_pread(dev, obj, args, file); 740 741 out: 742 drm_gem_object_unreference(&obj->base); 743 unlock: 744 mutex_unlock(&dev->struct_mutex); 745 return ret; 746 } 747 748 /* This is the fast write path which cannot handle 749 * page faults in the source data 750 */ 751 752 static inline int 753 fast_user_write(struct io_mapping *mapping, 754 loff_t page_base, int page_offset, 755 char __user *user_data, 756 int length) 757 { 758 void __iomem *vaddr_atomic; 759 void *vaddr; 760 unsigned long unwritten; 761 762 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 763 /* We can use the cpu mem copy function because this is X86. */ 764 vaddr = (void __force*)vaddr_atomic + page_offset; 765 unwritten = __copy_from_user_inatomic_nocache(vaddr, 766 user_data, length); 767 io_mapping_unmap_atomic(vaddr_atomic); 768 return unwritten; 769 } 770 771 /** 772 * This is the fast pwrite path, where we copy the data directly from the 773 * user into the GTT, uncached. 774 */ 775 static int 776 i915_gem_gtt_pwrite_fast(struct drm_device *dev, 777 struct drm_i915_gem_object *obj, 778 struct drm_i915_gem_pwrite *args, 779 struct drm_file *file) 780 { 781 struct drm_i915_private *dev_priv = dev->dev_private; 782 ssize_t remain; 783 loff_t offset, page_base; 784 char __user *user_data; 785 int page_offset, page_length, ret; 786 787 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); 788 if (ret) 789 goto out; 790 791 ret = i915_gem_object_set_to_gtt_domain(obj, true); 792 if (ret) 793 goto out_unpin; 794 795 ret = i915_gem_object_put_fence(obj); 796 if (ret) 797 goto out_unpin; 798 799 user_data = to_user_ptr(args->data_ptr); 800 remain = args->size; 801 802 offset = i915_gem_obj_ggtt_offset(obj) + args->offset; 803 804 intel_fb_obj_invalidate(obj, NULL, ORIGIN_GTT); 805 806 while (remain > 0) { 807 /* Operation in this page 808 * 809 * page_base = page offset within aperture 810 * page_offset = offset within page 811 * page_length = bytes to copy for this page 812 */ 813 page_base = offset & PAGE_MASK; 814 page_offset = offset_in_page(offset); 815 page_length = remain; 816 if ((page_offset + remain) > PAGE_SIZE) 817 page_length = PAGE_SIZE - page_offset; 818 819 /* If we get a fault while copying data, then (presumably) our 820 * source page isn't available. Return the error and we'll 821 * retry in the slow path. 822 */ 823 if (fast_user_write(dev_priv->gtt.mappable, page_base, 824 page_offset, user_data, page_length)) { 825 ret = -EFAULT; 826 goto out_flush; 827 } 828 829 remain -= page_length; 830 user_data += page_length; 831 offset += page_length; 832 } 833 834 out_flush: 835 intel_fb_obj_flush(obj, false); 836 out_unpin: 837 i915_gem_object_ggtt_unpin(obj); 838 out: 839 return ret; 840 } 841 842 /* Per-page copy function for the shmem pwrite fastpath. 843 * Flushes invalid cachelines before writing to the target if 844 * needs_clflush_before is set and flushes out any written cachelines after 845 * writing if needs_clflush is set. */ 846 static int 847 shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length, 848 char __user *user_data, 849 bool page_do_bit17_swizzling, 850 bool needs_clflush_before, 851 bool needs_clflush_after) 852 { 853 char *vaddr; 854 int ret; 855 856 if (unlikely(page_do_bit17_swizzling)) 857 return -EINVAL; 858 859 vaddr = kmap_atomic(page); 860 if (needs_clflush_before) 861 drm_clflush_virt_range(vaddr + shmem_page_offset, 862 page_length); 863 ret = __copy_from_user_inatomic(vaddr + shmem_page_offset, 864 user_data, page_length); 865 if (needs_clflush_after) 866 drm_clflush_virt_range(vaddr + shmem_page_offset, 867 page_length); 868 kunmap_atomic(vaddr); 869 870 return ret ? -EFAULT : 0; 871 } 872 873 /* Only difference to the fast-path function is that this can handle bit17 874 * and uses non-atomic copy and kmap functions. */ 875 static int 876 shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length, 877 char __user *user_data, 878 bool page_do_bit17_swizzling, 879 bool needs_clflush_before, 880 bool needs_clflush_after) 881 { 882 char *vaddr; 883 int ret; 884 885 vaddr = kmap(page); 886 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 887 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 888 page_length, 889 page_do_bit17_swizzling); 890 if (page_do_bit17_swizzling) 891 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, 892 user_data, 893 page_length); 894 else 895 ret = __copy_from_user(vaddr + shmem_page_offset, 896 user_data, 897 page_length); 898 if (needs_clflush_after) 899 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 900 page_length, 901 page_do_bit17_swizzling); 902 kunmap(page); 903 904 return ret ? -EFAULT : 0; 905 } 906 907 static int 908 i915_gem_shmem_pwrite(struct drm_device *dev, 909 struct drm_i915_gem_object *obj, 910 struct drm_i915_gem_pwrite *args, 911 struct drm_file *file) 912 { 913 ssize_t remain; 914 loff_t offset; 915 char __user *user_data; 916 int shmem_page_offset, page_length, ret = 0; 917 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 918 int hit_slowpath = 0; 919 int needs_clflush_after = 0; 920 int needs_clflush_before = 0; 921 struct sg_page_iter sg_iter; 922 923 user_data = to_user_ptr(args->data_ptr); 924 remain = args->size; 925 926 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 927 928 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 929 /* If we're not in the cpu write domain, set ourself into the gtt 930 * write domain and manually flush cachelines (if required). This 931 * optimizes for the case when the gpu will use the data 932 * right away and we therefore have to clflush anyway. */ 933 needs_clflush_after = cpu_write_needs_clflush(obj); 934 ret = i915_gem_object_wait_rendering(obj, false); 935 if (ret) 936 return ret; 937 } 938 /* Same trick applies to invalidate partially written cachelines read 939 * before writing. */ 940 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) 941 needs_clflush_before = 942 !cpu_cache_is_coherent(dev, obj->cache_level); 943 944 ret = i915_gem_object_get_pages(obj); 945 if (ret) 946 return ret; 947 948 intel_fb_obj_invalidate(obj, NULL, ORIGIN_CPU); 949 950 i915_gem_object_pin_pages(obj); 951 952 offset = args->offset; 953 obj->dirty = 1; 954 955 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 956 offset >> PAGE_SHIFT) { 957 struct page *page = sg_page_iter_page(&sg_iter); 958 int partial_cacheline_write; 959 960 if (remain <= 0) 961 break; 962 963 /* Operation in this page 964 * 965 * shmem_page_offset = offset within page in shmem file 966 * page_length = bytes to copy for this page 967 */ 968 shmem_page_offset = offset_in_page(offset); 969 970 page_length = remain; 971 if ((shmem_page_offset + page_length) > PAGE_SIZE) 972 page_length = PAGE_SIZE - shmem_page_offset; 973 974 /* If we don't overwrite a cacheline completely we need to be 975 * careful to have up-to-date data by first clflushing. Don't 976 * overcomplicate things and flush the entire patch. */ 977 partial_cacheline_write = needs_clflush_before && 978 ((shmem_page_offset | page_length) 979 & (boot_cpu_data.x86_clflush_size - 1)); 980 981 page_do_bit17_swizzling = obj_do_bit17_swizzling && 982 (page_to_phys(page) & (1 << 17)) != 0; 983 984 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, 985 user_data, page_do_bit17_swizzling, 986 partial_cacheline_write, 987 needs_clflush_after); 988 if (ret == 0) 989 goto next_page; 990 991 hit_slowpath = 1; 992 mutex_unlock(&dev->struct_mutex); 993 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, 994 user_data, page_do_bit17_swizzling, 995 partial_cacheline_write, 996 needs_clflush_after); 997 998 mutex_lock(&dev->struct_mutex); 999 1000 if (ret) 1001 goto out; 1002 1003 next_page: 1004 remain -= page_length; 1005 user_data += page_length; 1006 offset += page_length; 1007 } 1008 1009 out: 1010 i915_gem_object_unpin_pages(obj); 1011 1012 if (hit_slowpath) { 1013 /* 1014 * Fixup: Flush cpu caches in case we didn't flush the dirty 1015 * cachelines in-line while writing and the object moved 1016 * out of the cpu write domain while we've dropped the lock. 1017 */ 1018 if (!needs_clflush_after && 1019 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 1020 if (i915_gem_clflush_object(obj, obj->pin_display)) 1021 i915_gem_chipset_flush(dev); 1022 } 1023 } 1024 1025 if (needs_clflush_after) 1026 i915_gem_chipset_flush(dev); 1027 1028 intel_fb_obj_flush(obj, false); 1029 return ret; 1030 } 1031 1032 /** 1033 * Writes data to the object referenced by handle. 1034 * 1035 * On error, the contents of the buffer that were to be modified are undefined. 1036 */ 1037 int 1038 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1039 struct drm_file *file) 1040 { 1041 struct drm_i915_private *dev_priv = dev->dev_private; 1042 struct drm_i915_gem_pwrite *args = data; 1043 struct drm_i915_gem_object *obj; 1044 int ret; 1045 1046 if (args->size == 0) 1047 return 0; 1048 1049 if (!access_ok(VERIFY_READ, 1050 to_user_ptr(args->data_ptr), 1051 args->size)) 1052 return -EFAULT; 1053 1054 if (likely(!i915.prefault_disable)) { 1055 ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr), 1056 args->size); 1057 if (ret) 1058 return -EFAULT; 1059 } 1060 1061 intel_runtime_pm_get(dev_priv); 1062 1063 ret = i915_mutex_lock_interruptible(dev); 1064 if (ret) 1065 goto put_rpm; 1066 1067 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1068 if (&obj->base == NULL) { 1069 ret = -ENOENT; 1070 goto unlock; 1071 } 1072 1073 /* Bounds check destination. */ 1074 if (args->offset > obj->base.size || 1075 args->size > obj->base.size - args->offset) { 1076 ret = -EINVAL; 1077 goto out; 1078 } 1079 1080 /* prime objects have no backing filp to GEM pread/pwrite 1081 * pages from. 1082 */ 1083 if (!obj->base.filp) { 1084 ret = -EINVAL; 1085 goto out; 1086 } 1087 1088 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1089 1090 ret = -EFAULT; 1091 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1092 * it would end up going through the fenced access, and we'll get 1093 * different detiling behavior between reading and writing. 1094 * pread/pwrite currently are reading and writing from the CPU 1095 * perspective, requiring manual detiling by the client. 1096 */ 1097 if (obj->tiling_mode == I915_TILING_NONE && 1098 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 1099 cpu_write_needs_clflush(obj)) { 1100 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); 1101 /* Note that the gtt paths might fail with non-page-backed user 1102 * pointers (e.g. gtt mappings when moving data between 1103 * textures). Fallback to the shmem path in that case. */ 1104 } 1105 1106 if (ret == -EFAULT || ret == -ENOSPC) { 1107 if (obj->phys_handle) 1108 ret = i915_gem_phys_pwrite(obj, args, file); 1109 else 1110 ret = i915_gem_shmem_pwrite(dev, obj, args, file); 1111 } 1112 1113 out: 1114 drm_gem_object_unreference(&obj->base); 1115 unlock: 1116 mutex_unlock(&dev->struct_mutex); 1117 put_rpm: 1118 intel_runtime_pm_put(dev_priv); 1119 1120 return ret; 1121 } 1122 1123 int 1124 i915_gem_check_wedge(struct i915_gpu_error *error, 1125 bool interruptible) 1126 { 1127 if (i915_reset_in_progress(error)) { 1128 /* Non-interruptible callers can't handle -EAGAIN, hence return 1129 * -EIO unconditionally for these. */ 1130 if (!interruptible) 1131 return -EIO; 1132 1133 /* Recovery complete, but the reset failed ... */ 1134 if (i915_terminally_wedged(error)) 1135 return -EIO; 1136 1137 /* 1138 * Check if GPU Reset is in progress - we need intel_ring_begin 1139 * to work properly to reinit the hw state while the gpu is 1140 * still marked as reset-in-progress. Handle this with a flag. 1141 */ 1142 if (!error->reload_in_reset) 1143 return -EAGAIN; 1144 } 1145 1146 return 0; 1147 } 1148 1149 /* 1150 * Compare arbitrary request against outstanding lazy request. Emit on match. 1151 */ 1152 int 1153 i915_gem_check_olr(struct drm_i915_gem_request *req) 1154 { 1155 int ret; 1156 1157 WARN_ON(!mutex_is_locked(&req->ring->dev->struct_mutex)); 1158 1159 ret = 0; 1160 if (req == req->ring->outstanding_lazy_request) 1161 ret = i915_add_request(req->ring); 1162 1163 return ret; 1164 } 1165 1166 static void fake_irq(unsigned long data) 1167 { 1168 wake_up_process((struct task_struct *)data); 1169 } 1170 1171 static bool missed_irq(struct drm_i915_private *dev_priv, 1172 struct intel_engine_cs *ring) 1173 { 1174 return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings); 1175 } 1176 1177 static int __i915_spin_request(struct drm_i915_gem_request *req) 1178 { 1179 unsigned long timeout; 1180 1181 if (i915_gem_request_get_ring(req)->irq_refcount) 1182 return -EBUSY; 1183 1184 timeout = jiffies + 1; 1185 while (!need_resched()) { 1186 if (i915_gem_request_completed(req, true)) 1187 return 0; 1188 1189 if (time_after_eq(jiffies, timeout)) 1190 break; 1191 1192 cpu_relax_lowlatency(); 1193 } 1194 if (i915_gem_request_completed(req, false)) 1195 return 0; 1196 1197 return -EAGAIN; 1198 } 1199 1200 /** 1201 * __i915_wait_request - wait until execution of request has finished 1202 * @req: duh! 1203 * @reset_counter: reset sequence associated with the given request 1204 * @interruptible: do an interruptible wait (normally yes) 1205 * @timeout: in - how long to wait (NULL forever); out - how much time remaining 1206 * 1207 * Note: It is of utmost importance that the passed in seqno and reset_counter 1208 * values have been read by the caller in an smp safe manner. Where read-side 1209 * locks are involved, it is sufficient to read the reset_counter before 1210 * unlocking the lock that protects the seqno. For lockless tricks, the 1211 * reset_counter _must_ be read before, and an appropriate smp_rmb must be 1212 * inserted. 1213 * 1214 * Returns 0 if the request was found within the alloted time. Else returns the 1215 * errno with remaining time filled in timeout argument. 1216 */ 1217 int __i915_wait_request(struct drm_i915_gem_request *req, 1218 unsigned reset_counter, 1219 bool interruptible, 1220 s64 *timeout, 1221 struct intel_rps_client *rps) 1222 { 1223 struct intel_engine_cs *ring = i915_gem_request_get_ring(req); 1224 struct drm_device *dev = ring->dev; 1225 struct drm_i915_private *dev_priv = dev->dev_private; 1226 const bool irq_test_in_progress = 1227 ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring); 1228 DEFINE_WAIT(wait); 1229 unsigned long timeout_expire; 1230 s64 before, now; 1231 int ret; 1232 1233 WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled"); 1234 1235 if (list_empty(&req->list)) 1236 return 0; 1237 1238 if (i915_gem_request_completed(req, true)) 1239 return 0; 1240 1241 timeout_expire = timeout ? 1242 jiffies + nsecs_to_jiffies_timeout((u64)*timeout) : 0; 1243 1244 if (INTEL_INFO(dev_priv)->gen >= 6) 1245 gen6_rps_boost(dev_priv, rps, req->emitted_jiffies); 1246 1247 /* Record current time in case interrupted by signal, or wedged */ 1248 trace_i915_gem_request_wait_begin(req); 1249 before = ktime_get_raw_ns(); 1250 1251 /* Optimistic spin for the next jiffie before touching IRQs */ 1252 ret = __i915_spin_request(req); 1253 if (ret == 0) 1254 goto out; 1255 1256 if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring))) { 1257 ret = -ENODEV; 1258 goto out; 1259 } 1260 1261 for (;;) { 1262 struct timer_list timer; 1263 1264 prepare_to_wait(&ring->irq_queue, &wait, 1265 interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE); 1266 1267 /* We need to check whether any gpu reset happened in between 1268 * the caller grabbing the seqno and now ... */ 1269 if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) { 1270 /* ... but upgrade the -EAGAIN to an -EIO if the gpu 1271 * is truely gone. */ 1272 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 1273 if (ret == 0) 1274 ret = -EAGAIN; 1275 break; 1276 } 1277 1278 if (i915_gem_request_completed(req, false)) { 1279 ret = 0; 1280 break; 1281 } 1282 1283 if (interruptible && signal_pending(current)) { 1284 ret = -ERESTARTSYS; 1285 break; 1286 } 1287 1288 if (timeout && time_after_eq(jiffies, timeout_expire)) { 1289 ret = -ETIME; 1290 break; 1291 } 1292 1293 timer.function = NULL; 1294 if (timeout || missed_irq(dev_priv, ring)) { 1295 unsigned long expire; 1296 1297 setup_timer_on_stack(&timer, fake_irq, (unsigned long)current); 1298 expire = missed_irq(dev_priv, ring) ? jiffies + 1 : timeout_expire; 1299 mod_timer(&timer, expire); 1300 } 1301 1302 io_schedule(); 1303 1304 if (timer.function) { 1305 del_singleshot_timer_sync(&timer); 1306 destroy_timer_on_stack(&timer); 1307 } 1308 } 1309 if (!irq_test_in_progress) 1310 ring->irq_put(ring); 1311 1312 finish_wait(&ring->irq_queue, &wait); 1313 1314 out: 1315 now = ktime_get_raw_ns(); 1316 trace_i915_gem_request_wait_end(req); 1317 1318 if (timeout) { 1319 s64 tres = *timeout - (now - before); 1320 1321 *timeout = tres < 0 ? 0 : tres; 1322 1323 /* 1324 * Apparently ktime isn't accurate enough and occasionally has a 1325 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch 1326 * things up to make the test happy. We allow up to 1 jiffy. 1327 * 1328 * This is a regrssion from the timespec->ktime conversion. 1329 */ 1330 if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000) 1331 *timeout = 0; 1332 } 1333 1334 return ret; 1335 } 1336 1337 static inline void 1338 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) 1339 { 1340 struct drm_i915_file_private *file_priv = request->file_priv; 1341 1342 if (!file_priv) 1343 return; 1344 1345 spin_lock(&file_priv->mm.lock); 1346 list_del(&request->client_list); 1347 request->file_priv = NULL; 1348 spin_unlock(&file_priv->mm.lock); 1349 } 1350 1351 static void i915_gem_request_retire(struct drm_i915_gem_request *request) 1352 { 1353 trace_i915_gem_request_retire(request); 1354 1355 /* We know the GPU must have read the request to have 1356 * sent us the seqno + interrupt, so use the position 1357 * of tail of the request to update the last known position 1358 * of the GPU head. 1359 * 1360 * Note this requires that we are always called in request 1361 * completion order. 1362 */ 1363 request->ringbuf->last_retired_head = request->postfix; 1364 1365 list_del_init(&request->list); 1366 i915_gem_request_remove_from_client(request); 1367 1368 put_pid(request->pid); 1369 1370 i915_gem_request_unreference(request); 1371 } 1372 1373 static void 1374 __i915_gem_request_retire__upto(struct drm_i915_gem_request *req) 1375 { 1376 struct intel_engine_cs *engine = req->ring; 1377 struct drm_i915_gem_request *tmp; 1378 1379 lockdep_assert_held(&engine->dev->struct_mutex); 1380 1381 if (list_empty(&req->list)) 1382 return; 1383 1384 do { 1385 tmp = list_first_entry(&engine->request_list, 1386 typeof(*tmp), list); 1387 1388 i915_gem_request_retire(tmp); 1389 } while (tmp != req); 1390 1391 WARN_ON(i915_verify_lists(engine->dev)); 1392 } 1393 1394 /** 1395 * Waits for a request to be signaled, and cleans up the 1396 * request and object lists appropriately for that event. 1397 */ 1398 int 1399 i915_wait_request(struct drm_i915_gem_request *req) 1400 { 1401 struct drm_device *dev; 1402 struct drm_i915_private *dev_priv; 1403 bool interruptible; 1404 int ret; 1405 1406 BUG_ON(req == NULL); 1407 1408 dev = req->ring->dev; 1409 dev_priv = dev->dev_private; 1410 interruptible = dev_priv->mm.interruptible; 1411 1412 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1413 1414 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 1415 if (ret) 1416 return ret; 1417 1418 ret = i915_gem_check_olr(req); 1419 if (ret) 1420 return ret; 1421 1422 ret = __i915_wait_request(req, 1423 atomic_read(&dev_priv->gpu_error.reset_counter), 1424 interruptible, NULL, NULL); 1425 if (ret) 1426 return ret; 1427 1428 __i915_gem_request_retire__upto(req); 1429 return 0; 1430 } 1431 1432 /** 1433 * Ensures that all rendering to the object has completed and the object is 1434 * safe to unbind from the GTT or access from the CPU. 1435 */ 1436 int 1437 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 1438 bool readonly) 1439 { 1440 int ret, i; 1441 1442 if (!obj->active) 1443 return 0; 1444 1445 if (readonly) { 1446 if (obj->last_write_req != NULL) { 1447 ret = i915_wait_request(obj->last_write_req); 1448 if (ret) 1449 return ret; 1450 1451 i = obj->last_write_req->ring->id; 1452 if (obj->last_read_req[i] == obj->last_write_req) 1453 i915_gem_object_retire__read(obj, i); 1454 else 1455 i915_gem_object_retire__write(obj); 1456 } 1457 } else { 1458 for (i = 0; i < I915_NUM_RINGS; i++) { 1459 if (obj->last_read_req[i] == NULL) 1460 continue; 1461 1462 ret = i915_wait_request(obj->last_read_req[i]); 1463 if (ret) 1464 return ret; 1465 1466 i915_gem_object_retire__read(obj, i); 1467 } 1468 RQ_BUG_ON(obj->active); 1469 } 1470 1471 return 0; 1472 } 1473 1474 static void 1475 i915_gem_object_retire_request(struct drm_i915_gem_object *obj, 1476 struct drm_i915_gem_request *req) 1477 { 1478 int ring = req->ring->id; 1479 1480 if (obj->last_read_req[ring] == req) 1481 i915_gem_object_retire__read(obj, ring); 1482 else if (obj->last_write_req == req) 1483 i915_gem_object_retire__write(obj); 1484 1485 __i915_gem_request_retire__upto(req); 1486 } 1487 1488 /* A nonblocking variant of the above wait. This is a highly dangerous routine 1489 * as the object state may change during this call. 1490 */ 1491 static __must_check int 1492 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, 1493 struct intel_rps_client *rps, 1494 bool readonly) 1495 { 1496 struct drm_device *dev = obj->base.dev; 1497 struct drm_i915_private *dev_priv = dev->dev_private; 1498 struct drm_i915_gem_request *requests[I915_NUM_RINGS]; 1499 unsigned reset_counter; 1500 int ret, i, n = 0; 1501 1502 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1503 BUG_ON(!dev_priv->mm.interruptible); 1504 1505 if (!obj->active) 1506 return 0; 1507 1508 ret = i915_gem_check_wedge(&dev_priv->gpu_error, true); 1509 if (ret) 1510 return ret; 1511 1512 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 1513 1514 if (readonly) { 1515 struct drm_i915_gem_request *req; 1516 1517 req = obj->last_write_req; 1518 if (req == NULL) 1519 return 0; 1520 1521 ret = i915_gem_check_olr(req); 1522 if (ret) 1523 goto err; 1524 1525 requests[n++] = i915_gem_request_reference(req); 1526 } else { 1527 for (i = 0; i < I915_NUM_RINGS; i++) { 1528 struct drm_i915_gem_request *req; 1529 1530 req = obj->last_read_req[i]; 1531 if (req == NULL) 1532 continue; 1533 1534 ret = i915_gem_check_olr(req); 1535 if (ret) 1536 goto err; 1537 1538 requests[n++] = i915_gem_request_reference(req); 1539 } 1540 } 1541 1542 mutex_unlock(&dev->struct_mutex); 1543 for (i = 0; ret == 0 && i < n; i++) 1544 ret = __i915_wait_request(requests[i], reset_counter, true, 1545 NULL, rps); 1546 mutex_lock(&dev->struct_mutex); 1547 1548 err: 1549 for (i = 0; i < n; i++) { 1550 if (ret == 0) 1551 i915_gem_object_retire_request(obj, requests[i]); 1552 i915_gem_request_unreference(requests[i]); 1553 } 1554 1555 return ret; 1556 } 1557 1558 static struct intel_rps_client *to_rps_client(struct drm_file *file) 1559 { 1560 struct drm_i915_file_private *fpriv = file->driver_priv; 1561 return &fpriv->rps; 1562 } 1563 1564 /** 1565 * Called when user space prepares to use an object with the CPU, either 1566 * through the mmap ioctl's mapping or a GTT mapping. 1567 */ 1568 int 1569 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1570 struct drm_file *file) 1571 { 1572 struct drm_i915_gem_set_domain *args = data; 1573 struct drm_i915_gem_object *obj; 1574 uint32_t read_domains = args->read_domains; 1575 uint32_t write_domain = args->write_domain; 1576 int ret; 1577 1578 /* Only handle setting domains to types used by the CPU. */ 1579 if (write_domain & I915_GEM_GPU_DOMAINS) 1580 return -EINVAL; 1581 1582 if (read_domains & I915_GEM_GPU_DOMAINS) 1583 return -EINVAL; 1584 1585 /* Having something in the write domain implies it's in the read 1586 * domain, and only that read domain. Enforce that in the request. 1587 */ 1588 if (write_domain != 0 && read_domains != write_domain) 1589 return -EINVAL; 1590 1591 ret = i915_mutex_lock_interruptible(dev); 1592 if (ret) 1593 return ret; 1594 1595 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1596 if (&obj->base == NULL) { 1597 ret = -ENOENT; 1598 goto unlock; 1599 } 1600 1601 /* Try to flush the object off the GPU without holding the lock. 1602 * We will repeat the flush holding the lock in the normal manner 1603 * to catch cases where we are gazumped. 1604 */ 1605 ret = i915_gem_object_wait_rendering__nonblocking(obj, 1606 to_rps_client(file), 1607 !write_domain); 1608 if (ret) 1609 goto unref; 1610 1611 if (read_domains & I915_GEM_DOMAIN_GTT) 1612 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1613 else 1614 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1615 1616 unref: 1617 drm_gem_object_unreference(&obj->base); 1618 unlock: 1619 mutex_unlock(&dev->struct_mutex); 1620 return ret; 1621 } 1622 1623 /** 1624 * Called when user space has done writes to this buffer 1625 */ 1626 int 1627 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1628 struct drm_file *file) 1629 { 1630 struct drm_i915_gem_sw_finish *args = data; 1631 struct drm_i915_gem_object *obj; 1632 int ret = 0; 1633 1634 ret = i915_mutex_lock_interruptible(dev); 1635 if (ret) 1636 return ret; 1637 1638 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1639 if (&obj->base == NULL) { 1640 ret = -ENOENT; 1641 goto unlock; 1642 } 1643 1644 /* Pinned buffers may be scanout, so flush the cache */ 1645 if (obj->pin_display) 1646 i915_gem_object_flush_cpu_write_domain(obj); 1647 1648 drm_gem_object_unreference(&obj->base); 1649 unlock: 1650 mutex_unlock(&dev->struct_mutex); 1651 return ret; 1652 } 1653 1654 /** 1655 * Maps the contents of an object, returning the address it is mapped 1656 * into. 1657 * 1658 * While the mapping holds a reference on the contents of the object, it doesn't 1659 * imply a ref on the object itself. 1660 * 1661 * IMPORTANT: 1662 * 1663 * DRM driver writers who look a this function as an example for how to do GEM 1664 * mmap support, please don't implement mmap support like here. The modern way 1665 * to implement DRM mmap support is with an mmap offset ioctl (like 1666 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly. 1667 * That way debug tooling like valgrind will understand what's going on, hiding 1668 * the mmap call in a driver private ioctl will break that. The i915 driver only 1669 * does cpu mmaps this way because we didn't know better. 1670 */ 1671 int 1672 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1673 struct drm_file *file) 1674 { 1675 struct drm_i915_gem_mmap *args = data; 1676 struct drm_gem_object *obj; 1677 unsigned long addr; 1678 1679 if (args->flags & ~(I915_MMAP_WC)) 1680 return -EINVAL; 1681 1682 if (args->flags & I915_MMAP_WC && !cpu_has_pat) 1683 return -ENODEV; 1684 1685 obj = drm_gem_object_lookup(dev, file, args->handle); 1686 if (obj == NULL) 1687 return -ENOENT; 1688 1689 /* prime objects have no backing filp to GEM mmap 1690 * pages from. 1691 */ 1692 if (!obj->filp) { 1693 drm_gem_object_unreference_unlocked(obj); 1694 return -EINVAL; 1695 } 1696 1697 addr = vm_mmap(obj->filp, 0, args->size, 1698 PROT_READ | PROT_WRITE, MAP_SHARED, 1699 args->offset); 1700 if (args->flags & I915_MMAP_WC) { 1701 struct mm_struct *mm = current->mm; 1702 struct vm_area_struct *vma; 1703 1704 down_write(&mm->mmap_sem); 1705 vma = find_vma(mm, addr); 1706 if (vma) 1707 vma->vm_page_prot = 1708 pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); 1709 else 1710 addr = -ENOMEM; 1711 up_write(&mm->mmap_sem); 1712 } 1713 drm_gem_object_unreference_unlocked(obj); 1714 if (IS_ERR((void *)addr)) 1715 return addr; 1716 1717 args->addr_ptr = (uint64_t) addr; 1718 1719 return 0; 1720 } 1721 1722 /** 1723 * i915_gem_fault - fault a page into the GTT 1724 * vma: VMA in question 1725 * vmf: fault info 1726 * 1727 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1728 * from userspace. The fault handler takes care of binding the object to 1729 * the GTT (if needed), allocating and programming a fence register (again, 1730 * only if needed based on whether the old reg is still valid or the object 1731 * is tiled) and inserting a new PTE into the faulting process. 1732 * 1733 * Note that the faulting process may involve evicting existing objects 1734 * from the GTT and/or fence registers to make room. So performance may 1735 * suffer if the GTT working set is large or there are few fence registers 1736 * left. 1737 */ 1738 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1739 { 1740 struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data); 1741 struct drm_device *dev = obj->base.dev; 1742 struct drm_i915_private *dev_priv = dev->dev_private; 1743 struct i915_ggtt_view view = i915_ggtt_view_normal; 1744 pgoff_t page_offset; 1745 unsigned long pfn; 1746 int ret = 0; 1747 bool write = !!(vmf->flags & FAULT_FLAG_WRITE); 1748 1749 intel_runtime_pm_get(dev_priv); 1750 1751 /* We don't use vmf->pgoff since that has the fake offset */ 1752 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> 1753 PAGE_SHIFT; 1754 1755 ret = i915_mutex_lock_interruptible(dev); 1756 if (ret) 1757 goto out; 1758 1759 trace_i915_gem_object_fault(obj, page_offset, true, write); 1760 1761 /* Try to flush the object off the GPU first without holding the lock. 1762 * Upon reacquiring the lock, we will perform our sanity checks and then 1763 * repeat the flush holding the lock in the normal manner to catch cases 1764 * where we are gazumped. 1765 */ 1766 ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write); 1767 if (ret) 1768 goto unlock; 1769 1770 /* Access to snoopable pages through the GTT is incoherent. */ 1771 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { 1772 ret = -EFAULT; 1773 goto unlock; 1774 } 1775 1776 /* Use a partial view if the object is bigger than the aperture. */ 1777 if (obj->base.size >= dev_priv->gtt.mappable_end && 1778 obj->tiling_mode == I915_TILING_NONE) { 1779 static const unsigned int chunk_size = 256; // 1 MiB 1780 1781 memset(&view, 0, sizeof(view)); 1782 view.type = I915_GGTT_VIEW_PARTIAL; 1783 view.params.partial.offset = rounddown(page_offset, chunk_size); 1784 view.params.partial.size = 1785 min_t(unsigned int, 1786 chunk_size, 1787 (vma->vm_end - vma->vm_start)/PAGE_SIZE - 1788 view.params.partial.offset); 1789 } 1790 1791 /* Now pin it into the GTT if needed */ 1792 ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE); 1793 if (ret) 1794 goto unlock; 1795 1796 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1797 if (ret) 1798 goto unpin; 1799 1800 ret = i915_gem_object_get_fence(obj); 1801 if (ret) 1802 goto unpin; 1803 1804 /* Finally, remap it using the new GTT offset */ 1805 pfn = dev_priv->gtt.mappable_base + 1806 i915_gem_obj_ggtt_offset_view(obj, &view); 1807 pfn >>= PAGE_SHIFT; 1808 1809 if (unlikely(view.type == I915_GGTT_VIEW_PARTIAL)) { 1810 /* Overriding existing pages in partial view does not cause 1811 * us any trouble as TLBs are still valid because the fault 1812 * is due to userspace losing part of the mapping or never 1813 * having accessed it before (at this partials' range). 1814 */ 1815 unsigned long base = vma->vm_start + 1816 (view.params.partial.offset << PAGE_SHIFT); 1817 unsigned int i; 1818 1819 for (i = 0; i < view.params.partial.size; i++) { 1820 ret = vm_insert_pfn(vma, base + i * PAGE_SIZE, pfn + i); 1821 if (ret) 1822 break; 1823 } 1824 1825 obj->fault_mappable = true; 1826 } else { 1827 if (!obj->fault_mappable) { 1828 unsigned long size = min_t(unsigned long, 1829 vma->vm_end - vma->vm_start, 1830 obj->base.size); 1831 int i; 1832 1833 for (i = 0; i < size >> PAGE_SHIFT; i++) { 1834 ret = vm_insert_pfn(vma, 1835 (unsigned long)vma->vm_start + i * PAGE_SIZE, 1836 pfn + i); 1837 if (ret) 1838 break; 1839 } 1840 1841 obj->fault_mappable = true; 1842 } else 1843 ret = vm_insert_pfn(vma, 1844 (unsigned long)vmf->virtual_address, 1845 pfn + page_offset); 1846 } 1847 unpin: 1848 i915_gem_object_ggtt_unpin_view(obj, &view); 1849 unlock: 1850 mutex_unlock(&dev->struct_mutex); 1851 out: 1852 switch (ret) { 1853 case -EIO: 1854 /* 1855 * We eat errors when the gpu is terminally wedged to avoid 1856 * userspace unduly crashing (gl has no provisions for mmaps to 1857 * fail). But any other -EIO isn't ours (e.g. swap in failure) 1858 * and so needs to be reported. 1859 */ 1860 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 1861 ret = VM_FAULT_SIGBUS; 1862 break; 1863 } 1864 case -EAGAIN: 1865 /* 1866 * EAGAIN means the gpu is hung and we'll wait for the error 1867 * handler to reset everything when re-faulting in 1868 * i915_mutex_lock_interruptible. 1869 */ 1870 case 0: 1871 case -ERESTARTSYS: 1872 case -EINTR: 1873 case -EBUSY: 1874 /* 1875 * EBUSY is ok: this just means that another thread 1876 * already did the job. 1877 */ 1878 ret = VM_FAULT_NOPAGE; 1879 break; 1880 case -ENOMEM: 1881 ret = VM_FAULT_OOM; 1882 break; 1883 case -ENOSPC: 1884 case -EFAULT: 1885 ret = VM_FAULT_SIGBUS; 1886 break; 1887 default: 1888 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 1889 ret = VM_FAULT_SIGBUS; 1890 break; 1891 } 1892 1893 intel_runtime_pm_put(dev_priv); 1894 return ret; 1895 } 1896 1897 /** 1898 * i915_gem_release_mmap - remove physical page mappings 1899 * @obj: obj in question 1900 * 1901 * Preserve the reservation of the mmapping with the DRM core code, but 1902 * relinquish ownership of the pages back to the system. 1903 * 1904 * It is vital that we remove the page mapping if we have mapped a tiled 1905 * object through the GTT and then lose the fence register due to 1906 * resource pressure. Similarly if the object has been moved out of the 1907 * aperture, than pages mapped into userspace must be revoked. Removing the 1908 * mapping will then trigger a page fault on the next user access, allowing 1909 * fixup by i915_gem_fault(). 1910 */ 1911 void 1912 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 1913 { 1914 if (!obj->fault_mappable) 1915 return; 1916 1917 drm_vma_node_unmap(&obj->base.vma_node, 1918 obj->base.dev->anon_inode->i_mapping); 1919 obj->fault_mappable = false; 1920 } 1921 1922 void 1923 i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv) 1924 { 1925 struct drm_i915_gem_object *obj; 1926 1927 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) 1928 i915_gem_release_mmap(obj); 1929 } 1930 1931 uint32_t 1932 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) 1933 { 1934 uint32_t gtt_size; 1935 1936 if (INTEL_INFO(dev)->gen >= 4 || 1937 tiling_mode == I915_TILING_NONE) 1938 return size; 1939 1940 /* Previous chips need a power-of-two fence region when tiling */ 1941 if (INTEL_INFO(dev)->gen == 3) 1942 gtt_size = 1024*1024; 1943 else 1944 gtt_size = 512*1024; 1945 1946 while (gtt_size < size) 1947 gtt_size <<= 1; 1948 1949 return gtt_size; 1950 } 1951 1952 /** 1953 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 1954 * @obj: object to check 1955 * 1956 * Return the required GTT alignment for an object, taking into account 1957 * potential fence register mapping. 1958 */ 1959 uint32_t 1960 i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, 1961 int tiling_mode, bool fenced) 1962 { 1963 /* 1964 * Minimum alignment is 4k (GTT page size), but might be greater 1965 * if a fence register is needed for the object. 1966 */ 1967 if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) || 1968 tiling_mode == I915_TILING_NONE) 1969 return 4096; 1970 1971 /* 1972 * Previous chips need to be aligned to the size of the smallest 1973 * fence register that can contain the object. 1974 */ 1975 return i915_gem_get_gtt_size(dev, size, tiling_mode); 1976 } 1977 1978 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 1979 { 1980 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 1981 int ret; 1982 1983 if (drm_vma_node_has_offset(&obj->base.vma_node)) 1984 return 0; 1985 1986 dev_priv->mm.shrinker_no_lock_stealing = true; 1987 1988 ret = drm_gem_create_mmap_offset(&obj->base); 1989 if (ret != -ENOSPC) 1990 goto out; 1991 1992 /* Badly fragmented mmap space? The only way we can recover 1993 * space is by destroying unwanted objects. We can't randomly release 1994 * mmap_offsets as userspace expects them to be persistent for the 1995 * lifetime of the objects. The closest we can is to release the 1996 * offsets on purgeable objects by truncating it and marking it purged, 1997 * which prevents userspace from ever using that object again. 1998 */ 1999 i915_gem_shrink(dev_priv, 2000 obj->base.size >> PAGE_SHIFT, 2001 I915_SHRINK_BOUND | 2002 I915_SHRINK_UNBOUND | 2003 I915_SHRINK_PURGEABLE); 2004 ret = drm_gem_create_mmap_offset(&obj->base); 2005 if (ret != -ENOSPC) 2006 goto out; 2007 2008 i915_gem_shrink_all(dev_priv); 2009 ret = drm_gem_create_mmap_offset(&obj->base); 2010 out: 2011 dev_priv->mm.shrinker_no_lock_stealing = false; 2012 2013 return ret; 2014 } 2015 2016 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 2017 { 2018 drm_gem_free_mmap_offset(&obj->base); 2019 } 2020 2021 int 2022 i915_gem_mmap_gtt(struct drm_file *file, 2023 struct drm_device *dev, 2024 uint32_t handle, 2025 uint64_t *offset) 2026 { 2027 struct drm_i915_gem_object *obj; 2028 int ret; 2029 2030 ret = i915_mutex_lock_interruptible(dev); 2031 if (ret) 2032 return ret; 2033 2034 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle)); 2035 if (&obj->base == NULL) { 2036 ret = -ENOENT; 2037 goto unlock; 2038 } 2039 2040 if (obj->madv != I915_MADV_WILLNEED) { 2041 DRM_DEBUG("Attempting to mmap a purgeable buffer\n"); 2042 ret = -EFAULT; 2043 goto out; 2044 } 2045 2046 ret = i915_gem_object_create_mmap_offset(obj); 2047 if (ret) 2048 goto out; 2049 2050 *offset = drm_vma_node_offset_addr(&obj->base.vma_node); 2051 2052 out: 2053 drm_gem_object_unreference(&obj->base); 2054 unlock: 2055 mutex_unlock(&dev->struct_mutex); 2056 return ret; 2057 } 2058 2059 /** 2060 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 2061 * @dev: DRM device 2062 * @data: GTT mapping ioctl data 2063 * @file: GEM object info 2064 * 2065 * Simply returns the fake offset to userspace so it can mmap it. 2066 * The mmap call will end up in drm_gem_mmap(), which will set things 2067 * up so we can get faults in the handler above. 2068 * 2069 * The fault handler will take care of binding the object into the GTT 2070 * (since it may have been evicted to make room for something), allocating 2071 * a fence register, and mapping the appropriate aperture address into 2072 * userspace. 2073 */ 2074 int 2075 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 2076 struct drm_file *file) 2077 { 2078 struct drm_i915_gem_mmap_gtt *args = data; 2079 2080 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 2081 } 2082 2083 /* Immediately discard the backing storage */ 2084 static void 2085 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 2086 { 2087 i915_gem_object_free_mmap_offset(obj); 2088 2089 if (obj->base.filp == NULL) 2090 return; 2091 2092 /* Our goal here is to return as much of the memory as 2093 * is possible back to the system as we are called from OOM. 2094 * To do this we must instruct the shmfs to drop all of its 2095 * backing pages, *now*. 2096 */ 2097 shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1); 2098 obj->madv = __I915_MADV_PURGED; 2099 } 2100 2101 /* Try to discard unwanted pages */ 2102 static void 2103 i915_gem_object_invalidate(struct drm_i915_gem_object *obj) 2104 { 2105 struct address_space *mapping; 2106 2107 switch (obj->madv) { 2108 case I915_MADV_DONTNEED: 2109 i915_gem_object_truncate(obj); 2110 case __I915_MADV_PURGED: 2111 return; 2112 } 2113 2114 if (obj->base.filp == NULL) 2115 return; 2116 2117 mapping = file_inode(obj->base.filp)->i_mapping, 2118 invalidate_mapping_pages(mapping, 0, (loff_t)-1); 2119 } 2120 2121 static void 2122 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 2123 { 2124 struct sg_page_iter sg_iter; 2125 int ret; 2126 2127 BUG_ON(obj->madv == __I915_MADV_PURGED); 2128 2129 ret = i915_gem_object_set_to_cpu_domain(obj, true); 2130 if (ret) { 2131 /* In the event of a disaster, abandon all caches and 2132 * hope for the best. 2133 */ 2134 WARN_ON(ret != -EIO); 2135 i915_gem_clflush_object(obj, true); 2136 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2137 } 2138 2139 i915_gem_gtt_finish_object(obj); 2140 2141 if (i915_gem_object_needs_bit17_swizzle(obj)) 2142 i915_gem_object_save_bit_17_swizzle(obj); 2143 2144 if (obj->madv == I915_MADV_DONTNEED) 2145 obj->dirty = 0; 2146 2147 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) { 2148 struct page *page = sg_page_iter_page(&sg_iter); 2149 2150 if (obj->dirty) 2151 set_page_dirty(page); 2152 2153 if (obj->madv == I915_MADV_WILLNEED) 2154 mark_page_accessed(page); 2155 2156 page_cache_release(page); 2157 } 2158 obj->dirty = 0; 2159 2160 sg_free_table(obj->pages); 2161 kfree(obj->pages); 2162 } 2163 2164 int 2165 i915_gem_object_put_pages(struct drm_i915_gem_object *obj) 2166 { 2167 const struct drm_i915_gem_object_ops *ops = obj->ops; 2168 2169 if (obj->pages == NULL) 2170 return 0; 2171 2172 if (obj->pages_pin_count) 2173 return -EBUSY; 2174 2175 BUG_ON(i915_gem_obj_bound_any(obj)); 2176 2177 /* ->put_pages might need to allocate memory for the bit17 swizzle 2178 * array, hence protect them from being reaped by removing them from gtt 2179 * lists early. */ 2180 list_del(&obj->global_list); 2181 2182 ops->put_pages(obj); 2183 obj->pages = NULL; 2184 2185 i915_gem_object_invalidate(obj); 2186 2187 return 0; 2188 } 2189 2190 static int 2191 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2192 { 2193 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2194 int page_count, i; 2195 struct address_space *mapping; 2196 struct sg_table *st; 2197 struct scatterlist *sg; 2198 struct sg_page_iter sg_iter; 2199 struct page *page; 2200 unsigned long last_pfn = 0; /* suppress gcc warning */ 2201 int ret; 2202 gfp_t gfp; 2203 2204 /* Assert that the object is not currently in any GPU domain. As it 2205 * wasn't in the GTT, there shouldn't be any way it could have been in 2206 * a GPU cache 2207 */ 2208 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2209 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2210 2211 st = kmalloc(sizeof(*st), GFP_KERNEL); 2212 if (st == NULL) 2213 return -ENOMEM; 2214 2215 page_count = obj->base.size / PAGE_SIZE; 2216 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 2217 kfree(st); 2218 return -ENOMEM; 2219 } 2220 2221 /* Get the list of pages out of our struct file. They'll be pinned 2222 * at this point until we release them. 2223 * 2224 * Fail silently without starting the shrinker 2225 */ 2226 mapping = file_inode(obj->base.filp)->i_mapping; 2227 gfp = mapping_gfp_mask(mapping); 2228 gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD; 2229 gfp &= ~(__GFP_IO | __GFP_WAIT); 2230 sg = st->sgl; 2231 st->nents = 0; 2232 for (i = 0; i < page_count; i++) { 2233 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2234 if (IS_ERR(page)) { 2235 i915_gem_shrink(dev_priv, 2236 page_count, 2237 I915_SHRINK_BOUND | 2238 I915_SHRINK_UNBOUND | 2239 I915_SHRINK_PURGEABLE); 2240 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2241 } 2242 if (IS_ERR(page)) { 2243 /* We've tried hard to allocate the memory by reaping 2244 * our own buffer, now let the real VM do its job and 2245 * go down in flames if truly OOM. 2246 */ 2247 i915_gem_shrink_all(dev_priv); 2248 page = shmem_read_mapping_page(mapping, i); 2249 if (IS_ERR(page)) { 2250 ret = PTR_ERR(page); 2251 goto err_pages; 2252 } 2253 } 2254 #ifdef CONFIG_SWIOTLB 2255 if (swiotlb_nr_tbl()) { 2256 st->nents++; 2257 sg_set_page(sg, page, PAGE_SIZE, 0); 2258 sg = sg_next(sg); 2259 continue; 2260 } 2261 #endif 2262 if (!i || page_to_pfn(page) != last_pfn + 1) { 2263 if (i) 2264 sg = sg_next(sg); 2265 st->nents++; 2266 sg_set_page(sg, page, PAGE_SIZE, 0); 2267 } else { 2268 sg->length += PAGE_SIZE; 2269 } 2270 last_pfn = page_to_pfn(page); 2271 2272 /* Check that the i965g/gm workaround works. */ 2273 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL)); 2274 } 2275 #ifdef CONFIG_SWIOTLB 2276 if (!swiotlb_nr_tbl()) 2277 #endif 2278 sg_mark_end(sg); 2279 obj->pages = st; 2280 2281 ret = i915_gem_gtt_prepare_object(obj); 2282 if (ret) 2283 goto err_pages; 2284 2285 if (i915_gem_object_needs_bit17_swizzle(obj)) 2286 i915_gem_object_do_bit_17_swizzle(obj); 2287 2288 if (obj->tiling_mode != I915_TILING_NONE && 2289 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) 2290 i915_gem_object_pin_pages(obj); 2291 2292 return 0; 2293 2294 err_pages: 2295 sg_mark_end(sg); 2296 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) 2297 page_cache_release(sg_page_iter_page(&sg_iter)); 2298 sg_free_table(st); 2299 kfree(st); 2300 2301 /* shmemfs first checks if there is enough memory to allocate the page 2302 * and reports ENOSPC should there be insufficient, along with the usual 2303 * ENOMEM for a genuine allocation failure. 2304 * 2305 * We use ENOSPC in our driver to mean that we have run out of aperture 2306 * space and so want to translate the error from shmemfs back to our 2307 * usual understanding of ENOMEM. 2308 */ 2309 if (ret == -ENOSPC) 2310 ret = -ENOMEM; 2311 2312 return ret; 2313 } 2314 2315 /* Ensure that the associated pages are gathered from the backing storage 2316 * and pinned into our object. i915_gem_object_get_pages() may be called 2317 * multiple times before they are released by a single call to 2318 * i915_gem_object_put_pages() - once the pages are no longer referenced 2319 * either as a result of memory pressure (reaping pages under the shrinker) 2320 * or as the object is itself released. 2321 */ 2322 int 2323 i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2324 { 2325 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2326 const struct drm_i915_gem_object_ops *ops = obj->ops; 2327 int ret; 2328 2329 if (obj->pages) 2330 return 0; 2331 2332 if (obj->madv != I915_MADV_WILLNEED) { 2333 DRM_DEBUG("Attempting to obtain a purgeable object\n"); 2334 return -EFAULT; 2335 } 2336 2337 BUG_ON(obj->pages_pin_count); 2338 2339 ret = ops->get_pages(obj); 2340 if (ret) 2341 return ret; 2342 2343 list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list); 2344 2345 obj->get_page.sg = obj->pages->sgl; 2346 obj->get_page.last = 0; 2347 2348 return 0; 2349 } 2350 2351 void i915_vma_move_to_active(struct i915_vma *vma, 2352 struct intel_engine_cs *ring) 2353 { 2354 struct drm_i915_gem_object *obj = vma->obj; 2355 2356 /* Add a reference if we're newly entering the active list. */ 2357 if (obj->active == 0) 2358 drm_gem_object_reference(&obj->base); 2359 obj->active |= intel_ring_flag(ring); 2360 2361 list_move_tail(&obj->ring_list[ring->id], &ring->active_list); 2362 i915_gem_request_assign(&obj->last_read_req[ring->id], 2363 intel_ring_get_request(ring)); 2364 2365 list_move_tail(&vma->mm_list, &vma->vm->active_list); 2366 } 2367 2368 static void 2369 i915_gem_object_retire__write(struct drm_i915_gem_object *obj) 2370 { 2371 RQ_BUG_ON(obj->last_write_req == NULL); 2372 RQ_BUG_ON(!(obj->active & intel_ring_flag(obj->last_write_req->ring))); 2373 2374 i915_gem_request_assign(&obj->last_write_req, NULL); 2375 intel_fb_obj_flush(obj, true); 2376 } 2377 2378 static void 2379 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring) 2380 { 2381 struct i915_vma *vma; 2382 2383 RQ_BUG_ON(obj->last_read_req[ring] == NULL); 2384 RQ_BUG_ON(!(obj->active & (1 << ring))); 2385 2386 list_del_init(&obj->ring_list[ring]); 2387 i915_gem_request_assign(&obj->last_read_req[ring], NULL); 2388 2389 if (obj->last_write_req && obj->last_write_req->ring->id == ring) 2390 i915_gem_object_retire__write(obj); 2391 2392 obj->active &= ~(1 << ring); 2393 if (obj->active) 2394 return; 2395 2396 list_for_each_entry(vma, &obj->vma_list, vma_link) { 2397 if (!list_empty(&vma->mm_list)) 2398 list_move_tail(&vma->mm_list, &vma->vm->inactive_list); 2399 } 2400 2401 i915_gem_request_assign(&obj->last_fenced_req, NULL); 2402 drm_gem_object_unreference(&obj->base); 2403 } 2404 2405 static int 2406 i915_gem_init_seqno(struct drm_device *dev, u32 seqno) 2407 { 2408 struct drm_i915_private *dev_priv = dev->dev_private; 2409 struct intel_engine_cs *ring; 2410 int ret, i, j; 2411 2412 /* Carefully retire all requests without writing to the rings */ 2413 for_each_ring(ring, dev_priv, i) { 2414 ret = intel_ring_idle(ring); 2415 if (ret) 2416 return ret; 2417 } 2418 i915_gem_retire_requests(dev); 2419 2420 /* Finally reset hw state */ 2421 for_each_ring(ring, dev_priv, i) { 2422 intel_ring_init_seqno(ring, seqno); 2423 2424 for (j = 0; j < ARRAY_SIZE(ring->semaphore.sync_seqno); j++) 2425 ring->semaphore.sync_seqno[j] = 0; 2426 } 2427 2428 return 0; 2429 } 2430 2431 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) 2432 { 2433 struct drm_i915_private *dev_priv = dev->dev_private; 2434 int ret; 2435 2436 if (seqno == 0) 2437 return -EINVAL; 2438 2439 /* HWS page needs to be set less than what we 2440 * will inject to ring 2441 */ 2442 ret = i915_gem_init_seqno(dev, seqno - 1); 2443 if (ret) 2444 return ret; 2445 2446 /* Carefully set the last_seqno value so that wrap 2447 * detection still works 2448 */ 2449 dev_priv->next_seqno = seqno; 2450 dev_priv->last_seqno = seqno - 1; 2451 if (dev_priv->last_seqno == 0) 2452 dev_priv->last_seqno--; 2453 2454 return 0; 2455 } 2456 2457 int 2458 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) 2459 { 2460 struct drm_i915_private *dev_priv = dev->dev_private; 2461 2462 /* reserve 0 for non-seqno */ 2463 if (dev_priv->next_seqno == 0) { 2464 int ret = i915_gem_init_seqno(dev, 0); 2465 if (ret) 2466 return ret; 2467 2468 dev_priv->next_seqno = 1; 2469 } 2470 2471 *seqno = dev_priv->last_seqno = dev_priv->next_seqno++; 2472 return 0; 2473 } 2474 2475 int __i915_add_request(struct intel_engine_cs *ring, 2476 struct drm_file *file, 2477 struct drm_i915_gem_object *obj) 2478 { 2479 struct drm_i915_private *dev_priv = ring->dev->dev_private; 2480 struct drm_i915_gem_request *request; 2481 struct intel_ringbuffer *ringbuf; 2482 u32 request_start; 2483 int ret; 2484 2485 request = ring->outstanding_lazy_request; 2486 if (WARN_ON(request == NULL)) 2487 return -ENOMEM; 2488 2489 if (i915.enable_execlists) { 2490 ringbuf = request->ctx->engine[ring->id].ringbuf; 2491 } else 2492 ringbuf = ring->buffer; 2493 2494 request_start = intel_ring_get_tail(ringbuf); 2495 /* 2496 * Emit any outstanding flushes - execbuf can fail to emit the flush 2497 * after having emitted the batchbuffer command. Hence we need to fix 2498 * things up similar to emitting the lazy request. The difference here 2499 * is that the flush _must_ happen before the next request, no matter 2500 * what. 2501 */ 2502 if (i915.enable_execlists) { 2503 ret = logical_ring_flush_all_caches(ringbuf, request->ctx); 2504 if (ret) 2505 return ret; 2506 } else { 2507 ret = intel_ring_flush_all_caches(ring); 2508 if (ret) 2509 return ret; 2510 } 2511 2512 /* Record the position of the start of the request so that 2513 * should we detect the updated seqno part-way through the 2514 * GPU processing the request, we never over-estimate the 2515 * position of the head. 2516 */ 2517 request->postfix = intel_ring_get_tail(ringbuf); 2518 2519 if (i915.enable_execlists) { 2520 ret = ring->emit_request(ringbuf, request); 2521 if (ret) 2522 return ret; 2523 } else { 2524 ret = ring->add_request(ring); 2525 if (ret) 2526 return ret; 2527 2528 request->tail = intel_ring_get_tail(ringbuf); 2529 } 2530 2531 request->head = request_start; 2532 2533 /* Whilst this request exists, batch_obj will be on the 2534 * active_list, and so will hold the active reference. Only when this 2535 * request is retired will the the batch_obj be moved onto the 2536 * inactive_list and lose its active reference. Hence we do not need 2537 * to explicitly hold another reference here. 2538 */ 2539 request->batch_obj = obj; 2540 2541 if (!i915.enable_execlists) { 2542 /* Hold a reference to the current context so that we can inspect 2543 * it later in case a hangcheck error event fires. 2544 */ 2545 request->ctx = ring->last_context; 2546 if (request->ctx) 2547 i915_gem_context_reference(request->ctx); 2548 } 2549 2550 request->emitted_jiffies = jiffies; 2551 ring->last_submitted_seqno = request->seqno; 2552 list_add_tail(&request->list, &ring->request_list); 2553 request->file_priv = NULL; 2554 2555 if (file) { 2556 struct drm_i915_file_private *file_priv = file->driver_priv; 2557 2558 spin_lock(&file_priv->mm.lock); 2559 request->file_priv = file_priv; 2560 list_add_tail(&request->client_list, 2561 &file_priv->mm.request_list); 2562 spin_unlock(&file_priv->mm.lock); 2563 2564 request->pid = get_pid(task_pid(current)); 2565 } 2566 2567 trace_i915_gem_request_add(request); 2568 ring->outstanding_lazy_request = NULL; 2569 2570 i915_queue_hangcheck(ring->dev); 2571 2572 queue_delayed_work(dev_priv->wq, 2573 &dev_priv->mm.retire_work, 2574 round_jiffies_up_relative(HZ)); 2575 intel_mark_busy(dev_priv->dev); 2576 2577 return 0; 2578 } 2579 2580 static bool i915_context_is_banned(struct drm_i915_private *dev_priv, 2581 const struct intel_context *ctx) 2582 { 2583 unsigned long elapsed; 2584 2585 elapsed = get_seconds() - ctx->hang_stats.guilty_ts; 2586 2587 if (ctx->hang_stats.banned) 2588 return true; 2589 2590 if (ctx->hang_stats.ban_period_seconds && 2591 elapsed <= ctx->hang_stats.ban_period_seconds) { 2592 if (!i915_gem_context_is_default(ctx)) { 2593 DRM_DEBUG("context hanging too fast, banning!\n"); 2594 return true; 2595 } else if (i915_stop_ring_allow_ban(dev_priv)) { 2596 if (i915_stop_ring_allow_warn(dev_priv)) 2597 DRM_ERROR("gpu hanging too fast, banning!\n"); 2598 return true; 2599 } 2600 } 2601 2602 return false; 2603 } 2604 2605 static void i915_set_reset_status(struct drm_i915_private *dev_priv, 2606 struct intel_context *ctx, 2607 const bool guilty) 2608 { 2609 struct i915_ctx_hang_stats *hs; 2610 2611 if (WARN_ON(!ctx)) 2612 return; 2613 2614 hs = &ctx->hang_stats; 2615 2616 if (guilty) { 2617 hs->banned = i915_context_is_banned(dev_priv, ctx); 2618 hs->batch_active++; 2619 hs->guilty_ts = get_seconds(); 2620 } else { 2621 hs->batch_pending++; 2622 } 2623 } 2624 2625 void i915_gem_request_free(struct kref *req_ref) 2626 { 2627 struct drm_i915_gem_request *req = container_of(req_ref, 2628 typeof(*req), ref); 2629 struct intel_context *ctx = req->ctx; 2630 2631 if (ctx) { 2632 if (i915.enable_execlists) { 2633 struct intel_engine_cs *ring = req->ring; 2634 2635 if (ctx != ring->default_context) 2636 intel_lr_context_unpin(ring, ctx); 2637 } 2638 2639 i915_gem_context_unreference(ctx); 2640 } 2641 2642 kmem_cache_free(req->i915->requests, req); 2643 } 2644 2645 int i915_gem_request_alloc(struct intel_engine_cs *ring, 2646 struct intel_context *ctx) 2647 { 2648 struct drm_i915_private *dev_priv = to_i915(ring->dev); 2649 struct drm_i915_gem_request *req; 2650 int ret; 2651 2652 if (ring->outstanding_lazy_request) 2653 return 0; 2654 2655 req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL); 2656 if (req == NULL) 2657 return -ENOMEM; 2658 2659 kref_init(&req->ref); 2660 req->i915 = dev_priv; 2661 2662 ret = i915_gem_get_seqno(ring->dev, &req->seqno); 2663 if (ret) 2664 goto err; 2665 2666 req->ring = ring; 2667 2668 if (i915.enable_execlists) 2669 ret = intel_logical_ring_alloc_request_extras(req, ctx); 2670 else 2671 ret = intel_ring_alloc_request_extras(req); 2672 if (ret) 2673 goto err; 2674 2675 ring->outstanding_lazy_request = req; 2676 return 0; 2677 2678 err: 2679 kmem_cache_free(dev_priv->requests, req); 2680 return ret; 2681 } 2682 2683 struct drm_i915_gem_request * 2684 i915_gem_find_active_request(struct intel_engine_cs *ring) 2685 { 2686 struct drm_i915_gem_request *request; 2687 2688 list_for_each_entry(request, &ring->request_list, list) { 2689 if (i915_gem_request_completed(request, false)) 2690 continue; 2691 2692 return request; 2693 } 2694 2695 return NULL; 2696 } 2697 2698 static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv, 2699 struct intel_engine_cs *ring) 2700 { 2701 struct drm_i915_gem_request *request; 2702 bool ring_hung; 2703 2704 request = i915_gem_find_active_request(ring); 2705 2706 if (request == NULL) 2707 return; 2708 2709 ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG; 2710 2711 i915_set_reset_status(dev_priv, request->ctx, ring_hung); 2712 2713 list_for_each_entry_continue(request, &ring->request_list, list) 2714 i915_set_reset_status(dev_priv, request->ctx, false); 2715 } 2716 2717 static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv, 2718 struct intel_engine_cs *ring) 2719 { 2720 while (!list_empty(&ring->active_list)) { 2721 struct drm_i915_gem_object *obj; 2722 2723 obj = list_first_entry(&ring->active_list, 2724 struct drm_i915_gem_object, 2725 ring_list[ring->id]); 2726 2727 i915_gem_object_retire__read(obj, ring->id); 2728 } 2729 2730 /* 2731 * Clear the execlists queue up before freeing the requests, as those 2732 * are the ones that keep the context and ringbuffer backing objects 2733 * pinned in place. 2734 */ 2735 while (!list_empty(&ring->execlist_queue)) { 2736 struct drm_i915_gem_request *submit_req; 2737 2738 submit_req = list_first_entry(&ring->execlist_queue, 2739 struct drm_i915_gem_request, 2740 execlist_link); 2741 list_del(&submit_req->execlist_link); 2742 2743 if (submit_req->ctx != ring->default_context) 2744 intel_lr_context_unpin(ring, submit_req->ctx); 2745 2746 i915_gem_request_unreference(submit_req); 2747 } 2748 2749 /* 2750 * We must free the requests after all the corresponding objects have 2751 * been moved off active lists. Which is the same order as the normal 2752 * retire_requests function does. This is important if object hold 2753 * implicit references on things like e.g. ppgtt address spaces through 2754 * the request. 2755 */ 2756 while (!list_empty(&ring->request_list)) { 2757 struct drm_i915_gem_request *request; 2758 2759 request = list_first_entry(&ring->request_list, 2760 struct drm_i915_gem_request, 2761 list); 2762 2763 i915_gem_request_retire(request); 2764 } 2765 2766 /* This may not have been flushed before the reset, so clean it now */ 2767 i915_gem_request_assign(&ring->outstanding_lazy_request, NULL); 2768 } 2769 2770 void i915_gem_restore_fences(struct drm_device *dev) 2771 { 2772 struct drm_i915_private *dev_priv = dev->dev_private; 2773 int i; 2774 2775 for (i = 0; i < dev_priv->num_fence_regs; i++) { 2776 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 2777 2778 /* 2779 * Commit delayed tiling changes if we have an object still 2780 * attached to the fence, otherwise just clear the fence. 2781 */ 2782 if (reg->obj) { 2783 i915_gem_object_update_fence(reg->obj, reg, 2784 reg->obj->tiling_mode); 2785 } else { 2786 i915_gem_write_fence(dev, i, NULL); 2787 } 2788 } 2789 } 2790 2791 void i915_gem_reset(struct drm_device *dev) 2792 { 2793 struct drm_i915_private *dev_priv = dev->dev_private; 2794 struct intel_engine_cs *ring; 2795 int i; 2796 2797 /* 2798 * Before we free the objects from the requests, we need to inspect 2799 * them for finding the guilty party. As the requests only borrow 2800 * their reference to the objects, the inspection must be done first. 2801 */ 2802 for_each_ring(ring, dev_priv, i) 2803 i915_gem_reset_ring_status(dev_priv, ring); 2804 2805 for_each_ring(ring, dev_priv, i) 2806 i915_gem_reset_ring_cleanup(dev_priv, ring); 2807 2808 i915_gem_context_reset(dev); 2809 2810 i915_gem_restore_fences(dev); 2811 2812 WARN_ON(i915_verify_lists(dev)); 2813 } 2814 2815 /** 2816 * This function clears the request list as sequence numbers are passed. 2817 */ 2818 void 2819 i915_gem_retire_requests_ring(struct intel_engine_cs *ring) 2820 { 2821 WARN_ON(i915_verify_lists(ring->dev)); 2822 2823 /* Retire requests first as we use it above for the early return. 2824 * If we retire requests last, we may use a later seqno and so clear 2825 * the requests lists without clearing the active list, leading to 2826 * confusion. 2827 */ 2828 while (!list_empty(&ring->request_list)) { 2829 struct drm_i915_gem_request *request; 2830 2831 request = list_first_entry(&ring->request_list, 2832 struct drm_i915_gem_request, 2833 list); 2834 2835 if (!i915_gem_request_completed(request, true)) 2836 break; 2837 2838 i915_gem_request_retire(request); 2839 } 2840 2841 /* Move any buffers on the active list that are no longer referenced 2842 * by the ringbuffer to the flushing/inactive lists as appropriate, 2843 * before we free the context associated with the requests. 2844 */ 2845 while (!list_empty(&ring->active_list)) { 2846 struct drm_i915_gem_object *obj; 2847 2848 obj = list_first_entry(&ring->active_list, 2849 struct drm_i915_gem_object, 2850 ring_list[ring->id]); 2851 2852 if (!list_empty(&obj->last_read_req[ring->id]->list)) 2853 break; 2854 2855 i915_gem_object_retire__read(obj, ring->id); 2856 } 2857 2858 if (unlikely(ring->trace_irq_req && 2859 i915_gem_request_completed(ring->trace_irq_req, true))) { 2860 ring->irq_put(ring); 2861 i915_gem_request_assign(&ring->trace_irq_req, NULL); 2862 } 2863 2864 WARN_ON(i915_verify_lists(ring->dev)); 2865 } 2866 2867 bool 2868 i915_gem_retire_requests(struct drm_device *dev) 2869 { 2870 struct drm_i915_private *dev_priv = dev->dev_private; 2871 struct intel_engine_cs *ring; 2872 bool idle = true; 2873 int i; 2874 2875 for_each_ring(ring, dev_priv, i) { 2876 i915_gem_retire_requests_ring(ring); 2877 idle &= list_empty(&ring->request_list); 2878 if (i915.enable_execlists) { 2879 unsigned long flags; 2880 2881 spin_lock_irqsave(&ring->execlist_lock, flags); 2882 idle &= list_empty(&ring->execlist_queue); 2883 spin_unlock_irqrestore(&ring->execlist_lock, flags); 2884 2885 intel_execlists_retire_requests(ring); 2886 } 2887 } 2888 2889 if (idle) 2890 mod_delayed_work(dev_priv->wq, 2891 &dev_priv->mm.idle_work, 2892 msecs_to_jiffies(100)); 2893 2894 return idle; 2895 } 2896 2897 static void 2898 i915_gem_retire_work_handler(struct work_struct *work) 2899 { 2900 struct drm_i915_private *dev_priv = 2901 container_of(work, typeof(*dev_priv), mm.retire_work.work); 2902 struct drm_device *dev = dev_priv->dev; 2903 bool idle; 2904 2905 /* Come back later if the device is busy... */ 2906 idle = false; 2907 if (mutex_trylock(&dev->struct_mutex)) { 2908 idle = i915_gem_retire_requests(dev); 2909 mutex_unlock(&dev->struct_mutex); 2910 } 2911 if (!idle) 2912 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 2913 round_jiffies_up_relative(HZ)); 2914 } 2915 2916 static void 2917 i915_gem_idle_work_handler(struct work_struct *work) 2918 { 2919 struct drm_i915_private *dev_priv = 2920 container_of(work, typeof(*dev_priv), mm.idle_work.work); 2921 struct drm_device *dev = dev_priv->dev; 2922 struct intel_engine_cs *ring; 2923 int i; 2924 2925 for_each_ring(ring, dev_priv, i) 2926 if (!list_empty(&ring->request_list)) 2927 return; 2928 2929 intel_mark_idle(dev); 2930 2931 if (mutex_trylock(&dev->struct_mutex)) { 2932 struct intel_engine_cs *ring; 2933 int i; 2934 2935 for_each_ring(ring, dev_priv, i) 2936 i915_gem_batch_pool_fini(&ring->batch_pool); 2937 2938 mutex_unlock(&dev->struct_mutex); 2939 } 2940 } 2941 2942 /** 2943 * Ensures that an object will eventually get non-busy by flushing any required 2944 * write domains, emitting any outstanding lazy request and retiring and 2945 * completed requests. 2946 */ 2947 static int 2948 i915_gem_object_flush_active(struct drm_i915_gem_object *obj) 2949 { 2950 int ret, i; 2951 2952 if (!obj->active) 2953 return 0; 2954 2955 for (i = 0; i < I915_NUM_RINGS; i++) { 2956 struct drm_i915_gem_request *req; 2957 2958 req = obj->last_read_req[i]; 2959 if (req == NULL) 2960 continue; 2961 2962 if (list_empty(&req->list)) 2963 goto retire; 2964 2965 ret = i915_gem_check_olr(req); 2966 if (ret) 2967 return ret; 2968 2969 if (i915_gem_request_completed(req, true)) { 2970 __i915_gem_request_retire__upto(req); 2971 retire: 2972 i915_gem_object_retire__read(obj, i); 2973 } 2974 } 2975 2976 return 0; 2977 } 2978 2979 /** 2980 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 2981 * @DRM_IOCTL_ARGS: standard ioctl arguments 2982 * 2983 * Returns 0 if successful, else an error is returned with the remaining time in 2984 * the timeout parameter. 2985 * -ETIME: object is still busy after timeout 2986 * -ERESTARTSYS: signal interrupted the wait 2987 * -ENONENT: object doesn't exist 2988 * Also possible, but rare: 2989 * -EAGAIN: GPU wedged 2990 * -ENOMEM: damn 2991 * -ENODEV: Internal IRQ fail 2992 * -E?: The add request failed 2993 * 2994 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 2995 * non-zero timeout parameter the wait ioctl will wait for the given number of 2996 * nanoseconds on an object becoming unbusy. Since the wait itself does so 2997 * without holding struct_mutex the object may become re-busied before this 2998 * function completes. A similar but shorter * race condition exists in the busy 2999 * ioctl 3000 */ 3001 int 3002 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3003 { 3004 struct drm_i915_private *dev_priv = dev->dev_private; 3005 struct drm_i915_gem_wait *args = data; 3006 struct drm_i915_gem_object *obj; 3007 struct drm_i915_gem_request *req[I915_NUM_RINGS]; 3008 unsigned reset_counter; 3009 int i, n = 0; 3010 int ret; 3011 3012 if (args->flags != 0) 3013 return -EINVAL; 3014 3015 ret = i915_mutex_lock_interruptible(dev); 3016 if (ret) 3017 return ret; 3018 3019 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle)); 3020 if (&obj->base == NULL) { 3021 mutex_unlock(&dev->struct_mutex); 3022 return -ENOENT; 3023 } 3024 3025 /* Need to make sure the object gets inactive eventually. */ 3026 ret = i915_gem_object_flush_active(obj); 3027 if (ret) 3028 goto out; 3029 3030 if (!obj->active) 3031 goto out; 3032 3033 /* Do this after OLR check to make sure we make forward progress polling 3034 * on this IOCTL with a timeout == 0 (like busy ioctl) 3035 */ 3036 if (args->timeout_ns == 0) { 3037 ret = -ETIME; 3038 goto out; 3039 } 3040 3041 drm_gem_object_unreference(&obj->base); 3042 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 3043 3044 for (i = 0; i < I915_NUM_RINGS; i++) { 3045 if (obj->last_read_req[i] == NULL) 3046 continue; 3047 3048 req[n++] = i915_gem_request_reference(obj->last_read_req[i]); 3049 } 3050 3051 mutex_unlock(&dev->struct_mutex); 3052 3053 for (i = 0; i < n; i++) { 3054 if (ret == 0) 3055 ret = __i915_wait_request(req[i], reset_counter, true, 3056 args->timeout_ns > 0 ? &args->timeout_ns : NULL, 3057 file->driver_priv); 3058 i915_gem_request_unreference__unlocked(req[i]); 3059 } 3060 return ret; 3061 3062 out: 3063 drm_gem_object_unreference(&obj->base); 3064 mutex_unlock(&dev->struct_mutex); 3065 return ret; 3066 } 3067 3068 static int 3069 __i915_gem_object_sync(struct drm_i915_gem_object *obj, 3070 struct intel_engine_cs *to, 3071 struct drm_i915_gem_request *req) 3072 { 3073 struct intel_engine_cs *from; 3074 int ret; 3075 3076 from = i915_gem_request_get_ring(req); 3077 if (to == from) 3078 return 0; 3079 3080 if (i915_gem_request_completed(req, true)) 3081 return 0; 3082 3083 ret = i915_gem_check_olr(req); 3084 if (ret) 3085 return ret; 3086 3087 if (!i915_semaphore_is_enabled(obj->base.dev)) { 3088 struct drm_i915_private *i915 = to_i915(obj->base.dev); 3089 ret = __i915_wait_request(req, 3090 atomic_read(&i915->gpu_error.reset_counter), 3091 i915->mm.interruptible, 3092 NULL, 3093 &i915->rps.semaphores); 3094 if (ret) 3095 return ret; 3096 3097 i915_gem_object_retire_request(obj, req); 3098 } else { 3099 int idx = intel_ring_sync_index(from, to); 3100 u32 seqno = i915_gem_request_get_seqno(req); 3101 3102 if (seqno <= from->semaphore.sync_seqno[idx]) 3103 return 0; 3104 3105 trace_i915_gem_ring_sync_to(from, to, req); 3106 ret = to->semaphore.sync_to(to, from, seqno); 3107 if (ret) 3108 return ret; 3109 3110 /* We use last_read_req because sync_to() 3111 * might have just caused seqno wrap under 3112 * the radar. 3113 */ 3114 from->semaphore.sync_seqno[idx] = 3115 i915_gem_request_get_seqno(obj->last_read_req[from->id]); 3116 } 3117 3118 return 0; 3119 } 3120 3121 /** 3122 * i915_gem_object_sync - sync an object to a ring. 3123 * 3124 * @obj: object which may be in use on another ring. 3125 * @to: ring we wish to use the object on. May be NULL. 3126 * 3127 * This code is meant to abstract object synchronization with the GPU. 3128 * Calling with NULL implies synchronizing the object with the CPU 3129 * rather than a particular GPU ring. Conceptually we serialise writes 3130 * between engines inside the GPU. We only allow on engine to write 3131 * into a buffer at any time, but multiple readers. To ensure each has 3132 * a coherent view of memory, we must: 3133 * 3134 * - If there is an outstanding write request to the object, the new 3135 * request must wait for it to complete (either CPU or in hw, requests 3136 * on the same ring will be naturally ordered). 3137 * 3138 * - If we are a write request (pending_write_domain is set), the new 3139 * request must wait for outstanding read requests to complete. 3140 * 3141 * Returns 0 if successful, else propagates up the lower layer error. 3142 */ 3143 int 3144 i915_gem_object_sync(struct drm_i915_gem_object *obj, 3145 struct intel_engine_cs *to) 3146 { 3147 const bool readonly = obj->base.pending_write_domain == 0; 3148 struct drm_i915_gem_request *req[I915_NUM_RINGS]; 3149 int ret, i, n; 3150 3151 if (!obj->active) 3152 return 0; 3153 3154 if (to == NULL) 3155 return i915_gem_object_wait_rendering(obj, readonly); 3156 3157 n = 0; 3158 if (readonly) { 3159 if (obj->last_write_req) 3160 req[n++] = obj->last_write_req; 3161 } else { 3162 for (i = 0; i < I915_NUM_RINGS; i++) 3163 if (obj->last_read_req[i]) 3164 req[n++] = obj->last_read_req[i]; 3165 } 3166 for (i = 0; i < n; i++) { 3167 ret = __i915_gem_object_sync(obj, to, req[i]); 3168 if (ret) 3169 return ret; 3170 } 3171 3172 return 0; 3173 } 3174 3175 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) 3176 { 3177 u32 old_write_domain, old_read_domains; 3178 3179 /* Force a pagefault for domain tracking on next user access */ 3180 i915_gem_release_mmap(obj); 3181 3182 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3183 return; 3184 3185 /* Wait for any direct GTT access to complete */ 3186 mb(); 3187 3188 old_read_domains = obj->base.read_domains; 3189 old_write_domain = obj->base.write_domain; 3190 3191 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; 3192 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; 3193 3194 trace_i915_gem_object_change_domain(obj, 3195 old_read_domains, 3196 old_write_domain); 3197 } 3198 3199 int i915_vma_unbind(struct i915_vma *vma) 3200 { 3201 struct drm_i915_gem_object *obj = vma->obj; 3202 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3203 int ret; 3204 3205 if (list_empty(&vma->vma_link)) 3206 return 0; 3207 3208 if (!drm_mm_node_allocated(&vma->node)) { 3209 i915_gem_vma_destroy(vma); 3210 return 0; 3211 } 3212 3213 if (vma->pin_count) 3214 return -EBUSY; 3215 3216 BUG_ON(obj->pages == NULL); 3217 3218 ret = i915_gem_object_wait_rendering(obj, false); 3219 if (ret) 3220 return ret; 3221 /* Continue on if we fail due to EIO, the GPU is hung so we 3222 * should be safe and we need to cleanup or else we might 3223 * cause memory corruption through use-after-free. 3224 */ 3225 3226 if (i915_is_ggtt(vma->vm) && 3227 vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 3228 i915_gem_object_finish_gtt(obj); 3229 3230 /* release the fence reg _after_ flushing */ 3231 ret = i915_gem_object_put_fence(obj); 3232 if (ret) 3233 return ret; 3234 } 3235 3236 trace_i915_vma_unbind(vma); 3237 3238 vma->vm->unbind_vma(vma); 3239 vma->bound = 0; 3240 3241 list_del_init(&vma->mm_list); 3242 if (i915_is_ggtt(vma->vm)) { 3243 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 3244 obj->map_and_fenceable = false; 3245 } else if (vma->ggtt_view.pages) { 3246 sg_free_table(vma->ggtt_view.pages); 3247 kfree(vma->ggtt_view.pages); 3248 } 3249 vma->ggtt_view.pages = NULL; 3250 } 3251 3252 drm_mm_remove_node(&vma->node); 3253 i915_gem_vma_destroy(vma); 3254 3255 /* Since the unbound list is global, only move to that list if 3256 * no more VMAs exist. */ 3257 if (list_empty(&obj->vma_list)) 3258 list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list); 3259 3260 /* And finally now the object is completely decoupled from this vma, 3261 * we can drop its hold on the backing storage and allow it to be 3262 * reaped by the shrinker. 3263 */ 3264 i915_gem_object_unpin_pages(obj); 3265 3266 return 0; 3267 } 3268 3269 int i915_gpu_idle(struct drm_device *dev) 3270 { 3271 struct drm_i915_private *dev_priv = dev->dev_private; 3272 struct intel_engine_cs *ring; 3273 int ret, i; 3274 3275 /* Flush everything onto the inactive list. */ 3276 for_each_ring(ring, dev_priv, i) { 3277 if (!i915.enable_execlists) { 3278 ret = i915_switch_context(ring, ring->default_context); 3279 if (ret) 3280 return ret; 3281 } 3282 3283 ret = intel_ring_idle(ring); 3284 if (ret) 3285 return ret; 3286 } 3287 3288 WARN_ON(i915_verify_lists(dev)); 3289 return 0; 3290 } 3291 3292 static void i965_write_fence_reg(struct drm_device *dev, int reg, 3293 struct drm_i915_gem_object *obj) 3294 { 3295 struct drm_i915_private *dev_priv = dev->dev_private; 3296 int fence_reg; 3297 int fence_pitch_shift; 3298 3299 if (INTEL_INFO(dev)->gen >= 6) { 3300 fence_reg = FENCE_REG_SANDYBRIDGE_0; 3301 fence_pitch_shift = SANDYBRIDGE_FENCE_PITCH_SHIFT; 3302 } else { 3303 fence_reg = FENCE_REG_965_0; 3304 fence_pitch_shift = I965_FENCE_PITCH_SHIFT; 3305 } 3306 3307 fence_reg += reg * 8; 3308 3309 /* To w/a incoherency with non-atomic 64-bit register updates, 3310 * we split the 64-bit update into two 32-bit writes. In order 3311 * for a partial fence not to be evaluated between writes, we 3312 * precede the update with write to turn off the fence register, 3313 * and only enable the fence as the last step. 3314 * 3315 * For extra levels of paranoia, we make sure each step lands 3316 * before applying the next step. 3317 */ 3318 I915_WRITE(fence_reg, 0); 3319 POSTING_READ(fence_reg); 3320 3321 if (obj) { 3322 u32 size = i915_gem_obj_ggtt_size(obj); 3323 uint64_t val; 3324 3325 /* Adjust fence size to match tiled area */ 3326 if (obj->tiling_mode != I915_TILING_NONE) { 3327 uint32_t row_size = obj->stride * 3328 (obj->tiling_mode == I915_TILING_Y ? 32 : 8); 3329 size = (size / row_size) * row_size; 3330 } 3331 3332 val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) & 3333 0xfffff000) << 32; 3334 val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000; 3335 val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift; 3336 if (obj->tiling_mode == I915_TILING_Y) 3337 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 3338 val |= I965_FENCE_REG_VALID; 3339 3340 I915_WRITE(fence_reg + 4, val >> 32); 3341 POSTING_READ(fence_reg + 4); 3342 3343 I915_WRITE(fence_reg + 0, val); 3344 POSTING_READ(fence_reg); 3345 } else { 3346 I915_WRITE(fence_reg + 4, 0); 3347 POSTING_READ(fence_reg + 4); 3348 } 3349 } 3350 3351 static void i915_write_fence_reg(struct drm_device *dev, int reg, 3352 struct drm_i915_gem_object *obj) 3353 { 3354 struct drm_i915_private *dev_priv = dev->dev_private; 3355 u32 val; 3356 3357 if (obj) { 3358 u32 size = i915_gem_obj_ggtt_size(obj); 3359 int pitch_val; 3360 int tile_width; 3361 3362 WARN((i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) || 3363 (size & -size) != size || 3364 (i915_gem_obj_ggtt_offset(obj) & (size - 1)), 3365 "object 0x%08lx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n", 3366 i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size); 3367 3368 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) 3369 tile_width = 128; 3370 else 3371 tile_width = 512; 3372 3373 /* Note: pitch better be a power of two tile widths */ 3374 pitch_val = obj->stride / tile_width; 3375 pitch_val = ffs(pitch_val) - 1; 3376 3377 val = i915_gem_obj_ggtt_offset(obj); 3378 if (obj->tiling_mode == I915_TILING_Y) 3379 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 3380 val |= I915_FENCE_SIZE_BITS(size); 3381 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 3382 val |= I830_FENCE_REG_VALID; 3383 } else 3384 val = 0; 3385 3386 if (reg < 8) 3387 reg = FENCE_REG_830_0 + reg * 4; 3388 else 3389 reg = FENCE_REG_945_8 + (reg - 8) * 4; 3390 3391 I915_WRITE(reg, val); 3392 POSTING_READ(reg); 3393 } 3394 3395 static void i830_write_fence_reg(struct drm_device *dev, int reg, 3396 struct drm_i915_gem_object *obj) 3397 { 3398 struct drm_i915_private *dev_priv = dev->dev_private; 3399 uint32_t val; 3400 3401 if (obj) { 3402 u32 size = i915_gem_obj_ggtt_size(obj); 3403 uint32_t pitch_val; 3404 3405 WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) || 3406 (size & -size) != size || 3407 (i915_gem_obj_ggtt_offset(obj) & (size - 1)), 3408 "object 0x%08lx not 512K or pot-size 0x%08x aligned\n", 3409 i915_gem_obj_ggtt_offset(obj), size); 3410 3411 pitch_val = obj->stride / 128; 3412 pitch_val = ffs(pitch_val) - 1; 3413 3414 val = i915_gem_obj_ggtt_offset(obj); 3415 if (obj->tiling_mode == I915_TILING_Y) 3416 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 3417 val |= I830_FENCE_SIZE_BITS(size); 3418 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 3419 val |= I830_FENCE_REG_VALID; 3420 } else 3421 val = 0; 3422 3423 I915_WRITE(FENCE_REG_830_0 + reg * 4, val); 3424 POSTING_READ(FENCE_REG_830_0 + reg * 4); 3425 } 3426 3427 inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj) 3428 { 3429 return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT; 3430 } 3431 3432 static void i915_gem_write_fence(struct drm_device *dev, int reg, 3433 struct drm_i915_gem_object *obj) 3434 { 3435 struct drm_i915_private *dev_priv = dev->dev_private; 3436 3437 /* Ensure that all CPU reads are completed before installing a fence 3438 * and all writes before removing the fence. 3439 */ 3440 if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj)) 3441 mb(); 3442 3443 WARN(obj && (!obj->stride || !obj->tiling_mode), 3444 "bogus fence setup with stride: 0x%x, tiling mode: %i\n", 3445 obj->stride, obj->tiling_mode); 3446 3447 if (IS_GEN2(dev)) 3448 i830_write_fence_reg(dev, reg, obj); 3449 else if (IS_GEN3(dev)) 3450 i915_write_fence_reg(dev, reg, obj); 3451 else if (INTEL_INFO(dev)->gen >= 4) 3452 i965_write_fence_reg(dev, reg, obj); 3453 3454 /* And similarly be paranoid that no direct access to this region 3455 * is reordered to before the fence is installed. 3456 */ 3457 if (i915_gem_object_needs_mb(obj)) 3458 mb(); 3459 } 3460 3461 static inline int fence_number(struct drm_i915_private *dev_priv, 3462 struct drm_i915_fence_reg *fence) 3463 { 3464 return fence - dev_priv->fence_regs; 3465 } 3466 3467 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 3468 struct drm_i915_fence_reg *fence, 3469 bool enable) 3470 { 3471 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3472 int reg = fence_number(dev_priv, fence); 3473 3474 i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL); 3475 3476 if (enable) { 3477 obj->fence_reg = reg; 3478 fence->obj = obj; 3479 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list); 3480 } else { 3481 obj->fence_reg = I915_FENCE_REG_NONE; 3482 fence->obj = NULL; 3483 list_del_init(&fence->lru_list); 3484 } 3485 obj->fence_dirty = false; 3486 } 3487 3488 static int 3489 i915_gem_object_wait_fence(struct drm_i915_gem_object *obj) 3490 { 3491 if (obj->last_fenced_req) { 3492 int ret = i915_wait_request(obj->last_fenced_req); 3493 if (ret) 3494 return ret; 3495 3496 i915_gem_request_assign(&obj->last_fenced_req, NULL); 3497 } 3498 3499 return 0; 3500 } 3501 3502 int 3503 i915_gem_object_put_fence(struct drm_i915_gem_object *obj) 3504 { 3505 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3506 struct drm_i915_fence_reg *fence; 3507 int ret; 3508 3509 ret = i915_gem_object_wait_fence(obj); 3510 if (ret) 3511 return ret; 3512 3513 if (obj->fence_reg == I915_FENCE_REG_NONE) 3514 return 0; 3515 3516 fence = &dev_priv->fence_regs[obj->fence_reg]; 3517 3518 if (WARN_ON(fence->pin_count)) 3519 return -EBUSY; 3520 3521 i915_gem_object_fence_lost(obj); 3522 i915_gem_object_update_fence(obj, fence, false); 3523 3524 return 0; 3525 } 3526 3527 static struct drm_i915_fence_reg * 3528 i915_find_fence_reg(struct drm_device *dev) 3529 { 3530 struct drm_i915_private *dev_priv = dev->dev_private; 3531 struct drm_i915_fence_reg *reg, *avail; 3532 int i; 3533 3534 /* First try to find a free reg */ 3535 avail = NULL; 3536 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { 3537 reg = &dev_priv->fence_regs[i]; 3538 if (!reg->obj) 3539 return reg; 3540 3541 if (!reg->pin_count) 3542 avail = reg; 3543 } 3544 3545 if (avail == NULL) 3546 goto deadlock; 3547 3548 /* None available, try to steal one or wait for a user to finish */ 3549 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) { 3550 if (reg->pin_count) 3551 continue; 3552 3553 return reg; 3554 } 3555 3556 deadlock: 3557 /* Wait for completion of pending flips which consume fences */ 3558 if (intel_has_pending_fb_unpin(dev)) 3559 return ERR_PTR(-EAGAIN); 3560 3561 return ERR_PTR(-EDEADLK); 3562 } 3563 3564 /** 3565 * i915_gem_object_get_fence - set up fencing for an object 3566 * @obj: object to map through a fence reg 3567 * 3568 * When mapping objects through the GTT, userspace wants to be able to write 3569 * to them without having to worry about swizzling if the object is tiled. 3570 * This function walks the fence regs looking for a free one for @obj, 3571 * stealing one if it can't find any. 3572 * 3573 * It then sets up the reg based on the object's properties: address, pitch 3574 * and tiling format. 3575 * 3576 * For an untiled surface, this removes any existing fence. 3577 */ 3578 int 3579 i915_gem_object_get_fence(struct drm_i915_gem_object *obj) 3580 { 3581 struct drm_device *dev = obj->base.dev; 3582 struct drm_i915_private *dev_priv = dev->dev_private; 3583 bool enable = obj->tiling_mode != I915_TILING_NONE; 3584 struct drm_i915_fence_reg *reg; 3585 int ret; 3586 3587 /* Have we updated the tiling parameters upon the object and so 3588 * will need to serialise the write to the associated fence register? 3589 */ 3590 if (obj->fence_dirty) { 3591 ret = i915_gem_object_wait_fence(obj); 3592 if (ret) 3593 return ret; 3594 } 3595 3596 /* Just update our place in the LRU if our fence is getting reused. */ 3597 if (obj->fence_reg != I915_FENCE_REG_NONE) { 3598 reg = &dev_priv->fence_regs[obj->fence_reg]; 3599 if (!obj->fence_dirty) { 3600 list_move_tail(®->lru_list, 3601 &dev_priv->mm.fence_list); 3602 return 0; 3603 } 3604 } else if (enable) { 3605 if (WARN_ON(!obj->map_and_fenceable)) 3606 return -EINVAL; 3607 3608 reg = i915_find_fence_reg(dev); 3609 if (IS_ERR(reg)) 3610 return PTR_ERR(reg); 3611 3612 if (reg->obj) { 3613 struct drm_i915_gem_object *old = reg->obj; 3614 3615 ret = i915_gem_object_wait_fence(old); 3616 if (ret) 3617 return ret; 3618 3619 i915_gem_object_fence_lost(old); 3620 } 3621 } else 3622 return 0; 3623 3624 i915_gem_object_update_fence(obj, reg, enable); 3625 3626 return 0; 3627 } 3628 3629 static bool i915_gem_valid_gtt_space(struct i915_vma *vma, 3630 unsigned long cache_level) 3631 { 3632 struct drm_mm_node *gtt_space = &vma->node; 3633 struct drm_mm_node *other; 3634 3635 /* 3636 * On some machines we have to be careful when putting differing types 3637 * of snoopable memory together to avoid the prefetcher crossing memory 3638 * domains and dying. During vm initialisation, we decide whether or not 3639 * these constraints apply and set the drm_mm.color_adjust 3640 * appropriately. 3641 */ 3642 if (vma->vm->mm.color_adjust == NULL) 3643 return true; 3644 3645 if (!drm_mm_node_allocated(gtt_space)) 3646 return true; 3647 3648 if (list_empty(>t_space->node_list)) 3649 return true; 3650 3651 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); 3652 if (other->allocated && !other->hole_follows && other->color != cache_level) 3653 return false; 3654 3655 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); 3656 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) 3657 return false; 3658 3659 return true; 3660 } 3661 3662 /** 3663 * Finds free space in the GTT aperture and binds the object or a view of it 3664 * there. 3665 */ 3666 static struct i915_vma * 3667 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, 3668 struct i915_address_space *vm, 3669 const struct i915_ggtt_view *ggtt_view, 3670 unsigned alignment, 3671 uint64_t flags) 3672 { 3673 struct drm_device *dev = obj->base.dev; 3674 struct drm_i915_private *dev_priv = dev->dev_private; 3675 u32 size, fence_size, fence_alignment, unfenced_alignment; 3676 unsigned long start = 3677 flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; 3678 unsigned long end = 3679 flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total; 3680 struct i915_vma *vma; 3681 int ret; 3682 3683 if (i915_is_ggtt(vm)) { 3684 u32 view_size; 3685 3686 if (WARN_ON(!ggtt_view)) 3687 return ERR_PTR(-EINVAL); 3688 3689 view_size = i915_ggtt_view_size(obj, ggtt_view); 3690 3691 fence_size = i915_gem_get_gtt_size(dev, 3692 view_size, 3693 obj->tiling_mode); 3694 fence_alignment = i915_gem_get_gtt_alignment(dev, 3695 view_size, 3696 obj->tiling_mode, 3697 true); 3698 unfenced_alignment = i915_gem_get_gtt_alignment(dev, 3699 view_size, 3700 obj->tiling_mode, 3701 false); 3702 size = flags & PIN_MAPPABLE ? fence_size : view_size; 3703 } else { 3704 fence_size = i915_gem_get_gtt_size(dev, 3705 obj->base.size, 3706 obj->tiling_mode); 3707 fence_alignment = i915_gem_get_gtt_alignment(dev, 3708 obj->base.size, 3709 obj->tiling_mode, 3710 true); 3711 unfenced_alignment = 3712 i915_gem_get_gtt_alignment(dev, 3713 obj->base.size, 3714 obj->tiling_mode, 3715 false); 3716 size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; 3717 } 3718 3719 if (alignment == 0) 3720 alignment = flags & PIN_MAPPABLE ? fence_alignment : 3721 unfenced_alignment; 3722 if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) { 3723 DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n", 3724 ggtt_view ? ggtt_view->type : 0, 3725 alignment); 3726 return ERR_PTR(-EINVAL); 3727 } 3728 3729 /* If binding the object/GGTT view requires more space than the entire 3730 * aperture has, reject it early before evicting everything in a vain 3731 * attempt to find space. 3732 */ 3733 if (size > end) { 3734 DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%u > %s aperture=%lu\n", 3735 ggtt_view ? ggtt_view->type : 0, 3736 size, 3737 flags & PIN_MAPPABLE ? "mappable" : "total", 3738 end); 3739 return ERR_PTR(-E2BIG); 3740 } 3741 3742 ret = i915_gem_object_get_pages(obj); 3743 if (ret) 3744 return ERR_PTR(ret); 3745 3746 i915_gem_object_pin_pages(obj); 3747 3748 vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) : 3749 i915_gem_obj_lookup_or_create_vma(obj, vm); 3750 3751 if (IS_ERR(vma)) 3752 goto err_unpin; 3753 3754 search_free: 3755 ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, 3756 size, alignment, 3757 obj->cache_level, 3758 start, end, 3759 DRM_MM_SEARCH_DEFAULT, 3760 DRM_MM_CREATE_DEFAULT); 3761 if (ret) { 3762 ret = i915_gem_evict_something(dev, vm, size, alignment, 3763 obj->cache_level, 3764 start, end, 3765 flags); 3766 if (ret == 0) 3767 goto search_free; 3768 3769 goto err_free_vma; 3770 } 3771 if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) { 3772 ret = -EINVAL; 3773 goto err_remove_node; 3774 } 3775 3776 trace_i915_vma_bind(vma, flags); 3777 ret = i915_vma_bind(vma, obj->cache_level, flags); 3778 if (ret) 3779 goto err_remove_node; 3780 3781 list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); 3782 list_add_tail(&vma->mm_list, &vm->inactive_list); 3783 3784 return vma; 3785 3786 err_remove_node: 3787 drm_mm_remove_node(&vma->node); 3788 err_free_vma: 3789 i915_gem_vma_destroy(vma); 3790 vma = ERR_PTR(ret); 3791 err_unpin: 3792 i915_gem_object_unpin_pages(obj); 3793 return vma; 3794 } 3795 3796 bool 3797 i915_gem_clflush_object(struct drm_i915_gem_object *obj, 3798 bool force) 3799 { 3800 /* If we don't have a page list set up, then we're not pinned 3801 * to GPU, and we can ignore the cache flush because it'll happen 3802 * again at bind time. 3803 */ 3804 if (obj->pages == NULL) 3805 return false; 3806 3807 /* 3808 * Stolen memory is always coherent with the GPU as it is explicitly 3809 * marked as wc by the system, or the system is cache-coherent. 3810 */ 3811 if (obj->stolen || obj->phys_handle) 3812 return false; 3813 3814 /* If the GPU is snooping the contents of the CPU cache, 3815 * we do not need to manually clear the CPU cache lines. However, 3816 * the caches are only snooped when the render cache is 3817 * flushed/invalidated. As we always have to emit invalidations 3818 * and flushes when moving into and out of the RENDER domain, correct 3819 * snooping behaviour occurs naturally as the result of our domain 3820 * tracking. 3821 */ 3822 if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) { 3823 obj->cache_dirty = true; 3824 return false; 3825 } 3826 3827 trace_i915_gem_object_clflush(obj); 3828 drm_clflush_sg(obj->pages); 3829 obj->cache_dirty = false; 3830 3831 return true; 3832 } 3833 3834 /** Flushes the GTT write domain for the object if it's dirty. */ 3835 static void 3836 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 3837 { 3838 uint32_t old_write_domain; 3839 3840 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 3841 return; 3842 3843 /* No actual flushing is required for the GTT write domain. Writes 3844 * to it immediately go to main memory as far as we know, so there's 3845 * no chipset flush. It also doesn't land in render cache. 3846 * 3847 * However, we do have to enforce the order so that all writes through 3848 * the GTT land before any writes to the device, such as updates to 3849 * the GATT itself. 3850 */ 3851 wmb(); 3852 3853 old_write_domain = obj->base.write_domain; 3854 obj->base.write_domain = 0; 3855 3856 intel_fb_obj_flush(obj, false); 3857 3858 trace_i915_gem_object_change_domain(obj, 3859 obj->base.read_domains, 3860 old_write_domain); 3861 } 3862 3863 /** Flushes the CPU write domain for the object if it's dirty. */ 3864 static void 3865 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) 3866 { 3867 uint32_t old_write_domain; 3868 3869 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 3870 return; 3871 3872 if (i915_gem_clflush_object(obj, obj->pin_display)) 3873 i915_gem_chipset_flush(obj->base.dev); 3874 3875 old_write_domain = obj->base.write_domain; 3876 obj->base.write_domain = 0; 3877 3878 intel_fb_obj_flush(obj, false); 3879 3880 trace_i915_gem_object_change_domain(obj, 3881 obj->base.read_domains, 3882 old_write_domain); 3883 } 3884 3885 /** 3886 * Moves a single object to the GTT read, and possibly write domain. 3887 * 3888 * This function returns when the move is complete, including waiting on 3889 * flushes to occur. 3890 */ 3891 int 3892 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3893 { 3894 uint32_t old_write_domain, old_read_domains; 3895 struct i915_vma *vma; 3896 int ret; 3897 3898 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 3899 return 0; 3900 3901 ret = i915_gem_object_wait_rendering(obj, !write); 3902 if (ret) 3903 return ret; 3904 3905 /* Flush and acquire obj->pages so that we are coherent through 3906 * direct access in memory with previous cached writes through 3907 * shmemfs and that our cache domain tracking remains valid. 3908 * For example, if the obj->filp was moved to swap without us 3909 * being notified and releasing the pages, we would mistakenly 3910 * continue to assume that the obj remained out of the CPU cached 3911 * domain. 3912 */ 3913 ret = i915_gem_object_get_pages(obj); 3914 if (ret) 3915 return ret; 3916 3917 i915_gem_object_flush_cpu_write_domain(obj); 3918 3919 /* Serialise direct access to this object with the barriers for 3920 * coherent writes from the GPU, by effectively invalidating the 3921 * GTT domain upon first access. 3922 */ 3923 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3924 mb(); 3925 3926 old_write_domain = obj->base.write_domain; 3927 old_read_domains = obj->base.read_domains; 3928 3929 /* It should now be out of any other write domains, and we can update 3930 * the domain values for our changes. 3931 */ 3932 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3933 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3934 if (write) { 3935 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 3936 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 3937 obj->dirty = 1; 3938 } 3939 3940 if (write) 3941 intel_fb_obj_invalidate(obj, NULL, ORIGIN_GTT); 3942 3943 trace_i915_gem_object_change_domain(obj, 3944 old_read_domains, 3945 old_write_domain); 3946 3947 /* And bump the LRU for this access */ 3948 vma = i915_gem_obj_to_ggtt(obj); 3949 if (vma && drm_mm_node_allocated(&vma->node) && !obj->active) 3950 list_move_tail(&vma->mm_list, 3951 &to_i915(obj->base.dev)->gtt.base.inactive_list); 3952 3953 return 0; 3954 } 3955 3956 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 3957 enum i915_cache_level cache_level) 3958 { 3959 struct drm_device *dev = obj->base.dev; 3960 struct i915_vma *vma, *next; 3961 int ret; 3962 3963 if (obj->cache_level == cache_level) 3964 return 0; 3965 3966 if (i915_gem_obj_is_pinned(obj)) { 3967 DRM_DEBUG("can not change the cache level of pinned objects\n"); 3968 return -EBUSY; 3969 } 3970 3971 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { 3972 if (!i915_gem_valid_gtt_space(vma, cache_level)) { 3973 ret = i915_vma_unbind(vma); 3974 if (ret) 3975 return ret; 3976 } 3977 } 3978 3979 if (i915_gem_obj_bound_any(obj)) { 3980 ret = i915_gem_object_wait_rendering(obj, false); 3981 if (ret) 3982 return ret; 3983 3984 i915_gem_object_finish_gtt(obj); 3985 3986 /* Before SandyBridge, you could not use tiling or fence 3987 * registers with snooped memory, so relinquish any fences 3988 * currently pointing to our region in the aperture. 3989 */ 3990 if (INTEL_INFO(dev)->gen < 6) { 3991 ret = i915_gem_object_put_fence(obj); 3992 if (ret) 3993 return ret; 3994 } 3995 3996 list_for_each_entry(vma, &obj->vma_list, vma_link) 3997 if (drm_mm_node_allocated(&vma->node)) { 3998 ret = i915_vma_bind(vma, cache_level, 3999 PIN_UPDATE); 4000 if (ret) 4001 return ret; 4002 } 4003 } 4004 4005 list_for_each_entry(vma, &obj->vma_list, vma_link) 4006 vma->node.color = cache_level; 4007 obj->cache_level = cache_level; 4008 4009 if (obj->cache_dirty && 4010 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 4011 cpu_write_needs_clflush(obj)) { 4012 if (i915_gem_clflush_object(obj, true)) 4013 i915_gem_chipset_flush(obj->base.dev); 4014 } 4015 4016 return 0; 4017 } 4018 4019 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 4020 struct drm_file *file) 4021 { 4022 struct drm_i915_gem_caching *args = data; 4023 struct drm_i915_gem_object *obj; 4024 4025 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4026 if (&obj->base == NULL) 4027 return -ENOENT; 4028 4029 switch (obj->cache_level) { 4030 case I915_CACHE_LLC: 4031 case I915_CACHE_L3_LLC: 4032 args->caching = I915_CACHING_CACHED; 4033 break; 4034 4035 case I915_CACHE_WT: 4036 args->caching = I915_CACHING_DISPLAY; 4037 break; 4038 4039 default: 4040 args->caching = I915_CACHING_NONE; 4041 break; 4042 } 4043 4044 drm_gem_object_unreference_unlocked(&obj->base); 4045 return 0; 4046 } 4047 4048 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 4049 struct drm_file *file) 4050 { 4051 struct drm_i915_gem_caching *args = data; 4052 struct drm_i915_gem_object *obj; 4053 enum i915_cache_level level; 4054 int ret; 4055 4056 switch (args->caching) { 4057 case I915_CACHING_NONE: 4058 level = I915_CACHE_NONE; 4059 break; 4060 case I915_CACHING_CACHED: 4061 level = I915_CACHE_LLC; 4062 break; 4063 case I915_CACHING_DISPLAY: 4064 level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE; 4065 break; 4066 default: 4067 return -EINVAL; 4068 } 4069 4070 ret = i915_mutex_lock_interruptible(dev); 4071 if (ret) 4072 return ret; 4073 4074 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4075 if (&obj->base == NULL) { 4076 ret = -ENOENT; 4077 goto unlock; 4078 } 4079 4080 ret = i915_gem_object_set_cache_level(obj, level); 4081 4082 drm_gem_object_unreference(&obj->base); 4083 unlock: 4084 mutex_unlock(&dev->struct_mutex); 4085 return ret; 4086 } 4087 4088 /* 4089 * Prepare buffer for display plane (scanout, cursors, etc). 4090 * Can be called from an uninterruptible phase (modesetting) and allows 4091 * any flushes to be pipelined (for pageflips). 4092 */ 4093 int 4094 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 4095 u32 alignment, 4096 struct intel_engine_cs *pipelined, 4097 const struct i915_ggtt_view *view) 4098 { 4099 u32 old_read_domains, old_write_domain; 4100 int ret; 4101 4102 ret = i915_gem_object_sync(obj, pipelined); 4103 if (ret) 4104 return ret; 4105 4106 /* Mark the pin_display early so that we account for the 4107 * display coherency whilst setting up the cache domains. 4108 */ 4109 obj->pin_display++; 4110 4111 /* The display engine is not coherent with the LLC cache on gen6. As 4112 * a result, we make sure that the pinning that is about to occur is 4113 * done with uncached PTEs. This is lowest common denominator for all 4114 * chipsets. 4115 * 4116 * However for gen6+, we could do better by using the GFDT bit instead 4117 * of uncaching, which would allow us to flush all the LLC-cached data 4118 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 4119 */ 4120 ret = i915_gem_object_set_cache_level(obj, 4121 HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE); 4122 if (ret) 4123 goto err_unpin_display; 4124 4125 /* As the user may map the buffer once pinned in the display plane 4126 * (e.g. libkms for the bootup splash), we have to ensure that we 4127 * always use map_and_fenceable for all scanout buffers. 4128 */ 4129 ret = i915_gem_object_ggtt_pin(obj, view, alignment, 4130 view->type == I915_GGTT_VIEW_NORMAL ? 4131 PIN_MAPPABLE : 0); 4132 if (ret) 4133 goto err_unpin_display; 4134 4135 i915_gem_object_flush_cpu_write_domain(obj); 4136 4137 old_write_domain = obj->base.write_domain; 4138 old_read_domains = obj->base.read_domains; 4139 4140 /* It should now be out of any other write domains, and we can update 4141 * the domain values for our changes. 4142 */ 4143 obj->base.write_domain = 0; 4144 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 4145 4146 trace_i915_gem_object_change_domain(obj, 4147 old_read_domains, 4148 old_write_domain); 4149 4150 return 0; 4151 4152 err_unpin_display: 4153 obj->pin_display--; 4154 return ret; 4155 } 4156 4157 void 4158 i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj, 4159 const struct i915_ggtt_view *view) 4160 { 4161 if (WARN_ON(obj->pin_display == 0)) 4162 return; 4163 4164 i915_gem_object_ggtt_unpin_view(obj, view); 4165 4166 obj->pin_display--; 4167 } 4168 4169 /** 4170 * Moves a single object to the CPU read, and possibly write domain. 4171 * 4172 * This function returns when the move is complete, including waiting on 4173 * flushes to occur. 4174 */ 4175 int 4176 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 4177 { 4178 uint32_t old_write_domain, old_read_domains; 4179 int ret; 4180 4181 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 4182 return 0; 4183 4184 ret = i915_gem_object_wait_rendering(obj, !write); 4185 if (ret) 4186 return ret; 4187 4188 i915_gem_object_flush_gtt_write_domain(obj); 4189 4190 old_write_domain = obj->base.write_domain; 4191 old_read_domains = obj->base.read_domains; 4192 4193 /* Flush the CPU cache if it's still invalid. */ 4194 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 4195 i915_gem_clflush_object(obj, false); 4196 4197 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 4198 } 4199 4200 /* It should now be out of any other write domains, and we can update 4201 * the domain values for our changes. 4202 */ 4203 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 4204 4205 /* If we're writing through the CPU, then the GPU read domains will 4206 * need to be invalidated at next use. 4207 */ 4208 if (write) { 4209 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4210 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4211 } 4212 4213 if (write) 4214 intel_fb_obj_invalidate(obj, NULL, ORIGIN_CPU); 4215 4216 trace_i915_gem_object_change_domain(obj, 4217 old_read_domains, 4218 old_write_domain); 4219 4220 return 0; 4221 } 4222 4223 /* Throttle our rendering by waiting until the ring has completed our requests 4224 * emitted over 20 msec ago. 4225 * 4226 * Note that if we were to use the current jiffies each time around the loop, 4227 * we wouldn't escape the function with any frames outstanding if the time to 4228 * render a frame was over 20ms. 4229 * 4230 * This should get us reasonable parallelism between CPU and GPU but also 4231 * relatively low latency when blocking on a particular request to finish. 4232 */ 4233 static int 4234 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 4235 { 4236 struct drm_i915_private *dev_priv = dev->dev_private; 4237 struct drm_i915_file_private *file_priv = file->driver_priv; 4238 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; 4239 struct drm_i915_gem_request *request, *target = NULL; 4240 unsigned reset_counter; 4241 int ret; 4242 4243 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 4244 if (ret) 4245 return ret; 4246 4247 ret = i915_gem_check_wedge(&dev_priv->gpu_error, false); 4248 if (ret) 4249 return ret; 4250 4251 spin_lock(&file_priv->mm.lock); 4252 list_for_each_entry(request, &file_priv->mm.request_list, client_list) { 4253 if (time_after_eq(request->emitted_jiffies, recent_enough)) 4254 break; 4255 4256 target = request; 4257 } 4258 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 4259 if (target) 4260 i915_gem_request_reference(target); 4261 spin_unlock(&file_priv->mm.lock); 4262 4263 if (target == NULL) 4264 return 0; 4265 4266 ret = __i915_wait_request(target, reset_counter, true, NULL, NULL); 4267 if (ret == 0) 4268 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); 4269 4270 i915_gem_request_unreference__unlocked(target); 4271 4272 return ret; 4273 } 4274 4275 static bool 4276 i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags) 4277 { 4278 struct drm_i915_gem_object *obj = vma->obj; 4279 4280 if (alignment && 4281 vma->node.start & (alignment - 1)) 4282 return true; 4283 4284 if (flags & PIN_MAPPABLE && !obj->map_and_fenceable) 4285 return true; 4286 4287 if (flags & PIN_OFFSET_BIAS && 4288 vma->node.start < (flags & PIN_OFFSET_MASK)) 4289 return true; 4290 4291 return false; 4292 } 4293 4294 static int 4295 i915_gem_object_do_pin(struct drm_i915_gem_object *obj, 4296 struct i915_address_space *vm, 4297 const struct i915_ggtt_view *ggtt_view, 4298 uint32_t alignment, 4299 uint64_t flags) 4300 { 4301 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 4302 struct i915_vma *vma; 4303 unsigned bound; 4304 int ret; 4305 4306 if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base)) 4307 return -ENODEV; 4308 4309 if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm))) 4310 return -EINVAL; 4311 4312 if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE)) 4313 return -EINVAL; 4314 4315 if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view)) 4316 return -EINVAL; 4317 4318 vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) : 4319 i915_gem_obj_to_vma(obj, vm); 4320 4321 if (IS_ERR(vma)) 4322 return PTR_ERR(vma); 4323 4324 if (vma) { 4325 if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) 4326 return -EBUSY; 4327 4328 if (i915_vma_misplaced(vma, alignment, flags)) { 4329 unsigned long offset; 4330 offset = ggtt_view ? i915_gem_obj_ggtt_offset_view(obj, ggtt_view) : 4331 i915_gem_obj_offset(obj, vm); 4332 WARN(vma->pin_count, 4333 "bo is already pinned in %s with incorrect alignment:" 4334 " offset=%lx, req.alignment=%x, req.map_and_fenceable=%d," 4335 " obj->map_and_fenceable=%d\n", 4336 ggtt_view ? "ggtt" : "ppgtt", 4337 offset, 4338 alignment, 4339 !!(flags & PIN_MAPPABLE), 4340 obj->map_and_fenceable); 4341 ret = i915_vma_unbind(vma); 4342 if (ret) 4343 return ret; 4344 4345 vma = NULL; 4346 } 4347 } 4348 4349 bound = vma ? vma->bound : 0; 4350 if (vma == NULL || !drm_mm_node_allocated(&vma->node)) { 4351 vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment, 4352 flags); 4353 if (IS_ERR(vma)) 4354 return PTR_ERR(vma); 4355 } else { 4356 ret = i915_vma_bind(vma, obj->cache_level, flags); 4357 if (ret) 4358 return ret; 4359 } 4360 4361 if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL && 4362 (bound ^ vma->bound) & GLOBAL_BIND) { 4363 bool mappable, fenceable; 4364 u32 fence_size, fence_alignment; 4365 4366 fence_size = i915_gem_get_gtt_size(obj->base.dev, 4367 obj->base.size, 4368 obj->tiling_mode); 4369 fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev, 4370 obj->base.size, 4371 obj->tiling_mode, 4372 true); 4373 4374 fenceable = (vma->node.size == fence_size && 4375 (vma->node.start & (fence_alignment - 1)) == 0); 4376 4377 mappable = (vma->node.start + fence_size <= 4378 dev_priv->gtt.mappable_end); 4379 4380 obj->map_and_fenceable = mappable && fenceable; 4381 4382 WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable); 4383 } 4384 4385 vma->pin_count++; 4386 return 0; 4387 } 4388 4389 int 4390 i915_gem_object_pin(struct drm_i915_gem_object *obj, 4391 struct i915_address_space *vm, 4392 uint32_t alignment, 4393 uint64_t flags) 4394 { 4395 return i915_gem_object_do_pin(obj, vm, 4396 i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL, 4397 alignment, flags); 4398 } 4399 4400 int 4401 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 4402 const struct i915_ggtt_view *view, 4403 uint32_t alignment, 4404 uint64_t flags) 4405 { 4406 if (WARN_ONCE(!view, "no view specified")) 4407 return -EINVAL; 4408 4409 return i915_gem_object_do_pin(obj, i915_obj_to_ggtt(obj), view, 4410 alignment, flags | PIN_GLOBAL); 4411 } 4412 4413 void 4414 i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, 4415 const struct i915_ggtt_view *view) 4416 { 4417 struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view); 4418 4419 BUG_ON(!vma); 4420 WARN_ON(vma->pin_count == 0); 4421 WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view)); 4422 4423 --vma->pin_count; 4424 } 4425 4426 bool 4427 i915_gem_object_pin_fence(struct drm_i915_gem_object *obj) 4428 { 4429 if (obj->fence_reg != I915_FENCE_REG_NONE) { 4430 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 4431 struct i915_vma *ggtt_vma = i915_gem_obj_to_ggtt(obj); 4432 4433 WARN_ON(!ggtt_vma || 4434 dev_priv->fence_regs[obj->fence_reg].pin_count > 4435 ggtt_vma->pin_count); 4436 dev_priv->fence_regs[obj->fence_reg].pin_count++; 4437 return true; 4438 } else 4439 return false; 4440 } 4441 4442 void 4443 i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj) 4444 { 4445 if (obj->fence_reg != I915_FENCE_REG_NONE) { 4446 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 4447 WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count <= 0); 4448 dev_priv->fence_regs[obj->fence_reg].pin_count--; 4449 } 4450 } 4451 4452 int 4453 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4454 struct drm_file *file) 4455 { 4456 struct drm_i915_gem_busy *args = data; 4457 struct drm_i915_gem_object *obj; 4458 int ret; 4459 4460 ret = i915_mutex_lock_interruptible(dev); 4461 if (ret) 4462 return ret; 4463 4464 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4465 if (&obj->base == NULL) { 4466 ret = -ENOENT; 4467 goto unlock; 4468 } 4469 4470 /* Count all active objects as busy, even if they are currently not used 4471 * by the gpu. Users of this interface expect objects to eventually 4472 * become non-busy without any further actions, therefore emit any 4473 * necessary flushes here. 4474 */ 4475 ret = i915_gem_object_flush_active(obj); 4476 if (ret) 4477 goto unref; 4478 4479 BUILD_BUG_ON(I915_NUM_RINGS > 16); 4480 args->busy = obj->active << 16; 4481 if (obj->last_write_req) 4482 args->busy |= obj->last_write_req->ring->id; 4483 4484 unref: 4485 drm_gem_object_unreference(&obj->base); 4486 unlock: 4487 mutex_unlock(&dev->struct_mutex); 4488 return ret; 4489 } 4490 4491 int 4492 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4493 struct drm_file *file_priv) 4494 { 4495 return i915_gem_ring_throttle(dev, file_priv); 4496 } 4497 4498 int 4499 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4500 struct drm_file *file_priv) 4501 { 4502 struct drm_i915_private *dev_priv = dev->dev_private; 4503 struct drm_i915_gem_madvise *args = data; 4504 struct drm_i915_gem_object *obj; 4505 int ret; 4506 4507 switch (args->madv) { 4508 case I915_MADV_DONTNEED: 4509 case I915_MADV_WILLNEED: 4510 break; 4511 default: 4512 return -EINVAL; 4513 } 4514 4515 ret = i915_mutex_lock_interruptible(dev); 4516 if (ret) 4517 return ret; 4518 4519 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle)); 4520 if (&obj->base == NULL) { 4521 ret = -ENOENT; 4522 goto unlock; 4523 } 4524 4525 if (i915_gem_obj_is_pinned(obj)) { 4526 ret = -EINVAL; 4527 goto out; 4528 } 4529 4530 if (obj->pages && 4531 obj->tiling_mode != I915_TILING_NONE && 4532 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 4533 if (obj->madv == I915_MADV_WILLNEED) 4534 i915_gem_object_unpin_pages(obj); 4535 if (args->madv == I915_MADV_WILLNEED) 4536 i915_gem_object_pin_pages(obj); 4537 } 4538 4539 if (obj->madv != __I915_MADV_PURGED) 4540 obj->madv = args->madv; 4541 4542 /* if the object is no longer attached, discard its backing storage */ 4543 if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL) 4544 i915_gem_object_truncate(obj); 4545 4546 args->retained = obj->madv != __I915_MADV_PURGED; 4547 4548 out: 4549 drm_gem_object_unreference(&obj->base); 4550 unlock: 4551 mutex_unlock(&dev->struct_mutex); 4552 return ret; 4553 } 4554 4555 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4556 const struct drm_i915_gem_object_ops *ops) 4557 { 4558 int i; 4559 4560 INIT_LIST_HEAD(&obj->global_list); 4561 for (i = 0; i < I915_NUM_RINGS; i++) 4562 INIT_LIST_HEAD(&obj->ring_list[i]); 4563 INIT_LIST_HEAD(&obj->obj_exec_link); 4564 INIT_LIST_HEAD(&obj->vma_list); 4565 INIT_LIST_HEAD(&obj->batch_pool_link); 4566 4567 obj->ops = ops; 4568 4569 obj->fence_reg = I915_FENCE_REG_NONE; 4570 obj->madv = I915_MADV_WILLNEED; 4571 4572 i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size); 4573 } 4574 4575 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4576 .get_pages = i915_gem_object_get_pages_gtt, 4577 .put_pages = i915_gem_object_put_pages_gtt, 4578 }; 4579 4580 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, 4581 size_t size) 4582 { 4583 struct drm_i915_gem_object *obj; 4584 struct address_space *mapping; 4585 gfp_t mask; 4586 4587 obj = i915_gem_object_alloc(dev); 4588 if (obj == NULL) 4589 return NULL; 4590 4591 if (drm_gem_object_init(dev, &obj->base, size) != 0) { 4592 i915_gem_object_free(obj); 4593 return NULL; 4594 } 4595 4596 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4597 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) { 4598 /* 965gm cannot relocate objects above 4GiB. */ 4599 mask &= ~__GFP_HIGHMEM; 4600 mask |= __GFP_DMA32; 4601 } 4602 4603 mapping = file_inode(obj->base.filp)->i_mapping; 4604 mapping_set_gfp_mask(mapping, mask); 4605 4606 i915_gem_object_init(obj, &i915_gem_object_ops); 4607 4608 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4609 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4610 4611 if (HAS_LLC(dev)) { 4612 /* On some devices, we can have the GPU use the LLC (the CPU 4613 * cache) for about a 10% performance improvement 4614 * compared to uncached. Graphics requests other than 4615 * display scanout are coherent with the CPU in 4616 * accessing this cache. This means in this mode we 4617 * don't need to clflush on the CPU side, and on the 4618 * GPU side we only need to flush internal caches to 4619 * get data visible to the CPU. 4620 * 4621 * However, we maintain the display planes as UC, and so 4622 * need to rebind when first used as such. 4623 */ 4624 obj->cache_level = I915_CACHE_LLC; 4625 } else 4626 obj->cache_level = I915_CACHE_NONE; 4627 4628 trace_i915_gem_object_create(obj); 4629 4630 return obj; 4631 } 4632 4633 static bool discard_backing_storage(struct drm_i915_gem_object *obj) 4634 { 4635 /* If we are the last user of the backing storage (be it shmemfs 4636 * pages or stolen etc), we know that the pages are going to be 4637 * immediately released. In this case, we can then skip copying 4638 * back the contents from the GPU. 4639 */ 4640 4641 if (obj->madv != I915_MADV_WILLNEED) 4642 return false; 4643 4644 if (obj->base.filp == NULL) 4645 return true; 4646 4647 /* At first glance, this looks racy, but then again so would be 4648 * userspace racing mmap against close. However, the first external 4649 * reference to the filp can only be obtained through the 4650 * i915_gem_mmap_ioctl() which safeguards us against the user 4651 * acquiring such a reference whilst we are in the middle of 4652 * freeing the object. 4653 */ 4654 return atomic_long_read(&obj->base.filp->f_count) == 1; 4655 } 4656 4657 void i915_gem_free_object(struct drm_gem_object *gem_obj) 4658 { 4659 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 4660 struct drm_device *dev = obj->base.dev; 4661 struct drm_i915_private *dev_priv = dev->dev_private; 4662 struct i915_vma *vma, *next; 4663 4664 intel_runtime_pm_get(dev_priv); 4665 4666 trace_i915_gem_object_destroy(obj); 4667 4668 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { 4669 int ret; 4670 4671 vma->pin_count = 0; 4672 ret = i915_vma_unbind(vma); 4673 if (WARN_ON(ret == -ERESTARTSYS)) { 4674 bool was_interruptible; 4675 4676 was_interruptible = dev_priv->mm.interruptible; 4677 dev_priv->mm.interruptible = false; 4678 4679 WARN_ON(i915_vma_unbind(vma)); 4680 4681 dev_priv->mm.interruptible = was_interruptible; 4682 } 4683 } 4684 4685 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up 4686 * before progressing. */ 4687 if (obj->stolen) 4688 i915_gem_object_unpin_pages(obj); 4689 4690 WARN_ON(obj->frontbuffer_bits); 4691 4692 if (obj->pages && obj->madv == I915_MADV_WILLNEED && 4693 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES && 4694 obj->tiling_mode != I915_TILING_NONE) 4695 i915_gem_object_unpin_pages(obj); 4696 4697 if (WARN_ON(obj->pages_pin_count)) 4698 obj->pages_pin_count = 0; 4699 if (discard_backing_storage(obj)) 4700 obj->madv = I915_MADV_DONTNEED; 4701 i915_gem_object_put_pages(obj); 4702 i915_gem_object_free_mmap_offset(obj); 4703 4704 BUG_ON(obj->pages); 4705 4706 if (obj->base.import_attach) 4707 drm_prime_gem_destroy(&obj->base, NULL); 4708 4709 if (obj->ops->release) 4710 obj->ops->release(obj); 4711 4712 drm_gem_object_release(&obj->base); 4713 i915_gem_info_remove_obj(dev_priv, obj->base.size); 4714 4715 kfree(obj->bit_17); 4716 i915_gem_object_free(obj); 4717 4718 intel_runtime_pm_put(dev_priv); 4719 } 4720 4721 struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, 4722 struct i915_address_space *vm) 4723 { 4724 struct i915_vma *vma; 4725 list_for_each_entry(vma, &obj->vma_list, vma_link) { 4726 if (i915_is_ggtt(vma->vm) && 4727 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 4728 continue; 4729 if (vma->vm == vm) 4730 return vma; 4731 } 4732 return NULL; 4733 } 4734 4735 struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj, 4736 const struct i915_ggtt_view *view) 4737 { 4738 struct i915_address_space *ggtt = i915_obj_to_ggtt(obj); 4739 struct i915_vma *vma; 4740 4741 if (WARN_ONCE(!view, "no view specified")) 4742 return ERR_PTR(-EINVAL); 4743 4744 list_for_each_entry(vma, &obj->vma_list, vma_link) 4745 if (vma->vm == ggtt && 4746 i915_ggtt_view_equal(&vma->ggtt_view, view)) 4747 return vma; 4748 return NULL; 4749 } 4750 4751 void i915_gem_vma_destroy(struct i915_vma *vma) 4752 { 4753 struct i915_address_space *vm = NULL; 4754 WARN_ON(vma->node.allocated); 4755 4756 /* Keep the vma as a placeholder in the execbuffer reservation lists */ 4757 if (!list_empty(&vma->exec_list)) 4758 return; 4759 4760 vm = vma->vm; 4761 4762 if (!i915_is_ggtt(vm)) 4763 i915_ppgtt_put(i915_vm_to_ppgtt(vm)); 4764 4765 list_del(&vma->vma_link); 4766 4767 kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma); 4768 } 4769 4770 static void 4771 i915_gem_stop_ringbuffers(struct drm_device *dev) 4772 { 4773 struct drm_i915_private *dev_priv = dev->dev_private; 4774 struct intel_engine_cs *ring; 4775 int i; 4776 4777 for_each_ring(ring, dev_priv, i) 4778 dev_priv->gt.stop_ring(ring); 4779 } 4780 4781 int 4782 i915_gem_suspend(struct drm_device *dev) 4783 { 4784 struct drm_i915_private *dev_priv = dev->dev_private; 4785 int ret = 0; 4786 4787 mutex_lock(&dev->struct_mutex); 4788 ret = i915_gpu_idle(dev); 4789 if (ret) 4790 goto err; 4791 4792 i915_gem_retire_requests(dev); 4793 4794 i915_gem_stop_ringbuffers(dev); 4795 mutex_unlock(&dev->struct_mutex); 4796 4797 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 4798 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 4799 flush_delayed_work(&dev_priv->mm.idle_work); 4800 4801 /* Assert that we sucessfully flushed all the work and 4802 * reset the GPU back to its idle, low power state. 4803 */ 4804 WARN_ON(dev_priv->mm.busy); 4805 4806 return 0; 4807 4808 err: 4809 mutex_unlock(&dev->struct_mutex); 4810 return ret; 4811 } 4812 4813 int i915_gem_l3_remap(struct intel_engine_cs *ring, int slice) 4814 { 4815 struct drm_device *dev = ring->dev; 4816 struct drm_i915_private *dev_priv = dev->dev_private; 4817 u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200); 4818 u32 *remap_info = dev_priv->l3_parity.remap_info[slice]; 4819 int i, ret; 4820 4821 if (!HAS_L3_DPF(dev) || !remap_info) 4822 return 0; 4823 4824 ret = intel_ring_begin(ring, GEN7_L3LOG_SIZE / 4 * 3); 4825 if (ret) 4826 return ret; 4827 4828 /* 4829 * Note: We do not worry about the concurrent register cacheline hang 4830 * here because no other code should access these registers other than 4831 * at initialization time. 4832 */ 4833 for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) { 4834 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 4835 intel_ring_emit(ring, reg_base + i); 4836 intel_ring_emit(ring, remap_info[i/4]); 4837 } 4838 4839 intel_ring_advance(ring); 4840 4841 return ret; 4842 } 4843 4844 void i915_gem_init_swizzling(struct drm_device *dev) 4845 { 4846 struct drm_i915_private *dev_priv = dev->dev_private; 4847 4848 if (INTEL_INFO(dev)->gen < 5 || 4849 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 4850 return; 4851 4852 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 4853 DISP_TILE_SURFACE_SWIZZLING); 4854 4855 if (IS_GEN5(dev)) 4856 return; 4857 4858 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 4859 if (IS_GEN6(dev)) 4860 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 4861 else if (IS_GEN7(dev)) 4862 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 4863 else if (IS_GEN8(dev)) 4864 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 4865 else 4866 BUG(); 4867 } 4868 4869 static bool 4870 intel_enable_blt(struct drm_device *dev) 4871 { 4872 if (!HAS_BLT(dev)) 4873 return false; 4874 4875 /* The blitter was dysfunctional on early prototypes */ 4876 if (IS_GEN6(dev) && dev->pdev->revision < 8) { 4877 DRM_INFO("BLT not supported on this pre-production hardware;" 4878 " graphics performance will be degraded.\n"); 4879 return false; 4880 } 4881 4882 return true; 4883 } 4884 4885 static void init_unused_ring(struct drm_device *dev, u32 base) 4886 { 4887 struct drm_i915_private *dev_priv = dev->dev_private; 4888 4889 I915_WRITE(RING_CTL(base), 0); 4890 I915_WRITE(RING_HEAD(base), 0); 4891 I915_WRITE(RING_TAIL(base), 0); 4892 I915_WRITE(RING_START(base), 0); 4893 } 4894 4895 static void init_unused_rings(struct drm_device *dev) 4896 { 4897 if (IS_I830(dev)) { 4898 init_unused_ring(dev, PRB1_BASE); 4899 init_unused_ring(dev, SRB0_BASE); 4900 init_unused_ring(dev, SRB1_BASE); 4901 init_unused_ring(dev, SRB2_BASE); 4902 init_unused_ring(dev, SRB3_BASE); 4903 } else if (IS_GEN2(dev)) { 4904 init_unused_ring(dev, SRB0_BASE); 4905 init_unused_ring(dev, SRB1_BASE); 4906 } else if (IS_GEN3(dev)) { 4907 init_unused_ring(dev, PRB1_BASE); 4908 init_unused_ring(dev, PRB2_BASE); 4909 } 4910 } 4911 4912 int i915_gem_init_rings(struct drm_device *dev) 4913 { 4914 struct drm_i915_private *dev_priv = dev->dev_private; 4915 int ret; 4916 4917 ret = intel_init_render_ring_buffer(dev); 4918 if (ret) 4919 return ret; 4920 4921 if (HAS_BSD(dev)) { 4922 ret = intel_init_bsd_ring_buffer(dev); 4923 if (ret) 4924 goto cleanup_render_ring; 4925 } 4926 4927 if (intel_enable_blt(dev)) { 4928 ret = intel_init_blt_ring_buffer(dev); 4929 if (ret) 4930 goto cleanup_bsd_ring; 4931 } 4932 4933 if (HAS_VEBOX(dev)) { 4934 ret = intel_init_vebox_ring_buffer(dev); 4935 if (ret) 4936 goto cleanup_blt_ring; 4937 } 4938 4939 if (HAS_BSD2(dev)) { 4940 ret = intel_init_bsd2_ring_buffer(dev); 4941 if (ret) 4942 goto cleanup_vebox_ring; 4943 } 4944 4945 ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000)); 4946 if (ret) 4947 goto cleanup_bsd2_ring; 4948 4949 return 0; 4950 4951 cleanup_bsd2_ring: 4952 intel_cleanup_ring_buffer(&dev_priv->ring[VCS2]); 4953 cleanup_vebox_ring: 4954 intel_cleanup_ring_buffer(&dev_priv->ring[VECS]); 4955 cleanup_blt_ring: 4956 intel_cleanup_ring_buffer(&dev_priv->ring[BCS]); 4957 cleanup_bsd_ring: 4958 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]); 4959 cleanup_render_ring: 4960 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]); 4961 4962 return ret; 4963 } 4964 4965 int 4966 i915_gem_init_hw(struct drm_device *dev) 4967 { 4968 struct drm_i915_private *dev_priv = dev->dev_private; 4969 struct intel_engine_cs *ring; 4970 int ret, i; 4971 4972 if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) 4973 return -EIO; 4974 4975 /* Double layer security blanket, see i915_gem_init() */ 4976 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 4977 4978 if (dev_priv->ellc_size) 4979 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 4980 4981 if (IS_HASWELL(dev)) 4982 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ? 4983 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 4984 4985 if (HAS_PCH_NOP(dev)) { 4986 if (IS_IVYBRIDGE(dev)) { 4987 u32 temp = I915_READ(GEN7_MSG_CTL); 4988 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); 4989 I915_WRITE(GEN7_MSG_CTL, temp); 4990 } else if (INTEL_INFO(dev)->gen >= 7) { 4991 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT); 4992 temp &= ~RESET_PCH_HANDSHAKE_ENABLE; 4993 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp); 4994 } 4995 } 4996 4997 i915_gem_init_swizzling(dev); 4998 4999 /* 5000 * At least 830 can leave some of the unused rings 5001 * "active" (ie. head != tail) after resume which 5002 * will prevent c3 entry. Makes sure all unused rings 5003 * are totally idle. 5004 */ 5005 init_unused_rings(dev); 5006 5007 for_each_ring(ring, dev_priv, i) { 5008 ret = ring->init_hw(ring); 5009 if (ret) 5010 goto out; 5011 } 5012 5013 for (i = 0; i < NUM_L3_SLICES(dev); i++) 5014 i915_gem_l3_remap(&dev_priv->ring[RCS], i); 5015 5016 ret = i915_ppgtt_init_hw(dev); 5017 if (ret && ret != -EIO) { 5018 DRM_ERROR("PPGTT enable failed %d\n", ret); 5019 i915_gem_cleanup_ringbuffer(dev); 5020 } 5021 5022 ret = i915_gem_context_enable(dev_priv); 5023 if (ret && ret != -EIO) { 5024 DRM_ERROR("Context enable failed %d\n", ret); 5025 i915_gem_cleanup_ringbuffer(dev); 5026 5027 goto out; 5028 } 5029 5030 out: 5031 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5032 return ret; 5033 } 5034 5035 int i915_gem_init(struct drm_device *dev) 5036 { 5037 struct drm_i915_private *dev_priv = dev->dev_private; 5038 int ret; 5039 5040 i915.enable_execlists = intel_sanitize_enable_execlists(dev, 5041 i915.enable_execlists); 5042 5043 mutex_lock(&dev->struct_mutex); 5044 5045 if (IS_VALLEYVIEW(dev)) { 5046 /* VLVA0 (potential hack), BIOS isn't actually waking us */ 5047 I915_WRITE(VLV_GTLC_WAKE_CTRL, VLV_GTLC_ALLOWWAKEREQ); 5048 if (wait_for((I915_READ(VLV_GTLC_PW_STATUS) & 5049 VLV_GTLC_ALLOWWAKEACK), 10)) 5050 DRM_DEBUG_DRIVER("allow wake ack timed out\n"); 5051 } 5052 5053 if (!i915.enable_execlists) { 5054 dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission; 5055 dev_priv->gt.init_rings = i915_gem_init_rings; 5056 dev_priv->gt.cleanup_ring = intel_cleanup_ring_buffer; 5057 dev_priv->gt.stop_ring = intel_stop_ring_buffer; 5058 } else { 5059 dev_priv->gt.execbuf_submit = intel_execlists_submission; 5060 dev_priv->gt.init_rings = intel_logical_rings_init; 5061 dev_priv->gt.cleanup_ring = intel_logical_ring_cleanup; 5062 dev_priv->gt.stop_ring = intel_logical_ring_stop; 5063 } 5064 5065 /* This is just a security blanket to placate dragons. 5066 * On some systems, we very sporadically observe that the first TLBs 5067 * used by the CS may be stale, despite us poking the TLB reset. If 5068 * we hold the forcewake during initialisation these problems 5069 * just magically go away. 5070 */ 5071 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5072 5073 ret = i915_gem_init_userptr(dev); 5074 if (ret) 5075 goto out_unlock; 5076 5077 i915_gem_init_global_gtt(dev); 5078 5079 ret = i915_gem_context_init(dev); 5080 if (ret) 5081 goto out_unlock; 5082 5083 ret = dev_priv->gt.init_rings(dev); 5084 if (ret) 5085 goto out_unlock; 5086 5087 ret = i915_gem_init_hw(dev); 5088 if (ret == -EIO) { 5089 /* Allow ring initialisation to fail by marking the GPU as 5090 * wedged. But we only want to do this where the GPU is angry, 5091 * for all other failure, such as an allocation failure, bail. 5092 */ 5093 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n"); 5094 atomic_set_mask(I915_WEDGED, &dev_priv->gpu_error.reset_counter); 5095 ret = 0; 5096 } 5097 5098 out_unlock: 5099 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5100 mutex_unlock(&dev->struct_mutex); 5101 5102 return ret; 5103 } 5104 5105 void 5106 i915_gem_cleanup_ringbuffer(struct drm_device *dev) 5107 { 5108 struct drm_i915_private *dev_priv = dev->dev_private; 5109 struct intel_engine_cs *ring; 5110 int i; 5111 5112 for_each_ring(ring, dev_priv, i) 5113 dev_priv->gt.cleanup_ring(ring); 5114 } 5115 5116 static void 5117 init_ring_lists(struct intel_engine_cs *ring) 5118 { 5119 INIT_LIST_HEAD(&ring->active_list); 5120 INIT_LIST_HEAD(&ring->request_list); 5121 } 5122 5123 void i915_init_vm(struct drm_i915_private *dev_priv, 5124 struct i915_address_space *vm) 5125 { 5126 if (!i915_is_ggtt(vm)) 5127 drm_mm_init(&vm->mm, vm->start, vm->total); 5128 vm->dev = dev_priv->dev; 5129 INIT_LIST_HEAD(&vm->active_list); 5130 INIT_LIST_HEAD(&vm->inactive_list); 5131 INIT_LIST_HEAD(&vm->global_link); 5132 list_add_tail(&vm->global_link, &dev_priv->vm_list); 5133 } 5134 5135 void 5136 i915_gem_load(struct drm_device *dev) 5137 { 5138 struct drm_i915_private *dev_priv = dev->dev_private; 5139 int i; 5140 5141 dev_priv->objects = 5142 kmem_cache_create("i915_gem_object", 5143 sizeof(struct drm_i915_gem_object), 0, 5144 SLAB_HWCACHE_ALIGN, 5145 NULL); 5146 dev_priv->vmas = 5147 kmem_cache_create("i915_gem_vma", 5148 sizeof(struct i915_vma), 0, 5149 SLAB_HWCACHE_ALIGN, 5150 NULL); 5151 dev_priv->requests = 5152 kmem_cache_create("i915_gem_request", 5153 sizeof(struct drm_i915_gem_request), 0, 5154 SLAB_HWCACHE_ALIGN, 5155 NULL); 5156 5157 INIT_LIST_HEAD(&dev_priv->vm_list); 5158 i915_init_vm(dev_priv, &dev_priv->gtt.base); 5159 5160 INIT_LIST_HEAD(&dev_priv->context_list); 5161 INIT_LIST_HEAD(&dev_priv->mm.unbound_list); 5162 INIT_LIST_HEAD(&dev_priv->mm.bound_list); 5163 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5164 for (i = 0; i < I915_NUM_RINGS; i++) 5165 init_ring_lists(&dev_priv->ring[i]); 5166 for (i = 0; i < I915_MAX_NUM_FENCES; i++) 5167 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 5168 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 5169 i915_gem_retire_work_handler); 5170 INIT_DELAYED_WORK(&dev_priv->mm.idle_work, 5171 i915_gem_idle_work_handler); 5172 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 5173 5174 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 5175 5176 if (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev)) 5177 dev_priv->num_fence_regs = 32; 5178 else if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 5179 dev_priv->num_fence_regs = 16; 5180 else 5181 dev_priv->num_fence_regs = 8; 5182 5183 if (intel_vgpu_active(dev)) 5184 dev_priv->num_fence_regs = 5185 I915_READ(vgtif_reg(avail_rs.fence_num)); 5186 5187 /* Initialize fence registers to zero */ 5188 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5189 i915_gem_restore_fences(dev); 5190 5191 i915_gem_detect_bit_6_swizzle(dev); 5192 init_waitqueue_head(&dev_priv->pending_flip_queue); 5193 5194 dev_priv->mm.interruptible = true; 5195 5196 i915_gem_shrinker_init(dev_priv); 5197 5198 mutex_init(&dev_priv->fb_tracking.lock); 5199 } 5200 5201 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 5202 { 5203 struct drm_i915_file_private *file_priv = file->driver_priv; 5204 5205 /* Clean up our request list when the client is going away, so that 5206 * later retire_requests won't dereference our soon-to-be-gone 5207 * file_priv. 5208 */ 5209 spin_lock(&file_priv->mm.lock); 5210 while (!list_empty(&file_priv->mm.request_list)) { 5211 struct drm_i915_gem_request *request; 5212 5213 request = list_first_entry(&file_priv->mm.request_list, 5214 struct drm_i915_gem_request, 5215 client_list); 5216 list_del(&request->client_list); 5217 request->file_priv = NULL; 5218 } 5219 spin_unlock(&file_priv->mm.lock); 5220 5221 if (!list_empty(&file_priv->rps.link)) { 5222 spin_lock(&to_i915(dev)->rps.client_lock); 5223 list_del(&file_priv->rps.link); 5224 spin_unlock(&to_i915(dev)->rps.client_lock); 5225 } 5226 } 5227 5228 int i915_gem_open(struct drm_device *dev, struct drm_file *file) 5229 { 5230 struct drm_i915_file_private *file_priv; 5231 int ret; 5232 5233 DRM_DEBUG_DRIVER("\n"); 5234 5235 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 5236 if (!file_priv) 5237 return -ENOMEM; 5238 5239 file->driver_priv = file_priv; 5240 file_priv->dev_priv = dev->dev_private; 5241 file_priv->file = file; 5242 INIT_LIST_HEAD(&file_priv->rps.link); 5243 5244 spin_lock_init(&file_priv->mm.lock); 5245 INIT_LIST_HEAD(&file_priv->mm.request_list); 5246 5247 ret = i915_gem_context_open(dev, file); 5248 if (ret) 5249 kfree(file_priv); 5250 5251 return ret; 5252 } 5253 5254 /** 5255 * i915_gem_track_fb - update frontbuffer tracking 5256 * old: current GEM buffer for the frontbuffer slots 5257 * new: new GEM buffer for the frontbuffer slots 5258 * frontbuffer_bits: bitmask of frontbuffer slots 5259 * 5260 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them 5261 * from @old and setting them in @new. Both @old and @new can be NULL. 5262 */ 5263 void i915_gem_track_fb(struct drm_i915_gem_object *old, 5264 struct drm_i915_gem_object *new, 5265 unsigned frontbuffer_bits) 5266 { 5267 if (old) { 5268 WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex)); 5269 WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits)); 5270 old->frontbuffer_bits &= ~frontbuffer_bits; 5271 } 5272 5273 if (new) { 5274 WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex)); 5275 WARN_ON(new->frontbuffer_bits & frontbuffer_bits); 5276 new->frontbuffer_bits |= frontbuffer_bits; 5277 } 5278 } 5279 5280 /* All the new VM stuff */ 5281 unsigned long 5282 i915_gem_obj_offset(struct drm_i915_gem_object *o, 5283 struct i915_address_space *vm) 5284 { 5285 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5286 struct i915_vma *vma; 5287 5288 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 5289 5290 list_for_each_entry(vma, &o->vma_list, vma_link) { 5291 if (i915_is_ggtt(vma->vm) && 5292 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5293 continue; 5294 if (vma->vm == vm) 5295 return vma->node.start; 5296 } 5297 5298 WARN(1, "%s vma for this object not found.\n", 5299 i915_is_ggtt(vm) ? "global" : "ppgtt"); 5300 return -1; 5301 } 5302 5303 unsigned long 5304 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o, 5305 const struct i915_ggtt_view *view) 5306 { 5307 struct i915_address_space *ggtt = i915_obj_to_ggtt(o); 5308 struct i915_vma *vma; 5309 5310 list_for_each_entry(vma, &o->vma_list, vma_link) 5311 if (vma->vm == ggtt && 5312 i915_ggtt_view_equal(&vma->ggtt_view, view)) 5313 return vma->node.start; 5314 5315 WARN(1, "global vma for this object not found. (view=%u)\n", view->type); 5316 return -1; 5317 } 5318 5319 bool i915_gem_obj_bound(struct drm_i915_gem_object *o, 5320 struct i915_address_space *vm) 5321 { 5322 struct i915_vma *vma; 5323 5324 list_for_each_entry(vma, &o->vma_list, vma_link) { 5325 if (i915_is_ggtt(vma->vm) && 5326 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5327 continue; 5328 if (vma->vm == vm && drm_mm_node_allocated(&vma->node)) 5329 return true; 5330 } 5331 5332 return false; 5333 } 5334 5335 bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o, 5336 const struct i915_ggtt_view *view) 5337 { 5338 struct i915_address_space *ggtt = i915_obj_to_ggtt(o); 5339 struct i915_vma *vma; 5340 5341 list_for_each_entry(vma, &o->vma_list, vma_link) 5342 if (vma->vm == ggtt && 5343 i915_ggtt_view_equal(&vma->ggtt_view, view) && 5344 drm_mm_node_allocated(&vma->node)) 5345 return true; 5346 5347 return false; 5348 } 5349 5350 bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o) 5351 { 5352 struct i915_vma *vma; 5353 5354 list_for_each_entry(vma, &o->vma_list, vma_link) 5355 if (drm_mm_node_allocated(&vma->node)) 5356 return true; 5357 5358 return false; 5359 } 5360 5361 unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o, 5362 struct i915_address_space *vm) 5363 { 5364 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5365 struct i915_vma *vma; 5366 5367 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 5368 5369 BUG_ON(list_empty(&o->vma_list)); 5370 5371 list_for_each_entry(vma, &o->vma_list, vma_link) { 5372 if (i915_is_ggtt(vma->vm) && 5373 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5374 continue; 5375 if (vma->vm == vm) 5376 return vma->node.size; 5377 } 5378 return 0; 5379 } 5380 5381 bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj) 5382 { 5383 struct i915_vma *vma; 5384 list_for_each_entry(vma, &obj->vma_list, vma_link) 5385 if (vma->pin_count > 0) 5386 return true; 5387 5388 return false; 5389 } 5390 5391