1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drmP.h> 29 #include <drm/drm_vma_manager.h> 30 #include <drm/i915_drm.h> 31 #include "i915_drv.h" 32 #include "i915_vgpu.h" 33 #include "i915_trace.h" 34 #include "intel_drv.h" 35 #include <linux/shmem_fs.h> 36 #include <linux/slab.h> 37 #include <linux/swap.h> 38 #include <linux/pci.h> 39 #include <linux/dma-buf.h> 40 41 #define RQ_BUG_ON(expr) 42 43 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 44 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); 45 static void 46 i915_gem_object_retire__write(struct drm_i915_gem_object *obj); 47 static void 48 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring); 49 50 static bool cpu_cache_is_coherent(struct drm_device *dev, 51 enum i915_cache_level level) 52 { 53 return HAS_LLC(dev) || level != I915_CACHE_NONE; 54 } 55 56 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 57 { 58 if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) 59 return true; 60 61 return obj->pin_display; 62 } 63 64 /* some bookkeeping */ 65 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 66 size_t size) 67 { 68 spin_lock(&dev_priv->mm.object_stat_lock); 69 dev_priv->mm.object_count++; 70 dev_priv->mm.object_memory += size; 71 spin_unlock(&dev_priv->mm.object_stat_lock); 72 } 73 74 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 75 size_t size) 76 { 77 spin_lock(&dev_priv->mm.object_stat_lock); 78 dev_priv->mm.object_count--; 79 dev_priv->mm.object_memory -= size; 80 spin_unlock(&dev_priv->mm.object_stat_lock); 81 } 82 83 static int 84 i915_gem_wait_for_error(struct i915_gpu_error *error) 85 { 86 int ret; 87 88 #define EXIT_COND (!i915_reset_in_progress(error) || \ 89 i915_terminally_wedged(error)) 90 if (EXIT_COND) 91 return 0; 92 93 /* 94 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 95 * userspace. If it takes that long something really bad is going on and 96 * we should simply try to bail out and fail as gracefully as possible. 97 */ 98 ret = wait_event_interruptible_timeout(error->reset_queue, 99 EXIT_COND, 100 10*HZ); 101 if (ret == 0) { 102 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 103 return -EIO; 104 } else if (ret < 0) { 105 return ret; 106 } 107 #undef EXIT_COND 108 109 return 0; 110 } 111 112 int i915_mutex_lock_interruptible(struct drm_device *dev) 113 { 114 struct drm_i915_private *dev_priv = dev->dev_private; 115 int ret; 116 117 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 118 if (ret) 119 return ret; 120 121 ret = mutex_lock_interruptible(&dev->struct_mutex); 122 if (ret) 123 return ret; 124 125 WARN_ON(i915_verify_lists(dev)); 126 return 0; 127 } 128 129 int 130 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 131 struct drm_file *file) 132 { 133 struct drm_i915_private *dev_priv = dev->dev_private; 134 struct drm_i915_gem_get_aperture *args = data; 135 struct i915_gtt *ggtt = &dev_priv->gtt; 136 struct i915_vma *vma; 137 size_t pinned; 138 139 pinned = 0; 140 mutex_lock(&dev->struct_mutex); 141 list_for_each_entry(vma, &ggtt->base.active_list, mm_list) 142 if (vma->pin_count) 143 pinned += vma->node.size; 144 list_for_each_entry(vma, &ggtt->base.inactive_list, mm_list) 145 if (vma->pin_count) 146 pinned += vma->node.size; 147 mutex_unlock(&dev->struct_mutex); 148 149 args->aper_size = dev_priv->gtt.base.total; 150 args->aper_available_size = args->aper_size - pinned; 151 152 return 0; 153 } 154 155 static int 156 i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) 157 { 158 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 159 char *vaddr = obj->phys_handle->vaddr; 160 struct sg_table *st; 161 struct scatterlist *sg; 162 int i; 163 164 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) 165 return -EINVAL; 166 167 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 168 struct page *page; 169 char *src; 170 171 page = shmem_read_mapping_page(mapping, i); 172 if (IS_ERR(page)) 173 return PTR_ERR(page); 174 175 src = kmap_atomic(page); 176 memcpy(vaddr, src, PAGE_SIZE); 177 drm_clflush_virt_range(vaddr, PAGE_SIZE); 178 kunmap_atomic(src); 179 180 page_cache_release(page); 181 vaddr += PAGE_SIZE; 182 } 183 184 i915_gem_chipset_flush(obj->base.dev); 185 186 st = kmalloc(sizeof(*st), GFP_KERNEL); 187 if (st == NULL) 188 return -ENOMEM; 189 190 if (sg_alloc_table(st, 1, GFP_KERNEL)) { 191 kfree(st); 192 return -ENOMEM; 193 } 194 195 sg = st->sgl; 196 sg->offset = 0; 197 sg->length = obj->base.size; 198 199 sg_dma_address(sg) = obj->phys_handle->busaddr; 200 sg_dma_len(sg) = obj->base.size; 201 202 obj->pages = st; 203 return 0; 204 } 205 206 static void 207 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj) 208 { 209 int ret; 210 211 BUG_ON(obj->madv == __I915_MADV_PURGED); 212 213 ret = i915_gem_object_set_to_cpu_domain(obj, true); 214 if (ret) { 215 /* In the event of a disaster, abandon all caches and 216 * hope for the best. 217 */ 218 WARN_ON(ret != -EIO); 219 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 220 } 221 222 if (obj->madv == I915_MADV_DONTNEED) 223 obj->dirty = 0; 224 225 if (obj->dirty) { 226 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 227 char *vaddr = obj->phys_handle->vaddr; 228 int i; 229 230 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 231 struct page *page; 232 char *dst; 233 234 page = shmem_read_mapping_page(mapping, i); 235 if (IS_ERR(page)) 236 continue; 237 238 dst = kmap_atomic(page); 239 drm_clflush_virt_range(vaddr, PAGE_SIZE); 240 memcpy(dst, vaddr, PAGE_SIZE); 241 kunmap_atomic(dst); 242 243 set_page_dirty(page); 244 if (obj->madv == I915_MADV_WILLNEED) 245 mark_page_accessed(page); 246 page_cache_release(page); 247 vaddr += PAGE_SIZE; 248 } 249 obj->dirty = 0; 250 } 251 252 sg_free_table(obj->pages); 253 kfree(obj->pages); 254 } 255 256 static void 257 i915_gem_object_release_phys(struct drm_i915_gem_object *obj) 258 { 259 drm_pci_free(obj->base.dev, obj->phys_handle); 260 } 261 262 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { 263 .get_pages = i915_gem_object_get_pages_phys, 264 .put_pages = i915_gem_object_put_pages_phys, 265 .release = i915_gem_object_release_phys, 266 }; 267 268 static int 269 drop_pages(struct drm_i915_gem_object *obj) 270 { 271 struct i915_vma *vma, *next; 272 int ret; 273 274 drm_gem_object_reference(&obj->base); 275 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) 276 if (i915_vma_unbind(vma)) 277 break; 278 279 ret = i915_gem_object_put_pages(obj); 280 drm_gem_object_unreference(&obj->base); 281 282 return ret; 283 } 284 285 int 286 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, 287 int align) 288 { 289 drm_dma_handle_t *phys; 290 int ret; 291 292 if (obj->phys_handle) { 293 if ((unsigned long)obj->phys_handle->vaddr & (align -1)) 294 return -EBUSY; 295 296 return 0; 297 } 298 299 if (obj->madv != I915_MADV_WILLNEED) 300 return -EFAULT; 301 302 if (obj->base.filp == NULL) 303 return -EINVAL; 304 305 ret = drop_pages(obj); 306 if (ret) 307 return ret; 308 309 /* create a new object */ 310 phys = drm_pci_alloc(obj->base.dev, obj->base.size, align); 311 if (!phys) 312 return -ENOMEM; 313 314 obj->phys_handle = phys; 315 obj->ops = &i915_gem_phys_ops; 316 317 return i915_gem_object_get_pages(obj); 318 } 319 320 static int 321 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 322 struct drm_i915_gem_pwrite *args, 323 struct drm_file *file_priv) 324 { 325 struct drm_device *dev = obj->base.dev; 326 void *vaddr = obj->phys_handle->vaddr + args->offset; 327 char __user *user_data = to_user_ptr(args->data_ptr); 328 int ret = 0; 329 330 /* We manually control the domain here and pretend that it 331 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 332 */ 333 ret = i915_gem_object_wait_rendering(obj, false); 334 if (ret) 335 return ret; 336 337 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 338 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { 339 unsigned long unwritten; 340 341 /* The physical object once assigned is fixed for the lifetime 342 * of the obj, so we can safely drop the lock and continue 343 * to access vaddr. 344 */ 345 mutex_unlock(&dev->struct_mutex); 346 unwritten = copy_from_user(vaddr, user_data, args->size); 347 mutex_lock(&dev->struct_mutex); 348 if (unwritten) { 349 ret = -EFAULT; 350 goto out; 351 } 352 } 353 354 drm_clflush_virt_range(vaddr, args->size); 355 i915_gem_chipset_flush(dev); 356 357 out: 358 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 359 return ret; 360 } 361 362 void *i915_gem_object_alloc(struct drm_device *dev) 363 { 364 struct drm_i915_private *dev_priv = dev->dev_private; 365 return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL); 366 } 367 368 void i915_gem_object_free(struct drm_i915_gem_object *obj) 369 { 370 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 371 kmem_cache_free(dev_priv->objects, obj); 372 } 373 374 static int 375 i915_gem_create(struct drm_file *file, 376 struct drm_device *dev, 377 uint64_t size, 378 uint32_t *handle_p) 379 { 380 struct drm_i915_gem_object *obj; 381 int ret; 382 u32 handle; 383 384 size = roundup(size, PAGE_SIZE); 385 if (size == 0) 386 return -EINVAL; 387 388 /* Allocate the new object */ 389 obj = i915_gem_alloc_object(dev, size); 390 if (obj == NULL) 391 return -ENOMEM; 392 393 ret = drm_gem_handle_create(file, &obj->base, &handle); 394 /* drop reference from allocate - handle holds it now */ 395 drm_gem_object_unreference_unlocked(&obj->base); 396 if (ret) 397 return ret; 398 399 *handle_p = handle; 400 return 0; 401 } 402 403 int 404 i915_gem_dumb_create(struct drm_file *file, 405 struct drm_device *dev, 406 struct drm_mode_create_dumb *args) 407 { 408 /* have to work out size/pitch and return them */ 409 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); 410 args->size = args->pitch * args->height; 411 return i915_gem_create(file, dev, 412 args->size, &args->handle); 413 } 414 415 /** 416 * Creates a new mm object and returns a handle to it. 417 */ 418 int 419 i915_gem_create_ioctl(struct drm_device *dev, void *data, 420 struct drm_file *file) 421 { 422 struct drm_i915_gem_create *args = data; 423 424 return i915_gem_create(file, dev, 425 args->size, &args->handle); 426 } 427 428 static inline int 429 __copy_to_user_swizzled(char __user *cpu_vaddr, 430 const char *gpu_vaddr, int gpu_offset, 431 int length) 432 { 433 int ret, cpu_offset = 0; 434 435 while (length > 0) { 436 int cacheline_end = ALIGN(gpu_offset + 1, 64); 437 int this_length = min(cacheline_end - gpu_offset, length); 438 int swizzled_gpu_offset = gpu_offset ^ 64; 439 440 ret = __copy_to_user(cpu_vaddr + cpu_offset, 441 gpu_vaddr + swizzled_gpu_offset, 442 this_length); 443 if (ret) 444 return ret + length; 445 446 cpu_offset += this_length; 447 gpu_offset += this_length; 448 length -= this_length; 449 } 450 451 return 0; 452 } 453 454 static inline int 455 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 456 const char __user *cpu_vaddr, 457 int length) 458 { 459 int ret, cpu_offset = 0; 460 461 while (length > 0) { 462 int cacheline_end = ALIGN(gpu_offset + 1, 64); 463 int this_length = min(cacheline_end - gpu_offset, length); 464 int swizzled_gpu_offset = gpu_offset ^ 64; 465 466 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 467 cpu_vaddr + cpu_offset, 468 this_length); 469 if (ret) 470 return ret + length; 471 472 cpu_offset += this_length; 473 gpu_offset += this_length; 474 length -= this_length; 475 } 476 477 return 0; 478 } 479 480 /* 481 * Pins the specified object's pages and synchronizes the object with 482 * GPU accesses. Sets needs_clflush to non-zero if the caller should 483 * flush the object from the CPU cache. 484 */ 485 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, 486 int *needs_clflush) 487 { 488 int ret; 489 490 *needs_clflush = 0; 491 492 if (!obj->base.filp) 493 return -EINVAL; 494 495 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { 496 /* If we're not in the cpu read domain, set ourself into the gtt 497 * read domain and manually flush cachelines (if required). This 498 * optimizes for the case when the gpu will dirty the data 499 * anyway again before the next pread happens. */ 500 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev, 501 obj->cache_level); 502 ret = i915_gem_object_wait_rendering(obj, true); 503 if (ret) 504 return ret; 505 } 506 507 ret = i915_gem_object_get_pages(obj); 508 if (ret) 509 return ret; 510 511 i915_gem_object_pin_pages(obj); 512 513 return ret; 514 } 515 516 /* Per-page copy function for the shmem pread fastpath. 517 * Flushes invalid cachelines before reading the target if 518 * needs_clflush is set. */ 519 static int 520 shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length, 521 char __user *user_data, 522 bool page_do_bit17_swizzling, bool needs_clflush) 523 { 524 char *vaddr; 525 int ret; 526 527 if (unlikely(page_do_bit17_swizzling)) 528 return -EINVAL; 529 530 vaddr = kmap_atomic(page); 531 if (needs_clflush) 532 drm_clflush_virt_range(vaddr + shmem_page_offset, 533 page_length); 534 ret = __copy_to_user_inatomic(user_data, 535 vaddr + shmem_page_offset, 536 page_length); 537 kunmap_atomic(vaddr); 538 539 return ret ? -EFAULT : 0; 540 } 541 542 static void 543 shmem_clflush_swizzled_range(char *addr, unsigned long length, 544 bool swizzled) 545 { 546 if (unlikely(swizzled)) { 547 unsigned long start = (unsigned long) addr; 548 unsigned long end = (unsigned long) addr + length; 549 550 /* For swizzling simply ensure that we always flush both 551 * channels. Lame, but simple and it works. Swizzled 552 * pwrite/pread is far from a hotpath - current userspace 553 * doesn't use it at all. */ 554 start = round_down(start, 128); 555 end = round_up(end, 128); 556 557 drm_clflush_virt_range((void *)start, end - start); 558 } else { 559 drm_clflush_virt_range(addr, length); 560 } 561 562 } 563 564 /* Only difference to the fast-path function is that this can handle bit17 565 * and uses non-atomic copy and kmap functions. */ 566 static int 567 shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length, 568 char __user *user_data, 569 bool page_do_bit17_swizzling, bool needs_clflush) 570 { 571 char *vaddr; 572 int ret; 573 574 vaddr = kmap(page); 575 if (needs_clflush) 576 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 577 page_length, 578 page_do_bit17_swizzling); 579 580 if (page_do_bit17_swizzling) 581 ret = __copy_to_user_swizzled(user_data, 582 vaddr, shmem_page_offset, 583 page_length); 584 else 585 ret = __copy_to_user(user_data, 586 vaddr + shmem_page_offset, 587 page_length); 588 kunmap(page); 589 590 return ret ? - EFAULT : 0; 591 } 592 593 static int 594 i915_gem_shmem_pread(struct drm_device *dev, 595 struct drm_i915_gem_object *obj, 596 struct drm_i915_gem_pread *args, 597 struct drm_file *file) 598 { 599 char __user *user_data; 600 ssize_t remain; 601 loff_t offset; 602 int shmem_page_offset, page_length, ret = 0; 603 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 604 int prefaulted = 0; 605 int needs_clflush = 0; 606 struct sg_page_iter sg_iter; 607 608 user_data = to_user_ptr(args->data_ptr); 609 remain = args->size; 610 611 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 612 613 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); 614 if (ret) 615 return ret; 616 617 offset = args->offset; 618 619 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 620 offset >> PAGE_SHIFT) { 621 struct page *page = sg_page_iter_page(&sg_iter); 622 623 if (remain <= 0) 624 break; 625 626 /* Operation in this page 627 * 628 * shmem_page_offset = offset within page in shmem file 629 * page_length = bytes to copy for this page 630 */ 631 shmem_page_offset = offset_in_page(offset); 632 page_length = remain; 633 if ((shmem_page_offset + page_length) > PAGE_SIZE) 634 page_length = PAGE_SIZE - shmem_page_offset; 635 636 page_do_bit17_swizzling = obj_do_bit17_swizzling && 637 (page_to_phys(page) & (1 << 17)) != 0; 638 639 ret = shmem_pread_fast(page, shmem_page_offset, page_length, 640 user_data, page_do_bit17_swizzling, 641 needs_clflush); 642 if (ret == 0) 643 goto next_page; 644 645 mutex_unlock(&dev->struct_mutex); 646 647 if (likely(!i915.prefault_disable) && !prefaulted) { 648 ret = fault_in_multipages_writeable(user_data, remain); 649 /* Userspace is tricking us, but we've already clobbered 650 * its pages with the prefault and promised to write the 651 * data up to the first fault. Hence ignore any errors 652 * and just continue. */ 653 (void)ret; 654 prefaulted = 1; 655 } 656 657 ret = shmem_pread_slow(page, shmem_page_offset, page_length, 658 user_data, page_do_bit17_swizzling, 659 needs_clflush); 660 661 mutex_lock(&dev->struct_mutex); 662 663 if (ret) 664 goto out; 665 666 next_page: 667 remain -= page_length; 668 user_data += page_length; 669 offset += page_length; 670 } 671 672 out: 673 i915_gem_object_unpin_pages(obj); 674 675 return ret; 676 } 677 678 /** 679 * Reads data from the object referenced by handle. 680 * 681 * On error, the contents of *data are undefined. 682 */ 683 int 684 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 685 struct drm_file *file) 686 { 687 struct drm_i915_gem_pread *args = data; 688 struct drm_i915_gem_object *obj; 689 int ret = 0; 690 691 if (args->size == 0) 692 return 0; 693 694 if (!access_ok(VERIFY_WRITE, 695 to_user_ptr(args->data_ptr), 696 args->size)) 697 return -EFAULT; 698 699 ret = i915_mutex_lock_interruptible(dev); 700 if (ret) 701 return ret; 702 703 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 704 if (&obj->base == NULL) { 705 ret = -ENOENT; 706 goto unlock; 707 } 708 709 /* Bounds check source. */ 710 if (args->offset > obj->base.size || 711 args->size > obj->base.size - args->offset) { 712 ret = -EINVAL; 713 goto out; 714 } 715 716 /* prime objects have no backing filp to GEM pread/pwrite 717 * pages from. 718 */ 719 if (!obj->base.filp) { 720 ret = -EINVAL; 721 goto out; 722 } 723 724 trace_i915_gem_object_pread(obj, args->offset, args->size); 725 726 ret = i915_gem_shmem_pread(dev, obj, args, file); 727 728 out: 729 drm_gem_object_unreference(&obj->base); 730 unlock: 731 mutex_unlock(&dev->struct_mutex); 732 return ret; 733 } 734 735 /* This is the fast write path which cannot handle 736 * page faults in the source data 737 */ 738 739 static inline int 740 fast_user_write(struct io_mapping *mapping, 741 loff_t page_base, int page_offset, 742 char __user *user_data, 743 int length) 744 { 745 void __iomem *vaddr_atomic; 746 void *vaddr; 747 unsigned long unwritten; 748 749 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 750 /* We can use the cpu mem copy function because this is X86. */ 751 vaddr = (void __force*)vaddr_atomic + page_offset; 752 unwritten = __copy_from_user_inatomic_nocache(vaddr, 753 user_data, length); 754 io_mapping_unmap_atomic(vaddr_atomic); 755 return unwritten; 756 } 757 758 /** 759 * This is the fast pwrite path, where we copy the data directly from the 760 * user into the GTT, uncached. 761 */ 762 static int 763 i915_gem_gtt_pwrite_fast(struct drm_device *dev, 764 struct drm_i915_gem_object *obj, 765 struct drm_i915_gem_pwrite *args, 766 struct drm_file *file) 767 { 768 struct drm_i915_private *dev_priv = dev->dev_private; 769 ssize_t remain; 770 loff_t offset, page_base; 771 char __user *user_data; 772 int page_offset, page_length, ret; 773 774 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); 775 if (ret) 776 goto out; 777 778 ret = i915_gem_object_set_to_gtt_domain(obj, true); 779 if (ret) 780 goto out_unpin; 781 782 ret = i915_gem_object_put_fence(obj); 783 if (ret) 784 goto out_unpin; 785 786 user_data = to_user_ptr(args->data_ptr); 787 remain = args->size; 788 789 offset = i915_gem_obj_ggtt_offset(obj) + args->offset; 790 791 intel_fb_obj_invalidate(obj, ORIGIN_GTT); 792 793 while (remain > 0) { 794 /* Operation in this page 795 * 796 * page_base = page offset within aperture 797 * page_offset = offset within page 798 * page_length = bytes to copy for this page 799 */ 800 page_base = offset & PAGE_MASK; 801 page_offset = offset_in_page(offset); 802 page_length = remain; 803 if ((page_offset + remain) > PAGE_SIZE) 804 page_length = PAGE_SIZE - page_offset; 805 806 /* If we get a fault while copying data, then (presumably) our 807 * source page isn't available. Return the error and we'll 808 * retry in the slow path. 809 */ 810 if (fast_user_write(dev_priv->gtt.mappable, page_base, 811 page_offset, user_data, page_length)) { 812 ret = -EFAULT; 813 goto out_flush; 814 } 815 816 remain -= page_length; 817 user_data += page_length; 818 offset += page_length; 819 } 820 821 out_flush: 822 intel_fb_obj_flush(obj, false, ORIGIN_GTT); 823 out_unpin: 824 i915_gem_object_ggtt_unpin(obj); 825 out: 826 return ret; 827 } 828 829 /* Per-page copy function for the shmem pwrite fastpath. 830 * Flushes invalid cachelines before writing to the target if 831 * needs_clflush_before is set and flushes out any written cachelines after 832 * writing if needs_clflush is set. */ 833 static int 834 shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length, 835 char __user *user_data, 836 bool page_do_bit17_swizzling, 837 bool needs_clflush_before, 838 bool needs_clflush_after) 839 { 840 char *vaddr; 841 int ret; 842 843 if (unlikely(page_do_bit17_swizzling)) 844 return -EINVAL; 845 846 vaddr = kmap_atomic(page); 847 if (needs_clflush_before) 848 drm_clflush_virt_range(vaddr + shmem_page_offset, 849 page_length); 850 ret = __copy_from_user_inatomic(vaddr + shmem_page_offset, 851 user_data, page_length); 852 if (needs_clflush_after) 853 drm_clflush_virt_range(vaddr + shmem_page_offset, 854 page_length); 855 kunmap_atomic(vaddr); 856 857 return ret ? -EFAULT : 0; 858 } 859 860 /* Only difference to the fast-path function is that this can handle bit17 861 * and uses non-atomic copy and kmap functions. */ 862 static int 863 shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length, 864 char __user *user_data, 865 bool page_do_bit17_swizzling, 866 bool needs_clflush_before, 867 bool needs_clflush_after) 868 { 869 char *vaddr; 870 int ret; 871 872 vaddr = kmap(page); 873 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 874 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 875 page_length, 876 page_do_bit17_swizzling); 877 if (page_do_bit17_swizzling) 878 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, 879 user_data, 880 page_length); 881 else 882 ret = __copy_from_user(vaddr + shmem_page_offset, 883 user_data, 884 page_length); 885 if (needs_clflush_after) 886 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 887 page_length, 888 page_do_bit17_swizzling); 889 kunmap(page); 890 891 return ret ? -EFAULT : 0; 892 } 893 894 static int 895 i915_gem_shmem_pwrite(struct drm_device *dev, 896 struct drm_i915_gem_object *obj, 897 struct drm_i915_gem_pwrite *args, 898 struct drm_file *file) 899 { 900 ssize_t remain; 901 loff_t offset; 902 char __user *user_data; 903 int shmem_page_offset, page_length, ret = 0; 904 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 905 int hit_slowpath = 0; 906 int needs_clflush_after = 0; 907 int needs_clflush_before = 0; 908 struct sg_page_iter sg_iter; 909 910 user_data = to_user_ptr(args->data_ptr); 911 remain = args->size; 912 913 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 914 915 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 916 /* If we're not in the cpu write domain, set ourself into the gtt 917 * write domain and manually flush cachelines (if required). This 918 * optimizes for the case when the gpu will use the data 919 * right away and we therefore have to clflush anyway. */ 920 needs_clflush_after = cpu_write_needs_clflush(obj); 921 ret = i915_gem_object_wait_rendering(obj, false); 922 if (ret) 923 return ret; 924 } 925 /* Same trick applies to invalidate partially written cachelines read 926 * before writing. */ 927 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) 928 needs_clflush_before = 929 !cpu_cache_is_coherent(dev, obj->cache_level); 930 931 ret = i915_gem_object_get_pages(obj); 932 if (ret) 933 return ret; 934 935 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 936 937 i915_gem_object_pin_pages(obj); 938 939 offset = args->offset; 940 obj->dirty = 1; 941 942 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 943 offset >> PAGE_SHIFT) { 944 struct page *page = sg_page_iter_page(&sg_iter); 945 int partial_cacheline_write; 946 947 if (remain <= 0) 948 break; 949 950 /* Operation in this page 951 * 952 * shmem_page_offset = offset within page in shmem file 953 * page_length = bytes to copy for this page 954 */ 955 shmem_page_offset = offset_in_page(offset); 956 957 page_length = remain; 958 if ((shmem_page_offset + page_length) > PAGE_SIZE) 959 page_length = PAGE_SIZE - shmem_page_offset; 960 961 /* If we don't overwrite a cacheline completely we need to be 962 * careful to have up-to-date data by first clflushing. Don't 963 * overcomplicate things and flush the entire patch. */ 964 partial_cacheline_write = needs_clflush_before && 965 ((shmem_page_offset | page_length) 966 & (boot_cpu_data.x86_clflush_size - 1)); 967 968 page_do_bit17_swizzling = obj_do_bit17_swizzling && 969 (page_to_phys(page) & (1 << 17)) != 0; 970 971 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, 972 user_data, page_do_bit17_swizzling, 973 partial_cacheline_write, 974 needs_clflush_after); 975 if (ret == 0) 976 goto next_page; 977 978 hit_slowpath = 1; 979 mutex_unlock(&dev->struct_mutex); 980 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, 981 user_data, page_do_bit17_swizzling, 982 partial_cacheline_write, 983 needs_clflush_after); 984 985 mutex_lock(&dev->struct_mutex); 986 987 if (ret) 988 goto out; 989 990 next_page: 991 remain -= page_length; 992 user_data += page_length; 993 offset += page_length; 994 } 995 996 out: 997 i915_gem_object_unpin_pages(obj); 998 999 if (hit_slowpath) { 1000 /* 1001 * Fixup: Flush cpu caches in case we didn't flush the dirty 1002 * cachelines in-line while writing and the object moved 1003 * out of the cpu write domain while we've dropped the lock. 1004 */ 1005 if (!needs_clflush_after && 1006 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 1007 if (i915_gem_clflush_object(obj, obj->pin_display)) 1008 needs_clflush_after = true; 1009 } 1010 } 1011 1012 if (needs_clflush_after) 1013 i915_gem_chipset_flush(dev); 1014 else 1015 obj->cache_dirty = true; 1016 1017 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 1018 return ret; 1019 } 1020 1021 /** 1022 * Writes data to the object referenced by handle. 1023 * 1024 * On error, the contents of the buffer that were to be modified are undefined. 1025 */ 1026 int 1027 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1028 struct drm_file *file) 1029 { 1030 struct drm_i915_private *dev_priv = dev->dev_private; 1031 struct drm_i915_gem_pwrite *args = data; 1032 struct drm_i915_gem_object *obj; 1033 int ret; 1034 1035 if (args->size == 0) 1036 return 0; 1037 1038 if (!access_ok(VERIFY_READ, 1039 to_user_ptr(args->data_ptr), 1040 args->size)) 1041 return -EFAULT; 1042 1043 if (likely(!i915.prefault_disable)) { 1044 ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr), 1045 args->size); 1046 if (ret) 1047 return -EFAULT; 1048 } 1049 1050 intel_runtime_pm_get(dev_priv); 1051 1052 ret = i915_mutex_lock_interruptible(dev); 1053 if (ret) 1054 goto put_rpm; 1055 1056 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1057 if (&obj->base == NULL) { 1058 ret = -ENOENT; 1059 goto unlock; 1060 } 1061 1062 /* Bounds check destination. */ 1063 if (args->offset > obj->base.size || 1064 args->size > obj->base.size - args->offset) { 1065 ret = -EINVAL; 1066 goto out; 1067 } 1068 1069 /* prime objects have no backing filp to GEM pread/pwrite 1070 * pages from. 1071 */ 1072 if (!obj->base.filp) { 1073 ret = -EINVAL; 1074 goto out; 1075 } 1076 1077 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1078 1079 ret = -EFAULT; 1080 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1081 * it would end up going through the fenced access, and we'll get 1082 * different detiling behavior between reading and writing. 1083 * pread/pwrite currently are reading and writing from the CPU 1084 * perspective, requiring manual detiling by the client. 1085 */ 1086 if (obj->tiling_mode == I915_TILING_NONE && 1087 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 1088 cpu_write_needs_clflush(obj)) { 1089 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); 1090 /* Note that the gtt paths might fail with non-page-backed user 1091 * pointers (e.g. gtt mappings when moving data between 1092 * textures). Fallback to the shmem path in that case. */ 1093 } 1094 1095 if (ret == -EFAULT || ret == -ENOSPC) { 1096 if (obj->phys_handle) 1097 ret = i915_gem_phys_pwrite(obj, args, file); 1098 else 1099 ret = i915_gem_shmem_pwrite(dev, obj, args, file); 1100 } 1101 1102 out: 1103 drm_gem_object_unreference(&obj->base); 1104 unlock: 1105 mutex_unlock(&dev->struct_mutex); 1106 put_rpm: 1107 intel_runtime_pm_put(dev_priv); 1108 1109 return ret; 1110 } 1111 1112 int 1113 i915_gem_check_wedge(struct i915_gpu_error *error, 1114 bool interruptible) 1115 { 1116 if (i915_reset_in_progress(error)) { 1117 /* Non-interruptible callers can't handle -EAGAIN, hence return 1118 * -EIO unconditionally for these. */ 1119 if (!interruptible) 1120 return -EIO; 1121 1122 /* Recovery complete, but the reset failed ... */ 1123 if (i915_terminally_wedged(error)) 1124 return -EIO; 1125 1126 /* 1127 * Check if GPU Reset is in progress - we need intel_ring_begin 1128 * to work properly to reinit the hw state while the gpu is 1129 * still marked as reset-in-progress. Handle this with a flag. 1130 */ 1131 if (!error->reload_in_reset) 1132 return -EAGAIN; 1133 } 1134 1135 return 0; 1136 } 1137 1138 static void fake_irq(unsigned long data) 1139 { 1140 wake_up_process((struct task_struct *)data); 1141 } 1142 1143 static bool missed_irq(struct drm_i915_private *dev_priv, 1144 struct intel_engine_cs *ring) 1145 { 1146 return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings); 1147 } 1148 1149 static int __i915_spin_request(struct drm_i915_gem_request *req) 1150 { 1151 unsigned long timeout; 1152 1153 if (i915_gem_request_get_ring(req)->irq_refcount) 1154 return -EBUSY; 1155 1156 timeout = jiffies + 1; 1157 while (!need_resched()) { 1158 if (i915_gem_request_completed(req, true)) 1159 return 0; 1160 1161 if (time_after_eq(jiffies, timeout)) 1162 break; 1163 1164 cpu_relax_lowlatency(); 1165 } 1166 if (i915_gem_request_completed(req, false)) 1167 return 0; 1168 1169 return -EAGAIN; 1170 } 1171 1172 /** 1173 * __i915_wait_request - wait until execution of request has finished 1174 * @req: duh! 1175 * @reset_counter: reset sequence associated with the given request 1176 * @interruptible: do an interruptible wait (normally yes) 1177 * @timeout: in - how long to wait (NULL forever); out - how much time remaining 1178 * 1179 * Note: It is of utmost importance that the passed in seqno and reset_counter 1180 * values have been read by the caller in an smp safe manner. Where read-side 1181 * locks are involved, it is sufficient to read the reset_counter before 1182 * unlocking the lock that protects the seqno. For lockless tricks, the 1183 * reset_counter _must_ be read before, and an appropriate smp_rmb must be 1184 * inserted. 1185 * 1186 * Returns 0 if the request was found within the alloted time. Else returns the 1187 * errno with remaining time filled in timeout argument. 1188 */ 1189 int __i915_wait_request(struct drm_i915_gem_request *req, 1190 unsigned reset_counter, 1191 bool interruptible, 1192 s64 *timeout, 1193 struct intel_rps_client *rps) 1194 { 1195 struct intel_engine_cs *ring = i915_gem_request_get_ring(req); 1196 struct drm_device *dev = ring->dev; 1197 struct drm_i915_private *dev_priv = dev->dev_private; 1198 const bool irq_test_in_progress = 1199 ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring); 1200 DEFINE_WAIT(wait); 1201 unsigned long timeout_expire; 1202 s64 before, now; 1203 int ret; 1204 1205 WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled"); 1206 1207 if (list_empty(&req->list)) 1208 return 0; 1209 1210 if (i915_gem_request_completed(req, true)) 1211 return 0; 1212 1213 timeout_expire = timeout ? 1214 jiffies + nsecs_to_jiffies_timeout((u64)*timeout) : 0; 1215 1216 if (INTEL_INFO(dev_priv)->gen >= 6) 1217 gen6_rps_boost(dev_priv, rps, req->emitted_jiffies); 1218 1219 /* Record current time in case interrupted by signal, or wedged */ 1220 trace_i915_gem_request_wait_begin(req); 1221 before = ktime_get_raw_ns(); 1222 1223 /* Optimistic spin for the next jiffie before touching IRQs */ 1224 ret = __i915_spin_request(req); 1225 if (ret == 0) 1226 goto out; 1227 1228 if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring))) { 1229 ret = -ENODEV; 1230 goto out; 1231 } 1232 1233 for (;;) { 1234 struct timer_list timer; 1235 1236 prepare_to_wait(&ring->irq_queue, &wait, 1237 interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE); 1238 1239 /* We need to check whether any gpu reset happened in between 1240 * the caller grabbing the seqno and now ... */ 1241 if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) { 1242 /* ... but upgrade the -EAGAIN to an -EIO if the gpu 1243 * is truely gone. */ 1244 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 1245 if (ret == 0) 1246 ret = -EAGAIN; 1247 break; 1248 } 1249 1250 if (i915_gem_request_completed(req, false)) { 1251 ret = 0; 1252 break; 1253 } 1254 1255 if (interruptible && signal_pending(current)) { 1256 ret = -ERESTARTSYS; 1257 break; 1258 } 1259 1260 if (timeout && time_after_eq(jiffies, timeout_expire)) { 1261 ret = -ETIME; 1262 break; 1263 } 1264 1265 timer.function = NULL; 1266 if (timeout || missed_irq(dev_priv, ring)) { 1267 unsigned long expire; 1268 1269 setup_timer_on_stack(&timer, fake_irq, (unsigned long)current); 1270 expire = missed_irq(dev_priv, ring) ? jiffies + 1 : timeout_expire; 1271 mod_timer(&timer, expire); 1272 } 1273 1274 io_schedule(); 1275 1276 if (timer.function) { 1277 del_singleshot_timer_sync(&timer); 1278 destroy_timer_on_stack(&timer); 1279 } 1280 } 1281 if (!irq_test_in_progress) 1282 ring->irq_put(ring); 1283 1284 finish_wait(&ring->irq_queue, &wait); 1285 1286 out: 1287 now = ktime_get_raw_ns(); 1288 trace_i915_gem_request_wait_end(req); 1289 1290 if (timeout) { 1291 s64 tres = *timeout - (now - before); 1292 1293 *timeout = tres < 0 ? 0 : tres; 1294 1295 /* 1296 * Apparently ktime isn't accurate enough and occasionally has a 1297 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch 1298 * things up to make the test happy. We allow up to 1 jiffy. 1299 * 1300 * This is a regrssion from the timespec->ktime conversion. 1301 */ 1302 if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000) 1303 *timeout = 0; 1304 } 1305 1306 return ret; 1307 } 1308 1309 int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, 1310 struct drm_file *file) 1311 { 1312 struct drm_i915_private *dev_private; 1313 struct drm_i915_file_private *file_priv; 1314 1315 WARN_ON(!req || !file || req->file_priv); 1316 1317 if (!req || !file) 1318 return -EINVAL; 1319 1320 if (req->file_priv) 1321 return -EINVAL; 1322 1323 dev_private = req->ring->dev->dev_private; 1324 file_priv = file->driver_priv; 1325 1326 spin_lock(&file_priv->mm.lock); 1327 req->file_priv = file_priv; 1328 list_add_tail(&req->client_list, &file_priv->mm.request_list); 1329 spin_unlock(&file_priv->mm.lock); 1330 1331 req->pid = get_pid(task_pid(current)); 1332 1333 return 0; 1334 } 1335 1336 static inline void 1337 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) 1338 { 1339 struct drm_i915_file_private *file_priv = request->file_priv; 1340 1341 if (!file_priv) 1342 return; 1343 1344 spin_lock(&file_priv->mm.lock); 1345 list_del(&request->client_list); 1346 request->file_priv = NULL; 1347 spin_unlock(&file_priv->mm.lock); 1348 1349 put_pid(request->pid); 1350 request->pid = NULL; 1351 } 1352 1353 static void i915_gem_request_retire(struct drm_i915_gem_request *request) 1354 { 1355 trace_i915_gem_request_retire(request); 1356 1357 /* We know the GPU must have read the request to have 1358 * sent us the seqno + interrupt, so use the position 1359 * of tail of the request to update the last known position 1360 * of the GPU head. 1361 * 1362 * Note this requires that we are always called in request 1363 * completion order. 1364 */ 1365 request->ringbuf->last_retired_head = request->postfix; 1366 1367 list_del_init(&request->list); 1368 i915_gem_request_remove_from_client(request); 1369 1370 i915_gem_request_unreference(request); 1371 } 1372 1373 static void 1374 __i915_gem_request_retire__upto(struct drm_i915_gem_request *req) 1375 { 1376 struct intel_engine_cs *engine = req->ring; 1377 struct drm_i915_gem_request *tmp; 1378 1379 lockdep_assert_held(&engine->dev->struct_mutex); 1380 1381 if (list_empty(&req->list)) 1382 return; 1383 1384 do { 1385 tmp = list_first_entry(&engine->request_list, 1386 typeof(*tmp), list); 1387 1388 i915_gem_request_retire(tmp); 1389 } while (tmp != req); 1390 1391 WARN_ON(i915_verify_lists(engine->dev)); 1392 } 1393 1394 /** 1395 * Waits for a request to be signaled, and cleans up the 1396 * request and object lists appropriately for that event. 1397 */ 1398 int 1399 i915_wait_request(struct drm_i915_gem_request *req) 1400 { 1401 struct drm_device *dev; 1402 struct drm_i915_private *dev_priv; 1403 bool interruptible; 1404 int ret; 1405 1406 BUG_ON(req == NULL); 1407 1408 dev = req->ring->dev; 1409 dev_priv = dev->dev_private; 1410 interruptible = dev_priv->mm.interruptible; 1411 1412 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1413 1414 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 1415 if (ret) 1416 return ret; 1417 1418 ret = __i915_wait_request(req, 1419 atomic_read(&dev_priv->gpu_error.reset_counter), 1420 interruptible, NULL, NULL); 1421 if (ret) 1422 return ret; 1423 1424 __i915_gem_request_retire__upto(req); 1425 return 0; 1426 } 1427 1428 /** 1429 * Ensures that all rendering to the object has completed and the object is 1430 * safe to unbind from the GTT or access from the CPU. 1431 */ 1432 int 1433 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 1434 bool readonly) 1435 { 1436 int ret, i; 1437 1438 if (!obj->active) 1439 return 0; 1440 1441 if (readonly) { 1442 if (obj->last_write_req != NULL) { 1443 ret = i915_wait_request(obj->last_write_req); 1444 if (ret) 1445 return ret; 1446 1447 i = obj->last_write_req->ring->id; 1448 if (obj->last_read_req[i] == obj->last_write_req) 1449 i915_gem_object_retire__read(obj, i); 1450 else 1451 i915_gem_object_retire__write(obj); 1452 } 1453 } else { 1454 for (i = 0; i < I915_NUM_RINGS; i++) { 1455 if (obj->last_read_req[i] == NULL) 1456 continue; 1457 1458 ret = i915_wait_request(obj->last_read_req[i]); 1459 if (ret) 1460 return ret; 1461 1462 i915_gem_object_retire__read(obj, i); 1463 } 1464 RQ_BUG_ON(obj->active); 1465 } 1466 1467 return 0; 1468 } 1469 1470 static void 1471 i915_gem_object_retire_request(struct drm_i915_gem_object *obj, 1472 struct drm_i915_gem_request *req) 1473 { 1474 int ring = req->ring->id; 1475 1476 if (obj->last_read_req[ring] == req) 1477 i915_gem_object_retire__read(obj, ring); 1478 else if (obj->last_write_req == req) 1479 i915_gem_object_retire__write(obj); 1480 1481 __i915_gem_request_retire__upto(req); 1482 } 1483 1484 /* A nonblocking variant of the above wait. This is a highly dangerous routine 1485 * as the object state may change during this call. 1486 */ 1487 static __must_check int 1488 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, 1489 struct intel_rps_client *rps, 1490 bool readonly) 1491 { 1492 struct drm_device *dev = obj->base.dev; 1493 struct drm_i915_private *dev_priv = dev->dev_private; 1494 struct drm_i915_gem_request *requests[I915_NUM_RINGS]; 1495 unsigned reset_counter; 1496 int ret, i, n = 0; 1497 1498 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1499 BUG_ON(!dev_priv->mm.interruptible); 1500 1501 if (!obj->active) 1502 return 0; 1503 1504 ret = i915_gem_check_wedge(&dev_priv->gpu_error, true); 1505 if (ret) 1506 return ret; 1507 1508 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 1509 1510 if (readonly) { 1511 struct drm_i915_gem_request *req; 1512 1513 req = obj->last_write_req; 1514 if (req == NULL) 1515 return 0; 1516 1517 requests[n++] = i915_gem_request_reference(req); 1518 } else { 1519 for (i = 0; i < I915_NUM_RINGS; i++) { 1520 struct drm_i915_gem_request *req; 1521 1522 req = obj->last_read_req[i]; 1523 if (req == NULL) 1524 continue; 1525 1526 requests[n++] = i915_gem_request_reference(req); 1527 } 1528 } 1529 1530 mutex_unlock(&dev->struct_mutex); 1531 for (i = 0; ret == 0 && i < n; i++) 1532 ret = __i915_wait_request(requests[i], reset_counter, true, 1533 NULL, rps); 1534 mutex_lock(&dev->struct_mutex); 1535 1536 for (i = 0; i < n; i++) { 1537 if (ret == 0) 1538 i915_gem_object_retire_request(obj, requests[i]); 1539 i915_gem_request_unreference(requests[i]); 1540 } 1541 1542 return ret; 1543 } 1544 1545 static struct intel_rps_client *to_rps_client(struct drm_file *file) 1546 { 1547 struct drm_i915_file_private *fpriv = file->driver_priv; 1548 return &fpriv->rps; 1549 } 1550 1551 /** 1552 * Called when user space prepares to use an object with the CPU, either 1553 * through the mmap ioctl's mapping or a GTT mapping. 1554 */ 1555 int 1556 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1557 struct drm_file *file) 1558 { 1559 struct drm_i915_gem_set_domain *args = data; 1560 struct drm_i915_gem_object *obj; 1561 uint32_t read_domains = args->read_domains; 1562 uint32_t write_domain = args->write_domain; 1563 int ret; 1564 1565 /* Only handle setting domains to types used by the CPU. */ 1566 if (write_domain & I915_GEM_GPU_DOMAINS) 1567 return -EINVAL; 1568 1569 if (read_domains & I915_GEM_GPU_DOMAINS) 1570 return -EINVAL; 1571 1572 /* Having something in the write domain implies it's in the read 1573 * domain, and only that read domain. Enforce that in the request. 1574 */ 1575 if (write_domain != 0 && read_domains != write_domain) 1576 return -EINVAL; 1577 1578 ret = i915_mutex_lock_interruptible(dev); 1579 if (ret) 1580 return ret; 1581 1582 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1583 if (&obj->base == NULL) { 1584 ret = -ENOENT; 1585 goto unlock; 1586 } 1587 1588 /* Try to flush the object off the GPU without holding the lock. 1589 * We will repeat the flush holding the lock in the normal manner 1590 * to catch cases where we are gazumped. 1591 */ 1592 ret = i915_gem_object_wait_rendering__nonblocking(obj, 1593 to_rps_client(file), 1594 !write_domain); 1595 if (ret) 1596 goto unref; 1597 1598 if (read_domains & I915_GEM_DOMAIN_GTT) 1599 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1600 else 1601 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1602 1603 if (write_domain != 0) 1604 intel_fb_obj_invalidate(obj, 1605 write_domain == I915_GEM_DOMAIN_GTT ? 1606 ORIGIN_GTT : ORIGIN_CPU); 1607 1608 unref: 1609 drm_gem_object_unreference(&obj->base); 1610 unlock: 1611 mutex_unlock(&dev->struct_mutex); 1612 return ret; 1613 } 1614 1615 /** 1616 * Called when user space has done writes to this buffer 1617 */ 1618 int 1619 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1620 struct drm_file *file) 1621 { 1622 struct drm_i915_gem_sw_finish *args = data; 1623 struct drm_i915_gem_object *obj; 1624 int ret = 0; 1625 1626 ret = i915_mutex_lock_interruptible(dev); 1627 if (ret) 1628 return ret; 1629 1630 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1631 if (&obj->base == NULL) { 1632 ret = -ENOENT; 1633 goto unlock; 1634 } 1635 1636 /* Pinned buffers may be scanout, so flush the cache */ 1637 if (obj->pin_display) 1638 i915_gem_object_flush_cpu_write_domain(obj); 1639 1640 drm_gem_object_unreference(&obj->base); 1641 unlock: 1642 mutex_unlock(&dev->struct_mutex); 1643 return ret; 1644 } 1645 1646 /** 1647 * Maps the contents of an object, returning the address it is mapped 1648 * into. 1649 * 1650 * While the mapping holds a reference on the contents of the object, it doesn't 1651 * imply a ref on the object itself. 1652 * 1653 * IMPORTANT: 1654 * 1655 * DRM driver writers who look a this function as an example for how to do GEM 1656 * mmap support, please don't implement mmap support like here. The modern way 1657 * to implement DRM mmap support is with an mmap offset ioctl (like 1658 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly. 1659 * That way debug tooling like valgrind will understand what's going on, hiding 1660 * the mmap call in a driver private ioctl will break that. The i915 driver only 1661 * does cpu mmaps this way because we didn't know better. 1662 */ 1663 int 1664 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1665 struct drm_file *file) 1666 { 1667 struct drm_i915_gem_mmap *args = data; 1668 struct drm_gem_object *obj; 1669 unsigned long addr; 1670 1671 if (args->flags & ~(I915_MMAP_WC)) 1672 return -EINVAL; 1673 1674 if (args->flags & I915_MMAP_WC && !cpu_has_pat) 1675 return -ENODEV; 1676 1677 obj = drm_gem_object_lookup(dev, file, args->handle); 1678 if (obj == NULL) 1679 return -ENOENT; 1680 1681 /* prime objects have no backing filp to GEM mmap 1682 * pages from. 1683 */ 1684 if (!obj->filp) { 1685 drm_gem_object_unreference_unlocked(obj); 1686 return -EINVAL; 1687 } 1688 1689 addr = vm_mmap(obj->filp, 0, args->size, 1690 PROT_READ | PROT_WRITE, MAP_SHARED, 1691 args->offset); 1692 if (args->flags & I915_MMAP_WC) { 1693 struct mm_struct *mm = current->mm; 1694 struct vm_area_struct *vma; 1695 1696 down_write(&mm->mmap_sem); 1697 vma = find_vma(mm, addr); 1698 if (vma) 1699 vma->vm_page_prot = 1700 pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); 1701 else 1702 addr = -ENOMEM; 1703 up_write(&mm->mmap_sem); 1704 } 1705 drm_gem_object_unreference_unlocked(obj); 1706 if (IS_ERR((void *)addr)) 1707 return addr; 1708 1709 args->addr_ptr = (uint64_t) addr; 1710 1711 return 0; 1712 } 1713 1714 /** 1715 * i915_gem_fault - fault a page into the GTT 1716 * @vma: VMA in question 1717 * @vmf: fault info 1718 * 1719 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1720 * from userspace. The fault handler takes care of binding the object to 1721 * the GTT (if needed), allocating and programming a fence register (again, 1722 * only if needed based on whether the old reg is still valid or the object 1723 * is tiled) and inserting a new PTE into the faulting process. 1724 * 1725 * Note that the faulting process may involve evicting existing objects 1726 * from the GTT and/or fence registers to make room. So performance may 1727 * suffer if the GTT working set is large or there are few fence registers 1728 * left. 1729 */ 1730 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1731 { 1732 struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data); 1733 struct drm_device *dev = obj->base.dev; 1734 struct drm_i915_private *dev_priv = dev->dev_private; 1735 struct i915_ggtt_view view = i915_ggtt_view_normal; 1736 pgoff_t page_offset; 1737 unsigned long pfn; 1738 int ret = 0; 1739 bool write = !!(vmf->flags & FAULT_FLAG_WRITE); 1740 1741 intel_runtime_pm_get(dev_priv); 1742 1743 /* We don't use vmf->pgoff since that has the fake offset */ 1744 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> 1745 PAGE_SHIFT; 1746 1747 ret = i915_mutex_lock_interruptible(dev); 1748 if (ret) 1749 goto out; 1750 1751 trace_i915_gem_object_fault(obj, page_offset, true, write); 1752 1753 /* Try to flush the object off the GPU first without holding the lock. 1754 * Upon reacquiring the lock, we will perform our sanity checks and then 1755 * repeat the flush holding the lock in the normal manner to catch cases 1756 * where we are gazumped. 1757 */ 1758 ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write); 1759 if (ret) 1760 goto unlock; 1761 1762 /* Access to snoopable pages through the GTT is incoherent. */ 1763 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { 1764 ret = -EFAULT; 1765 goto unlock; 1766 } 1767 1768 /* Use a partial view if the object is bigger than the aperture. */ 1769 if (obj->base.size >= dev_priv->gtt.mappable_end && 1770 obj->tiling_mode == I915_TILING_NONE) { 1771 static const unsigned int chunk_size = 256; // 1 MiB 1772 1773 memset(&view, 0, sizeof(view)); 1774 view.type = I915_GGTT_VIEW_PARTIAL; 1775 view.params.partial.offset = rounddown(page_offset, chunk_size); 1776 view.params.partial.size = 1777 min_t(unsigned int, 1778 chunk_size, 1779 (vma->vm_end - vma->vm_start)/PAGE_SIZE - 1780 view.params.partial.offset); 1781 } 1782 1783 /* Now pin it into the GTT if needed */ 1784 ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE); 1785 if (ret) 1786 goto unlock; 1787 1788 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1789 if (ret) 1790 goto unpin; 1791 1792 ret = i915_gem_object_get_fence(obj); 1793 if (ret) 1794 goto unpin; 1795 1796 /* Finally, remap it using the new GTT offset */ 1797 pfn = dev_priv->gtt.mappable_base + 1798 i915_gem_obj_ggtt_offset_view(obj, &view); 1799 pfn >>= PAGE_SHIFT; 1800 1801 if (unlikely(view.type == I915_GGTT_VIEW_PARTIAL)) { 1802 /* Overriding existing pages in partial view does not cause 1803 * us any trouble as TLBs are still valid because the fault 1804 * is due to userspace losing part of the mapping or never 1805 * having accessed it before (at this partials' range). 1806 */ 1807 unsigned long base = vma->vm_start + 1808 (view.params.partial.offset << PAGE_SHIFT); 1809 unsigned int i; 1810 1811 for (i = 0; i < view.params.partial.size; i++) { 1812 ret = vm_insert_pfn(vma, base + i * PAGE_SIZE, pfn + i); 1813 if (ret) 1814 break; 1815 } 1816 1817 obj->fault_mappable = true; 1818 } else { 1819 if (!obj->fault_mappable) { 1820 unsigned long size = min_t(unsigned long, 1821 vma->vm_end - vma->vm_start, 1822 obj->base.size); 1823 int i; 1824 1825 for (i = 0; i < size >> PAGE_SHIFT; i++) { 1826 ret = vm_insert_pfn(vma, 1827 (unsigned long)vma->vm_start + i * PAGE_SIZE, 1828 pfn + i); 1829 if (ret) 1830 break; 1831 } 1832 1833 obj->fault_mappable = true; 1834 } else 1835 ret = vm_insert_pfn(vma, 1836 (unsigned long)vmf->virtual_address, 1837 pfn + page_offset); 1838 } 1839 unpin: 1840 i915_gem_object_ggtt_unpin_view(obj, &view); 1841 unlock: 1842 mutex_unlock(&dev->struct_mutex); 1843 out: 1844 switch (ret) { 1845 case -EIO: 1846 /* 1847 * We eat errors when the gpu is terminally wedged to avoid 1848 * userspace unduly crashing (gl has no provisions for mmaps to 1849 * fail). But any other -EIO isn't ours (e.g. swap in failure) 1850 * and so needs to be reported. 1851 */ 1852 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 1853 ret = VM_FAULT_SIGBUS; 1854 break; 1855 } 1856 case -EAGAIN: 1857 /* 1858 * EAGAIN means the gpu is hung and we'll wait for the error 1859 * handler to reset everything when re-faulting in 1860 * i915_mutex_lock_interruptible. 1861 */ 1862 case 0: 1863 case -ERESTARTSYS: 1864 case -EINTR: 1865 case -EBUSY: 1866 /* 1867 * EBUSY is ok: this just means that another thread 1868 * already did the job. 1869 */ 1870 ret = VM_FAULT_NOPAGE; 1871 break; 1872 case -ENOMEM: 1873 ret = VM_FAULT_OOM; 1874 break; 1875 case -ENOSPC: 1876 case -EFAULT: 1877 ret = VM_FAULT_SIGBUS; 1878 break; 1879 default: 1880 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 1881 ret = VM_FAULT_SIGBUS; 1882 break; 1883 } 1884 1885 intel_runtime_pm_put(dev_priv); 1886 return ret; 1887 } 1888 1889 /** 1890 * i915_gem_release_mmap - remove physical page mappings 1891 * @obj: obj in question 1892 * 1893 * Preserve the reservation of the mmapping with the DRM core code, but 1894 * relinquish ownership of the pages back to the system. 1895 * 1896 * It is vital that we remove the page mapping if we have mapped a tiled 1897 * object through the GTT and then lose the fence register due to 1898 * resource pressure. Similarly if the object has been moved out of the 1899 * aperture, than pages mapped into userspace must be revoked. Removing the 1900 * mapping will then trigger a page fault on the next user access, allowing 1901 * fixup by i915_gem_fault(). 1902 */ 1903 void 1904 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 1905 { 1906 if (!obj->fault_mappable) 1907 return; 1908 1909 drm_vma_node_unmap(&obj->base.vma_node, 1910 obj->base.dev->anon_inode->i_mapping); 1911 obj->fault_mappable = false; 1912 } 1913 1914 void 1915 i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv) 1916 { 1917 struct drm_i915_gem_object *obj; 1918 1919 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) 1920 i915_gem_release_mmap(obj); 1921 } 1922 1923 uint32_t 1924 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) 1925 { 1926 uint32_t gtt_size; 1927 1928 if (INTEL_INFO(dev)->gen >= 4 || 1929 tiling_mode == I915_TILING_NONE) 1930 return size; 1931 1932 /* Previous chips need a power-of-two fence region when tiling */ 1933 if (INTEL_INFO(dev)->gen == 3) 1934 gtt_size = 1024*1024; 1935 else 1936 gtt_size = 512*1024; 1937 1938 while (gtt_size < size) 1939 gtt_size <<= 1; 1940 1941 return gtt_size; 1942 } 1943 1944 /** 1945 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 1946 * @obj: object to check 1947 * 1948 * Return the required GTT alignment for an object, taking into account 1949 * potential fence register mapping. 1950 */ 1951 uint32_t 1952 i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, 1953 int tiling_mode, bool fenced) 1954 { 1955 /* 1956 * Minimum alignment is 4k (GTT page size), but might be greater 1957 * if a fence register is needed for the object. 1958 */ 1959 if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) || 1960 tiling_mode == I915_TILING_NONE) 1961 return 4096; 1962 1963 /* 1964 * Previous chips need to be aligned to the size of the smallest 1965 * fence register that can contain the object. 1966 */ 1967 return i915_gem_get_gtt_size(dev, size, tiling_mode); 1968 } 1969 1970 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 1971 { 1972 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 1973 int ret; 1974 1975 if (drm_vma_node_has_offset(&obj->base.vma_node)) 1976 return 0; 1977 1978 dev_priv->mm.shrinker_no_lock_stealing = true; 1979 1980 ret = drm_gem_create_mmap_offset(&obj->base); 1981 if (ret != -ENOSPC) 1982 goto out; 1983 1984 /* Badly fragmented mmap space? The only way we can recover 1985 * space is by destroying unwanted objects. We can't randomly release 1986 * mmap_offsets as userspace expects them to be persistent for the 1987 * lifetime of the objects. The closest we can is to release the 1988 * offsets on purgeable objects by truncating it and marking it purged, 1989 * which prevents userspace from ever using that object again. 1990 */ 1991 i915_gem_shrink(dev_priv, 1992 obj->base.size >> PAGE_SHIFT, 1993 I915_SHRINK_BOUND | 1994 I915_SHRINK_UNBOUND | 1995 I915_SHRINK_PURGEABLE); 1996 ret = drm_gem_create_mmap_offset(&obj->base); 1997 if (ret != -ENOSPC) 1998 goto out; 1999 2000 i915_gem_shrink_all(dev_priv); 2001 ret = drm_gem_create_mmap_offset(&obj->base); 2002 out: 2003 dev_priv->mm.shrinker_no_lock_stealing = false; 2004 2005 return ret; 2006 } 2007 2008 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 2009 { 2010 drm_gem_free_mmap_offset(&obj->base); 2011 } 2012 2013 int 2014 i915_gem_mmap_gtt(struct drm_file *file, 2015 struct drm_device *dev, 2016 uint32_t handle, 2017 uint64_t *offset) 2018 { 2019 struct drm_i915_gem_object *obj; 2020 int ret; 2021 2022 ret = i915_mutex_lock_interruptible(dev); 2023 if (ret) 2024 return ret; 2025 2026 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle)); 2027 if (&obj->base == NULL) { 2028 ret = -ENOENT; 2029 goto unlock; 2030 } 2031 2032 if (obj->madv != I915_MADV_WILLNEED) { 2033 DRM_DEBUG("Attempting to mmap a purgeable buffer\n"); 2034 ret = -EFAULT; 2035 goto out; 2036 } 2037 2038 ret = i915_gem_object_create_mmap_offset(obj); 2039 if (ret) 2040 goto out; 2041 2042 *offset = drm_vma_node_offset_addr(&obj->base.vma_node); 2043 2044 out: 2045 drm_gem_object_unreference(&obj->base); 2046 unlock: 2047 mutex_unlock(&dev->struct_mutex); 2048 return ret; 2049 } 2050 2051 /** 2052 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 2053 * @dev: DRM device 2054 * @data: GTT mapping ioctl data 2055 * @file: GEM object info 2056 * 2057 * Simply returns the fake offset to userspace so it can mmap it. 2058 * The mmap call will end up in drm_gem_mmap(), which will set things 2059 * up so we can get faults in the handler above. 2060 * 2061 * The fault handler will take care of binding the object into the GTT 2062 * (since it may have been evicted to make room for something), allocating 2063 * a fence register, and mapping the appropriate aperture address into 2064 * userspace. 2065 */ 2066 int 2067 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 2068 struct drm_file *file) 2069 { 2070 struct drm_i915_gem_mmap_gtt *args = data; 2071 2072 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 2073 } 2074 2075 /* Immediately discard the backing storage */ 2076 static void 2077 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 2078 { 2079 i915_gem_object_free_mmap_offset(obj); 2080 2081 if (obj->base.filp == NULL) 2082 return; 2083 2084 /* Our goal here is to return as much of the memory as 2085 * is possible back to the system as we are called from OOM. 2086 * To do this we must instruct the shmfs to drop all of its 2087 * backing pages, *now*. 2088 */ 2089 shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1); 2090 obj->madv = __I915_MADV_PURGED; 2091 } 2092 2093 /* Try to discard unwanted pages */ 2094 static void 2095 i915_gem_object_invalidate(struct drm_i915_gem_object *obj) 2096 { 2097 struct address_space *mapping; 2098 2099 switch (obj->madv) { 2100 case I915_MADV_DONTNEED: 2101 i915_gem_object_truncate(obj); 2102 case __I915_MADV_PURGED: 2103 return; 2104 } 2105 2106 if (obj->base.filp == NULL) 2107 return; 2108 2109 mapping = file_inode(obj->base.filp)->i_mapping, 2110 invalidate_mapping_pages(mapping, 0, (loff_t)-1); 2111 } 2112 2113 static void 2114 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 2115 { 2116 struct sg_page_iter sg_iter; 2117 int ret; 2118 2119 BUG_ON(obj->madv == __I915_MADV_PURGED); 2120 2121 ret = i915_gem_object_set_to_cpu_domain(obj, true); 2122 if (ret) { 2123 /* In the event of a disaster, abandon all caches and 2124 * hope for the best. 2125 */ 2126 WARN_ON(ret != -EIO); 2127 i915_gem_clflush_object(obj, true); 2128 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2129 } 2130 2131 i915_gem_gtt_finish_object(obj); 2132 2133 if (i915_gem_object_needs_bit17_swizzle(obj)) 2134 i915_gem_object_save_bit_17_swizzle(obj); 2135 2136 if (obj->madv == I915_MADV_DONTNEED) 2137 obj->dirty = 0; 2138 2139 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) { 2140 struct page *page = sg_page_iter_page(&sg_iter); 2141 2142 if (obj->dirty) 2143 set_page_dirty(page); 2144 2145 if (obj->madv == I915_MADV_WILLNEED) 2146 mark_page_accessed(page); 2147 2148 page_cache_release(page); 2149 } 2150 obj->dirty = 0; 2151 2152 sg_free_table(obj->pages); 2153 kfree(obj->pages); 2154 } 2155 2156 int 2157 i915_gem_object_put_pages(struct drm_i915_gem_object *obj) 2158 { 2159 const struct drm_i915_gem_object_ops *ops = obj->ops; 2160 2161 if (obj->pages == NULL) 2162 return 0; 2163 2164 if (obj->pages_pin_count) 2165 return -EBUSY; 2166 2167 BUG_ON(i915_gem_obj_bound_any(obj)); 2168 2169 /* ->put_pages might need to allocate memory for the bit17 swizzle 2170 * array, hence protect them from being reaped by removing them from gtt 2171 * lists early. */ 2172 list_del(&obj->global_list); 2173 2174 ops->put_pages(obj); 2175 obj->pages = NULL; 2176 2177 i915_gem_object_invalidate(obj); 2178 2179 return 0; 2180 } 2181 2182 static int 2183 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2184 { 2185 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2186 int page_count, i; 2187 struct address_space *mapping; 2188 struct sg_table *st; 2189 struct scatterlist *sg; 2190 struct sg_page_iter sg_iter; 2191 struct page *page; 2192 unsigned long last_pfn = 0; /* suppress gcc warning */ 2193 int ret; 2194 gfp_t gfp; 2195 2196 /* Assert that the object is not currently in any GPU domain. As it 2197 * wasn't in the GTT, there shouldn't be any way it could have been in 2198 * a GPU cache 2199 */ 2200 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2201 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2202 2203 st = kmalloc(sizeof(*st), GFP_KERNEL); 2204 if (st == NULL) 2205 return -ENOMEM; 2206 2207 page_count = obj->base.size / PAGE_SIZE; 2208 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 2209 kfree(st); 2210 return -ENOMEM; 2211 } 2212 2213 /* Get the list of pages out of our struct file. They'll be pinned 2214 * at this point until we release them. 2215 * 2216 * Fail silently without starting the shrinker 2217 */ 2218 mapping = file_inode(obj->base.filp)->i_mapping; 2219 gfp = mapping_gfp_constraint(mapping, ~(__GFP_IO | __GFP_RECLAIM)); 2220 gfp |= __GFP_NORETRY | __GFP_NOWARN; 2221 sg = st->sgl; 2222 st->nents = 0; 2223 for (i = 0; i < page_count; i++) { 2224 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2225 if (IS_ERR(page)) { 2226 i915_gem_shrink(dev_priv, 2227 page_count, 2228 I915_SHRINK_BOUND | 2229 I915_SHRINK_UNBOUND | 2230 I915_SHRINK_PURGEABLE); 2231 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2232 } 2233 if (IS_ERR(page)) { 2234 /* We've tried hard to allocate the memory by reaping 2235 * our own buffer, now let the real VM do its job and 2236 * go down in flames if truly OOM. 2237 */ 2238 i915_gem_shrink_all(dev_priv); 2239 page = shmem_read_mapping_page(mapping, i); 2240 if (IS_ERR(page)) { 2241 ret = PTR_ERR(page); 2242 goto err_pages; 2243 } 2244 } 2245 #ifdef CONFIG_SWIOTLB 2246 if (swiotlb_nr_tbl()) { 2247 st->nents++; 2248 sg_set_page(sg, page, PAGE_SIZE, 0); 2249 sg = sg_next(sg); 2250 continue; 2251 } 2252 #endif 2253 if (!i || page_to_pfn(page) != last_pfn + 1) { 2254 if (i) 2255 sg = sg_next(sg); 2256 st->nents++; 2257 sg_set_page(sg, page, PAGE_SIZE, 0); 2258 } else { 2259 sg->length += PAGE_SIZE; 2260 } 2261 last_pfn = page_to_pfn(page); 2262 2263 /* Check that the i965g/gm workaround works. */ 2264 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL)); 2265 } 2266 #ifdef CONFIG_SWIOTLB 2267 if (!swiotlb_nr_tbl()) 2268 #endif 2269 sg_mark_end(sg); 2270 obj->pages = st; 2271 2272 ret = i915_gem_gtt_prepare_object(obj); 2273 if (ret) 2274 goto err_pages; 2275 2276 if (i915_gem_object_needs_bit17_swizzle(obj)) 2277 i915_gem_object_do_bit_17_swizzle(obj); 2278 2279 if (obj->tiling_mode != I915_TILING_NONE && 2280 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) 2281 i915_gem_object_pin_pages(obj); 2282 2283 return 0; 2284 2285 err_pages: 2286 sg_mark_end(sg); 2287 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) 2288 page_cache_release(sg_page_iter_page(&sg_iter)); 2289 sg_free_table(st); 2290 kfree(st); 2291 2292 /* shmemfs first checks if there is enough memory to allocate the page 2293 * and reports ENOSPC should there be insufficient, along with the usual 2294 * ENOMEM for a genuine allocation failure. 2295 * 2296 * We use ENOSPC in our driver to mean that we have run out of aperture 2297 * space and so want to translate the error from shmemfs back to our 2298 * usual understanding of ENOMEM. 2299 */ 2300 if (ret == -ENOSPC) 2301 ret = -ENOMEM; 2302 2303 return ret; 2304 } 2305 2306 /* Ensure that the associated pages are gathered from the backing storage 2307 * and pinned into our object. i915_gem_object_get_pages() may be called 2308 * multiple times before they are released by a single call to 2309 * i915_gem_object_put_pages() - once the pages are no longer referenced 2310 * either as a result of memory pressure (reaping pages under the shrinker) 2311 * or as the object is itself released. 2312 */ 2313 int 2314 i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2315 { 2316 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2317 const struct drm_i915_gem_object_ops *ops = obj->ops; 2318 int ret; 2319 2320 if (obj->pages) 2321 return 0; 2322 2323 if (obj->madv != I915_MADV_WILLNEED) { 2324 DRM_DEBUG("Attempting to obtain a purgeable object\n"); 2325 return -EFAULT; 2326 } 2327 2328 BUG_ON(obj->pages_pin_count); 2329 2330 ret = ops->get_pages(obj); 2331 if (ret) 2332 return ret; 2333 2334 list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list); 2335 2336 obj->get_page.sg = obj->pages->sgl; 2337 obj->get_page.last = 0; 2338 2339 return 0; 2340 } 2341 2342 void i915_vma_move_to_active(struct i915_vma *vma, 2343 struct drm_i915_gem_request *req) 2344 { 2345 struct drm_i915_gem_object *obj = vma->obj; 2346 struct intel_engine_cs *ring; 2347 2348 ring = i915_gem_request_get_ring(req); 2349 2350 /* Add a reference if we're newly entering the active list. */ 2351 if (obj->active == 0) 2352 drm_gem_object_reference(&obj->base); 2353 obj->active |= intel_ring_flag(ring); 2354 2355 list_move_tail(&obj->ring_list[ring->id], &ring->active_list); 2356 i915_gem_request_assign(&obj->last_read_req[ring->id], req); 2357 2358 list_move_tail(&vma->mm_list, &vma->vm->active_list); 2359 } 2360 2361 static void 2362 i915_gem_object_retire__write(struct drm_i915_gem_object *obj) 2363 { 2364 RQ_BUG_ON(obj->last_write_req == NULL); 2365 RQ_BUG_ON(!(obj->active & intel_ring_flag(obj->last_write_req->ring))); 2366 2367 i915_gem_request_assign(&obj->last_write_req, NULL); 2368 intel_fb_obj_flush(obj, true, ORIGIN_CS); 2369 } 2370 2371 static void 2372 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring) 2373 { 2374 struct i915_vma *vma; 2375 2376 RQ_BUG_ON(obj->last_read_req[ring] == NULL); 2377 RQ_BUG_ON(!(obj->active & (1 << ring))); 2378 2379 list_del_init(&obj->ring_list[ring]); 2380 i915_gem_request_assign(&obj->last_read_req[ring], NULL); 2381 2382 if (obj->last_write_req && obj->last_write_req->ring->id == ring) 2383 i915_gem_object_retire__write(obj); 2384 2385 obj->active &= ~(1 << ring); 2386 if (obj->active) 2387 return; 2388 2389 /* Bump our place on the bound list to keep it roughly in LRU order 2390 * so that we don't steal from recently used but inactive objects 2391 * (unless we are forced to ofc!) 2392 */ 2393 list_move_tail(&obj->global_list, 2394 &to_i915(obj->base.dev)->mm.bound_list); 2395 2396 list_for_each_entry(vma, &obj->vma_list, vma_link) { 2397 if (!list_empty(&vma->mm_list)) 2398 list_move_tail(&vma->mm_list, &vma->vm->inactive_list); 2399 } 2400 2401 i915_gem_request_assign(&obj->last_fenced_req, NULL); 2402 drm_gem_object_unreference(&obj->base); 2403 } 2404 2405 static int 2406 i915_gem_init_seqno(struct drm_device *dev, u32 seqno) 2407 { 2408 struct drm_i915_private *dev_priv = dev->dev_private; 2409 struct intel_engine_cs *ring; 2410 int ret, i, j; 2411 2412 /* Carefully retire all requests without writing to the rings */ 2413 for_each_ring(ring, dev_priv, i) { 2414 ret = intel_ring_idle(ring); 2415 if (ret) 2416 return ret; 2417 } 2418 i915_gem_retire_requests(dev); 2419 2420 /* Finally reset hw state */ 2421 for_each_ring(ring, dev_priv, i) { 2422 intel_ring_init_seqno(ring, seqno); 2423 2424 for (j = 0; j < ARRAY_SIZE(ring->semaphore.sync_seqno); j++) 2425 ring->semaphore.sync_seqno[j] = 0; 2426 } 2427 2428 return 0; 2429 } 2430 2431 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) 2432 { 2433 struct drm_i915_private *dev_priv = dev->dev_private; 2434 int ret; 2435 2436 if (seqno == 0) 2437 return -EINVAL; 2438 2439 /* HWS page needs to be set less than what we 2440 * will inject to ring 2441 */ 2442 ret = i915_gem_init_seqno(dev, seqno - 1); 2443 if (ret) 2444 return ret; 2445 2446 /* Carefully set the last_seqno value so that wrap 2447 * detection still works 2448 */ 2449 dev_priv->next_seqno = seqno; 2450 dev_priv->last_seqno = seqno - 1; 2451 if (dev_priv->last_seqno == 0) 2452 dev_priv->last_seqno--; 2453 2454 return 0; 2455 } 2456 2457 int 2458 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) 2459 { 2460 struct drm_i915_private *dev_priv = dev->dev_private; 2461 2462 /* reserve 0 for non-seqno */ 2463 if (dev_priv->next_seqno == 0) { 2464 int ret = i915_gem_init_seqno(dev, 0); 2465 if (ret) 2466 return ret; 2467 2468 dev_priv->next_seqno = 1; 2469 } 2470 2471 *seqno = dev_priv->last_seqno = dev_priv->next_seqno++; 2472 return 0; 2473 } 2474 2475 /* 2476 * NB: This function is not allowed to fail. Doing so would mean the the 2477 * request is not being tracked for completion but the work itself is 2478 * going to happen on the hardware. This would be a Bad Thing(tm). 2479 */ 2480 void __i915_add_request(struct drm_i915_gem_request *request, 2481 struct drm_i915_gem_object *obj, 2482 bool flush_caches) 2483 { 2484 struct intel_engine_cs *ring; 2485 struct drm_i915_private *dev_priv; 2486 struct intel_ringbuffer *ringbuf; 2487 u32 request_start; 2488 int ret; 2489 2490 if (WARN_ON(request == NULL)) 2491 return; 2492 2493 ring = request->ring; 2494 dev_priv = ring->dev->dev_private; 2495 ringbuf = request->ringbuf; 2496 2497 /* 2498 * To ensure that this call will not fail, space for its emissions 2499 * should already have been reserved in the ring buffer. Let the ring 2500 * know that it is time to use that space up. 2501 */ 2502 intel_ring_reserved_space_use(ringbuf); 2503 2504 request_start = intel_ring_get_tail(ringbuf); 2505 /* 2506 * Emit any outstanding flushes - execbuf can fail to emit the flush 2507 * after having emitted the batchbuffer command. Hence we need to fix 2508 * things up similar to emitting the lazy request. The difference here 2509 * is that the flush _must_ happen before the next request, no matter 2510 * what. 2511 */ 2512 if (flush_caches) { 2513 if (i915.enable_execlists) 2514 ret = logical_ring_flush_all_caches(request); 2515 else 2516 ret = intel_ring_flush_all_caches(request); 2517 /* Not allowed to fail! */ 2518 WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret); 2519 } 2520 2521 /* Record the position of the start of the request so that 2522 * should we detect the updated seqno part-way through the 2523 * GPU processing the request, we never over-estimate the 2524 * position of the head. 2525 */ 2526 request->postfix = intel_ring_get_tail(ringbuf); 2527 2528 if (i915.enable_execlists) 2529 ret = ring->emit_request(request); 2530 else { 2531 ret = ring->add_request(request); 2532 2533 request->tail = intel_ring_get_tail(ringbuf); 2534 } 2535 /* Not allowed to fail! */ 2536 WARN(ret, "emit|add_request failed: %d!\n", ret); 2537 2538 request->head = request_start; 2539 2540 /* Whilst this request exists, batch_obj will be on the 2541 * active_list, and so will hold the active reference. Only when this 2542 * request is retired will the the batch_obj be moved onto the 2543 * inactive_list and lose its active reference. Hence we do not need 2544 * to explicitly hold another reference here. 2545 */ 2546 request->batch_obj = obj; 2547 2548 request->emitted_jiffies = jiffies; 2549 ring->last_submitted_seqno = request->seqno; 2550 list_add_tail(&request->list, &ring->request_list); 2551 2552 trace_i915_gem_request_add(request); 2553 2554 i915_queue_hangcheck(ring->dev); 2555 2556 queue_delayed_work(dev_priv->wq, 2557 &dev_priv->mm.retire_work, 2558 round_jiffies_up_relative(HZ)); 2559 intel_mark_busy(dev_priv->dev); 2560 2561 /* Sanity check that the reserved size was large enough. */ 2562 intel_ring_reserved_space_end(ringbuf); 2563 } 2564 2565 static bool i915_context_is_banned(struct drm_i915_private *dev_priv, 2566 const struct intel_context *ctx) 2567 { 2568 unsigned long elapsed; 2569 2570 elapsed = get_seconds() - ctx->hang_stats.guilty_ts; 2571 2572 if (ctx->hang_stats.banned) 2573 return true; 2574 2575 if (ctx->hang_stats.ban_period_seconds && 2576 elapsed <= ctx->hang_stats.ban_period_seconds) { 2577 if (!i915_gem_context_is_default(ctx)) { 2578 DRM_DEBUG("context hanging too fast, banning!\n"); 2579 return true; 2580 } else if (i915_stop_ring_allow_ban(dev_priv)) { 2581 if (i915_stop_ring_allow_warn(dev_priv)) 2582 DRM_ERROR("gpu hanging too fast, banning!\n"); 2583 return true; 2584 } 2585 } 2586 2587 return false; 2588 } 2589 2590 static void i915_set_reset_status(struct drm_i915_private *dev_priv, 2591 struct intel_context *ctx, 2592 const bool guilty) 2593 { 2594 struct i915_ctx_hang_stats *hs; 2595 2596 if (WARN_ON(!ctx)) 2597 return; 2598 2599 hs = &ctx->hang_stats; 2600 2601 if (guilty) { 2602 hs->banned = i915_context_is_banned(dev_priv, ctx); 2603 hs->batch_active++; 2604 hs->guilty_ts = get_seconds(); 2605 } else { 2606 hs->batch_pending++; 2607 } 2608 } 2609 2610 void i915_gem_request_free(struct kref *req_ref) 2611 { 2612 struct drm_i915_gem_request *req = container_of(req_ref, 2613 typeof(*req), ref); 2614 struct intel_context *ctx = req->ctx; 2615 2616 if (req->file_priv) 2617 i915_gem_request_remove_from_client(req); 2618 2619 if (ctx) { 2620 if (i915.enable_execlists) { 2621 if (ctx != req->ring->default_context) 2622 intel_lr_context_unpin(req); 2623 } 2624 2625 i915_gem_context_unreference(ctx); 2626 } 2627 2628 kmem_cache_free(req->i915->requests, req); 2629 } 2630 2631 int i915_gem_request_alloc(struct intel_engine_cs *ring, 2632 struct intel_context *ctx, 2633 struct drm_i915_gem_request **req_out) 2634 { 2635 struct drm_i915_private *dev_priv = to_i915(ring->dev); 2636 struct drm_i915_gem_request *req; 2637 int ret; 2638 2639 if (!req_out) 2640 return -EINVAL; 2641 2642 *req_out = NULL; 2643 2644 req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL); 2645 if (req == NULL) 2646 return -ENOMEM; 2647 2648 ret = i915_gem_get_seqno(ring->dev, &req->seqno); 2649 if (ret) 2650 goto err; 2651 2652 kref_init(&req->ref); 2653 req->i915 = dev_priv; 2654 req->ring = ring; 2655 req->ctx = ctx; 2656 i915_gem_context_reference(req->ctx); 2657 2658 if (i915.enable_execlists) 2659 ret = intel_logical_ring_alloc_request_extras(req); 2660 else 2661 ret = intel_ring_alloc_request_extras(req); 2662 if (ret) { 2663 i915_gem_context_unreference(req->ctx); 2664 goto err; 2665 } 2666 2667 /* 2668 * Reserve space in the ring buffer for all the commands required to 2669 * eventually emit this request. This is to guarantee that the 2670 * i915_add_request() call can't fail. Note that the reserve may need 2671 * to be redone if the request is not actually submitted straight 2672 * away, e.g. because a GPU scheduler has deferred it. 2673 */ 2674 if (i915.enable_execlists) 2675 ret = intel_logical_ring_reserve_space(req); 2676 else 2677 ret = intel_ring_reserve_space(req); 2678 if (ret) { 2679 /* 2680 * At this point, the request is fully allocated even if not 2681 * fully prepared. Thus it can be cleaned up using the proper 2682 * free code. 2683 */ 2684 i915_gem_request_cancel(req); 2685 return ret; 2686 } 2687 2688 *req_out = req; 2689 return 0; 2690 2691 err: 2692 kmem_cache_free(dev_priv->requests, req); 2693 return ret; 2694 } 2695 2696 void i915_gem_request_cancel(struct drm_i915_gem_request *req) 2697 { 2698 intel_ring_reserved_space_cancel(req->ringbuf); 2699 2700 i915_gem_request_unreference(req); 2701 } 2702 2703 struct drm_i915_gem_request * 2704 i915_gem_find_active_request(struct intel_engine_cs *ring) 2705 { 2706 struct drm_i915_gem_request *request; 2707 2708 list_for_each_entry(request, &ring->request_list, list) { 2709 if (i915_gem_request_completed(request, false)) 2710 continue; 2711 2712 return request; 2713 } 2714 2715 return NULL; 2716 } 2717 2718 static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv, 2719 struct intel_engine_cs *ring) 2720 { 2721 struct drm_i915_gem_request *request; 2722 bool ring_hung; 2723 2724 request = i915_gem_find_active_request(ring); 2725 2726 if (request == NULL) 2727 return; 2728 2729 ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG; 2730 2731 i915_set_reset_status(dev_priv, request->ctx, ring_hung); 2732 2733 list_for_each_entry_continue(request, &ring->request_list, list) 2734 i915_set_reset_status(dev_priv, request->ctx, false); 2735 } 2736 2737 static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv, 2738 struct intel_engine_cs *ring) 2739 { 2740 while (!list_empty(&ring->active_list)) { 2741 struct drm_i915_gem_object *obj; 2742 2743 obj = list_first_entry(&ring->active_list, 2744 struct drm_i915_gem_object, 2745 ring_list[ring->id]); 2746 2747 i915_gem_object_retire__read(obj, ring->id); 2748 } 2749 2750 /* 2751 * Clear the execlists queue up before freeing the requests, as those 2752 * are the ones that keep the context and ringbuffer backing objects 2753 * pinned in place. 2754 */ 2755 while (!list_empty(&ring->execlist_queue)) { 2756 struct drm_i915_gem_request *submit_req; 2757 2758 submit_req = list_first_entry(&ring->execlist_queue, 2759 struct drm_i915_gem_request, 2760 execlist_link); 2761 list_del(&submit_req->execlist_link); 2762 2763 if (submit_req->ctx != ring->default_context) 2764 intel_lr_context_unpin(submit_req); 2765 2766 i915_gem_request_unreference(submit_req); 2767 } 2768 2769 /* 2770 * We must free the requests after all the corresponding objects have 2771 * been moved off active lists. Which is the same order as the normal 2772 * retire_requests function does. This is important if object hold 2773 * implicit references on things like e.g. ppgtt address spaces through 2774 * the request. 2775 */ 2776 while (!list_empty(&ring->request_list)) { 2777 struct drm_i915_gem_request *request; 2778 2779 request = list_first_entry(&ring->request_list, 2780 struct drm_i915_gem_request, 2781 list); 2782 2783 i915_gem_request_retire(request); 2784 } 2785 } 2786 2787 void i915_gem_reset(struct drm_device *dev) 2788 { 2789 struct drm_i915_private *dev_priv = dev->dev_private; 2790 struct intel_engine_cs *ring; 2791 int i; 2792 2793 /* 2794 * Before we free the objects from the requests, we need to inspect 2795 * them for finding the guilty party. As the requests only borrow 2796 * their reference to the objects, the inspection must be done first. 2797 */ 2798 for_each_ring(ring, dev_priv, i) 2799 i915_gem_reset_ring_status(dev_priv, ring); 2800 2801 for_each_ring(ring, dev_priv, i) 2802 i915_gem_reset_ring_cleanup(dev_priv, ring); 2803 2804 i915_gem_context_reset(dev); 2805 2806 i915_gem_restore_fences(dev); 2807 2808 WARN_ON(i915_verify_lists(dev)); 2809 } 2810 2811 /** 2812 * This function clears the request list as sequence numbers are passed. 2813 */ 2814 void 2815 i915_gem_retire_requests_ring(struct intel_engine_cs *ring) 2816 { 2817 WARN_ON(i915_verify_lists(ring->dev)); 2818 2819 /* Retire requests first as we use it above for the early return. 2820 * If we retire requests last, we may use a later seqno and so clear 2821 * the requests lists without clearing the active list, leading to 2822 * confusion. 2823 */ 2824 while (!list_empty(&ring->request_list)) { 2825 struct drm_i915_gem_request *request; 2826 2827 request = list_first_entry(&ring->request_list, 2828 struct drm_i915_gem_request, 2829 list); 2830 2831 if (!i915_gem_request_completed(request, true)) 2832 break; 2833 2834 i915_gem_request_retire(request); 2835 } 2836 2837 /* Move any buffers on the active list that are no longer referenced 2838 * by the ringbuffer to the flushing/inactive lists as appropriate, 2839 * before we free the context associated with the requests. 2840 */ 2841 while (!list_empty(&ring->active_list)) { 2842 struct drm_i915_gem_object *obj; 2843 2844 obj = list_first_entry(&ring->active_list, 2845 struct drm_i915_gem_object, 2846 ring_list[ring->id]); 2847 2848 if (!list_empty(&obj->last_read_req[ring->id]->list)) 2849 break; 2850 2851 i915_gem_object_retire__read(obj, ring->id); 2852 } 2853 2854 if (unlikely(ring->trace_irq_req && 2855 i915_gem_request_completed(ring->trace_irq_req, true))) { 2856 ring->irq_put(ring); 2857 i915_gem_request_assign(&ring->trace_irq_req, NULL); 2858 } 2859 2860 WARN_ON(i915_verify_lists(ring->dev)); 2861 } 2862 2863 bool 2864 i915_gem_retire_requests(struct drm_device *dev) 2865 { 2866 struct drm_i915_private *dev_priv = dev->dev_private; 2867 struct intel_engine_cs *ring; 2868 bool idle = true; 2869 int i; 2870 2871 for_each_ring(ring, dev_priv, i) { 2872 i915_gem_retire_requests_ring(ring); 2873 idle &= list_empty(&ring->request_list); 2874 if (i915.enable_execlists) { 2875 unsigned long flags; 2876 2877 spin_lock_irqsave(&ring->execlist_lock, flags); 2878 idle &= list_empty(&ring->execlist_queue); 2879 spin_unlock_irqrestore(&ring->execlist_lock, flags); 2880 2881 intel_execlists_retire_requests(ring); 2882 } 2883 } 2884 2885 if (idle) 2886 mod_delayed_work(dev_priv->wq, 2887 &dev_priv->mm.idle_work, 2888 msecs_to_jiffies(100)); 2889 2890 return idle; 2891 } 2892 2893 static void 2894 i915_gem_retire_work_handler(struct work_struct *work) 2895 { 2896 struct drm_i915_private *dev_priv = 2897 container_of(work, typeof(*dev_priv), mm.retire_work.work); 2898 struct drm_device *dev = dev_priv->dev; 2899 bool idle; 2900 2901 /* Come back later if the device is busy... */ 2902 idle = false; 2903 if (mutex_trylock(&dev->struct_mutex)) { 2904 idle = i915_gem_retire_requests(dev); 2905 mutex_unlock(&dev->struct_mutex); 2906 } 2907 if (!idle) 2908 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 2909 round_jiffies_up_relative(HZ)); 2910 } 2911 2912 static void 2913 i915_gem_idle_work_handler(struct work_struct *work) 2914 { 2915 struct drm_i915_private *dev_priv = 2916 container_of(work, typeof(*dev_priv), mm.idle_work.work); 2917 struct drm_device *dev = dev_priv->dev; 2918 struct intel_engine_cs *ring; 2919 int i; 2920 2921 for_each_ring(ring, dev_priv, i) 2922 if (!list_empty(&ring->request_list)) 2923 return; 2924 2925 intel_mark_idle(dev); 2926 2927 if (mutex_trylock(&dev->struct_mutex)) { 2928 struct intel_engine_cs *ring; 2929 int i; 2930 2931 for_each_ring(ring, dev_priv, i) 2932 i915_gem_batch_pool_fini(&ring->batch_pool); 2933 2934 mutex_unlock(&dev->struct_mutex); 2935 } 2936 } 2937 2938 /** 2939 * Ensures that an object will eventually get non-busy by flushing any required 2940 * write domains, emitting any outstanding lazy request and retiring and 2941 * completed requests. 2942 */ 2943 static int 2944 i915_gem_object_flush_active(struct drm_i915_gem_object *obj) 2945 { 2946 int i; 2947 2948 if (!obj->active) 2949 return 0; 2950 2951 for (i = 0; i < I915_NUM_RINGS; i++) { 2952 struct drm_i915_gem_request *req; 2953 2954 req = obj->last_read_req[i]; 2955 if (req == NULL) 2956 continue; 2957 2958 if (list_empty(&req->list)) 2959 goto retire; 2960 2961 if (i915_gem_request_completed(req, true)) { 2962 __i915_gem_request_retire__upto(req); 2963 retire: 2964 i915_gem_object_retire__read(obj, i); 2965 } 2966 } 2967 2968 return 0; 2969 } 2970 2971 /** 2972 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 2973 * @DRM_IOCTL_ARGS: standard ioctl arguments 2974 * 2975 * Returns 0 if successful, else an error is returned with the remaining time in 2976 * the timeout parameter. 2977 * -ETIME: object is still busy after timeout 2978 * -ERESTARTSYS: signal interrupted the wait 2979 * -ENONENT: object doesn't exist 2980 * Also possible, but rare: 2981 * -EAGAIN: GPU wedged 2982 * -ENOMEM: damn 2983 * -ENODEV: Internal IRQ fail 2984 * -E?: The add request failed 2985 * 2986 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 2987 * non-zero timeout parameter the wait ioctl will wait for the given number of 2988 * nanoseconds on an object becoming unbusy. Since the wait itself does so 2989 * without holding struct_mutex the object may become re-busied before this 2990 * function completes. A similar but shorter * race condition exists in the busy 2991 * ioctl 2992 */ 2993 int 2994 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 2995 { 2996 struct drm_i915_private *dev_priv = dev->dev_private; 2997 struct drm_i915_gem_wait *args = data; 2998 struct drm_i915_gem_object *obj; 2999 struct drm_i915_gem_request *req[I915_NUM_RINGS]; 3000 unsigned reset_counter; 3001 int i, n = 0; 3002 int ret; 3003 3004 if (args->flags != 0) 3005 return -EINVAL; 3006 3007 ret = i915_mutex_lock_interruptible(dev); 3008 if (ret) 3009 return ret; 3010 3011 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle)); 3012 if (&obj->base == NULL) { 3013 mutex_unlock(&dev->struct_mutex); 3014 return -ENOENT; 3015 } 3016 3017 /* Need to make sure the object gets inactive eventually. */ 3018 ret = i915_gem_object_flush_active(obj); 3019 if (ret) 3020 goto out; 3021 3022 if (!obj->active) 3023 goto out; 3024 3025 /* Do this after OLR check to make sure we make forward progress polling 3026 * on this IOCTL with a timeout == 0 (like busy ioctl) 3027 */ 3028 if (args->timeout_ns == 0) { 3029 ret = -ETIME; 3030 goto out; 3031 } 3032 3033 drm_gem_object_unreference(&obj->base); 3034 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 3035 3036 for (i = 0; i < I915_NUM_RINGS; i++) { 3037 if (obj->last_read_req[i] == NULL) 3038 continue; 3039 3040 req[n++] = i915_gem_request_reference(obj->last_read_req[i]); 3041 } 3042 3043 mutex_unlock(&dev->struct_mutex); 3044 3045 for (i = 0; i < n; i++) { 3046 if (ret == 0) 3047 ret = __i915_wait_request(req[i], reset_counter, true, 3048 args->timeout_ns > 0 ? &args->timeout_ns : NULL, 3049 file->driver_priv); 3050 i915_gem_request_unreference__unlocked(req[i]); 3051 } 3052 return ret; 3053 3054 out: 3055 drm_gem_object_unreference(&obj->base); 3056 mutex_unlock(&dev->struct_mutex); 3057 return ret; 3058 } 3059 3060 static int 3061 __i915_gem_object_sync(struct drm_i915_gem_object *obj, 3062 struct intel_engine_cs *to, 3063 struct drm_i915_gem_request *from_req, 3064 struct drm_i915_gem_request **to_req) 3065 { 3066 struct intel_engine_cs *from; 3067 int ret; 3068 3069 from = i915_gem_request_get_ring(from_req); 3070 if (to == from) 3071 return 0; 3072 3073 if (i915_gem_request_completed(from_req, true)) 3074 return 0; 3075 3076 if (!i915_semaphore_is_enabled(obj->base.dev)) { 3077 struct drm_i915_private *i915 = to_i915(obj->base.dev); 3078 ret = __i915_wait_request(from_req, 3079 atomic_read(&i915->gpu_error.reset_counter), 3080 i915->mm.interruptible, 3081 NULL, 3082 &i915->rps.semaphores); 3083 if (ret) 3084 return ret; 3085 3086 i915_gem_object_retire_request(obj, from_req); 3087 } else { 3088 int idx = intel_ring_sync_index(from, to); 3089 u32 seqno = i915_gem_request_get_seqno(from_req); 3090 3091 WARN_ON(!to_req); 3092 3093 if (seqno <= from->semaphore.sync_seqno[idx]) 3094 return 0; 3095 3096 if (*to_req == NULL) { 3097 ret = i915_gem_request_alloc(to, to->default_context, to_req); 3098 if (ret) 3099 return ret; 3100 } 3101 3102 trace_i915_gem_ring_sync_to(*to_req, from, from_req); 3103 ret = to->semaphore.sync_to(*to_req, from, seqno); 3104 if (ret) 3105 return ret; 3106 3107 /* We use last_read_req because sync_to() 3108 * might have just caused seqno wrap under 3109 * the radar. 3110 */ 3111 from->semaphore.sync_seqno[idx] = 3112 i915_gem_request_get_seqno(obj->last_read_req[from->id]); 3113 } 3114 3115 return 0; 3116 } 3117 3118 /** 3119 * i915_gem_object_sync - sync an object to a ring. 3120 * 3121 * @obj: object which may be in use on another ring. 3122 * @to: ring we wish to use the object on. May be NULL. 3123 * @to_req: request we wish to use the object for. See below. 3124 * This will be allocated and returned if a request is 3125 * required but not passed in. 3126 * 3127 * This code is meant to abstract object synchronization with the GPU. 3128 * Calling with NULL implies synchronizing the object with the CPU 3129 * rather than a particular GPU ring. Conceptually we serialise writes 3130 * between engines inside the GPU. We only allow one engine to write 3131 * into a buffer at any time, but multiple readers. To ensure each has 3132 * a coherent view of memory, we must: 3133 * 3134 * - If there is an outstanding write request to the object, the new 3135 * request must wait for it to complete (either CPU or in hw, requests 3136 * on the same ring will be naturally ordered). 3137 * 3138 * - If we are a write request (pending_write_domain is set), the new 3139 * request must wait for outstanding read requests to complete. 3140 * 3141 * For CPU synchronisation (NULL to) no request is required. For syncing with 3142 * rings to_req must be non-NULL. However, a request does not have to be 3143 * pre-allocated. If *to_req is NULL and sync commands will be emitted then a 3144 * request will be allocated automatically and returned through *to_req. Note 3145 * that it is not guaranteed that commands will be emitted (because the system 3146 * might already be idle). Hence there is no need to create a request that 3147 * might never have any work submitted. Note further that if a request is 3148 * returned in *to_req, it is the responsibility of the caller to submit 3149 * that request (after potentially adding more work to it). 3150 * 3151 * Returns 0 if successful, else propagates up the lower layer error. 3152 */ 3153 int 3154 i915_gem_object_sync(struct drm_i915_gem_object *obj, 3155 struct intel_engine_cs *to, 3156 struct drm_i915_gem_request **to_req) 3157 { 3158 const bool readonly = obj->base.pending_write_domain == 0; 3159 struct drm_i915_gem_request *req[I915_NUM_RINGS]; 3160 int ret, i, n; 3161 3162 if (!obj->active) 3163 return 0; 3164 3165 if (to == NULL) 3166 return i915_gem_object_wait_rendering(obj, readonly); 3167 3168 n = 0; 3169 if (readonly) { 3170 if (obj->last_write_req) 3171 req[n++] = obj->last_write_req; 3172 } else { 3173 for (i = 0; i < I915_NUM_RINGS; i++) 3174 if (obj->last_read_req[i]) 3175 req[n++] = obj->last_read_req[i]; 3176 } 3177 for (i = 0; i < n; i++) { 3178 ret = __i915_gem_object_sync(obj, to, req[i], to_req); 3179 if (ret) 3180 return ret; 3181 } 3182 3183 return 0; 3184 } 3185 3186 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) 3187 { 3188 u32 old_write_domain, old_read_domains; 3189 3190 /* Force a pagefault for domain tracking on next user access */ 3191 i915_gem_release_mmap(obj); 3192 3193 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3194 return; 3195 3196 /* Wait for any direct GTT access to complete */ 3197 mb(); 3198 3199 old_read_domains = obj->base.read_domains; 3200 old_write_domain = obj->base.write_domain; 3201 3202 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; 3203 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; 3204 3205 trace_i915_gem_object_change_domain(obj, 3206 old_read_domains, 3207 old_write_domain); 3208 } 3209 3210 static int __i915_vma_unbind(struct i915_vma *vma, bool wait) 3211 { 3212 struct drm_i915_gem_object *obj = vma->obj; 3213 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3214 int ret; 3215 3216 if (list_empty(&vma->vma_link)) 3217 return 0; 3218 3219 if (!drm_mm_node_allocated(&vma->node)) { 3220 i915_gem_vma_destroy(vma); 3221 return 0; 3222 } 3223 3224 if (vma->pin_count) 3225 return -EBUSY; 3226 3227 BUG_ON(obj->pages == NULL); 3228 3229 if (wait) { 3230 ret = i915_gem_object_wait_rendering(obj, false); 3231 if (ret) 3232 return ret; 3233 } 3234 3235 if (i915_is_ggtt(vma->vm) && 3236 vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 3237 i915_gem_object_finish_gtt(obj); 3238 3239 /* release the fence reg _after_ flushing */ 3240 ret = i915_gem_object_put_fence(obj); 3241 if (ret) 3242 return ret; 3243 } 3244 3245 trace_i915_vma_unbind(vma); 3246 3247 vma->vm->unbind_vma(vma); 3248 vma->bound = 0; 3249 3250 list_del_init(&vma->mm_list); 3251 if (i915_is_ggtt(vma->vm)) { 3252 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 3253 obj->map_and_fenceable = false; 3254 } else if (vma->ggtt_view.pages) { 3255 sg_free_table(vma->ggtt_view.pages); 3256 kfree(vma->ggtt_view.pages); 3257 } 3258 vma->ggtt_view.pages = NULL; 3259 } 3260 3261 drm_mm_remove_node(&vma->node); 3262 i915_gem_vma_destroy(vma); 3263 3264 /* Since the unbound list is global, only move to that list if 3265 * no more VMAs exist. */ 3266 if (list_empty(&obj->vma_list)) 3267 list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list); 3268 3269 /* And finally now the object is completely decoupled from this vma, 3270 * we can drop its hold on the backing storage and allow it to be 3271 * reaped by the shrinker. 3272 */ 3273 i915_gem_object_unpin_pages(obj); 3274 3275 return 0; 3276 } 3277 3278 int i915_vma_unbind(struct i915_vma *vma) 3279 { 3280 return __i915_vma_unbind(vma, true); 3281 } 3282 3283 int __i915_vma_unbind_no_wait(struct i915_vma *vma) 3284 { 3285 return __i915_vma_unbind(vma, false); 3286 } 3287 3288 int i915_gpu_idle(struct drm_device *dev) 3289 { 3290 struct drm_i915_private *dev_priv = dev->dev_private; 3291 struct intel_engine_cs *ring; 3292 int ret, i; 3293 3294 /* Flush everything onto the inactive list. */ 3295 for_each_ring(ring, dev_priv, i) { 3296 if (!i915.enable_execlists) { 3297 struct drm_i915_gem_request *req; 3298 3299 ret = i915_gem_request_alloc(ring, ring->default_context, &req); 3300 if (ret) 3301 return ret; 3302 3303 ret = i915_switch_context(req); 3304 if (ret) { 3305 i915_gem_request_cancel(req); 3306 return ret; 3307 } 3308 3309 i915_add_request_no_flush(req); 3310 } 3311 3312 ret = intel_ring_idle(ring); 3313 if (ret) 3314 return ret; 3315 } 3316 3317 WARN_ON(i915_verify_lists(dev)); 3318 return 0; 3319 } 3320 3321 static bool i915_gem_valid_gtt_space(struct i915_vma *vma, 3322 unsigned long cache_level) 3323 { 3324 struct drm_mm_node *gtt_space = &vma->node; 3325 struct drm_mm_node *other; 3326 3327 /* 3328 * On some machines we have to be careful when putting differing types 3329 * of snoopable memory together to avoid the prefetcher crossing memory 3330 * domains and dying. During vm initialisation, we decide whether or not 3331 * these constraints apply and set the drm_mm.color_adjust 3332 * appropriately. 3333 */ 3334 if (vma->vm->mm.color_adjust == NULL) 3335 return true; 3336 3337 if (!drm_mm_node_allocated(gtt_space)) 3338 return true; 3339 3340 if (list_empty(>t_space->node_list)) 3341 return true; 3342 3343 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); 3344 if (other->allocated && !other->hole_follows && other->color != cache_level) 3345 return false; 3346 3347 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); 3348 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) 3349 return false; 3350 3351 return true; 3352 } 3353 3354 /** 3355 * Finds free space in the GTT aperture and binds the object or a view of it 3356 * there. 3357 */ 3358 static struct i915_vma * 3359 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, 3360 struct i915_address_space *vm, 3361 const struct i915_ggtt_view *ggtt_view, 3362 unsigned alignment, 3363 uint64_t flags) 3364 { 3365 struct drm_device *dev = obj->base.dev; 3366 struct drm_i915_private *dev_priv = dev->dev_private; 3367 u32 fence_alignment, unfenced_alignment; 3368 u32 search_flag, alloc_flag; 3369 u64 start, end; 3370 u64 size, fence_size; 3371 struct i915_vma *vma; 3372 int ret; 3373 3374 if (i915_is_ggtt(vm)) { 3375 u32 view_size; 3376 3377 if (WARN_ON(!ggtt_view)) 3378 return ERR_PTR(-EINVAL); 3379 3380 view_size = i915_ggtt_view_size(obj, ggtt_view); 3381 3382 fence_size = i915_gem_get_gtt_size(dev, 3383 view_size, 3384 obj->tiling_mode); 3385 fence_alignment = i915_gem_get_gtt_alignment(dev, 3386 view_size, 3387 obj->tiling_mode, 3388 true); 3389 unfenced_alignment = i915_gem_get_gtt_alignment(dev, 3390 view_size, 3391 obj->tiling_mode, 3392 false); 3393 size = flags & PIN_MAPPABLE ? fence_size : view_size; 3394 } else { 3395 fence_size = i915_gem_get_gtt_size(dev, 3396 obj->base.size, 3397 obj->tiling_mode); 3398 fence_alignment = i915_gem_get_gtt_alignment(dev, 3399 obj->base.size, 3400 obj->tiling_mode, 3401 true); 3402 unfenced_alignment = 3403 i915_gem_get_gtt_alignment(dev, 3404 obj->base.size, 3405 obj->tiling_mode, 3406 false); 3407 size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; 3408 } 3409 3410 start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; 3411 end = vm->total; 3412 if (flags & PIN_MAPPABLE) 3413 end = min_t(u64, end, dev_priv->gtt.mappable_end); 3414 if (flags & PIN_ZONE_4G) 3415 end = min_t(u64, end, (1ULL << 32)); 3416 3417 if (alignment == 0) 3418 alignment = flags & PIN_MAPPABLE ? fence_alignment : 3419 unfenced_alignment; 3420 if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) { 3421 DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n", 3422 ggtt_view ? ggtt_view->type : 0, 3423 alignment); 3424 return ERR_PTR(-EINVAL); 3425 } 3426 3427 /* If binding the object/GGTT view requires more space than the entire 3428 * aperture has, reject it early before evicting everything in a vain 3429 * attempt to find space. 3430 */ 3431 if (size > end) { 3432 DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n", 3433 ggtt_view ? ggtt_view->type : 0, 3434 size, 3435 flags & PIN_MAPPABLE ? "mappable" : "total", 3436 end); 3437 return ERR_PTR(-E2BIG); 3438 } 3439 3440 ret = i915_gem_object_get_pages(obj); 3441 if (ret) 3442 return ERR_PTR(ret); 3443 3444 i915_gem_object_pin_pages(obj); 3445 3446 vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) : 3447 i915_gem_obj_lookup_or_create_vma(obj, vm); 3448 3449 if (IS_ERR(vma)) 3450 goto err_unpin; 3451 3452 if (flags & PIN_HIGH) { 3453 search_flag = DRM_MM_SEARCH_BELOW; 3454 alloc_flag = DRM_MM_CREATE_TOP; 3455 } else { 3456 search_flag = DRM_MM_SEARCH_DEFAULT; 3457 alloc_flag = DRM_MM_CREATE_DEFAULT; 3458 } 3459 3460 search_free: 3461 ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, 3462 size, alignment, 3463 obj->cache_level, 3464 start, end, 3465 search_flag, 3466 alloc_flag); 3467 if (ret) { 3468 ret = i915_gem_evict_something(dev, vm, size, alignment, 3469 obj->cache_level, 3470 start, end, 3471 flags); 3472 if (ret == 0) 3473 goto search_free; 3474 3475 goto err_free_vma; 3476 } 3477 if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) { 3478 ret = -EINVAL; 3479 goto err_remove_node; 3480 } 3481 3482 trace_i915_vma_bind(vma, flags); 3483 ret = i915_vma_bind(vma, obj->cache_level, flags); 3484 if (ret) 3485 goto err_remove_node; 3486 3487 list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); 3488 list_add_tail(&vma->mm_list, &vm->inactive_list); 3489 3490 return vma; 3491 3492 err_remove_node: 3493 drm_mm_remove_node(&vma->node); 3494 err_free_vma: 3495 i915_gem_vma_destroy(vma); 3496 vma = ERR_PTR(ret); 3497 err_unpin: 3498 i915_gem_object_unpin_pages(obj); 3499 return vma; 3500 } 3501 3502 bool 3503 i915_gem_clflush_object(struct drm_i915_gem_object *obj, 3504 bool force) 3505 { 3506 /* If we don't have a page list set up, then we're not pinned 3507 * to GPU, and we can ignore the cache flush because it'll happen 3508 * again at bind time. 3509 */ 3510 if (obj->pages == NULL) 3511 return false; 3512 3513 /* 3514 * Stolen memory is always coherent with the GPU as it is explicitly 3515 * marked as wc by the system, or the system is cache-coherent. 3516 */ 3517 if (obj->stolen || obj->phys_handle) 3518 return false; 3519 3520 /* If the GPU is snooping the contents of the CPU cache, 3521 * we do not need to manually clear the CPU cache lines. However, 3522 * the caches are only snooped when the render cache is 3523 * flushed/invalidated. As we always have to emit invalidations 3524 * and flushes when moving into and out of the RENDER domain, correct 3525 * snooping behaviour occurs naturally as the result of our domain 3526 * tracking. 3527 */ 3528 if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) { 3529 obj->cache_dirty = true; 3530 return false; 3531 } 3532 3533 trace_i915_gem_object_clflush(obj); 3534 drm_clflush_sg(obj->pages); 3535 obj->cache_dirty = false; 3536 3537 return true; 3538 } 3539 3540 /** Flushes the GTT write domain for the object if it's dirty. */ 3541 static void 3542 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 3543 { 3544 uint32_t old_write_domain; 3545 3546 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 3547 return; 3548 3549 /* No actual flushing is required for the GTT write domain. Writes 3550 * to it immediately go to main memory as far as we know, so there's 3551 * no chipset flush. It also doesn't land in render cache. 3552 * 3553 * However, we do have to enforce the order so that all writes through 3554 * the GTT land before any writes to the device, such as updates to 3555 * the GATT itself. 3556 */ 3557 wmb(); 3558 3559 old_write_domain = obj->base.write_domain; 3560 obj->base.write_domain = 0; 3561 3562 intel_fb_obj_flush(obj, false, ORIGIN_GTT); 3563 3564 trace_i915_gem_object_change_domain(obj, 3565 obj->base.read_domains, 3566 old_write_domain); 3567 } 3568 3569 /** Flushes the CPU write domain for the object if it's dirty. */ 3570 static void 3571 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) 3572 { 3573 uint32_t old_write_domain; 3574 3575 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 3576 return; 3577 3578 if (i915_gem_clflush_object(obj, obj->pin_display)) 3579 i915_gem_chipset_flush(obj->base.dev); 3580 3581 old_write_domain = obj->base.write_domain; 3582 obj->base.write_domain = 0; 3583 3584 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 3585 3586 trace_i915_gem_object_change_domain(obj, 3587 obj->base.read_domains, 3588 old_write_domain); 3589 } 3590 3591 /** 3592 * Moves a single object to the GTT read, and possibly write domain. 3593 * 3594 * This function returns when the move is complete, including waiting on 3595 * flushes to occur. 3596 */ 3597 int 3598 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3599 { 3600 uint32_t old_write_domain, old_read_domains; 3601 struct i915_vma *vma; 3602 int ret; 3603 3604 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 3605 return 0; 3606 3607 ret = i915_gem_object_wait_rendering(obj, !write); 3608 if (ret) 3609 return ret; 3610 3611 /* Flush and acquire obj->pages so that we are coherent through 3612 * direct access in memory with previous cached writes through 3613 * shmemfs and that our cache domain tracking remains valid. 3614 * For example, if the obj->filp was moved to swap without us 3615 * being notified and releasing the pages, we would mistakenly 3616 * continue to assume that the obj remained out of the CPU cached 3617 * domain. 3618 */ 3619 ret = i915_gem_object_get_pages(obj); 3620 if (ret) 3621 return ret; 3622 3623 i915_gem_object_flush_cpu_write_domain(obj); 3624 3625 /* Serialise direct access to this object with the barriers for 3626 * coherent writes from the GPU, by effectively invalidating the 3627 * GTT domain upon first access. 3628 */ 3629 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3630 mb(); 3631 3632 old_write_domain = obj->base.write_domain; 3633 old_read_domains = obj->base.read_domains; 3634 3635 /* It should now be out of any other write domains, and we can update 3636 * the domain values for our changes. 3637 */ 3638 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3639 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3640 if (write) { 3641 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 3642 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 3643 obj->dirty = 1; 3644 } 3645 3646 trace_i915_gem_object_change_domain(obj, 3647 old_read_domains, 3648 old_write_domain); 3649 3650 /* And bump the LRU for this access */ 3651 vma = i915_gem_obj_to_ggtt(obj); 3652 if (vma && drm_mm_node_allocated(&vma->node) && !obj->active) 3653 list_move_tail(&vma->mm_list, 3654 &to_i915(obj->base.dev)->gtt.base.inactive_list); 3655 3656 return 0; 3657 } 3658 3659 /** 3660 * Changes the cache-level of an object across all VMA. 3661 * 3662 * After this function returns, the object will be in the new cache-level 3663 * across all GTT and the contents of the backing storage will be coherent, 3664 * with respect to the new cache-level. In order to keep the backing storage 3665 * coherent for all users, we only allow a single cache level to be set 3666 * globally on the object and prevent it from being changed whilst the 3667 * hardware is reading from the object. That is if the object is currently 3668 * on the scanout it will be set to uncached (or equivalent display 3669 * cache coherency) and all non-MOCS GPU access will also be uncached so 3670 * that all direct access to the scanout remains coherent. 3671 */ 3672 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 3673 enum i915_cache_level cache_level) 3674 { 3675 struct drm_device *dev = obj->base.dev; 3676 struct i915_vma *vma, *next; 3677 bool bound = false; 3678 int ret = 0; 3679 3680 if (obj->cache_level == cache_level) 3681 goto out; 3682 3683 /* Inspect the list of currently bound VMA and unbind any that would 3684 * be invalid given the new cache-level. This is principally to 3685 * catch the issue of the CS prefetch crossing page boundaries and 3686 * reading an invalid PTE on older architectures. 3687 */ 3688 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { 3689 if (!drm_mm_node_allocated(&vma->node)) 3690 continue; 3691 3692 if (vma->pin_count) { 3693 DRM_DEBUG("can not change the cache level of pinned objects\n"); 3694 return -EBUSY; 3695 } 3696 3697 if (!i915_gem_valid_gtt_space(vma, cache_level)) { 3698 ret = i915_vma_unbind(vma); 3699 if (ret) 3700 return ret; 3701 } else 3702 bound = true; 3703 } 3704 3705 /* We can reuse the existing drm_mm nodes but need to change the 3706 * cache-level on the PTE. We could simply unbind them all and 3707 * rebind with the correct cache-level on next use. However since 3708 * we already have a valid slot, dma mapping, pages etc, we may as 3709 * rewrite the PTE in the belief that doing so tramples upon less 3710 * state and so involves less work. 3711 */ 3712 if (bound) { 3713 /* Before we change the PTE, the GPU must not be accessing it. 3714 * If we wait upon the object, we know that all the bound 3715 * VMA are no longer active. 3716 */ 3717 ret = i915_gem_object_wait_rendering(obj, false); 3718 if (ret) 3719 return ret; 3720 3721 if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) { 3722 /* Access to snoopable pages through the GTT is 3723 * incoherent and on some machines causes a hard 3724 * lockup. Relinquish the CPU mmaping to force 3725 * userspace to refault in the pages and we can 3726 * then double check if the GTT mapping is still 3727 * valid for that pointer access. 3728 */ 3729 i915_gem_release_mmap(obj); 3730 3731 /* As we no longer need a fence for GTT access, 3732 * we can relinquish it now (and so prevent having 3733 * to steal a fence from someone else on the next 3734 * fence request). Note GPU activity would have 3735 * dropped the fence as all snoopable access is 3736 * supposed to be linear. 3737 */ 3738 ret = i915_gem_object_put_fence(obj); 3739 if (ret) 3740 return ret; 3741 } else { 3742 /* We either have incoherent backing store and 3743 * so no GTT access or the architecture is fully 3744 * coherent. In such cases, existing GTT mmaps 3745 * ignore the cache bit in the PTE and we can 3746 * rewrite it without confusing the GPU or having 3747 * to force userspace to fault back in its mmaps. 3748 */ 3749 } 3750 3751 list_for_each_entry(vma, &obj->vma_list, vma_link) { 3752 if (!drm_mm_node_allocated(&vma->node)) 3753 continue; 3754 3755 ret = i915_vma_bind(vma, cache_level, PIN_UPDATE); 3756 if (ret) 3757 return ret; 3758 } 3759 } 3760 3761 list_for_each_entry(vma, &obj->vma_list, vma_link) 3762 vma->node.color = cache_level; 3763 obj->cache_level = cache_level; 3764 3765 out: 3766 /* Flush the dirty CPU caches to the backing storage so that the 3767 * object is now coherent at its new cache level (with respect 3768 * to the access domain). 3769 */ 3770 if (obj->cache_dirty && 3771 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 3772 cpu_write_needs_clflush(obj)) { 3773 if (i915_gem_clflush_object(obj, true)) 3774 i915_gem_chipset_flush(obj->base.dev); 3775 } 3776 3777 return 0; 3778 } 3779 3780 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 3781 struct drm_file *file) 3782 { 3783 struct drm_i915_gem_caching *args = data; 3784 struct drm_i915_gem_object *obj; 3785 3786 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3787 if (&obj->base == NULL) 3788 return -ENOENT; 3789 3790 switch (obj->cache_level) { 3791 case I915_CACHE_LLC: 3792 case I915_CACHE_L3_LLC: 3793 args->caching = I915_CACHING_CACHED; 3794 break; 3795 3796 case I915_CACHE_WT: 3797 args->caching = I915_CACHING_DISPLAY; 3798 break; 3799 3800 default: 3801 args->caching = I915_CACHING_NONE; 3802 break; 3803 } 3804 3805 drm_gem_object_unreference_unlocked(&obj->base); 3806 return 0; 3807 } 3808 3809 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 3810 struct drm_file *file) 3811 { 3812 struct drm_i915_gem_caching *args = data; 3813 struct drm_i915_gem_object *obj; 3814 enum i915_cache_level level; 3815 int ret; 3816 3817 switch (args->caching) { 3818 case I915_CACHING_NONE: 3819 level = I915_CACHE_NONE; 3820 break; 3821 case I915_CACHING_CACHED: 3822 /* 3823 * Due to a HW issue on BXT A stepping, GPU stores via a 3824 * snooped mapping may leave stale data in a corresponding CPU 3825 * cacheline, whereas normally such cachelines would get 3826 * invalidated. 3827 */ 3828 if (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0) 3829 return -ENODEV; 3830 3831 level = I915_CACHE_LLC; 3832 break; 3833 case I915_CACHING_DISPLAY: 3834 level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE; 3835 break; 3836 default: 3837 return -EINVAL; 3838 } 3839 3840 ret = i915_mutex_lock_interruptible(dev); 3841 if (ret) 3842 return ret; 3843 3844 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3845 if (&obj->base == NULL) { 3846 ret = -ENOENT; 3847 goto unlock; 3848 } 3849 3850 ret = i915_gem_object_set_cache_level(obj, level); 3851 3852 drm_gem_object_unreference(&obj->base); 3853 unlock: 3854 mutex_unlock(&dev->struct_mutex); 3855 return ret; 3856 } 3857 3858 /* 3859 * Prepare buffer for display plane (scanout, cursors, etc). 3860 * Can be called from an uninterruptible phase (modesetting) and allows 3861 * any flushes to be pipelined (for pageflips). 3862 */ 3863 int 3864 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 3865 u32 alignment, 3866 struct intel_engine_cs *pipelined, 3867 struct drm_i915_gem_request **pipelined_request, 3868 const struct i915_ggtt_view *view) 3869 { 3870 u32 old_read_domains, old_write_domain; 3871 int ret; 3872 3873 ret = i915_gem_object_sync(obj, pipelined, pipelined_request); 3874 if (ret) 3875 return ret; 3876 3877 /* Mark the pin_display early so that we account for the 3878 * display coherency whilst setting up the cache domains. 3879 */ 3880 obj->pin_display++; 3881 3882 /* The display engine is not coherent with the LLC cache on gen6. As 3883 * a result, we make sure that the pinning that is about to occur is 3884 * done with uncached PTEs. This is lowest common denominator for all 3885 * chipsets. 3886 * 3887 * However for gen6+, we could do better by using the GFDT bit instead 3888 * of uncaching, which would allow us to flush all the LLC-cached data 3889 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 3890 */ 3891 ret = i915_gem_object_set_cache_level(obj, 3892 HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE); 3893 if (ret) 3894 goto err_unpin_display; 3895 3896 /* As the user may map the buffer once pinned in the display plane 3897 * (e.g. libkms for the bootup splash), we have to ensure that we 3898 * always use map_and_fenceable for all scanout buffers. 3899 */ 3900 ret = i915_gem_object_ggtt_pin(obj, view, alignment, 3901 view->type == I915_GGTT_VIEW_NORMAL ? 3902 PIN_MAPPABLE : 0); 3903 if (ret) 3904 goto err_unpin_display; 3905 3906 i915_gem_object_flush_cpu_write_domain(obj); 3907 3908 old_write_domain = obj->base.write_domain; 3909 old_read_domains = obj->base.read_domains; 3910 3911 /* It should now be out of any other write domains, and we can update 3912 * the domain values for our changes. 3913 */ 3914 obj->base.write_domain = 0; 3915 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3916 3917 trace_i915_gem_object_change_domain(obj, 3918 old_read_domains, 3919 old_write_domain); 3920 3921 return 0; 3922 3923 err_unpin_display: 3924 obj->pin_display--; 3925 return ret; 3926 } 3927 3928 void 3929 i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj, 3930 const struct i915_ggtt_view *view) 3931 { 3932 if (WARN_ON(obj->pin_display == 0)) 3933 return; 3934 3935 i915_gem_object_ggtt_unpin_view(obj, view); 3936 3937 obj->pin_display--; 3938 } 3939 3940 /** 3941 * Moves a single object to the CPU read, and possibly write domain. 3942 * 3943 * This function returns when the move is complete, including waiting on 3944 * flushes to occur. 3945 */ 3946 int 3947 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 3948 { 3949 uint32_t old_write_domain, old_read_domains; 3950 int ret; 3951 3952 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 3953 return 0; 3954 3955 ret = i915_gem_object_wait_rendering(obj, !write); 3956 if (ret) 3957 return ret; 3958 3959 i915_gem_object_flush_gtt_write_domain(obj); 3960 3961 old_write_domain = obj->base.write_domain; 3962 old_read_domains = obj->base.read_domains; 3963 3964 /* Flush the CPU cache if it's still invalid. */ 3965 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 3966 i915_gem_clflush_object(obj, false); 3967 3968 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 3969 } 3970 3971 /* It should now be out of any other write domains, and we can update 3972 * the domain values for our changes. 3973 */ 3974 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 3975 3976 /* If we're writing through the CPU, then the GPU read domains will 3977 * need to be invalidated at next use. 3978 */ 3979 if (write) { 3980 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 3981 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 3982 } 3983 3984 trace_i915_gem_object_change_domain(obj, 3985 old_read_domains, 3986 old_write_domain); 3987 3988 return 0; 3989 } 3990 3991 /* Throttle our rendering by waiting until the ring has completed our requests 3992 * emitted over 20 msec ago. 3993 * 3994 * Note that if we were to use the current jiffies each time around the loop, 3995 * we wouldn't escape the function with any frames outstanding if the time to 3996 * render a frame was over 20ms. 3997 * 3998 * This should get us reasonable parallelism between CPU and GPU but also 3999 * relatively low latency when blocking on a particular request to finish. 4000 */ 4001 static int 4002 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 4003 { 4004 struct drm_i915_private *dev_priv = dev->dev_private; 4005 struct drm_i915_file_private *file_priv = file->driver_priv; 4006 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; 4007 struct drm_i915_gem_request *request, *target = NULL; 4008 unsigned reset_counter; 4009 int ret; 4010 4011 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 4012 if (ret) 4013 return ret; 4014 4015 ret = i915_gem_check_wedge(&dev_priv->gpu_error, false); 4016 if (ret) 4017 return ret; 4018 4019 spin_lock(&file_priv->mm.lock); 4020 list_for_each_entry(request, &file_priv->mm.request_list, client_list) { 4021 if (time_after_eq(request->emitted_jiffies, recent_enough)) 4022 break; 4023 4024 /* 4025 * Note that the request might not have been submitted yet. 4026 * In which case emitted_jiffies will be zero. 4027 */ 4028 if (!request->emitted_jiffies) 4029 continue; 4030 4031 target = request; 4032 } 4033 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 4034 if (target) 4035 i915_gem_request_reference(target); 4036 spin_unlock(&file_priv->mm.lock); 4037 4038 if (target == NULL) 4039 return 0; 4040 4041 ret = __i915_wait_request(target, reset_counter, true, NULL, NULL); 4042 if (ret == 0) 4043 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); 4044 4045 i915_gem_request_unreference__unlocked(target); 4046 4047 return ret; 4048 } 4049 4050 static bool 4051 i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags) 4052 { 4053 struct drm_i915_gem_object *obj = vma->obj; 4054 4055 if (alignment && 4056 vma->node.start & (alignment - 1)) 4057 return true; 4058 4059 if (flags & PIN_MAPPABLE && !obj->map_and_fenceable) 4060 return true; 4061 4062 if (flags & PIN_OFFSET_BIAS && 4063 vma->node.start < (flags & PIN_OFFSET_MASK)) 4064 return true; 4065 4066 return false; 4067 } 4068 4069 static int 4070 i915_gem_object_do_pin(struct drm_i915_gem_object *obj, 4071 struct i915_address_space *vm, 4072 const struct i915_ggtt_view *ggtt_view, 4073 uint32_t alignment, 4074 uint64_t flags) 4075 { 4076 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 4077 struct i915_vma *vma; 4078 unsigned bound; 4079 int ret; 4080 4081 if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base)) 4082 return -ENODEV; 4083 4084 if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm))) 4085 return -EINVAL; 4086 4087 if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE)) 4088 return -EINVAL; 4089 4090 if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view)) 4091 return -EINVAL; 4092 4093 vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) : 4094 i915_gem_obj_to_vma(obj, vm); 4095 4096 if (IS_ERR(vma)) 4097 return PTR_ERR(vma); 4098 4099 if (vma) { 4100 if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) 4101 return -EBUSY; 4102 4103 if (i915_vma_misplaced(vma, alignment, flags)) { 4104 WARN(vma->pin_count, 4105 "bo is already pinned in %s with incorrect alignment:" 4106 " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d," 4107 " obj->map_and_fenceable=%d\n", 4108 ggtt_view ? "ggtt" : "ppgtt", 4109 upper_32_bits(vma->node.start), 4110 lower_32_bits(vma->node.start), 4111 alignment, 4112 !!(flags & PIN_MAPPABLE), 4113 obj->map_and_fenceable); 4114 ret = i915_vma_unbind(vma); 4115 if (ret) 4116 return ret; 4117 4118 vma = NULL; 4119 } 4120 } 4121 4122 bound = vma ? vma->bound : 0; 4123 if (vma == NULL || !drm_mm_node_allocated(&vma->node)) { 4124 vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment, 4125 flags); 4126 if (IS_ERR(vma)) 4127 return PTR_ERR(vma); 4128 } else { 4129 ret = i915_vma_bind(vma, obj->cache_level, flags); 4130 if (ret) 4131 return ret; 4132 } 4133 4134 if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL && 4135 (bound ^ vma->bound) & GLOBAL_BIND) { 4136 bool mappable, fenceable; 4137 u32 fence_size, fence_alignment; 4138 4139 fence_size = i915_gem_get_gtt_size(obj->base.dev, 4140 obj->base.size, 4141 obj->tiling_mode); 4142 fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev, 4143 obj->base.size, 4144 obj->tiling_mode, 4145 true); 4146 4147 fenceable = (vma->node.size == fence_size && 4148 (vma->node.start & (fence_alignment - 1)) == 0); 4149 4150 mappable = (vma->node.start + fence_size <= 4151 dev_priv->gtt.mappable_end); 4152 4153 obj->map_and_fenceable = mappable && fenceable; 4154 4155 WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable); 4156 } 4157 4158 vma->pin_count++; 4159 return 0; 4160 } 4161 4162 int 4163 i915_gem_object_pin(struct drm_i915_gem_object *obj, 4164 struct i915_address_space *vm, 4165 uint32_t alignment, 4166 uint64_t flags) 4167 { 4168 return i915_gem_object_do_pin(obj, vm, 4169 i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL, 4170 alignment, flags); 4171 } 4172 4173 int 4174 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 4175 const struct i915_ggtt_view *view, 4176 uint32_t alignment, 4177 uint64_t flags) 4178 { 4179 if (WARN_ONCE(!view, "no view specified")) 4180 return -EINVAL; 4181 4182 return i915_gem_object_do_pin(obj, i915_obj_to_ggtt(obj), view, 4183 alignment, flags | PIN_GLOBAL); 4184 } 4185 4186 void 4187 i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, 4188 const struct i915_ggtt_view *view) 4189 { 4190 struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view); 4191 4192 BUG_ON(!vma); 4193 WARN_ON(vma->pin_count == 0); 4194 WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view)); 4195 4196 --vma->pin_count; 4197 } 4198 4199 int 4200 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4201 struct drm_file *file) 4202 { 4203 struct drm_i915_gem_busy *args = data; 4204 struct drm_i915_gem_object *obj; 4205 int ret; 4206 4207 ret = i915_mutex_lock_interruptible(dev); 4208 if (ret) 4209 return ret; 4210 4211 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4212 if (&obj->base == NULL) { 4213 ret = -ENOENT; 4214 goto unlock; 4215 } 4216 4217 /* Count all active objects as busy, even if they are currently not used 4218 * by the gpu. Users of this interface expect objects to eventually 4219 * become non-busy without any further actions, therefore emit any 4220 * necessary flushes here. 4221 */ 4222 ret = i915_gem_object_flush_active(obj); 4223 if (ret) 4224 goto unref; 4225 4226 BUILD_BUG_ON(I915_NUM_RINGS > 16); 4227 args->busy = obj->active << 16; 4228 if (obj->last_write_req) 4229 args->busy |= obj->last_write_req->ring->id; 4230 4231 unref: 4232 drm_gem_object_unreference(&obj->base); 4233 unlock: 4234 mutex_unlock(&dev->struct_mutex); 4235 return ret; 4236 } 4237 4238 int 4239 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4240 struct drm_file *file_priv) 4241 { 4242 return i915_gem_ring_throttle(dev, file_priv); 4243 } 4244 4245 int 4246 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4247 struct drm_file *file_priv) 4248 { 4249 struct drm_i915_private *dev_priv = dev->dev_private; 4250 struct drm_i915_gem_madvise *args = data; 4251 struct drm_i915_gem_object *obj; 4252 int ret; 4253 4254 switch (args->madv) { 4255 case I915_MADV_DONTNEED: 4256 case I915_MADV_WILLNEED: 4257 break; 4258 default: 4259 return -EINVAL; 4260 } 4261 4262 ret = i915_mutex_lock_interruptible(dev); 4263 if (ret) 4264 return ret; 4265 4266 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle)); 4267 if (&obj->base == NULL) { 4268 ret = -ENOENT; 4269 goto unlock; 4270 } 4271 4272 if (i915_gem_obj_is_pinned(obj)) { 4273 ret = -EINVAL; 4274 goto out; 4275 } 4276 4277 if (obj->pages && 4278 obj->tiling_mode != I915_TILING_NONE && 4279 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 4280 if (obj->madv == I915_MADV_WILLNEED) 4281 i915_gem_object_unpin_pages(obj); 4282 if (args->madv == I915_MADV_WILLNEED) 4283 i915_gem_object_pin_pages(obj); 4284 } 4285 4286 if (obj->madv != __I915_MADV_PURGED) 4287 obj->madv = args->madv; 4288 4289 /* if the object is no longer attached, discard its backing storage */ 4290 if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL) 4291 i915_gem_object_truncate(obj); 4292 4293 args->retained = obj->madv != __I915_MADV_PURGED; 4294 4295 out: 4296 drm_gem_object_unreference(&obj->base); 4297 unlock: 4298 mutex_unlock(&dev->struct_mutex); 4299 return ret; 4300 } 4301 4302 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4303 const struct drm_i915_gem_object_ops *ops) 4304 { 4305 int i; 4306 4307 INIT_LIST_HEAD(&obj->global_list); 4308 for (i = 0; i < I915_NUM_RINGS; i++) 4309 INIT_LIST_HEAD(&obj->ring_list[i]); 4310 INIT_LIST_HEAD(&obj->obj_exec_link); 4311 INIT_LIST_HEAD(&obj->vma_list); 4312 INIT_LIST_HEAD(&obj->batch_pool_link); 4313 4314 obj->ops = ops; 4315 4316 obj->fence_reg = I915_FENCE_REG_NONE; 4317 obj->madv = I915_MADV_WILLNEED; 4318 4319 i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size); 4320 } 4321 4322 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4323 .get_pages = i915_gem_object_get_pages_gtt, 4324 .put_pages = i915_gem_object_put_pages_gtt, 4325 }; 4326 4327 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, 4328 size_t size) 4329 { 4330 struct drm_i915_gem_object *obj; 4331 struct address_space *mapping; 4332 gfp_t mask; 4333 4334 obj = i915_gem_object_alloc(dev); 4335 if (obj == NULL) 4336 return NULL; 4337 4338 if (drm_gem_object_init(dev, &obj->base, size) != 0) { 4339 i915_gem_object_free(obj); 4340 return NULL; 4341 } 4342 4343 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4344 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) { 4345 /* 965gm cannot relocate objects above 4GiB. */ 4346 mask &= ~__GFP_HIGHMEM; 4347 mask |= __GFP_DMA32; 4348 } 4349 4350 mapping = file_inode(obj->base.filp)->i_mapping; 4351 mapping_set_gfp_mask(mapping, mask); 4352 4353 i915_gem_object_init(obj, &i915_gem_object_ops); 4354 4355 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4356 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4357 4358 if (HAS_LLC(dev)) { 4359 /* On some devices, we can have the GPU use the LLC (the CPU 4360 * cache) for about a 10% performance improvement 4361 * compared to uncached. Graphics requests other than 4362 * display scanout are coherent with the CPU in 4363 * accessing this cache. This means in this mode we 4364 * don't need to clflush on the CPU side, and on the 4365 * GPU side we only need to flush internal caches to 4366 * get data visible to the CPU. 4367 * 4368 * However, we maintain the display planes as UC, and so 4369 * need to rebind when first used as such. 4370 */ 4371 obj->cache_level = I915_CACHE_LLC; 4372 } else 4373 obj->cache_level = I915_CACHE_NONE; 4374 4375 trace_i915_gem_object_create(obj); 4376 4377 return obj; 4378 } 4379 4380 static bool discard_backing_storage(struct drm_i915_gem_object *obj) 4381 { 4382 /* If we are the last user of the backing storage (be it shmemfs 4383 * pages or stolen etc), we know that the pages are going to be 4384 * immediately released. In this case, we can then skip copying 4385 * back the contents from the GPU. 4386 */ 4387 4388 if (obj->madv != I915_MADV_WILLNEED) 4389 return false; 4390 4391 if (obj->base.filp == NULL) 4392 return true; 4393 4394 /* At first glance, this looks racy, but then again so would be 4395 * userspace racing mmap against close. However, the first external 4396 * reference to the filp can only be obtained through the 4397 * i915_gem_mmap_ioctl() which safeguards us against the user 4398 * acquiring such a reference whilst we are in the middle of 4399 * freeing the object. 4400 */ 4401 return atomic_long_read(&obj->base.filp->f_count) == 1; 4402 } 4403 4404 void i915_gem_free_object(struct drm_gem_object *gem_obj) 4405 { 4406 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 4407 struct drm_device *dev = obj->base.dev; 4408 struct drm_i915_private *dev_priv = dev->dev_private; 4409 struct i915_vma *vma, *next; 4410 4411 intel_runtime_pm_get(dev_priv); 4412 4413 trace_i915_gem_object_destroy(obj); 4414 4415 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { 4416 int ret; 4417 4418 vma->pin_count = 0; 4419 ret = i915_vma_unbind(vma); 4420 if (WARN_ON(ret == -ERESTARTSYS)) { 4421 bool was_interruptible; 4422 4423 was_interruptible = dev_priv->mm.interruptible; 4424 dev_priv->mm.interruptible = false; 4425 4426 WARN_ON(i915_vma_unbind(vma)); 4427 4428 dev_priv->mm.interruptible = was_interruptible; 4429 } 4430 } 4431 4432 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up 4433 * before progressing. */ 4434 if (obj->stolen) 4435 i915_gem_object_unpin_pages(obj); 4436 4437 WARN_ON(obj->frontbuffer_bits); 4438 4439 if (obj->pages && obj->madv == I915_MADV_WILLNEED && 4440 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES && 4441 obj->tiling_mode != I915_TILING_NONE) 4442 i915_gem_object_unpin_pages(obj); 4443 4444 if (WARN_ON(obj->pages_pin_count)) 4445 obj->pages_pin_count = 0; 4446 if (discard_backing_storage(obj)) 4447 obj->madv = I915_MADV_DONTNEED; 4448 i915_gem_object_put_pages(obj); 4449 i915_gem_object_free_mmap_offset(obj); 4450 4451 BUG_ON(obj->pages); 4452 4453 if (obj->base.import_attach) 4454 drm_prime_gem_destroy(&obj->base, NULL); 4455 4456 if (obj->ops->release) 4457 obj->ops->release(obj); 4458 4459 drm_gem_object_release(&obj->base); 4460 i915_gem_info_remove_obj(dev_priv, obj->base.size); 4461 4462 kfree(obj->bit_17); 4463 i915_gem_object_free(obj); 4464 4465 intel_runtime_pm_put(dev_priv); 4466 } 4467 4468 struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, 4469 struct i915_address_space *vm) 4470 { 4471 struct i915_vma *vma; 4472 list_for_each_entry(vma, &obj->vma_list, vma_link) { 4473 if (i915_is_ggtt(vma->vm) && 4474 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 4475 continue; 4476 if (vma->vm == vm) 4477 return vma; 4478 } 4479 return NULL; 4480 } 4481 4482 struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj, 4483 const struct i915_ggtt_view *view) 4484 { 4485 struct i915_address_space *ggtt = i915_obj_to_ggtt(obj); 4486 struct i915_vma *vma; 4487 4488 if (WARN_ONCE(!view, "no view specified")) 4489 return ERR_PTR(-EINVAL); 4490 4491 list_for_each_entry(vma, &obj->vma_list, vma_link) 4492 if (vma->vm == ggtt && 4493 i915_ggtt_view_equal(&vma->ggtt_view, view)) 4494 return vma; 4495 return NULL; 4496 } 4497 4498 void i915_gem_vma_destroy(struct i915_vma *vma) 4499 { 4500 struct i915_address_space *vm = NULL; 4501 WARN_ON(vma->node.allocated); 4502 4503 /* Keep the vma as a placeholder in the execbuffer reservation lists */ 4504 if (!list_empty(&vma->exec_list)) 4505 return; 4506 4507 vm = vma->vm; 4508 4509 if (!i915_is_ggtt(vm)) 4510 i915_ppgtt_put(i915_vm_to_ppgtt(vm)); 4511 4512 list_del(&vma->vma_link); 4513 4514 kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma); 4515 } 4516 4517 static void 4518 i915_gem_stop_ringbuffers(struct drm_device *dev) 4519 { 4520 struct drm_i915_private *dev_priv = dev->dev_private; 4521 struct intel_engine_cs *ring; 4522 int i; 4523 4524 for_each_ring(ring, dev_priv, i) 4525 dev_priv->gt.stop_ring(ring); 4526 } 4527 4528 int 4529 i915_gem_suspend(struct drm_device *dev) 4530 { 4531 struct drm_i915_private *dev_priv = dev->dev_private; 4532 int ret = 0; 4533 4534 mutex_lock(&dev->struct_mutex); 4535 ret = i915_gpu_idle(dev); 4536 if (ret) 4537 goto err; 4538 4539 i915_gem_retire_requests(dev); 4540 4541 i915_gem_stop_ringbuffers(dev); 4542 mutex_unlock(&dev->struct_mutex); 4543 4544 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 4545 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 4546 flush_delayed_work(&dev_priv->mm.idle_work); 4547 4548 /* Assert that we sucessfully flushed all the work and 4549 * reset the GPU back to its idle, low power state. 4550 */ 4551 WARN_ON(dev_priv->mm.busy); 4552 4553 return 0; 4554 4555 err: 4556 mutex_unlock(&dev->struct_mutex); 4557 return ret; 4558 } 4559 4560 int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice) 4561 { 4562 struct intel_engine_cs *ring = req->ring; 4563 struct drm_device *dev = ring->dev; 4564 struct drm_i915_private *dev_priv = dev->dev_private; 4565 u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200); 4566 u32 *remap_info = dev_priv->l3_parity.remap_info[slice]; 4567 int i, ret; 4568 4569 if (!HAS_L3_DPF(dev) || !remap_info) 4570 return 0; 4571 4572 ret = intel_ring_begin(req, GEN7_L3LOG_SIZE / 4 * 3); 4573 if (ret) 4574 return ret; 4575 4576 /* 4577 * Note: We do not worry about the concurrent register cacheline hang 4578 * here because no other code should access these registers other than 4579 * at initialization time. 4580 */ 4581 for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) { 4582 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 4583 intel_ring_emit(ring, reg_base + i); 4584 intel_ring_emit(ring, remap_info[i/4]); 4585 } 4586 4587 intel_ring_advance(ring); 4588 4589 return ret; 4590 } 4591 4592 void i915_gem_init_swizzling(struct drm_device *dev) 4593 { 4594 struct drm_i915_private *dev_priv = dev->dev_private; 4595 4596 if (INTEL_INFO(dev)->gen < 5 || 4597 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 4598 return; 4599 4600 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 4601 DISP_TILE_SURFACE_SWIZZLING); 4602 4603 if (IS_GEN5(dev)) 4604 return; 4605 4606 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 4607 if (IS_GEN6(dev)) 4608 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 4609 else if (IS_GEN7(dev)) 4610 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 4611 else if (IS_GEN8(dev)) 4612 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 4613 else 4614 BUG(); 4615 } 4616 4617 static void init_unused_ring(struct drm_device *dev, u32 base) 4618 { 4619 struct drm_i915_private *dev_priv = dev->dev_private; 4620 4621 I915_WRITE(RING_CTL(base), 0); 4622 I915_WRITE(RING_HEAD(base), 0); 4623 I915_WRITE(RING_TAIL(base), 0); 4624 I915_WRITE(RING_START(base), 0); 4625 } 4626 4627 static void init_unused_rings(struct drm_device *dev) 4628 { 4629 if (IS_I830(dev)) { 4630 init_unused_ring(dev, PRB1_BASE); 4631 init_unused_ring(dev, SRB0_BASE); 4632 init_unused_ring(dev, SRB1_BASE); 4633 init_unused_ring(dev, SRB2_BASE); 4634 init_unused_ring(dev, SRB3_BASE); 4635 } else if (IS_GEN2(dev)) { 4636 init_unused_ring(dev, SRB0_BASE); 4637 init_unused_ring(dev, SRB1_BASE); 4638 } else if (IS_GEN3(dev)) { 4639 init_unused_ring(dev, PRB1_BASE); 4640 init_unused_ring(dev, PRB2_BASE); 4641 } 4642 } 4643 4644 int i915_gem_init_rings(struct drm_device *dev) 4645 { 4646 struct drm_i915_private *dev_priv = dev->dev_private; 4647 int ret; 4648 4649 ret = intel_init_render_ring_buffer(dev); 4650 if (ret) 4651 return ret; 4652 4653 if (HAS_BSD(dev)) { 4654 ret = intel_init_bsd_ring_buffer(dev); 4655 if (ret) 4656 goto cleanup_render_ring; 4657 } 4658 4659 if (HAS_BLT(dev)) { 4660 ret = intel_init_blt_ring_buffer(dev); 4661 if (ret) 4662 goto cleanup_bsd_ring; 4663 } 4664 4665 if (HAS_VEBOX(dev)) { 4666 ret = intel_init_vebox_ring_buffer(dev); 4667 if (ret) 4668 goto cleanup_blt_ring; 4669 } 4670 4671 if (HAS_BSD2(dev)) { 4672 ret = intel_init_bsd2_ring_buffer(dev); 4673 if (ret) 4674 goto cleanup_vebox_ring; 4675 } 4676 4677 return 0; 4678 4679 cleanup_vebox_ring: 4680 intel_cleanup_ring_buffer(&dev_priv->ring[VECS]); 4681 cleanup_blt_ring: 4682 intel_cleanup_ring_buffer(&dev_priv->ring[BCS]); 4683 cleanup_bsd_ring: 4684 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]); 4685 cleanup_render_ring: 4686 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]); 4687 4688 return ret; 4689 } 4690 4691 int 4692 i915_gem_init_hw(struct drm_device *dev) 4693 { 4694 struct drm_i915_private *dev_priv = dev->dev_private; 4695 struct intel_engine_cs *ring; 4696 int ret, i, j; 4697 4698 if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) 4699 return -EIO; 4700 4701 /* Double layer security blanket, see i915_gem_init() */ 4702 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 4703 4704 if (dev_priv->ellc_size) 4705 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 4706 4707 if (IS_HASWELL(dev)) 4708 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ? 4709 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 4710 4711 if (HAS_PCH_NOP(dev)) { 4712 if (IS_IVYBRIDGE(dev)) { 4713 u32 temp = I915_READ(GEN7_MSG_CTL); 4714 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); 4715 I915_WRITE(GEN7_MSG_CTL, temp); 4716 } else if (INTEL_INFO(dev)->gen >= 7) { 4717 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT); 4718 temp &= ~RESET_PCH_HANDSHAKE_ENABLE; 4719 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp); 4720 } 4721 } 4722 4723 i915_gem_init_swizzling(dev); 4724 4725 /* 4726 * At least 830 can leave some of the unused rings 4727 * "active" (ie. head != tail) after resume which 4728 * will prevent c3 entry. Makes sure all unused rings 4729 * are totally idle. 4730 */ 4731 init_unused_rings(dev); 4732 4733 BUG_ON(!dev_priv->ring[RCS].default_context); 4734 4735 ret = i915_ppgtt_init_hw(dev); 4736 if (ret) { 4737 DRM_ERROR("PPGTT enable HW failed %d\n", ret); 4738 goto out; 4739 } 4740 4741 /* Need to do basic initialisation of all rings first: */ 4742 for_each_ring(ring, dev_priv, i) { 4743 ret = ring->init_hw(ring); 4744 if (ret) 4745 goto out; 4746 } 4747 4748 /* We can't enable contexts until all firmware is loaded */ 4749 if (HAS_GUC_UCODE(dev)) { 4750 ret = intel_guc_ucode_load(dev); 4751 if (ret) { 4752 /* 4753 * If we got an error and GuC submission is enabled, map 4754 * the error to -EIO so the GPU will be declared wedged. 4755 * OTOH, if we didn't intend to use the GuC anyway, just 4756 * discard the error and carry on. 4757 */ 4758 DRM_ERROR("Failed to initialize GuC, error %d%s\n", ret, 4759 i915.enable_guc_submission ? "" : 4760 " (ignored)"); 4761 ret = i915.enable_guc_submission ? -EIO : 0; 4762 if (ret) 4763 goto out; 4764 } 4765 } 4766 4767 /* 4768 * Increment the next seqno by 0x100 so we have a visible break 4769 * on re-initialisation 4770 */ 4771 ret = i915_gem_set_seqno(dev, dev_priv->next_seqno+0x100); 4772 if (ret) 4773 goto out; 4774 4775 /* Now it is safe to go back round and do everything else: */ 4776 for_each_ring(ring, dev_priv, i) { 4777 struct drm_i915_gem_request *req; 4778 4779 WARN_ON(!ring->default_context); 4780 4781 ret = i915_gem_request_alloc(ring, ring->default_context, &req); 4782 if (ret) { 4783 i915_gem_cleanup_ringbuffer(dev); 4784 goto out; 4785 } 4786 4787 if (ring->id == RCS) { 4788 for (j = 0; j < NUM_L3_SLICES(dev); j++) 4789 i915_gem_l3_remap(req, j); 4790 } 4791 4792 ret = i915_ppgtt_init_ring(req); 4793 if (ret && ret != -EIO) { 4794 DRM_ERROR("PPGTT enable ring #%d failed %d\n", i, ret); 4795 i915_gem_request_cancel(req); 4796 i915_gem_cleanup_ringbuffer(dev); 4797 goto out; 4798 } 4799 4800 ret = i915_gem_context_enable(req); 4801 if (ret && ret != -EIO) { 4802 DRM_ERROR("Context enable ring #%d failed %d\n", i, ret); 4803 i915_gem_request_cancel(req); 4804 i915_gem_cleanup_ringbuffer(dev); 4805 goto out; 4806 } 4807 4808 i915_add_request_no_flush(req); 4809 } 4810 4811 out: 4812 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 4813 return ret; 4814 } 4815 4816 int i915_gem_init(struct drm_device *dev) 4817 { 4818 struct drm_i915_private *dev_priv = dev->dev_private; 4819 int ret; 4820 4821 i915.enable_execlists = intel_sanitize_enable_execlists(dev, 4822 i915.enable_execlists); 4823 4824 mutex_lock(&dev->struct_mutex); 4825 4826 if (IS_VALLEYVIEW(dev)) { 4827 /* VLVA0 (potential hack), BIOS isn't actually waking us */ 4828 I915_WRITE(VLV_GTLC_WAKE_CTRL, VLV_GTLC_ALLOWWAKEREQ); 4829 if (wait_for((I915_READ(VLV_GTLC_PW_STATUS) & 4830 VLV_GTLC_ALLOWWAKEACK), 10)) 4831 DRM_DEBUG_DRIVER("allow wake ack timed out\n"); 4832 } 4833 4834 if (!i915.enable_execlists) { 4835 dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission; 4836 dev_priv->gt.init_rings = i915_gem_init_rings; 4837 dev_priv->gt.cleanup_ring = intel_cleanup_ring_buffer; 4838 dev_priv->gt.stop_ring = intel_stop_ring_buffer; 4839 } else { 4840 dev_priv->gt.execbuf_submit = intel_execlists_submission; 4841 dev_priv->gt.init_rings = intel_logical_rings_init; 4842 dev_priv->gt.cleanup_ring = intel_logical_ring_cleanup; 4843 dev_priv->gt.stop_ring = intel_logical_ring_stop; 4844 } 4845 4846 /* This is just a security blanket to placate dragons. 4847 * On some systems, we very sporadically observe that the first TLBs 4848 * used by the CS may be stale, despite us poking the TLB reset. If 4849 * we hold the forcewake during initialisation these problems 4850 * just magically go away. 4851 */ 4852 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 4853 4854 ret = i915_gem_init_userptr(dev); 4855 if (ret) 4856 goto out_unlock; 4857 4858 i915_gem_init_global_gtt(dev); 4859 4860 ret = i915_gem_context_init(dev); 4861 if (ret) 4862 goto out_unlock; 4863 4864 ret = dev_priv->gt.init_rings(dev); 4865 if (ret) 4866 goto out_unlock; 4867 4868 ret = i915_gem_init_hw(dev); 4869 if (ret == -EIO) { 4870 /* Allow ring initialisation to fail by marking the GPU as 4871 * wedged. But we only want to do this where the GPU is angry, 4872 * for all other failure, such as an allocation failure, bail. 4873 */ 4874 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n"); 4875 atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter); 4876 ret = 0; 4877 } 4878 4879 out_unlock: 4880 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 4881 mutex_unlock(&dev->struct_mutex); 4882 4883 return ret; 4884 } 4885 4886 void 4887 i915_gem_cleanup_ringbuffer(struct drm_device *dev) 4888 { 4889 struct drm_i915_private *dev_priv = dev->dev_private; 4890 struct intel_engine_cs *ring; 4891 int i; 4892 4893 for_each_ring(ring, dev_priv, i) 4894 dev_priv->gt.cleanup_ring(ring); 4895 4896 if (i915.enable_execlists) 4897 /* 4898 * Neither the BIOS, ourselves or any other kernel 4899 * expects the system to be in execlists mode on startup, 4900 * so we need to reset the GPU back to legacy mode. 4901 */ 4902 intel_gpu_reset(dev); 4903 } 4904 4905 static void 4906 init_ring_lists(struct intel_engine_cs *ring) 4907 { 4908 INIT_LIST_HEAD(&ring->active_list); 4909 INIT_LIST_HEAD(&ring->request_list); 4910 } 4911 4912 void 4913 i915_gem_load(struct drm_device *dev) 4914 { 4915 struct drm_i915_private *dev_priv = dev->dev_private; 4916 int i; 4917 4918 dev_priv->objects = 4919 kmem_cache_create("i915_gem_object", 4920 sizeof(struct drm_i915_gem_object), 0, 4921 SLAB_HWCACHE_ALIGN, 4922 NULL); 4923 dev_priv->vmas = 4924 kmem_cache_create("i915_gem_vma", 4925 sizeof(struct i915_vma), 0, 4926 SLAB_HWCACHE_ALIGN, 4927 NULL); 4928 dev_priv->requests = 4929 kmem_cache_create("i915_gem_request", 4930 sizeof(struct drm_i915_gem_request), 0, 4931 SLAB_HWCACHE_ALIGN, 4932 NULL); 4933 4934 INIT_LIST_HEAD(&dev_priv->vm_list); 4935 INIT_LIST_HEAD(&dev_priv->context_list); 4936 INIT_LIST_HEAD(&dev_priv->mm.unbound_list); 4937 INIT_LIST_HEAD(&dev_priv->mm.bound_list); 4938 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 4939 for (i = 0; i < I915_NUM_RINGS; i++) 4940 init_ring_lists(&dev_priv->ring[i]); 4941 for (i = 0; i < I915_MAX_NUM_FENCES; i++) 4942 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 4943 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 4944 i915_gem_retire_work_handler); 4945 INIT_DELAYED_WORK(&dev_priv->mm.idle_work, 4946 i915_gem_idle_work_handler); 4947 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 4948 4949 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 4950 4951 if (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev)) 4952 dev_priv->num_fence_regs = 32; 4953 else if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 4954 dev_priv->num_fence_regs = 16; 4955 else 4956 dev_priv->num_fence_regs = 8; 4957 4958 if (intel_vgpu_active(dev)) 4959 dev_priv->num_fence_regs = 4960 I915_READ(vgtif_reg(avail_rs.fence_num)); 4961 4962 /* 4963 * Set initial sequence number for requests. 4964 * Using this number allows the wraparound to happen early, 4965 * catching any obvious problems. 4966 */ 4967 dev_priv->next_seqno = ((u32)~0 - 0x1100); 4968 dev_priv->last_seqno = ((u32)~0 - 0x1101); 4969 4970 /* Initialize fence registers to zero */ 4971 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 4972 i915_gem_restore_fences(dev); 4973 4974 i915_gem_detect_bit_6_swizzle(dev); 4975 init_waitqueue_head(&dev_priv->pending_flip_queue); 4976 4977 dev_priv->mm.interruptible = true; 4978 4979 i915_gem_shrinker_init(dev_priv); 4980 4981 mutex_init(&dev_priv->fb_tracking.lock); 4982 } 4983 4984 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 4985 { 4986 struct drm_i915_file_private *file_priv = file->driver_priv; 4987 4988 /* Clean up our request list when the client is going away, so that 4989 * later retire_requests won't dereference our soon-to-be-gone 4990 * file_priv. 4991 */ 4992 spin_lock(&file_priv->mm.lock); 4993 while (!list_empty(&file_priv->mm.request_list)) { 4994 struct drm_i915_gem_request *request; 4995 4996 request = list_first_entry(&file_priv->mm.request_list, 4997 struct drm_i915_gem_request, 4998 client_list); 4999 list_del(&request->client_list); 5000 request->file_priv = NULL; 5001 } 5002 spin_unlock(&file_priv->mm.lock); 5003 5004 if (!list_empty(&file_priv->rps.link)) { 5005 spin_lock(&to_i915(dev)->rps.client_lock); 5006 list_del(&file_priv->rps.link); 5007 spin_unlock(&to_i915(dev)->rps.client_lock); 5008 } 5009 } 5010 5011 int i915_gem_open(struct drm_device *dev, struct drm_file *file) 5012 { 5013 struct drm_i915_file_private *file_priv; 5014 int ret; 5015 5016 DRM_DEBUG_DRIVER("\n"); 5017 5018 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 5019 if (!file_priv) 5020 return -ENOMEM; 5021 5022 file->driver_priv = file_priv; 5023 file_priv->dev_priv = dev->dev_private; 5024 file_priv->file = file; 5025 INIT_LIST_HEAD(&file_priv->rps.link); 5026 5027 spin_lock_init(&file_priv->mm.lock); 5028 INIT_LIST_HEAD(&file_priv->mm.request_list); 5029 5030 ret = i915_gem_context_open(dev, file); 5031 if (ret) 5032 kfree(file_priv); 5033 5034 return ret; 5035 } 5036 5037 /** 5038 * i915_gem_track_fb - update frontbuffer tracking 5039 * @old: current GEM buffer for the frontbuffer slots 5040 * @new: new GEM buffer for the frontbuffer slots 5041 * @frontbuffer_bits: bitmask of frontbuffer slots 5042 * 5043 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them 5044 * from @old and setting them in @new. Both @old and @new can be NULL. 5045 */ 5046 void i915_gem_track_fb(struct drm_i915_gem_object *old, 5047 struct drm_i915_gem_object *new, 5048 unsigned frontbuffer_bits) 5049 { 5050 if (old) { 5051 WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex)); 5052 WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits)); 5053 old->frontbuffer_bits &= ~frontbuffer_bits; 5054 } 5055 5056 if (new) { 5057 WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex)); 5058 WARN_ON(new->frontbuffer_bits & frontbuffer_bits); 5059 new->frontbuffer_bits |= frontbuffer_bits; 5060 } 5061 } 5062 5063 /* All the new VM stuff */ 5064 u64 i915_gem_obj_offset(struct drm_i915_gem_object *o, 5065 struct i915_address_space *vm) 5066 { 5067 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5068 struct i915_vma *vma; 5069 5070 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 5071 5072 list_for_each_entry(vma, &o->vma_list, vma_link) { 5073 if (i915_is_ggtt(vma->vm) && 5074 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5075 continue; 5076 if (vma->vm == vm) 5077 return vma->node.start; 5078 } 5079 5080 WARN(1, "%s vma for this object not found.\n", 5081 i915_is_ggtt(vm) ? "global" : "ppgtt"); 5082 return -1; 5083 } 5084 5085 u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o, 5086 const struct i915_ggtt_view *view) 5087 { 5088 struct i915_address_space *ggtt = i915_obj_to_ggtt(o); 5089 struct i915_vma *vma; 5090 5091 list_for_each_entry(vma, &o->vma_list, vma_link) 5092 if (vma->vm == ggtt && 5093 i915_ggtt_view_equal(&vma->ggtt_view, view)) 5094 return vma->node.start; 5095 5096 WARN(1, "global vma for this object not found. (view=%u)\n", view->type); 5097 return -1; 5098 } 5099 5100 bool i915_gem_obj_bound(struct drm_i915_gem_object *o, 5101 struct i915_address_space *vm) 5102 { 5103 struct i915_vma *vma; 5104 5105 list_for_each_entry(vma, &o->vma_list, vma_link) { 5106 if (i915_is_ggtt(vma->vm) && 5107 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5108 continue; 5109 if (vma->vm == vm && drm_mm_node_allocated(&vma->node)) 5110 return true; 5111 } 5112 5113 return false; 5114 } 5115 5116 bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o, 5117 const struct i915_ggtt_view *view) 5118 { 5119 struct i915_address_space *ggtt = i915_obj_to_ggtt(o); 5120 struct i915_vma *vma; 5121 5122 list_for_each_entry(vma, &o->vma_list, vma_link) 5123 if (vma->vm == ggtt && 5124 i915_ggtt_view_equal(&vma->ggtt_view, view) && 5125 drm_mm_node_allocated(&vma->node)) 5126 return true; 5127 5128 return false; 5129 } 5130 5131 bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o) 5132 { 5133 struct i915_vma *vma; 5134 5135 list_for_each_entry(vma, &o->vma_list, vma_link) 5136 if (drm_mm_node_allocated(&vma->node)) 5137 return true; 5138 5139 return false; 5140 } 5141 5142 unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o, 5143 struct i915_address_space *vm) 5144 { 5145 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5146 struct i915_vma *vma; 5147 5148 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 5149 5150 BUG_ON(list_empty(&o->vma_list)); 5151 5152 list_for_each_entry(vma, &o->vma_list, vma_link) { 5153 if (i915_is_ggtt(vma->vm) && 5154 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5155 continue; 5156 if (vma->vm == vm) 5157 return vma->node.size; 5158 } 5159 return 0; 5160 } 5161 5162 bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj) 5163 { 5164 struct i915_vma *vma; 5165 list_for_each_entry(vma, &obj->vma_list, vma_link) 5166 if (vma->pin_count > 0) 5167 return true; 5168 5169 return false; 5170 } 5171 5172 /* Allocate a new GEM object and fill it with the supplied data */ 5173 struct drm_i915_gem_object * 5174 i915_gem_object_create_from_data(struct drm_device *dev, 5175 const void *data, size_t size) 5176 { 5177 struct drm_i915_gem_object *obj; 5178 struct sg_table *sg; 5179 size_t bytes; 5180 int ret; 5181 5182 obj = i915_gem_alloc_object(dev, round_up(size, PAGE_SIZE)); 5183 if (IS_ERR_OR_NULL(obj)) 5184 return obj; 5185 5186 ret = i915_gem_object_set_to_cpu_domain(obj, true); 5187 if (ret) 5188 goto fail; 5189 5190 ret = i915_gem_object_get_pages(obj); 5191 if (ret) 5192 goto fail; 5193 5194 i915_gem_object_pin_pages(obj); 5195 sg = obj->pages; 5196 bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size); 5197 i915_gem_object_unpin_pages(obj); 5198 5199 if (WARN_ON(bytes != size)) { 5200 DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size); 5201 ret = -EFAULT; 5202 goto fail; 5203 } 5204 5205 return obj; 5206 5207 fail: 5208 drm_gem_object_unreference(&obj->base); 5209 return ERR_PTR(ret); 5210 } 5211