1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drmP.h> 29 #include <drm/drm_vma_manager.h> 30 #include <drm/i915_drm.h> 31 #include "i915_drv.h" 32 #include "i915_vgpu.h" 33 #include "i915_trace.h" 34 #include "intel_drv.h" 35 #include "intel_mocs.h" 36 #include <linux/shmem_fs.h> 37 #include <linux/slab.h> 38 #include <linux/swap.h> 39 #include <linux/pci.h> 40 #include <linux/dma-buf.h> 41 42 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 43 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); 44 static void 45 i915_gem_object_retire__write(struct drm_i915_gem_object *obj); 46 static void 47 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring); 48 49 static bool cpu_cache_is_coherent(struct drm_device *dev, 50 enum i915_cache_level level) 51 { 52 return HAS_LLC(dev) || level != I915_CACHE_NONE; 53 } 54 55 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 56 { 57 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 58 return false; 59 60 if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) 61 return true; 62 63 return obj->pin_display; 64 } 65 66 static int 67 insert_mappable_node(struct drm_i915_private *i915, 68 struct drm_mm_node *node, u32 size) 69 { 70 memset(node, 0, sizeof(*node)); 71 return drm_mm_insert_node_in_range_generic(&i915->ggtt.base.mm, node, 72 size, 0, 0, 0, 73 i915->ggtt.mappable_end, 74 DRM_MM_SEARCH_DEFAULT, 75 DRM_MM_CREATE_DEFAULT); 76 } 77 78 static void 79 remove_mappable_node(struct drm_mm_node *node) 80 { 81 drm_mm_remove_node(node); 82 } 83 84 /* some bookkeeping */ 85 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 86 size_t size) 87 { 88 spin_lock(&dev_priv->mm.object_stat_lock); 89 dev_priv->mm.object_count++; 90 dev_priv->mm.object_memory += size; 91 spin_unlock(&dev_priv->mm.object_stat_lock); 92 } 93 94 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 95 size_t size) 96 { 97 spin_lock(&dev_priv->mm.object_stat_lock); 98 dev_priv->mm.object_count--; 99 dev_priv->mm.object_memory -= size; 100 spin_unlock(&dev_priv->mm.object_stat_lock); 101 } 102 103 static int 104 i915_gem_wait_for_error(struct i915_gpu_error *error) 105 { 106 int ret; 107 108 if (!i915_reset_in_progress(error)) 109 return 0; 110 111 /* 112 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 113 * userspace. If it takes that long something really bad is going on and 114 * we should simply try to bail out and fail as gracefully as possible. 115 */ 116 ret = wait_event_interruptible_timeout(error->reset_queue, 117 !i915_reset_in_progress(error), 118 10*HZ); 119 if (ret == 0) { 120 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 121 return -EIO; 122 } else if (ret < 0) { 123 return ret; 124 } else { 125 return 0; 126 } 127 } 128 129 int i915_mutex_lock_interruptible(struct drm_device *dev) 130 { 131 struct drm_i915_private *dev_priv = to_i915(dev); 132 int ret; 133 134 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 135 if (ret) 136 return ret; 137 138 ret = mutex_lock_interruptible(&dev->struct_mutex); 139 if (ret) 140 return ret; 141 142 WARN_ON(i915_verify_lists(dev)); 143 return 0; 144 } 145 146 int 147 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 148 struct drm_file *file) 149 { 150 struct drm_i915_private *dev_priv = to_i915(dev); 151 struct i915_ggtt *ggtt = &dev_priv->ggtt; 152 struct drm_i915_gem_get_aperture *args = data; 153 struct i915_vma *vma; 154 size_t pinned; 155 156 pinned = 0; 157 mutex_lock(&dev->struct_mutex); 158 list_for_each_entry(vma, &ggtt->base.active_list, vm_link) 159 if (vma->pin_count) 160 pinned += vma->node.size; 161 list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link) 162 if (vma->pin_count) 163 pinned += vma->node.size; 164 mutex_unlock(&dev->struct_mutex); 165 166 args->aper_size = ggtt->base.total; 167 args->aper_available_size = args->aper_size - pinned; 168 169 return 0; 170 } 171 172 static int 173 i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) 174 { 175 struct address_space *mapping = obj->base.filp->f_mapping; 176 char *vaddr = obj->phys_handle->vaddr; 177 struct sg_table *st; 178 struct scatterlist *sg; 179 int i; 180 181 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) 182 return -EINVAL; 183 184 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 185 struct page *page; 186 char *src; 187 188 page = shmem_read_mapping_page(mapping, i); 189 if (IS_ERR(page)) 190 return PTR_ERR(page); 191 192 src = kmap_atomic(page); 193 memcpy(vaddr, src, PAGE_SIZE); 194 drm_clflush_virt_range(vaddr, PAGE_SIZE); 195 kunmap_atomic(src); 196 197 put_page(page); 198 vaddr += PAGE_SIZE; 199 } 200 201 i915_gem_chipset_flush(to_i915(obj->base.dev)); 202 203 st = kmalloc(sizeof(*st), GFP_KERNEL); 204 if (st == NULL) 205 return -ENOMEM; 206 207 if (sg_alloc_table(st, 1, GFP_KERNEL)) { 208 kfree(st); 209 return -ENOMEM; 210 } 211 212 sg = st->sgl; 213 sg->offset = 0; 214 sg->length = obj->base.size; 215 216 sg_dma_address(sg) = obj->phys_handle->busaddr; 217 sg_dma_len(sg) = obj->base.size; 218 219 obj->pages = st; 220 return 0; 221 } 222 223 static void 224 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj) 225 { 226 int ret; 227 228 BUG_ON(obj->madv == __I915_MADV_PURGED); 229 230 ret = i915_gem_object_set_to_cpu_domain(obj, true); 231 if (WARN_ON(ret)) { 232 /* In the event of a disaster, abandon all caches and 233 * hope for the best. 234 */ 235 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 236 } 237 238 if (obj->madv == I915_MADV_DONTNEED) 239 obj->dirty = 0; 240 241 if (obj->dirty) { 242 struct address_space *mapping = obj->base.filp->f_mapping; 243 char *vaddr = obj->phys_handle->vaddr; 244 int i; 245 246 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 247 struct page *page; 248 char *dst; 249 250 page = shmem_read_mapping_page(mapping, i); 251 if (IS_ERR(page)) 252 continue; 253 254 dst = kmap_atomic(page); 255 drm_clflush_virt_range(vaddr, PAGE_SIZE); 256 memcpy(dst, vaddr, PAGE_SIZE); 257 kunmap_atomic(dst); 258 259 set_page_dirty(page); 260 if (obj->madv == I915_MADV_WILLNEED) 261 mark_page_accessed(page); 262 put_page(page); 263 vaddr += PAGE_SIZE; 264 } 265 obj->dirty = 0; 266 } 267 268 sg_free_table(obj->pages); 269 kfree(obj->pages); 270 } 271 272 static void 273 i915_gem_object_release_phys(struct drm_i915_gem_object *obj) 274 { 275 drm_pci_free(obj->base.dev, obj->phys_handle); 276 } 277 278 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { 279 .get_pages = i915_gem_object_get_pages_phys, 280 .put_pages = i915_gem_object_put_pages_phys, 281 .release = i915_gem_object_release_phys, 282 }; 283 284 static int 285 drop_pages(struct drm_i915_gem_object *obj) 286 { 287 struct i915_vma *vma, *next; 288 int ret; 289 290 drm_gem_object_reference(&obj->base); 291 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) 292 if (i915_vma_unbind(vma)) 293 break; 294 295 ret = i915_gem_object_put_pages(obj); 296 drm_gem_object_unreference(&obj->base); 297 298 return ret; 299 } 300 301 int 302 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, 303 int align) 304 { 305 drm_dma_handle_t *phys; 306 int ret; 307 308 if (obj->phys_handle) { 309 if ((unsigned long)obj->phys_handle->vaddr & (align -1)) 310 return -EBUSY; 311 312 return 0; 313 } 314 315 if (obj->madv != I915_MADV_WILLNEED) 316 return -EFAULT; 317 318 if (obj->base.filp == NULL) 319 return -EINVAL; 320 321 ret = drop_pages(obj); 322 if (ret) 323 return ret; 324 325 /* create a new object */ 326 phys = drm_pci_alloc(obj->base.dev, obj->base.size, align); 327 if (!phys) 328 return -ENOMEM; 329 330 obj->phys_handle = phys; 331 obj->ops = &i915_gem_phys_ops; 332 333 return i915_gem_object_get_pages(obj); 334 } 335 336 static int 337 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 338 struct drm_i915_gem_pwrite *args, 339 struct drm_file *file_priv) 340 { 341 struct drm_device *dev = obj->base.dev; 342 void *vaddr = obj->phys_handle->vaddr + args->offset; 343 char __user *user_data = u64_to_user_ptr(args->data_ptr); 344 int ret = 0; 345 346 /* We manually control the domain here and pretend that it 347 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 348 */ 349 ret = i915_gem_object_wait_rendering(obj, false); 350 if (ret) 351 return ret; 352 353 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 354 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { 355 unsigned long unwritten; 356 357 /* The physical object once assigned is fixed for the lifetime 358 * of the obj, so we can safely drop the lock and continue 359 * to access vaddr. 360 */ 361 mutex_unlock(&dev->struct_mutex); 362 unwritten = copy_from_user(vaddr, user_data, args->size); 363 mutex_lock(&dev->struct_mutex); 364 if (unwritten) { 365 ret = -EFAULT; 366 goto out; 367 } 368 } 369 370 drm_clflush_virt_range(vaddr, args->size); 371 i915_gem_chipset_flush(to_i915(dev)); 372 373 out: 374 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 375 return ret; 376 } 377 378 void *i915_gem_object_alloc(struct drm_device *dev) 379 { 380 struct drm_i915_private *dev_priv = to_i915(dev); 381 return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL); 382 } 383 384 void i915_gem_object_free(struct drm_i915_gem_object *obj) 385 { 386 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 387 kmem_cache_free(dev_priv->objects, obj); 388 } 389 390 static int 391 i915_gem_create(struct drm_file *file, 392 struct drm_device *dev, 393 uint64_t size, 394 uint32_t *handle_p) 395 { 396 struct drm_i915_gem_object *obj; 397 int ret; 398 u32 handle; 399 400 size = roundup(size, PAGE_SIZE); 401 if (size == 0) 402 return -EINVAL; 403 404 /* Allocate the new object */ 405 obj = i915_gem_object_create(dev, size); 406 if (IS_ERR(obj)) 407 return PTR_ERR(obj); 408 409 ret = drm_gem_handle_create(file, &obj->base, &handle); 410 /* drop reference from allocate - handle holds it now */ 411 drm_gem_object_unreference_unlocked(&obj->base); 412 if (ret) 413 return ret; 414 415 *handle_p = handle; 416 return 0; 417 } 418 419 int 420 i915_gem_dumb_create(struct drm_file *file, 421 struct drm_device *dev, 422 struct drm_mode_create_dumb *args) 423 { 424 /* have to work out size/pitch and return them */ 425 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); 426 args->size = args->pitch * args->height; 427 return i915_gem_create(file, dev, 428 args->size, &args->handle); 429 } 430 431 /** 432 * Creates a new mm object and returns a handle to it. 433 * @dev: drm device pointer 434 * @data: ioctl data blob 435 * @file: drm file pointer 436 */ 437 int 438 i915_gem_create_ioctl(struct drm_device *dev, void *data, 439 struct drm_file *file) 440 { 441 struct drm_i915_gem_create *args = data; 442 443 return i915_gem_create(file, dev, 444 args->size, &args->handle); 445 } 446 447 static inline int 448 __copy_to_user_swizzled(char __user *cpu_vaddr, 449 const char *gpu_vaddr, int gpu_offset, 450 int length) 451 { 452 int ret, cpu_offset = 0; 453 454 while (length > 0) { 455 int cacheline_end = ALIGN(gpu_offset + 1, 64); 456 int this_length = min(cacheline_end - gpu_offset, length); 457 int swizzled_gpu_offset = gpu_offset ^ 64; 458 459 ret = __copy_to_user(cpu_vaddr + cpu_offset, 460 gpu_vaddr + swizzled_gpu_offset, 461 this_length); 462 if (ret) 463 return ret + length; 464 465 cpu_offset += this_length; 466 gpu_offset += this_length; 467 length -= this_length; 468 } 469 470 return 0; 471 } 472 473 static inline int 474 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 475 const char __user *cpu_vaddr, 476 int length) 477 { 478 int ret, cpu_offset = 0; 479 480 while (length > 0) { 481 int cacheline_end = ALIGN(gpu_offset + 1, 64); 482 int this_length = min(cacheline_end - gpu_offset, length); 483 int swizzled_gpu_offset = gpu_offset ^ 64; 484 485 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 486 cpu_vaddr + cpu_offset, 487 this_length); 488 if (ret) 489 return ret + length; 490 491 cpu_offset += this_length; 492 gpu_offset += this_length; 493 length -= this_length; 494 } 495 496 return 0; 497 } 498 499 /* 500 * Pins the specified object's pages and synchronizes the object with 501 * GPU accesses. Sets needs_clflush to non-zero if the caller should 502 * flush the object from the CPU cache. 503 */ 504 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, 505 int *needs_clflush) 506 { 507 int ret; 508 509 *needs_clflush = 0; 510 511 if (WARN_ON(!i915_gem_object_has_struct_page(obj))) 512 return -EINVAL; 513 514 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { 515 /* If we're not in the cpu read domain, set ourself into the gtt 516 * read domain and manually flush cachelines (if required). This 517 * optimizes for the case when the gpu will dirty the data 518 * anyway again before the next pread happens. */ 519 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev, 520 obj->cache_level); 521 ret = i915_gem_object_wait_rendering(obj, true); 522 if (ret) 523 return ret; 524 } 525 526 ret = i915_gem_object_get_pages(obj); 527 if (ret) 528 return ret; 529 530 i915_gem_object_pin_pages(obj); 531 532 return ret; 533 } 534 535 /* Per-page copy function for the shmem pread fastpath. 536 * Flushes invalid cachelines before reading the target if 537 * needs_clflush is set. */ 538 static int 539 shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length, 540 char __user *user_data, 541 bool page_do_bit17_swizzling, bool needs_clflush) 542 { 543 char *vaddr; 544 int ret; 545 546 if (unlikely(page_do_bit17_swizzling)) 547 return -EINVAL; 548 549 vaddr = kmap_atomic(page); 550 if (needs_clflush) 551 drm_clflush_virt_range(vaddr + shmem_page_offset, 552 page_length); 553 ret = __copy_to_user_inatomic(user_data, 554 vaddr + shmem_page_offset, 555 page_length); 556 kunmap_atomic(vaddr); 557 558 return ret ? -EFAULT : 0; 559 } 560 561 static void 562 shmem_clflush_swizzled_range(char *addr, unsigned long length, 563 bool swizzled) 564 { 565 if (unlikely(swizzled)) { 566 unsigned long start = (unsigned long) addr; 567 unsigned long end = (unsigned long) addr + length; 568 569 /* For swizzling simply ensure that we always flush both 570 * channels. Lame, but simple and it works. Swizzled 571 * pwrite/pread is far from a hotpath - current userspace 572 * doesn't use it at all. */ 573 start = round_down(start, 128); 574 end = round_up(end, 128); 575 576 drm_clflush_virt_range((void *)start, end - start); 577 } else { 578 drm_clflush_virt_range(addr, length); 579 } 580 581 } 582 583 /* Only difference to the fast-path function is that this can handle bit17 584 * and uses non-atomic copy and kmap functions. */ 585 static int 586 shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length, 587 char __user *user_data, 588 bool page_do_bit17_swizzling, bool needs_clflush) 589 { 590 char *vaddr; 591 int ret; 592 593 vaddr = kmap(page); 594 if (needs_clflush) 595 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 596 page_length, 597 page_do_bit17_swizzling); 598 599 if (page_do_bit17_swizzling) 600 ret = __copy_to_user_swizzled(user_data, 601 vaddr, shmem_page_offset, 602 page_length); 603 else 604 ret = __copy_to_user(user_data, 605 vaddr + shmem_page_offset, 606 page_length); 607 kunmap(page); 608 609 return ret ? - EFAULT : 0; 610 } 611 612 static inline unsigned long 613 slow_user_access(struct io_mapping *mapping, 614 uint64_t page_base, int page_offset, 615 char __user *user_data, 616 unsigned long length, bool pwrite) 617 { 618 void __iomem *ioaddr; 619 void *vaddr; 620 uint64_t unwritten; 621 622 ioaddr = io_mapping_map_wc(mapping, page_base, PAGE_SIZE); 623 /* We can use the cpu mem copy function because this is X86. */ 624 vaddr = (void __force *)ioaddr + page_offset; 625 if (pwrite) 626 unwritten = __copy_from_user(vaddr, user_data, length); 627 else 628 unwritten = __copy_to_user(user_data, vaddr, length); 629 630 io_mapping_unmap(ioaddr); 631 return unwritten; 632 } 633 634 static int 635 i915_gem_gtt_pread(struct drm_device *dev, 636 struct drm_i915_gem_object *obj, uint64_t size, 637 uint64_t data_offset, uint64_t data_ptr) 638 { 639 struct drm_i915_private *dev_priv = to_i915(dev); 640 struct i915_ggtt *ggtt = &dev_priv->ggtt; 641 struct drm_mm_node node; 642 char __user *user_data; 643 uint64_t remain; 644 uint64_t offset; 645 int ret; 646 647 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE); 648 if (ret) { 649 ret = insert_mappable_node(dev_priv, &node, PAGE_SIZE); 650 if (ret) 651 goto out; 652 653 ret = i915_gem_object_get_pages(obj); 654 if (ret) { 655 remove_mappable_node(&node); 656 goto out; 657 } 658 659 i915_gem_object_pin_pages(obj); 660 } else { 661 node.start = i915_gem_obj_ggtt_offset(obj); 662 node.allocated = false; 663 ret = i915_gem_object_put_fence(obj); 664 if (ret) 665 goto out_unpin; 666 } 667 668 ret = i915_gem_object_set_to_gtt_domain(obj, false); 669 if (ret) 670 goto out_unpin; 671 672 user_data = u64_to_user_ptr(data_ptr); 673 remain = size; 674 offset = data_offset; 675 676 mutex_unlock(&dev->struct_mutex); 677 if (likely(!i915.prefault_disable)) { 678 ret = fault_in_multipages_writeable(user_data, remain); 679 if (ret) { 680 mutex_lock(&dev->struct_mutex); 681 goto out_unpin; 682 } 683 } 684 685 while (remain > 0) { 686 /* Operation in this page 687 * 688 * page_base = page offset within aperture 689 * page_offset = offset within page 690 * page_length = bytes to copy for this page 691 */ 692 u32 page_base = node.start; 693 unsigned page_offset = offset_in_page(offset); 694 unsigned page_length = PAGE_SIZE - page_offset; 695 page_length = remain < page_length ? remain : page_length; 696 if (node.allocated) { 697 wmb(); 698 ggtt->base.insert_page(&ggtt->base, 699 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 700 node.start, 701 I915_CACHE_NONE, 0); 702 wmb(); 703 } else { 704 page_base += offset & PAGE_MASK; 705 } 706 /* This is a slow read/write as it tries to read from 707 * and write to user memory which may result into page 708 * faults, and so we cannot perform this under struct_mutex. 709 */ 710 if (slow_user_access(ggtt->mappable, page_base, 711 page_offset, user_data, 712 page_length, false)) { 713 ret = -EFAULT; 714 break; 715 } 716 717 remain -= page_length; 718 user_data += page_length; 719 offset += page_length; 720 } 721 722 mutex_lock(&dev->struct_mutex); 723 if (ret == 0 && (obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) { 724 /* The user has modified the object whilst we tried 725 * reading from it, and we now have no idea what domain 726 * the pages should be in. As we have just been touching 727 * them directly, flush everything back to the GTT 728 * domain. 729 */ 730 ret = i915_gem_object_set_to_gtt_domain(obj, false); 731 } 732 733 out_unpin: 734 if (node.allocated) { 735 wmb(); 736 ggtt->base.clear_range(&ggtt->base, 737 node.start, node.size, 738 true); 739 i915_gem_object_unpin_pages(obj); 740 remove_mappable_node(&node); 741 } else { 742 i915_gem_object_ggtt_unpin(obj); 743 } 744 out: 745 return ret; 746 } 747 748 static int 749 i915_gem_shmem_pread(struct drm_device *dev, 750 struct drm_i915_gem_object *obj, 751 struct drm_i915_gem_pread *args, 752 struct drm_file *file) 753 { 754 char __user *user_data; 755 ssize_t remain; 756 loff_t offset; 757 int shmem_page_offset, page_length, ret = 0; 758 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 759 int prefaulted = 0; 760 int needs_clflush = 0; 761 struct sg_page_iter sg_iter; 762 763 if (!i915_gem_object_has_struct_page(obj)) 764 return -ENODEV; 765 766 user_data = u64_to_user_ptr(args->data_ptr); 767 remain = args->size; 768 769 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 770 771 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); 772 if (ret) 773 return ret; 774 775 offset = args->offset; 776 777 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 778 offset >> PAGE_SHIFT) { 779 struct page *page = sg_page_iter_page(&sg_iter); 780 781 if (remain <= 0) 782 break; 783 784 /* Operation in this page 785 * 786 * shmem_page_offset = offset within page in shmem file 787 * page_length = bytes to copy for this page 788 */ 789 shmem_page_offset = offset_in_page(offset); 790 page_length = remain; 791 if ((shmem_page_offset + page_length) > PAGE_SIZE) 792 page_length = PAGE_SIZE - shmem_page_offset; 793 794 page_do_bit17_swizzling = obj_do_bit17_swizzling && 795 (page_to_phys(page) & (1 << 17)) != 0; 796 797 ret = shmem_pread_fast(page, shmem_page_offset, page_length, 798 user_data, page_do_bit17_swizzling, 799 needs_clflush); 800 if (ret == 0) 801 goto next_page; 802 803 mutex_unlock(&dev->struct_mutex); 804 805 if (likely(!i915.prefault_disable) && !prefaulted) { 806 ret = fault_in_multipages_writeable(user_data, remain); 807 /* Userspace is tricking us, but we've already clobbered 808 * its pages with the prefault and promised to write the 809 * data up to the first fault. Hence ignore any errors 810 * and just continue. */ 811 (void)ret; 812 prefaulted = 1; 813 } 814 815 ret = shmem_pread_slow(page, shmem_page_offset, page_length, 816 user_data, page_do_bit17_swizzling, 817 needs_clflush); 818 819 mutex_lock(&dev->struct_mutex); 820 821 if (ret) 822 goto out; 823 824 next_page: 825 remain -= page_length; 826 user_data += page_length; 827 offset += page_length; 828 } 829 830 out: 831 i915_gem_object_unpin_pages(obj); 832 833 return ret; 834 } 835 836 /** 837 * Reads data from the object referenced by handle. 838 * @dev: drm device pointer 839 * @data: ioctl data blob 840 * @file: drm file pointer 841 * 842 * On error, the contents of *data are undefined. 843 */ 844 int 845 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 846 struct drm_file *file) 847 { 848 struct drm_i915_gem_pread *args = data; 849 struct drm_i915_gem_object *obj; 850 int ret = 0; 851 852 if (args->size == 0) 853 return 0; 854 855 if (!access_ok(VERIFY_WRITE, 856 u64_to_user_ptr(args->data_ptr), 857 args->size)) 858 return -EFAULT; 859 860 ret = i915_mutex_lock_interruptible(dev); 861 if (ret) 862 return ret; 863 864 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 865 if (&obj->base == NULL) { 866 ret = -ENOENT; 867 goto unlock; 868 } 869 870 /* Bounds check source. */ 871 if (args->offset > obj->base.size || 872 args->size > obj->base.size - args->offset) { 873 ret = -EINVAL; 874 goto out; 875 } 876 877 trace_i915_gem_object_pread(obj, args->offset, args->size); 878 879 ret = i915_gem_shmem_pread(dev, obj, args, file); 880 881 /* pread for non shmem backed objects */ 882 if (ret == -EFAULT || ret == -ENODEV) 883 ret = i915_gem_gtt_pread(dev, obj, args->size, 884 args->offset, args->data_ptr); 885 886 out: 887 drm_gem_object_unreference(&obj->base); 888 unlock: 889 mutex_unlock(&dev->struct_mutex); 890 return ret; 891 } 892 893 /* This is the fast write path which cannot handle 894 * page faults in the source data 895 */ 896 897 static inline int 898 fast_user_write(struct io_mapping *mapping, 899 loff_t page_base, int page_offset, 900 char __user *user_data, 901 int length) 902 { 903 void __iomem *vaddr_atomic; 904 void *vaddr; 905 unsigned long unwritten; 906 907 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 908 /* We can use the cpu mem copy function because this is X86. */ 909 vaddr = (void __force*)vaddr_atomic + page_offset; 910 unwritten = __copy_from_user_inatomic_nocache(vaddr, 911 user_data, length); 912 io_mapping_unmap_atomic(vaddr_atomic); 913 return unwritten; 914 } 915 916 /** 917 * This is the fast pwrite path, where we copy the data directly from the 918 * user into the GTT, uncached. 919 * @dev: drm device pointer 920 * @obj: i915 gem object 921 * @args: pwrite arguments structure 922 * @file: drm file pointer 923 */ 924 static int 925 i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915, 926 struct drm_i915_gem_object *obj, 927 struct drm_i915_gem_pwrite *args, 928 struct drm_file *file) 929 { 930 struct i915_ggtt *ggtt = &i915->ggtt; 931 struct drm_device *dev = obj->base.dev; 932 struct drm_mm_node node; 933 uint64_t remain, offset; 934 char __user *user_data; 935 int ret; 936 bool hit_slow_path = false; 937 938 if (obj->tiling_mode != I915_TILING_NONE) 939 return -EFAULT; 940 941 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); 942 if (ret) { 943 ret = insert_mappable_node(i915, &node, PAGE_SIZE); 944 if (ret) 945 goto out; 946 947 ret = i915_gem_object_get_pages(obj); 948 if (ret) { 949 remove_mappable_node(&node); 950 goto out; 951 } 952 953 i915_gem_object_pin_pages(obj); 954 } else { 955 node.start = i915_gem_obj_ggtt_offset(obj); 956 node.allocated = false; 957 ret = i915_gem_object_put_fence(obj); 958 if (ret) 959 goto out_unpin; 960 } 961 962 ret = i915_gem_object_set_to_gtt_domain(obj, true); 963 if (ret) 964 goto out_unpin; 965 966 intel_fb_obj_invalidate(obj, ORIGIN_GTT); 967 obj->dirty = true; 968 969 user_data = u64_to_user_ptr(args->data_ptr); 970 offset = args->offset; 971 remain = args->size; 972 while (remain) { 973 /* Operation in this page 974 * 975 * page_base = page offset within aperture 976 * page_offset = offset within page 977 * page_length = bytes to copy for this page 978 */ 979 u32 page_base = node.start; 980 unsigned page_offset = offset_in_page(offset); 981 unsigned page_length = PAGE_SIZE - page_offset; 982 page_length = remain < page_length ? remain : page_length; 983 if (node.allocated) { 984 wmb(); /* flush the write before we modify the GGTT */ 985 ggtt->base.insert_page(&ggtt->base, 986 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 987 node.start, I915_CACHE_NONE, 0); 988 wmb(); /* flush modifications to the GGTT (insert_page) */ 989 } else { 990 page_base += offset & PAGE_MASK; 991 } 992 /* If we get a fault while copying data, then (presumably) our 993 * source page isn't available. Return the error and we'll 994 * retry in the slow path. 995 * If the object is non-shmem backed, we retry again with the 996 * path that handles page fault. 997 */ 998 if (fast_user_write(ggtt->mappable, page_base, 999 page_offset, user_data, page_length)) { 1000 hit_slow_path = true; 1001 mutex_unlock(&dev->struct_mutex); 1002 if (slow_user_access(ggtt->mappable, 1003 page_base, 1004 page_offset, user_data, 1005 page_length, true)) { 1006 ret = -EFAULT; 1007 mutex_lock(&dev->struct_mutex); 1008 goto out_flush; 1009 } 1010 1011 mutex_lock(&dev->struct_mutex); 1012 } 1013 1014 remain -= page_length; 1015 user_data += page_length; 1016 offset += page_length; 1017 } 1018 1019 out_flush: 1020 if (hit_slow_path) { 1021 if (ret == 0 && 1022 (obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) { 1023 /* The user has modified the object whilst we tried 1024 * reading from it, and we now have no idea what domain 1025 * the pages should be in. As we have just been touching 1026 * them directly, flush everything back to the GTT 1027 * domain. 1028 */ 1029 ret = i915_gem_object_set_to_gtt_domain(obj, false); 1030 } 1031 } 1032 1033 intel_fb_obj_flush(obj, false, ORIGIN_GTT); 1034 out_unpin: 1035 if (node.allocated) { 1036 wmb(); 1037 ggtt->base.clear_range(&ggtt->base, 1038 node.start, node.size, 1039 true); 1040 i915_gem_object_unpin_pages(obj); 1041 remove_mappable_node(&node); 1042 } else { 1043 i915_gem_object_ggtt_unpin(obj); 1044 } 1045 out: 1046 return ret; 1047 } 1048 1049 /* Per-page copy function for the shmem pwrite fastpath. 1050 * Flushes invalid cachelines before writing to the target if 1051 * needs_clflush_before is set and flushes out any written cachelines after 1052 * writing if needs_clflush is set. */ 1053 static int 1054 shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length, 1055 char __user *user_data, 1056 bool page_do_bit17_swizzling, 1057 bool needs_clflush_before, 1058 bool needs_clflush_after) 1059 { 1060 char *vaddr; 1061 int ret; 1062 1063 if (unlikely(page_do_bit17_swizzling)) 1064 return -EINVAL; 1065 1066 vaddr = kmap_atomic(page); 1067 if (needs_clflush_before) 1068 drm_clflush_virt_range(vaddr + shmem_page_offset, 1069 page_length); 1070 ret = __copy_from_user_inatomic(vaddr + shmem_page_offset, 1071 user_data, page_length); 1072 if (needs_clflush_after) 1073 drm_clflush_virt_range(vaddr + shmem_page_offset, 1074 page_length); 1075 kunmap_atomic(vaddr); 1076 1077 return ret ? -EFAULT : 0; 1078 } 1079 1080 /* Only difference to the fast-path function is that this can handle bit17 1081 * and uses non-atomic copy and kmap functions. */ 1082 static int 1083 shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length, 1084 char __user *user_data, 1085 bool page_do_bit17_swizzling, 1086 bool needs_clflush_before, 1087 bool needs_clflush_after) 1088 { 1089 char *vaddr; 1090 int ret; 1091 1092 vaddr = kmap(page); 1093 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 1094 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 1095 page_length, 1096 page_do_bit17_swizzling); 1097 if (page_do_bit17_swizzling) 1098 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, 1099 user_data, 1100 page_length); 1101 else 1102 ret = __copy_from_user(vaddr + shmem_page_offset, 1103 user_data, 1104 page_length); 1105 if (needs_clflush_after) 1106 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 1107 page_length, 1108 page_do_bit17_swizzling); 1109 kunmap(page); 1110 1111 return ret ? -EFAULT : 0; 1112 } 1113 1114 static int 1115 i915_gem_shmem_pwrite(struct drm_device *dev, 1116 struct drm_i915_gem_object *obj, 1117 struct drm_i915_gem_pwrite *args, 1118 struct drm_file *file) 1119 { 1120 ssize_t remain; 1121 loff_t offset; 1122 char __user *user_data; 1123 int shmem_page_offset, page_length, ret = 0; 1124 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 1125 int hit_slowpath = 0; 1126 int needs_clflush_after = 0; 1127 int needs_clflush_before = 0; 1128 struct sg_page_iter sg_iter; 1129 1130 user_data = u64_to_user_ptr(args->data_ptr); 1131 remain = args->size; 1132 1133 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 1134 1135 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 1136 /* If we're not in the cpu write domain, set ourself into the gtt 1137 * write domain and manually flush cachelines (if required). This 1138 * optimizes for the case when the gpu will use the data 1139 * right away and we therefore have to clflush anyway. */ 1140 needs_clflush_after = cpu_write_needs_clflush(obj); 1141 ret = i915_gem_object_wait_rendering(obj, false); 1142 if (ret) 1143 return ret; 1144 } 1145 /* Same trick applies to invalidate partially written cachelines read 1146 * before writing. */ 1147 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) 1148 needs_clflush_before = 1149 !cpu_cache_is_coherent(dev, obj->cache_level); 1150 1151 ret = i915_gem_object_get_pages(obj); 1152 if (ret) 1153 return ret; 1154 1155 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 1156 1157 i915_gem_object_pin_pages(obj); 1158 1159 offset = args->offset; 1160 obj->dirty = 1; 1161 1162 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 1163 offset >> PAGE_SHIFT) { 1164 struct page *page = sg_page_iter_page(&sg_iter); 1165 int partial_cacheline_write; 1166 1167 if (remain <= 0) 1168 break; 1169 1170 /* Operation in this page 1171 * 1172 * shmem_page_offset = offset within page in shmem file 1173 * page_length = bytes to copy for this page 1174 */ 1175 shmem_page_offset = offset_in_page(offset); 1176 1177 page_length = remain; 1178 if ((shmem_page_offset + page_length) > PAGE_SIZE) 1179 page_length = PAGE_SIZE - shmem_page_offset; 1180 1181 /* If we don't overwrite a cacheline completely we need to be 1182 * careful to have up-to-date data by first clflushing. Don't 1183 * overcomplicate things and flush the entire patch. */ 1184 partial_cacheline_write = needs_clflush_before && 1185 ((shmem_page_offset | page_length) 1186 & (boot_cpu_data.x86_clflush_size - 1)); 1187 1188 page_do_bit17_swizzling = obj_do_bit17_swizzling && 1189 (page_to_phys(page) & (1 << 17)) != 0; 1190 1191 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, 1192 user_data, page_do_bit17_swizzling, 1193 partial_cacheline_write, 1194 needs_clflush_after); 1195 if (ret == 0) 1196 goto next_page; 1197 1198 hit_slowpath = 1; 1199 mutex_unlock(&dev->struct_mutex); 1200 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, 1201 user_data, page_do_bit17_swizzling, 1202 partial_cacheline_write, 1203 needs_clflush_after); 1204 1205 mutex_lock(&dev->struct_mutex); 1206 1207 if (ret) 1208 goto out; 1209 1210 next_page: 1211 remain -= page_length; 1212 user_data += page_length; 1213 offset += page_length; 1214 } 1215 1216 out: 1217 i915_gem_object_unpin_pages(obj); 1218 1219 if (hit_slowpath) { 1220 /* 1221 * Fixup: Flush cpu caches in case we didn't flush the dirty 1222 * cachelines in-line while writing and the object moved 1223 * out of the cpu write domain while we've dropped the lock. 1224 */ 1225 if (!needs_clflush_after && 1226 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 1227 if (i915_gem_clflush_object(obj, obj->pin_display)) 1228 needs_clflush_after = true; 1229 } 1230 } 1231 1232 if (needs_clflush_after) 1233 i915_gem_chipset_flush(to_i915(dev)); 1234 else 1235 obj->cache_dirty = true; 1236 1237 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 1238 return ret; 1239 } 1240 1241 /** 1242 * Writes data to the object referenced by handle. 1243 * @dev: drm device 1244 * @data: ioctl data blob 1245 * @file: drm file 1246 * 1247 * On error, the contents of the buffer that were to be modified are undefined. 1248 */ 1249 int 1250 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1251 struct drm_file *file) 1252 { 1253 struct drm_i915_private *dev_priv = to_i915(dev); 1254 struct drm_i915_gem_pwrite *args = data; 1255 struct drm_i915_gem_object *obj; 1256 int ret; 1257 1258 if (args->size == 0) 1259 return 0; 1260 1261 if (!access_ok(VERIFY_READ, 1262 u64_to_user_ptr(args->data_ptr), 1263 args->size)) 1264 return -EFAULT; 1265 1266 if (likely(!i915.prefault_disable)) { 1267 ret = fault_in_multipages_readable(u64_to_user_ptr(args->data_ptr), 1268 args->size); 1269 if (ret) 1270 return -EFAULT; 1271 } 1272 1273 intel_runtime_pm_get(dev_priv); 1274 1275 ret = i915_mutex_lock_interruptible(dev); 1276 if (ret) 1277 goto put_rpm; 1278 1279 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 1280 if (&obj->base == NULL) { 1281 ret = -ENOENT; 1282 goto unlock; 1283 } 1284 1285 /* Bounds check destination. */ 1286 if (args->offset > obj->base.size || 1287 args->size > obj->base.size - args->offset) { 1288 ret = -EINVAL; 1289 goto out; 1290 } 1291 1292 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1293 1294 ret = -EFAULT; 1295 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1296 * it would end up going through the fenced access, and we'll get 1297 * different detiling behavior between reading and writing. 1298 * pread/pwrite currently are reading and writing from the CPU 1299 * perspective, requiring manual detiling by the client. 1300 */ 1301 if (!i915_gem_object_has_struct_page(obj) || 1302 cpu_write_needs_clflush(obj)) { 1303 ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file); 1304 /* Note that the gtt paths might fail with non-page-backed user 1305 * pointers (e.g. gtt mappings when moving data between 1306 * textures). Fallback to the shmem path in that case. */ 1307 } 1308 1309 if (ret == -EFAULT) { 1310 if (obj->phys_handle) 1311 ret = i915_gem_phys_pwrite(obj, args, file); 1312 else if (i915_gem_object_has_struct_page(obj)) 1313 ret = i915_gem_shmem_pwrite(dev, obj, args, file); 1314 else 1315 ret = -ENODEV; 1316 } 1317 1318 out: 1319 drm_gem_object_unreference(&obj->base); 1320 unlock: 1321 mutex_unlock(&dev->struct_mutex); 1322 put_rpm: 1323 intel_runtime_pm_put(dev_priv); 1324 1325 return ret; 1326 } 1327 1328 static int 1329 i915_gem_check_wedge(unsigned reset_counter, bool interruptible) 1330 { 1331 if (__i915_terminally_wedged(reset_counter)) 1332 return -EIO; 1333 1334 if (__i915_reset_in_progress(reset_counter)) { 1335 /* Non-interruptible callers can't handle -EAGAIN, hence return 1336 * -EIO unconditionally for these. */ 1337 if (!interruptible) 1338 return -EIO; 1339 1340 return -EAGAIN; 1341 } 1342 1343 return 0; 1344 } 1345 1346 static unsigned long local_clock_us(unsigned *cpu) 1347 { 1348 unsigned long t; 1349 1350 /* Cheaply and approximately convert from nanoseconds to microseconds. 1351 * The result and subsequent calculations are also defined in the same 1352 * approximate microseconds units. The principal source of timing 1353 * error here is from the simple truncation. 1354 * 1355 * Note that local_clock() is only defined wrt to the current CPU; 1356 * the comparisons are no longer valid if we switch CPUs. Instead of 1357 * blocking preemption for the entire busywait, we can detect the CPU 1358 * switch and use that as indicator of system load and a reason to 1359 * stop busywaiting, see busywait_stop(). 1360 */ 1361 *cpu = get_cpu(); 1362 t = local_clock() >> 10; 1363 put_cpu(); 1364 1365 return t; 1366 } 1367 1368 static bool busywait_stop(unsigned long timeout, unsigned cpu) 1369 { 1370 unsigned this_cpu; 1371 1372 if (time_after(local_clock_us(&this_cpu), timeout)) 1373 return true; 1374 1375 return this_cpu != cpu; 1376 } 1377 1378 bool __i915_spin_request(const struct drm_i915_gem_request *req, 1379 int state, unsigned long timeout_us) 1380 { 1381 unsigned cpu; 1382 1383 /* When waiting for high frequency requests, e.g. during synchronous 1384 * rendering split between the CPU and GPU, the finite amount of time 1385 * required to set up the irq and wait upon it limits the response 1386 * rate. By busywaiting on the request completion for a short while we 1387 * can service the high frequency waits as quick as possible. However, 1388 * if it is a slow request, we want to sleep as quickly as possible. 1389 * The tradeoff between waiting and sleeping is roughly the time it 1390 * takes to sleep on a request, on the order of a microsecond. 1391 */ 1392 1393 timeout_us += local_clock_us(&cpu); 1394 do { 1395 if (i915_gem_request_completed(req)) 1396 return true; 1397 1398 if (signal_pending_state(state, current)) 1399 break; 1400 1401 if (busywait_stop(timeout_us, cpu)) 1402 break; 1403 1404 cpu_relax_lowlatency(); 1405 } while (!need_resched()); 1406 1407 return false; 1408 } 1409 1410 /** 1411 * __i915_wait_request - wait until execution of request has finished 1412 * @req: duh! 1413 * @interruptible: do an interruptible wait (normally yes) 1414 * @timeout: in - how long to wait (NULL forever); out - how much time remaining 1415 * @rps: RPS client 1416 * 1417 * Note: It is of utmost importance that the passed in seqno and reset_counter 1418 * values have been read by the caller in an smp safe manner. Where read-side 1419 * locks are involved, it is sufficient to read the reset_counter before 1420 * unlocking the lock that protects the seqno. For lockless tricks, the 1421 * reset_counter _must_ be read before, and an appropriate smp_rmb must be 1422 * inserted. 1423 * 1424 * Returns 0 if the request was found within the alloted time. Else returns the 1425 * errno with remaining time filled in timeout argument. 1426 */ 1427 int __i915_wait_request(struct drm_i915_gem_request *req, 1428 bool interruptible, 1429 s64 *timeout, 1430 struct intel_rps_client *rps) 1431 { 1432 int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; 1433 DEFINE_WAIT(reset); 1434 struct intel_wait wait; 1435 unsigned long timeout_remain; 1436 s64 before = 0; /* Only to silence a compiler warning. */ 1437 int ret = 0; 1438 1439 might_sleep(); 1440 1441 if (list_empty(&req->list)) 1442 return 0; 1443 1444 if (i915_gem_request_completed(req)) 1445 return 0; 1446 1447 timeout_remain = MAX_SCHEDULE_TIMEOUT; 1448 if (timeout) { 1449 if (WARN_ON(*timeout < 0)) 1450 return -EINVAL; 1451 1452 if (*timeout == 0) 1453 return -ETIME; 1454 1455 timeout_remain = nsecs_to_jiffies_timeout(*timeout); 1456 1457 /* 1458 * Record current time in case interrupted by signal, or wedged. 1459 */ 1460 before = ktime_get_raw_ns(); 1461 } 1462 1463 trace_i915_gem_request_wait_begin(req); 1464 1465 /* This client is about to stall waiting for the GPU. In many cases 1466 * this is undesirable and limits the throughput of the system, as 1467 * many clients cannot continue processing user input/output whilst 1468 * blocked. RPS autotuning may take tens of milliseconds to respond 1469 * to the GPU load and thus incurs additional latency for the client. 1470 * We can circumvent that by promoting the GPU frequency to maximum 1471 * before we wait. This makes the GPU throttle up much more quickly 1472 * (good for benchmarks and user experience, e.g. window animations), 1473 * but at a cost of spending more power processing the workload 1474 * (bad for battery). Not all clients even want their results 1475 * immediately and for them we should just let the GPU select its own 1476 * frequency to maximise efficiency. To prevent a single client from 1477 * forcing the clocks too high for the whole system, we only allow 1478 * each client to waitboost once in a busy period. 1479 */ 1480 if (INTEL_INFO(req->i915)->gen >= 6) 1481 gen6_rps_boost(req->i915, rps, req->emitted_jiffies); 1482 1483 /* Optimistic spin for the next ~jiffie before touching IRQs */ 1484 if (i915_spin_request(req, state, 5)) 1485 goto complete; 1486 1487 set_current_state(state); 1488 add_wait_queue(&req->i915->gpu_error.wait_queue, &reset); 1489 1490 intel_wait_init(&wait, req->seqno); 1491 if (intel_engine_add_wait(req->engine, &wait)) 1492 /* In order to check that we haven't missed the interrupt 1493 * as we enabled it, we need to kick ourselves to do a 1494 * coherent check on the seqno before we sleep. 1495 */ 1496 goto wakeup; 1497 1498 for (;;) { 1499 if (signal_pending_state(state, current)) { 1500 ret = -ERESTARTSYS; 1501 break; 1502 } 1503 1504 timeout_remain = io_schedule_timeout(timeout_remain); 1505 if (timeout_remain == 0) { 1506 ret = -ETIME; 1507 break; 1508 } 1509 1510 if (intel_wait_complete(&wait)) 1511 break; 1512 1513 set_current_state(state); 1514 1515 wakeup: 1516 /* Carefully check if the request is complete, giving time 1517 * for the seqno to be visible following the interrupt. 1518 * We also have to check in case we are kicked by the GPU 1519 * reset in order to drop the struct_mutex. 1520 */ 1521 if (__i915_request_irq_complete(req)) 1522 break; 1523 1524 /* Only spin if we know the GPU is processing this request */ 1525 if (i915_spin_request(req, state, 2)) 1526 break; 1527 } 1528 remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset); 1529 1530 intel_engine_remove_wait(req->engine, &wait); 1531 __set_current_state(TASK_RUNNING); 1532 complete: 1533 trace_i915_gem_request_wait_end(req); 1534 1535 if (timeout) { 1536 s64 tres = *timeout - (ktime_get_raw_ns() - before); 1537 1538 *timeout = tres < 0 ? 0 : tres; 1539 1540 /* 1541 * Apparently ktime isn't accurate enough and occasionally has a 1542 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch 1543 * things up to make the test happy. We allow up to 1 jiffy. 1544 * 1545 * This is a regrssion from the timespec->ktime conversion. 1546 */ 1547 if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000) 1548 *timeout = 0; 1549 } 1550 1551 if (rps && req->seqno == req->engine->last_submitted_seqno) { 1552 /* The GPU is now idle and this client has stalled. 1553 * Since no other client has submitted a request in the 1554 * meantime, assume that this client is the only one 1555 * supplying work to the GPU but is unable to keep that 1556 * work supplied because it is waiting. Since the GPU is 1557 * then never kept fully busy, RPS autoclocking will 1558 * keep the clocks relatively low, causing further delays. 1559 * Compensate by giving the synchronous client credit for 1560 * a waitboost next time. 1561 */ 1562 spin_lock(&req->i915->rps.client_lock); 1563 list_del_init(&rps->link); 1564 spin_unlock(&req->i915->rps.client_lock); 1565 } 1566 1567 return ret; 1568 } 1569 1570 int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, 1571 struct drm_file *file) 1572 { 1573 struct drm_i915_file_private *file_priv; 1574 1575 WARN_ON(!req || !file || req->file_priv); 1576 1577 if (!req || !file) 1578 return -EINVAL; 1579 1580 if (req->file_priv) 1581 return -EINVAL; 1582 1583 file_priv = file->driver_priv; 1584 1585 spin_lock(&file_priv->mm.lock); 1586 req->file_priv = file_priv; 1587 list_add_tail(&req->client_list, &file_priv->mm.request_list); 1588 spin_unlock(&file_priv->mm.lock); 1589 1590 req->pid = get_pid(task_pid(current)); 1591 1592 return 0; 1593 } 1594 1595 static inline void 1596 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) 1597 { 1598 struct drm_i915_file_private *file_priv = request->file_priv; 1599 1600 if (!file_priv) 1601 return; 1602 1603 spin_lock(&file_priv->mm.lock); 1604 list_del(&request->client_list); 1605 request->file_priv = NULL; 1606 spin_unlock(&file_priv->mm.lock); 1607 1608 put_pid(request->pid); 1609 request->pid = NULL; 1610 } 1611 1612 static void i915_gem_request_retire(struct drm_i915_gem_request *request) 1613 { 1614 trace_i915_gem_request_retire(request); 1615 1616 /* We know the GPU must have read the request to have 1617 * sent us the seqno + interrupt, so use the position 1618 * of tail of the request to update the last known position 1619 * of the GPU head. 1620 * 1621 * Note this requires that we are always called in request 1622 * completion order. 1623 */ 1624 request->ringbuf->last_retired_head = request->postfix; 1625 1626 list_del_init(&request->list); 1627 i915_gem_request_remove_from_client(request); 1628 1629 if (request->previous_context) { 1630 if (i915.enable_execlists) 1631 intel_lr_context_unpin(request->previous_context, 1632 request->engine); 1633 } 1634 1635 i915_gem_context_unreference(request->ctx); 1636 i915_gem_request_unreference(request); 1637 } 1638 1639 static void 1640 __i915_gem_request_retire__upto(struct drm_i915_gem_request *req) 1641 { 1642 struct intel_engine_cs *engine = req->engine; 1643 struct drm_i915_gem_request *tmp; 1644 1645 lockdep_assert_held(&engine->i915->drm.struct_mutex); 1646 1647 if (list_empty(&req->list)) 1648 return; 1649 1650 do { 1651 tmp = list_first_entry(&engine->request_list, 1652 typeof(*tmp), list); 1653 1654 i915_gem_request_retire(tmp); 1655 } while (tmp != req); 1656 1657 WARN_ON(i915_verify_lists(engine->dev)); 1658 } 1659 1660 /** 1661 * Waits for a request to be signaled, and cleans up the 1662 * request and object lists appropriately for that event. 1663 * @req: request to wait on 1664 */ 1665 int 1666 i915_wait_request(struct drm_i915_gem_request *req) 1667 { 1668 struct drm_i915_private *dev_priv = req->i915; 1669 bool interruptible; 1670 int ret; 1671 1672 interruptible = dev_priv->mm.interruptible; 1673 1674 BUG_ON(!mutex_is_locked(&dev_priv->drm.struct_mutex)); 1675 1676 ret = __i915_wait_request(req, interruptible, NULL, NULL); 1677 if (ret) 1678 return ret; 1679 1680 /* If the GPU hung, we want to keep the requests to find the guilty. */ 1681 if (!i915_reset_in_progress(&dev_priv->gpu_error)) 1682 __i915_gem_request_retire__upto(req); 1683 1684 return 0; 1685 } 1686 1687 /** 1688 * Ensures that all rendering to the object has completed and the object is 1689 * safe to unbind from the GTT or access from the CPU. 1690 * @obj: i915 gem object 1691 * @readonly: waiting for read access or write 1692 */ 1693 int 1694 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 1695 bool readonly) 1696 { 1697 int ret, i; 1698 1699 if (!obj->active) 1700 return 0; 1701 1702 if (readonly) { 1703 if (obj->last_write_req != NULL) { 1704 ret = i915_wait_request(obj->last_write_req); 1705 if (ret) 1706 return ret; 1707 1708 i = obj->last_write_req->engine->id; 1709 if (obj->last_read_req[i] == obj->last_write_req) 1710 i915_gem_object_retire__read(obj, i); 1711 else 1712 i915_gem_object_retire__write(obj); 1713 } 1714 } else { 1715 for (i = 0; i < I915_NUM_ENGINES; i++) { 1716 if (obj->last_read_req[i] == NULL) 1717 continue; 1718 1719 ret = i915_wait_request(obj->last_read_req[i]); 1720 if (ret) 1721 return ret; 1722 1723 i915_gem_object_retire__read(obj, i); 1724 } 1725 GEM_BUG_ON(obj->active); 1726 } 1727 1728 return 0; 1729 } 1730 1731 static void 1732 i915_gem_object_retire_request(struct drm_i915_gem_object *obj, 1733 struct drm_i915_gem_request *req) 1734 { 1735 int ring = req->engine->id; 1736 1737 if (obj->last_read_req[ring] == req) 1738 i915_gem_object_retire__read(obj, ring); 1739 else if (obj->last_write_req == req) 1740 i915_gem_object_retire__write(obj); 1741 1742 if (!i915_reset_in_progress(&req->i915->gpu_error)) 1743 __i915_gem_request_retire__upto(req); 1744 } 1745 1746 /* A nonblocking variant of the above wait. This is a highly dangerous routine 1747 * as the object state may change during this call. 1748 */ 1749 static __must_check int 1750 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, 1751 struct intel_rps_client *rps, 1752 bool readonly) 1753 { 1754 struct drm_device *dev = obj->base.dev; 1755 struct drm_i915_private *dev_priv = to_i915(dev); 1756 struct drm_i915_gem_request *requests[I915_NUM_ENGINES]; 1757 int ret, i, n = 0; 1758 1759 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1760 BUG_ON(!dev_priv->mm.interruptible); 1761 1762 if (!obj->active) 1763 return 0; 1764 1765 if (readonly) { 1766 struct drm_i915_gem_request *req; 1767 1768 req = obj->last_write_req; 1769 if (req == NULL) 1770 return 0; 1771 1772 requests[n++] = i915_gem_request_reference(req); 1773 } else { 1774 for (i = 0; i < I915_NUM_ENGINES; i++) { 1775 struct drm_i915_gem_request *req; 1776 1777 req = obj->last_read_req[i]; 1778 if (req == NULL) 1779 continue; 1780 1781 requests[n++] = i915_gem_request_reference(req); 1782 } 1783 } 1784 1785 mutex_unlock(&dev->struct_mutex); 1786 ret = 0; 1787 for (i = 0; ret == 0 && i < n; i++) 1788 ret = __i915_wait_request(requests[i], true, NULL, rps); 1789 mutex_lock(&dev->struct_mutex); 1790 1791 for (i = 0; i < n; i++) { 1792 if (ret == 0) 1793 i915_gem_object_retire_request(obj, requests[i]); 1794 i915_gem_request_unreference(requests[i]); 1795 } 1796 1797 return ret; 1798 } 1799 1800 static struct intel_rps_client *to_rps_client(struct drm_file *file) 1801 { 1802 struct drm_i915_file_private *fpriv = file->driver_priv; 1803 return &fpriv->rps; 1804 } 1805 1806 static enum fb_op_origin 1807 write_origin(struct drm_i915_gem_object *obj, unsigned domain) 1808 { 1809 return domain == I915_GEM_DOMAIN_GTT && !obj->has_wc_mmap ? 1810 ORIGIN_GTT : ORIGIN_CPU; 1811 } 1812 1813 /** 1814 * Called when user space prepares to use an object with the CPU, either 1815 * through the mmap ioctl's mapping or a GTT mapping. 1816 * @dev: drm device 1817 * @data: ioctl data blob 1818 * @file: drm file 1819 */ 1820 int 1821 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1822 struct drm_file *file) 1823 { 1824 struct drm_i915_gem_set_domain *args = data; 1825 struct drm_i915_gem_object *obj; 1826 uint32_t read_domains = args->read_domains; 1827 uint32_t write_domain = args->write_domain; 1828 int ret; 1829 1830 /* Only handle setting domains to types used by the CPU. */ 1831 if (write_domain & I915_GEM_GPU_DOMAINS) 1832 return -EINVAL; 1833 1834 if (read_domains & I915_GEM_GPU_DOMAINS) 1835 return -EINVAL; 1836 1837 /* Having something in the write domain implies it's in the read 1838 * domain, and only that read domain. Enforce that in the request. 1839 */ 1840 if (write_domain != 0 && read_domains != write_domain) 1841 return -EINVAL; 1842 1843 ret = i915_mutex_lock_interruptible(dev); 1844 if (ret) 1845 return ret; 1846 1847 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 1848 if (&obj->base == NULL) { 1849 ret = -ENOENT; 1850 goto unlock; 1851 } 1852 1853 /* Try to flush the object off the GPU without holding the lock. 1854 * We will repeat the flush holding the lock in the normal manner 1855 * to catch cases where we are gazumped. 1856 */ 1857 ret = i915_gem_object_wait_rendering__nonblocking(obj, 1858 to_rps_client(file), 1859 !write_domain); 1860 if (ret) 1861 goto unref; 1862 1863 if (read_domains & I915_GEM_DOMAIN_GTT) 1864 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1865 else 1866 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1867 1868 if (write_domain != 0) 1869 intel_fb_obj_invalidate(obj, write_origin(obj, write_domain)); 1870 1871 unref: 1872 drm_gem_object_unreference(&obj->base); 1873 unlock: 1874 mutex_unlock(&dev->struct_mutex); 1875 return ret; 1876 } 1877 1878 /** 1879 * Called when user space has done writes to this buffer 1880 * @dev: drm device 1881 * @data: ioctl data blob 1882 * @file: drm file 1883 */ 1884 int 1885 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1886 struct drm_file *file) 1887 { 1888 struct drm_i915_gem_sw_finish *args = data; 1889 struct drm_i915_gem_object *obj; 1890 int ret = 0; 1891 1892 ret = i915_mutex_lock_interruptible(dev); 1893 if (ret) 1894 return ret; 1895 1896 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 1897 if (&obj->base == NULL) { 1898 ret = -ENOENT; 1899 goto unlock; 1900 } 1901 1902 /* Pinned buffers may be scanout, so flush the cache */ 1903 if (obj->pin_display) 1904 i915_gem_object_flush_cpu_write_domain(obj); 1905 1906 drm_gem_object_unreference(&obj->base); 1907 unlock: 1908 mutex_unlock(&dev->struct_mutex); 1909 return ret; 1910 } 1911 1912 /** 1913 * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address 1914 * it is mapped to. 1915 * @dev: drm device 1916 * @data: ioctl data blob 1917 * @file: drm file 1918 * 1919 * While the mapping holds a reference on the contents of the object, it doesn't 1920 * imply a ref on the object itself. 1921 * 1922 * IMPORTANT: 1923 * 1924 * DRM driver writers who look a this function as an example for how to do GEM 1925 * mmap support, please don't implement mmap support like here. The modern way 1926 * to implement DRM mmap support is with an mmap offset ioctl (like 1927 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly. 1928 * That way debug tooling like valgrind will understand what's going on, hiding 1929 * the mmap call in a driver private ioctl will break that. The i915 driver only 1930 * does cpu mmaps this way because we didn't know better. 1931 */ 1932 int 1933 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1934 struct drm_file *file) 1935 { 1936 struct drm_i915_gem_mmap *args = data; 1937 struct drm_gem_object *obj; 1938 unsigned long addr; 1939 1940 if (args->flags & ~(I915_MMAP_WC)) 1941 return -EINVAL; 1942 1943 if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT)) 1944 return -ENODEV; 1945 1946 obj = drm_gem_object_lookup(file, args->handle); 1947 if (obj == NULL) 1948 return -ENOENT; 1949 1950 /* prime objects have no backing filp to GEM mmap 1951 * pages from. 1952 */ 1953 if (!obj->filp) { 1954 drm_gem_object_unreference_unlocked(obj); 1955 return -EINVAL; 1956 } 1957 1958 addr = vm_mmap(obj->filp, 0, args->size, 1959 PROT_READ | PROT_WRITE, MAP_SHARED, 1960 args->offset); 1961 if (args->flags & I915_MMAP_WC) { 1962 struct mm_struct *mm = current->mm; 1963 struct vm_area_struct *vma; 1964 1965 if (down_write_killable(&mm->mmap_sem)) { 1966 drm_gem_object_unreference_unlocked(obj); 1967 return -EINTR; 1968 } 1969 vma = find_vma(mm, addr); 1970 if (vma) 1971 vma->vm_page_prot = 1972 pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); 1973 else 1974 addr = -ENOMEM; 1975 up_write(&mm->mmap_sem); 1976 1977 /* This may race, but that's ok, it only gets set */ 1978 WRITE_ONCE(to_intel_bo(obj)->has_wc_mmap, true); 1979 } 1980 drm_gem_object_unreference_unlocked(obj); 1981 if (IS_ERR((void *)addr)) 1982 return addr; 1983 1984 args->addr_ptr = (uint64_t) addr; 1985 1986 return 0; 1987 } 1988 1989 /** 1990 * i915_gem_fault - fault a page into the GTT 1991 * @vma: VMA in question 1992 * @vmf: fault info 1993 * 1994 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1995 * from userspace. The fault handler takes care of binding the object to 1996 * the GTT (if needed), allocating and programming a fence register (again, 1997 * only if needed based on whether the old reg is still valid or the object 1998 * is tiled) and inserting a new PTE into the faulting process. 1999 * 2000 * Note that the faulting process may involve evicting existing objects 2001 * from the GTT and/or fence registers to make room. So performance may 2002 * suffer if the GTT working set is large or there are few fence registers 2003 * left. 2004 */ 2005 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 2006 { 2007 struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data); 2008 struct drm_device *dev = obj->base.dev; 2009 struct drm_i915_private *dev_priv = to_i915(dev); 2010 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2011 struct i915_ggtt_view view = i915_ggtt_view_normal; 2012 pgoff_t page_offset; 2013 unsigned long pfn; 2014 int ret = 0; 2015 bool write = !!(vmf->flags & FAULT_FLAG_WRITE); 2016 2017 intel_runtime_pm_get(dev_priv); 2018 2019 /* We don't use vmf->pgoff since that has the fake offset */ 2020 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> 2021 PAGE_SHIFT; 2022 2023 ret = i915_mutex_lock_interruptible(dev); 2024 if (ret) 2025 goto out; 2026 2027 trace_i915_gem_object_fault(obj, page_offset, true, write); 2028 2029 /* Try to flush the object off the GPU first without holding the lock. 2030 * Upon reacquiring the lock, we will perform our sanity checks and then 2031 * repeat the flush holding the lock in the normal manner to catch cases 2032 * where we are gazumped. 2033 */ 2034 ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write); 2035 if (ret) 2036 goto unlock; 2037 2038 /* Access to snoopable pages through the GTT is incoherent. */ 2039 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { 2040 ret = -EFAULT; 2041 goto unlock; 2042 } 2043 2044 /* Use a partial view if the object is bigger than the aperture. */ 2045 if (obj->base.size >= ggtt->mappable_end && 2046 obj->tiling_mode == I915_TILING_NONE) { 2047 static const unsigned int chunk_size = 256; // 1 MiB 2048 2049 memset(&view, 0, sizeof(view)); 2050 view.type = I915_GGTT_VIEW_PARTIAL; 2051 view.params.partial.offset = rounddown(page_offset, chunk_size); 2052 view.params.partial.size = 2053 min_t(unsigned int, 2054 chunk_size, 2055 (vma->vm_end - vma->vm_start)/PAGE_SIZE - 2056 view.params.partial.offset); 2057 } 2058 2059 /* Now pin it into the GTT if needed */ 2060 ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE); 2061 if (ret) 2062 goto unlock; 2063 2064 ret = i915_gem_object_set_to_gtt_domain(obj, write); 2065 if (ret) 2066 goto unpin; 2067 2068 ret = i915_gem_object_get_fence(obj); 2069 if (ret) 2070 goto unpin; 2071 2072 /* Finally, remap it using the new GTT offset */ 2073 pfn = ggtt->mappable_base + 2074 i915_gem_obj_ggtt_offset_view(obj, &view); 2075 pfn >>= PAGE_SHIFT; 2076 2077 if (unlikely(view.type == I915_GGTT_VIEW_PARTIAL)) { 2078 /* Overriding existing pages in partial view does not cause 2079 * us any trouble as TLBs are still valid because the fault 2080 * is due to userspace losing part of the mapping or never 2081 * having accessed it before (at this partials' range). 2082 */ 2083 unsigned long base = vma->vm_start + 2084 (view.params.partial.offset << PAGE_SHIFT); 2085 unsigned int i; 2086 2087 for (i = 0; i < view.params.partial.size; i++) { 2088 ret = vm_insert_pfn(vma, base + i * PAGE_SIZE, pfn + i); 2089 if (ret) 2090 break; 2091 } 2092 2093 obj->fault_mappable = true; 2094 } else { 2095 if (!obj->fault_mappable) { 2096 unsigned long size = min_t(unsigned long, 2097 vma->vm_end - vma->vm_start, 2098 obj->base.size); 2099 int i; 2100 2101 for (i = 0; i < size >> PAGE_SHIFT; i++) { 2102 ret = vm_insert_pfn(vma, 2103 (unsigned long)vma->vm_start + i * PAGE_SIZE, 2104 pfn + i); 2105 if (ret) 2106 break; 2107 } 2108 2109 obj->fault_mappable = true; 2110 } else 2111 ret = vm_insert_pfn(vma, 2112 (unsigned long)vmf->virtual_address, 2113 pfn + page_offset); 2114 } 2115 unpin: 2116 i915_gem_object_ggtt_unpin_view(obj, &view); 2117 unlock: 2118 mutex_unlock(&dev->struct_mutex); 2119 out: 2120 switch (ret) { 2121 case -EIO: 2122 /* 2123 * We eat errors when the gpu is terminally wedged to avoid 2124 * userspace unduly crashing (gl has no provisions for mmaps to 2125 * fail). But any other -EIO isn't ours (e.g. swap in failure) 2126 * and so needs to be reported. 2127 */ 2128 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 2129 ret = VM_FAULT_SIGBUS; 2130 break; 2131 } 2132 case -EAGAIN: 2133 /* 2134 * EAGAIN means the gpu is hung and we'll wait for the error 2135 * handler to reset everything when re-faulting in 2136 * i915_mutex_lock_interruptible. 2137 */ 2138 case 0: 2139 case -ERESTARTSYS: 2140 case -EINTR: 2141 case -EBUSY: 2142 /* 2143 * EBUSY is ok: this just means that another thread 2144 * already did the job. 2145 */ 2146 ret = VM_FAULT_NOPAGE; 2147 break; 2148 case -ENOMEM: 2149 ret = VM_FAULT_OOM; 2150 break; 2151 case -ENOSPC: 2152 case -EFAULT: 2153 ret = VM_FAULT_SIGBUS; 2154 break; 2155 default: 2156 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 2157 ret = VM_FAULT_SIGBUS; 2158 break; 2159 } 2160 2161 intel_runtime_pm_put(dev_priv); 2162 return ret; 2163 } 2164 2165 /** 2166 * i915_gem_release_mmap - remove physical page mappings 2167 * @obj: obj in question 2168 * 2169 * Preserve the reservation of the mmapping with the DRM core code, but 2170 * relinquish ownership of the pages back to the system. 2171 * 2172 * It is vital that we remove the page mapping if we have mapped a tiled 2173 * object through the GTT and then lose the fence register due to 2174 * resource pressure. Similarly if the object has been moved out of the 2175 * aperture, than pages mapped into userspace must be revoked. Removing the 2176 * mapping will then trigger a page fault on the next user access, allowing 2177 * fixup by i915_gem_fault(). 2178 */ 2179 void 2180 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 2181 { 2182 /* Serialisation between user GTT access and our code depends upon 2183 * revoking the CPU's PTE whilst the mutex is held. The next user 2184 * pagefault then has to wait until we release the mutex. 2185 */ 2186 lockdep_assert_held(&obj->base.dev->struct_mutex); 2187 2188 if (!obj->fault_mappable) 2189 return; 2190 2191 drm_vma_node_unmap(&obj->base.vma_node, 2192 obj->base.dev->anon_inode->i_mapping); 2193 2194 /* Ensure that the CPU's PTE are revoked and there are not outstanding 2195 * memory transactions from userspace before we return. The TLB 2196 * flushing implied above by changing the PTE above *should* be 2197 * sufficient, an extra barrier here just provides us with a bit 2198 * of paranoid documentation about our requirement to serialise 2199 * memory writes before touching registers / GSM. 2200 */ 2201 wmb(); 2202 2203 obj->fault_mappable = false; 2204 } 2205 2206 void 2207 i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv) 2208 { 2209 struct drm_i915_gem_object *obj; 2210 2211 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) 2212 i915_gem_release_mmap(obj); 2213 } 2214 2215 uint32_t 2216 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) 2217 { 2218 uint32_t gtt_size; 2219 2220 if (INTEL_INFO(dev)->gen >= 4 || 2221 tiling_mode == I915_TILING_NONE) 2222 return size; 2223 2224 /* Previous chips need a power-of-two fence region when tiling */ 2225 if (IS_GEN3(dev)) 2226 gtt_size = 1024*1024; 2227 else 2228 gtt_size = 512*1024; 2229 2230 while (gtt_size < size) 2231 gtt_size <<= 1; 2232 2233 return gtt_size; 2234 } 2235 2236 /** 2237 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 2238 * @dev: drm device 2239 * @size: object size 2240 * @tiling_mode: tiling mode 2241 * @fenced: is fenced alignemned required or not 2242 * 2243 * Return the required GTT alignment for an object, taking into account 2244 * potential fence register mapping. 2245 */ 2246 uint32_t 2247 i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, 2248 int tiling_mode, bool fenced) 2249 { 2250 /* 2251 * Minimum alignment is 4k (GTT page size), but might be greater 2252 * if a fence register is needed for the object. 2253 */ 2254 if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) || 2255 tiling_mode == I915_TILING_NONE) 2256 return 4096; 2257 2258 /* 2259 * Previous chips need to be aligned to the size of the smallest 2260 * fence register that can contain the object. 2261 */ 2262 return i915_gem_get_gtt_size(dev, size, tiling_mode); 2263 } 2264 2265 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 2266 { 2267 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2268 int ret; 2269 2270 dev_priv->mm.shrinker_no_lock_stealing = true; 2271 2272 ret = drm_gem_create_mmap_offset(&obj->base); 2273 if (ret != -ENOSPC) 2274 goto out; 2275 2276 /* Badly fragmented mmap space? The only way we can recover 2277 * space is by destroying unwanted objects. We can't randomly release 2278 * mmap_offsets as userspace expects them to be persistent for the 2279 * lifetime of the objects. The closest we can is to release the 2280 * offsets on purgeable objects by truncating it and marking it purged, 2281 * which prevents userspace from ever using that object again. 2282 */ 2283 i915_gem_shrink(dev_priv, 2284 obj->base.size >> PAGE_SHIFT, 2285 I915_SHRINK_BOUND | 2286 I915_SHRINK_UNBOUND | 2287 I915_SHRINK_PURGEABLE); 2288 ret = drm_gem_create_mmap_offset(&obj->base); 2289 if (ret != -ENOSPC) 2290 goto out; 2291 2292 i915_gem_shrink_all(dev_priv); 2293 ret = drm_gem_create_mmap_offset(&obj->base); 2294 out: 2295 dev_priv->mm.shrinker_no_lock_stealing = false; 2296 2297 return ret; 2298 } 2299 2300 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 2301 { 2302 drm_gem_free_mmap_offset(&obj->base); 2303 } 2304 2305 int 2306 i915_gem_mmap_gtt(struct drm_file *file, 2307 struct drm_device *dev, 2308 uint32_t handle, 2309 uint64_t *offset) 2310 { 2311 struct drm_i915_gem_object *obj; 2312 int ret; 2313 2314 ret = i915_mutex_lock_interruptible(dev); 2315 if (ret) 2316 return ret; 2317 2318 obj = to_intel_bo(drm_gem_object_lookup(file, handle)); 2319 if (&obj->base == NULL) { 2320 ret = -ENOENT; 2321 goto unlock; 2322 } 2323 2324 if (obj->madv != I915_MADV_WILLNEED) { 2325 DRM_DEBUG("Attempting to mmap a purgeable buffer\n"); 2326 ret = -EFAULT; 2327 goto out; 2328 } 2329 2330 ret = i915_gem_object_create_mmap_offset(obj); 2331 if (ret) 2332 goto out; 2333 2334 *offset = drm_vma_node_offset_addr(&obj->base.vma_node); 2335 2336 out: 2337 drm_gem_object_unreference(&obj->base); 2338 unlock: 2339 mutex_unlock(&dev->struct_mutex); 2340 return ret; 2341 } 2342 2343 /** 2344 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 2345 * @dev: DRM device 2346 * @data: GTT mapping ioctl data 2347 * @file: GEM object info 2348 * 2349 * Simply returns the fake offset to userspace so it can mmap it. 2350 * The mmap call will end up in drm_gem_mmap(), which will set things 2351 * up so we can get faults in the handler above. 2352 * 2353 * The fault handler will take care of binding the object into the GTT 2354 * (since it may have been evicted to make room for something), allocating 2355 * a fence register, and mapping the appropriate aperture address into 2356 * userspace. 2357 */ 2358 int 2359 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 2360 struct drm_file *file) 2361 { 2362 struct drm_i915_gem_mmap_gtt *args = data; 2363 2364 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 2365 } 2366 2367 /* Immediately discard the backing storage */ 2368 static void 2369 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 2370 { 2371 i915_gem_object_free_mmap_offset(obj); 2372 2373 if (obj->base.filp == NULL) 2374 return; 2375 2376 /* Our goal here is to return as much of the memory as 2377 * is possible back to the system as we are called from OOM. 2378 * To do this we must instruct the shmfs to drop all of its 2379 * backing pages, *now*. 2380 */ 2381 shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1); 2382 obj->madv = __I915_MADV_PURGED; 2383 } 2384 2385 /* Try to discard unwanted pages */ 2386 static void 2387 i915_gem_object_invalidate(struct drm_i915_gem_object *obj) 2388 { 2389 struct address_space *mapping; 2390 2391 switch (obj->madv) { 2392 case I915_MADV_DONTNEED: 2393 i915_gem_object_truncate(obj); 2394 case __I915_MADV_PURGED: 2395 return; 2396 } 2397 2398 if (obj->base.filp == NULL) 2399 return; 2400 2401 mapping = obj->base.filp->f_mapping, 2402 invalidate_mapping_pages(mapping, 0, (loff_t)-1); 2403 } 2404 2405 static void 2406 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 2407 { 2408 struct sgt_iter sgt_iter; 2409 struct page *page; 2410 int ret; 2411 2412 BUG_ON(obj->madv == __I915_MADV_PURGED); 2413 2414 ret = i915_gem_object_set_to_cpu_domain(obj, true); 2415 if (WARN_ON(ret)) { 2416 /* In the event of a disaster, abandon all caches and 2417 * hope for the best. 2418 */ 2419 i915_gem_clflush_object(obj, true); 2420 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2421 } 2422 2423 i915_gem_gtt_finish_object(obj); 2424 2425 if (i915_gem_object_needs_bit17_swizzle(obj)) 2426 i915_gem_object_save_bit_17_swizzle(obj); 2427 2428 if (obj->madv == I915_MADV_DONTNEED) 2429 obj->dirty = 0; 2430 2431 for_each_sgt_page(page, sgt_iter, obj->pages) { 2432 if (obj->dirty) 2433 set_page_dirty(page); 2434 2435 if (obj->madv == I915_MADV_WILLNEED) 2436 mark_page_accessed(page); 2437 2438 put_page(page); 2439 } 2440 obj->dirty = 0; 2441 2442 sg_free_table(obj->pages); 2443 kfree(obj->pages); 2444 } 2445 2446 int 2447 i915_gem_object_put_pages(struct drm_i915_gem_object *obj) 2448 { 2449 const struct drm_i915_gem_object_ops *ops = obj->ops; 2450 2451 if (obj->pages == NULL) 2452 return 0; 2453 2454 if (obj->pages_pin_count) 2455 return -EBUSY; 2456 2457 BUG_ON(i915_gem_obj_bound_any(obj)); 2458 2459 /* ->put_pages might need to allocate memory for the bit17 swizzle 2460 * array, hence protect them from being reaped by removing them from gtt 2461 * lists early. */ 2462 list_del(&obj->global_list); 2463 2464 if (obj->mapping) { 2465 if (is_vmalloc_addr(obj->mapping)) 2466 vunmap(obj->mapping); 2467 else 2468 kunmap(kmap_to_page(obj->mapping)); 2469 obj->mapping = NULL; 2470 } 2471 2472 ops->put_pages(obj); 2473 obj->pages = NULL; 2474 2475 i915_gem_object_invalidate(obj); 2476 2477 return 0; 2478 } 2479 2480 static int 2481 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2482 { 2483 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2484 int page_count, i; 2485 struct address_space *mapping; 2486 struct sg_table *st; 2487 struct scatterlist *sg; 2488 struct sgt_iter sgt_iter; 2489 struct page *page; 2490 unsigned long last_pfn = 0; /* suppress gcc warning */ 2491 int ret; 2492 gfp_t gfp; 2493 2494 /* Assert that the object is not currently in any GPU domain. As it 2495 * wasn't in the GTT, there shouldn't be any way it could have been in 2496 * a GPU cache 2497 */ 2498 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2499 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2500 2501 st = kmalloc(sizeof(*st), GFP_KERNEL); 2502 if (st == NULL) 2503 return -ENOMEM; 2504 2505 page_count = obj->base.size / PAGE_SIZE; 2506 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 2507 kfree(st); 2508 return -ENOMEM; 2509 } 2510 2511 /* Get the list of pages out of our struct file. They'll be pinned 2512 * at this point until we release them. 2513 * 2514 * Fail silently without starting the shrinker 2515 */ 2516 mapping = obj->base.filp->f_mapping; 2517 gfp = mapping_gfp_constraint(mapping, ~(__GFP_IO | __GFP_RECLAIM)); 2518 gfp |= __GFP_NORETRY | __GFP_NOWARN; 2519 sg = st->sgl; 2520 st->nents = 0; 2521 for (i = 0; i < page_count; i++) { 2522 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2523 if (IS_ERR(page)) { 2524 i915_gem_shrink(dev_priv, 2525 page_count, 2526 I915_SHRINK_BOUND | 2527 I915_SHRINK_UNBOUND | 2528 I915_SHRINK_PURGEABLE); 2529 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2530 } 2531 if (IS_ERR(page)) { 2532 /* We've tried hard to allocate the memory by reaping 2533 * our own buffer, now let the real VM do its job and 2534 * go down in flames if truly OOM. 2535 */ 2536 i915_gem_shrink_all(dev_priv); 2537 page = shmem_read_mapping_page(mapping, i); 2538 if (IS_ERR(page)) { 2539 ret = PTR_ERR(page); 2540 goto err_pages; 2541 } 2542 } 2543 #ifdef CONFIG_SWIOTLB 2544 if (swiotlb_nr_tbl()) { 2545 st->nents++; 2546 sg_set_page(sg, page, PAGE_SIZE, 0); 2547 sg = sg_next(sg); 2548 continue; 2549 } 2550 #endif 2551 if (!i || page_to_pfn(page) != last_pfn + 1) { 2552 if (i) 2553 sg = sg_next(sg); 2554 st->nents++; 2555 sg_set_page(sg, page, PAGE_SIZE, 0); 2556 } else { 2557 sg->length += PAGE_SIZE; 2558 } 2559 last_pfn = page_to_pfn(page); 2560 2561 /* Check that the i965g/gm workaround works. */ 2562 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL)); 2563 } 2564 #ifdef CONFIG_SWIOTLB 2565 if (!swiotlb_nr_tbl()) 2566 #endif 2567 sg_mark_end(sg); 2568 obj->pages = st; 2569 2570 ret = i915_gem_gtt_prepare_object(obj); 2571 if (ret) 2572 goto err_pages; 2573 2574 if (i915_gem_object_needs_bit17_swizzle(obj)) 2575 i915_gem_object_do_bit_17_swizzle(obj); 2576 2577 if (obj->tiling_mode != I915_TILING_NONE && 2578 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) 2579 i915_gem_object_pin_pages(obj); 2580 2581 return 0; 2582 2583 err_pages: 2584 sg_mark_end(sg); 2585 for_each_sgt_page(page, sgt_iter, st) 2586 put_page(page); 2587 sg_free_table(st); 2588 kfree(st); 2589 2590 /* shmemfs first checks if there is enough memory to allocate the page 2591 * and reports ENOSPC should there be insufficient, along with the usual 2592 * ENOMEM for a genuine allocation failure. 2593 * 2594 * We use ENOSPC in our driver to mean that we have run out of aperture 2595 * space and so want to translate the error from shmemfs back to our 2596 * usual understanding of ENOMEM. 2597 */ 2598 if (ret == -ENOSPC) 2599 ret = -ENOMEM; 2600 2601 return ret; 2602 } 2603 2604 /* Ensure that the associated pages are gathered from the backing storage 2605 * and pinned into our object. i915_gem_object_get_pages() may be called 2606 * multiple times before they are released by a single call to 2607 * i915_gem_object_put_pages() - once the pages are no longer referenced 2608 * either as a result of memory pressure (reaping pages under the shrinker) 2609 * or as the object is itself released. 2610 */ 2611 int 2612 i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2613 { 2614 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2615 const struct drm_i915_gem_object_ops *ops = obj->ops; 2616 int ret; 2617 2618 if (obj->pages) 2619 return 0; 2620 2621 if (obj->madv != I915_MADV_WILLNEED) { 2622 DRM_DEBUG("Attempting to obtain a purgeable object\n"); 2623 return -EFAULT; 2624 } 2625 2626 BUG_ON(obj->pages_pin_count); 2627 2628 ret = ops->get_pages(obj); 2629 if (ret) 2630 return ret; 2631 2632 list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list); 2633 2634 obj->get_page.sg = obj->pages->sgl; 2635 obj->get_page.last = 0; 2636 2637 return 0; 2638 } 2639 2640 /* The 'mapping' part of i915_gem_object_pin_map() below */ 2641 static void *i915_gem_object_map(const struct drm_i915_gem_object *obj) 2642 { 2643 unsigned long n_pages = obj->base.size >> PAGE_SHIFT; 2644 struct sg_table *sgt = obj->pages; 2645 struct sgt_iter sgt_iter; 2646 struct page *page; 2647 struct page *stack_pages[32]; 2648 struct page **pages = stack_pages; 2649 unsigned long i = 0; 2650 void *addr; 2651 2652 /* A single page can always be kmapped */ 2653 if (n_pages == 1) 2654 return kmap(sg_page(sgt->sgl)); 2655 2656 if (n_pages > ARRAY_SIZE(stack_pages)) { 2657 /* Too big for stack -- allocate temporary array instead */ 2658 pages = drm_malloc_gfp(n_pages, sizeof(*pages), GFP_TEMPORARY); 2659 if (!pages) 2660 return NULL; 2661 } 2662 2663 for_each_sgt_page(page, sgt_iter, sgt) 2664 pages[i++] = page; 2665 2666 /* Check that we have the expected number of pages */ 2667 GEM_BUG_ON(i != n_pages); 2668 2669 addr = vmap(pages, n_pages, 0, PAGE_KERNEL); 2670 2671 if (pages != stack_pages) 2672 drm_free_large(pages); 2673 2674 return addr; 2675 } 2676 2677 /* get, pin, and map the pages of the object into kernel space */ 2678 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj) 2679 { 2680 int ret; 2681 2682 lockdep_assert_held(&obj->base.dev->struct_mutex); 2683 2684 ret = i915_gem_object_get_pages(obj); 2685 if (ret) 2686 return ERR_PTR(ret); 2687 2688 i915_gem_object_pin_pages(obj); 2689 2690 if (!obj->mapping) { 2691 obj->mapping = i915_gem_object_map(obj); 2692 if (!obj->mapping) { 2693 i915_gem_object_unpin_pages(obj); 2694 return ERR_PTR(-ENOMEM); 2695 } 2696 } 2697 2698 return obj->mapping; 2699 } 2700 2701 void i915_vma_move_to_active(struct i915_vma *vma, 2702 struct drm_i915_gem_request *req) 2703 { 2704 struct drm_i915_gem_object *obj = vma->obj; 2705 struct intel_engine_cs *engine; 2706 2707 engine = i915_gem_request_get_engine(req); 2708 2709 /* Add a reference if we're newly entering the active list. */ 2710 if (obj->active == 0) 2711 drm_gem_object_reference(&obj->base); 2712 obj->active |= intel_engine_flag(engine); 2713 2714 list_move_tail(&obj->engine_list[engine->id], &engine->active_list); 2715 i915_gem_request_assign(&obj->last_read_req[engine->id], req); 2716 2717 list_move_tail(&vma->vm_link, &vma->vm->active_list); 2718 } 2719 2720 static void 2721 i915_gem_object_retire__write(struct drm_i915_gem_object *obj) 2722 { 2723 GEM_BUG_ON(obj->last_write_req == NULL); 2724 GEM_BUG_ON(!(obj->active & intel_engine_flag(obj->last_write_req->engine))); 2725 2726 i915_gem_request_assign(&obj->last_write_req, NULL); 2727 intel_fb_obj_flush(obj, true, ORIGIN_CS); 2728 } 2729 2730 static void 2731 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring) 2732 { 2733 struct i915_vma *vma; 2734 2735 GEM_BUG_ON(obj->last_read_req[ring] == NULL); 2736 GEM_BUG_ON(!(obj->active & (1 << ring))); 2737 2738 list_del_init(&obj->engine_list[ring]); 2739 i915_gem_request_assign(&obj->last_read_req[ring], NULL); 2740 2741 if (obj->last_write_req && obj->last_write_req->engine->id == ring) 2742 i915_gem_object_retire__write(obj); 2743 2744 obj->active &= ~(1 << ring); 2745 if (obj->active) 2746 return; 2747 2748 /* Bump our place on the bound list to keep it roughly in LRU order 2749 * so that we don't steal from recently used but inactive objects 2750 * (unless we are forced to ofc!) 2751 */ 2752 list_move_tail(&obj->global_list, 2753 &to_i915(obj->base.dev)->mm.bound_list); 2754 2755 list_for_each_entry(vma, &obj->vma_list, obj_link) { 2756 if (!list_empty(&vma->vm_link)) 2757 list_move_tail(&vma->vm_link, &vma->vm->inactive_list); 2758 } 2759 2760 i915_gem_request_assign(&obj->last_fenced_req, NULL); 2761 drm_gem_object_unreference(&obj->base); 2762 } 2763 2764 static int 2765 i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno) 2766 { 2767 struct intel_engine_cs *engine; 2768 int ret; 2769 2770 /* Carefully retire all requests without writing to the rings */ 2771 for_each_engine(engine, dev_priv) { 2772 ret = intel_engine_idle(engine); 2773 if (ret) 2774 return ret; 2775 } 2776 i915_gem_retire_requests(dev_priv); 2777 2778 /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ 2779 if (!i915_seqno_passed(seqno, dev_priv->next_seqno)) { 2780 while (intel_kick_waiters(dev_priv) || 2781 intel_kick_signalers(dev_priv)) 2782 yield(); 2783 } 2784 2785 /* Finally reset hw state */ 2786 for_each_engine(engine, dev_priv) 2787 intel_ring_init_seqno(engine, seqno); 2788 2789 return 0; 2790 } 2791 2792 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) 2793 { 2794 struct drm_i915_private *dev_priv = to_i915(dev); 2795 int ret; 2796 2797 if (seqno == 0) 2798 return -EINVAL; 2799 2800 /* HWS page needs to be set less than what we 2801 * will inject to ring 2802 */ 2803 ret = i915_gem_init_seqno(dev_priv, seqno - 1); 2804 if (ret) 2805 return ret; 2806 2807 /* Carefully set the last_seqno value so that wrap 2808 * detection still works 2809 */ 2810 dev_priv->next_seqno = seqno; 2811 dev_priv->last_seqno = seqno - 1; 2812 if (dev_priv->last_seqno == 0) 2813 dev_priv->last_seqno--; 2814 2815 return 0; 2816 } 2817 2818 int 2819 i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno) 2820 { 2821 /* reserve 0 for non-seqno */ 2822 if (dev_priv->next_seqno == 0) { 2823 int ret = i915_gem_init_seqno(dev_priv, 0); 2824 if (ret) 2825 return ret; 2826 2827 dev_priv->next_seqno = 1; 2828 } 2829 2830 *seqno = dev_priv->last_seqno = dev_priv->next_seqno++; 2831 return 0; 2832 } 2833 2834 static void i915_gem_mark_busy(const struct intel_engine_cs *engine) 2835 { 2836 struct drm_i915_private *dev_priv = engine->i915; 2837 2838 dev_priv->gt.active_engines |= intel_engine_flag(engine); 2839 if (dev_priv->gt.awake) 2840 return; 2841 2842 intel_runtime_pm_get_noresume(dev_priv); 2843 dev_priv->gt.awake = true; 2844 2845 i915_update_gfx_val(dev_priv); 2846 if (INTEL_GEN(dev_priv) >= 6) 2847 gen6_rps_busy(dev_priv); 2848 2849 queue_delayed_work(dev_priv->wq, 2850 &dev_priv->gt.retire_work, 2851 round_jiffies_up_relative(HZ)); 2852 } 2853 2854 /* 2855 * NB: This function is not allowed to fail. Doing so would mean the the 2856 * request is not being tracked for completion but the work itself is 2857 * going to happen on the hardware. This would be a Bad Thing(tm). 2858 */ 2859 void __i915_add_request(struct drm_i915_gem_request *request, 2860 struct drm_i915_gem_object *obj, 2861 bool flush_caches) 2862 { 2863 struct intel_engine_cs *engine; 2864 struct intel_ringbuffer *ringbuf; 2865 u32 request_start; 2866 u32 reserved_tail; 2867 int ret; 2868 2869 if (WARN_ON(request == NULL)) 2870 return; 2871 2872 engine = request->engine; 2873 ringbuf = request->ringbuf; 2874 2875 /* 2876 * To ensure that this call will not fail, space for its emissions 2877 * should already have been reserved in the ring buffer. Let the ring 2878 * know that it is time to use that space up. 2879 */ 2880 request_start = intel_ring_get_tail(ringbuf); 2881 reserved_tail = request->reserved_space; 2882 request->reserved_space = 0; 2883 2884 /* 2885 * Emit any outstanding flushes - execbuf can fail to emit the flush 2886 * after having emitted the batchbuffer command. Hence we need to fix 2887 * things up similar to emitting the lazy request. The difference here 2888 * is that the flush _must_ happen before the next request, no matter 2889 * what. 2890 */ 2891 if (flush_caches) { 2892 if (i915.enable_execlists) 2893 ret = logical_ring_flush_all_caches(request); 2894 else 2895 ret = intel_ring_flush_all_caches(request); 2896 /* Not allowed to fail! */ 2897 WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret); 2898 } 2899 2900 trace_i915_gem_request_add(request); 2901 2902 request->head = request_start; 2903 2904 /* Whilst this request exists, batch_obj will be on the 2905 * active_list, and so will hold the active reference. Only when this 2906 * request is retired will the the batch_obj be moved onto the 2907 * inactive_list and lose its active reference. Hence we do not need 2908 * to explicitly hold another reference here. 2909 */ 2910 request->batch_obj = obj; 2911 2912 /* Seal the request and mark it as pending execution. Note that 2913 * we may inspect this state, without holding any locks, during 2914 * hangcheck. Hence we apply the barrier to ensure that we do not 2915 * see a more recent value in the hws than we are tracking. 2916 */ 2917 request->emitted_jiffies = jiffies; 2918 request->previous_seqno = engine->last_submitted_seqno; 2919 smp_store_mb(engine->last_submitted_seqno, request->seqno); 2920 list_add_tail(&request->list, &engine->request_list); 2921 2922 /* Record the position of the start of the request so that 2923 * should we detect the updated seqno part-way through the 2924 * GPU processing the request, we never over-estimate the 2925 * position of the head. 2926 */ 2927 request->postfix = intel_ring_get_tail(ringbuf); 2928 2929 if (i915.enable_execlists) 2930 ret = engine->emit_request(request); 2931 else { 2932 ret = engine->add_request(request); 2933 2934 request->tail = intel_ring_get_tail(ringbuf); 2935 } 2936 /* Not allowed to fail! */ 2937 WARN(ret, "emit|add_request failed: %d!\n", ret); 2938 /* Sanity check that the reserved size was large enough. */ 2939 ret = intel_ring_get_tail(ringbuf) - request_start; 2940 if (ret < 0) 2941 ret += ringbuf->size; 2942 WARN_ONCE(ret > reserved_tail, 2943 "Not enough space reserved (%d bytes) " 2944 "for adding the request (%d bytes)\n", 2945 reserved_tail, ret); 2946 2947 i915_gem_mark_busy(engine); 2948 } 2949 2950 static bool i915_context_is_banned(const struct i915_gem_context *ctx) 2951 { 2952 unsigned long elapsed; 2953 2954 if (ctx->hang_stats.banned) 2955 return true; 2956 2957 elapsed = get_seconds() - ctx->hang_stats.guilty_ts; 2958 if (ctx->hang_stats.ban_period_seconds && 2959 elapsed <= ctx->hang_stats.ban_period_seconds) { 2960 DRM_DEBUG("context hanging too fast, banning!\n"); 2961 return true; 2962 } 2963 2964 return false; 2965 } 2966 2967 static void i915_set_reset_status(struct i915_gem_context *ctx, 2968 const bool guilty) 2969 { 2970 struct i915_ctx_hang_stats *hs = &ctx->hang_stats; 2971 2972 if (guilty) { 2973 hs->banned = i915_context_is_banned(ctx); 2974 hs->batch_active++; 2975 hs->guilty_ts = get_seconds(); 2976 } else { 2977 hs->batch_pending++; 2978 } 2979 } 2980 2981 void i915_gem_request_free(struct kref *req_ref) 2982 { 2983 struct drm_i915_gem_request *req = container_of(req_ref, 2984 typeof(*req), ref); 2985 kmem_cache_free(req->i915->requests, req); 2986 } 2987 2988 static inline int 2989 __i915_gem_request_alloc(struct intel_engine_cs *engine, 2990 struct i915_gem_context *ctx, 2991 struct drm_i915_gem_request **req_out) 2992 { 2993 struct drm_i915_private *dev_priv = engine->i915; 2994 unsigned reset_counter = i915_reset_counter(&dev_priv->gpu_error); 2995 struct drm_i915_gem_request *req; 2996 int ret; 2997 2998 if (!req_out) 2999 return -EINVAL; 3000 3001 *req_out = NULL; 3002 3003 /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report 3004 * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex 3005 * and restart. 3006 */ 3007 ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible); 3008 if (ret) 3009 return ret; 3010 3011 req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL); 3012 if (req == NULL) 3013 return -ENOMEM; 3014 3015 ret = i915_gem_get_seqno(engine->i915, &req->seqno); 3016 if (ret) 3017 goto err; 3018 3019 kref_init(&req->ref); 3020 req->i915 = dev_priv; 3021 req->engine = engine; 3022 req->ctx = ctx; 3023 i915_gem_context_reference(req->ctx); 3024 3025 /* 3026 * Reserve space in the ring buffer for all the commands required to 3027 * eventually emit this request. This is to guarantee that the 3028 * i915_add_request() call can't fail. Note that the reserve may need 3029 * to be redone if the request is not actually submitted straight 3030 * away, e.g. because a GPU scheduler has deferred it. 3031 */ 3032 req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST; 3033 3034 if (i915.enable_execlists) 3035 ret = intel_logical_ring_alloc_request_extras(req); 3036 else 3037 ret = intel_ring_alloc_request_extras(req); 3038 if (ret) 3039 goto err_ctx; 3040 3041 *req_out = req; 3042 return 0; 3043 3044 err_ctx: 3045 i915_gem_context_unreference(ctx); 3046 err: 3047 kmem_cache_free(dev_priv->requests, req); 3048 return ret; 3049 } 3050 3051 /** 3052 * i915_gem_request_alloc - allocate a request structure 3053 * 3054 * @engine: engine that we wish to issue the request on. 3055 * @ctx: context that the request will be associated with. 3056 * This can be NULL if the request is not directly related to 3057 * any specific user context, in which case this function will 3058 * choose an appropriate context to use. 3059 * 3060 * Returns a pointer to the allocated request if successful, 3061 * or an error code if not. 3062 */ 3063 struct drm_i915_gem_request * 3064 i915_gem_request_alloc(struct intel_engine_cs *engine, 3065 struct i915_gem_context *ctx) 3066 { 3067 struct drm_i915_gem_request *req; 3068 int err; 3069 3070 if (ctx == NULL) 3071 ctx = engine->i915->kernel_context; 3072 err = __i915_gem_request_alloc(engine, ctx, &req); 3073 return err ? ERR_PTR(err) : req; 3074 } 3075 3076 struct drm_i915_gem_request * 3077 i915_gem_find_active_request(struct intel_engine_cs *engine) 3078 { 3079 struct drm_i915_gem_request *request; 3080 3081 /* We are called by the error capture and reset at a random 3082 * point in time. In particular, note that neither is crucially 3083 * ordered with an interrupt. After a hang, the GPU is dead and we 3084 * assume that no more writes can happen (we waited long enough for 3085 * all writes that were in transaction to be flushed) - adding an 3086 * extra delay for a recent interrupt is pointless. Hence, we do 3087 * not need an engine->irq_seqno_barrier() before the seqno reads. 3088 */ 3089 list_for_each_entry(request, &engine->request_list, list) { 3090 if (i915_gem_request_completed(request)) 3091 continue; 3092 3093 return request; 3094 } 3095 3096 return NULL; 3097 } 3098 3099 static void i915_gem_reset_engine_status(struct intel_engine_cs *engine) 3100 { 3101 struct drm_i915_gem_request *request; 3102 bool ring_hung; 3103 3104 request = i915_gem_find_active_request(engine); 3105 if (request == NULL) 3106 return; 3107 3108 ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG; 3109 3110 i915_set_reset_status(request->ctx, ring_hung); 3111 list_for_each_entry_continue(request, &engine->request_list, list) 3112 i915_set_reset_status(request->ctx, false); 3113 } 3114 3115 static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine) 3116 { 3117 struct intel_ringbuffer *buffer; 3118 3119 while (!list_empty(&engine->active_list)) { 3120 struct drm_i915_gem_object *obj; 3121 3122 obj = list_first_entry(&engine->active_list, 3123 struct drm_i915_gem_object, 3124 engine_list[engine->id]); 3125 3126 i915_gem_object_retire__read(obj, engine->id); 3127 } 3128 3129 /* 3130 * Clear the execlists queue up before freeing the requests, as those 3131 * are the ones that keep the context and ringbuffer backing objects 3132 * pinned in place. 3133 */ 3134 3135 if (i915.enable_execlists) { 3136 /* Ensure irq handler finishes or is cancelled. */ 3137 tasklet_kill(&engine->irq_tasklet); 3138 3139 intel_execlists_cancel_requests(engine); 3140 } 3141 3142 /* 3143 * We must free the requests after all the corresponding objects have 3144 * been moved off active lists. Which is the same order as the normal 3145 * retire_requests function does. This is important if object hold 3146 * implicit references on things like e.g. ppgtt address spaces through 3147 * the request. 3148 */ 3149 while (!list_empty(&engine->request_list)) { 3150 struct drm_i915_gem_request *request; 3151 3152 request = list_first_entry(&engine->request_list, 3153 struct drm_i915_gem_request, 3154 list); 3155 3156 i915_gem_request_retire(request); 3157 } 3158 3159 /* Having flushed all requests from all queues, we know that all 3160 * ringbuffers must now be empty. However, since we do not reclaim 3161 * all space when retiring the request (to prevent HEADs colliding 3162 * with rapid ringbuffer wraparound) the amount of available space 3163 * upon reset is less than when we start. Do one more pass over 3164 * all the ringbuffers to reset last_retired_head. 3165 */ 3166 list_for_each_entry(buffer, &engine->buffers, link) { 3167 buffer->last_retired_head = buffer->tail; 3168 intel_ring_update_space(buffer); 3169 } 3170 3171 intel_ring_init_seqno(engine, engine->last_submitted_seqno); 3172 } 3173 3174 void i915_gem_reset(struct drm_device *dev) 3175 { 3176 struct drm_i915_private *dev_priv = to_i915(dev); 3177 struct intel_engine_cs *engine; 3178 3179 /* 3180 * Before we free the objects from the requests, we need to inspect 3181 * them for finding the guilty party. As the requests only borrow 3182 * their reference to the objects, the inspection must be done first. 3183 */ 3184 for_each_engine(engine, dev_priv) 3185 i915_gem_reset_engine_status(engine); 3186 3187 for_each_engine(engine, dev_priv) 3188 i915_gem_reset_engine_cleanup(engine); 3189 3190 i915_gem_context_reset(dev); 3191 3192 i915_gem_restore_fences(dev); 3193 3194 WARN_ON(i915_verify_lists(dev)); 3195 } 3196 3197 /** 3198 * This function clears the request list as sequence numbers are passed. 3199 * @engine: engine to retire requests on 3200 */ 3201 void 3202 i915_gem_retire_requests_ring(struct intel_engine_cs *engine) 3203 { 3204 WARN_ON(i915_verify_lists(engine->dev)); 3205 3206 /* Retire requests first as we use it above for the early return. 3207 * If we retire requests last, we may use a later seqno and so clear 3208 * the requests lists without clearing the active list, leading to 3209 * confusion. 3210 */ 3211 while (!list_empty(&engine->request_list)) { 3212 struct drm_i915_gem_request *request; 3213 3214 request = list_first_entry(&engine->request_list, 3215 struct drm_i915_gem_request, 3216 list); 3217 3218 if (!i915_gem_request_completed(request)) 3219 break; 3220 3221 i915_gem_request_retire(request); 3222 } 3223 3224 /* Move any buffers on the active list that are no longer referenced 3225 * by the ringbuffer to the flushing/inactive lists as appropriate, 3226 * before we free the context associated with the requests. 3227 */ 3228 while (!list_empty(&engine->active_list)) { 3229 struct drm_i915_gem_object *obj; 3230 3231 obj = list_first_entry(&engine->active_list, 3232 struct drm_i915_gem_object, 3233 engine_list[engine->id]); 3234 3235 if (!list_empty(&obj->last_read_req[engine->id]->list)) 3236 break; 3237 3238 i915_gem_object_retire__read(obj, engine->id); 3239 } 3240 3241 WARN_ON(i915_verify_lists(engine->dev)); 3242 } 3243 3244 void i915_gem_retire_requests(struct drm_i915_private *dev_priv) 3245 { 3246 struct intel_engine_cs *engine; 3247 3248 lockdep_assert_held(&dev_priv->drm.struct_mutex); 3249 3250 if (dev_priv->gt.active_engines == 0) 3251 return; 3252 3253 GEM_BUG_ON(!dev_priv->gt.awake); 3254 3255 for_each_engine(engine, dev_priv) { 3256 i915_gem_retire_requests_ring(engine); 3257 if (list_empty(&engine->request_list)) 3258 dev_priv->gt.active_engines &= ~intel_engine_flag(engine); 3259 } 3260 3261 if (dev_priv->gt.active_engines == 0) 3262 queue_delayed_work(dev_priv->wq, 3263 &dev_priv->gt.idle_work, 3264 msecs_to_jiffies(100)); 3265 } 3266 3267 static void 3268 i915_gem_retire_work_handler(struct work_struct *work) 3269 { 3270 struct drm_i915_private *dev_priv = 3271 container_of(work, typeof(*dev_priv), gt.retire_work.work); 3272 struct drm_device *dev = &dev_priv->drm; 3273 3274 /* Come back later if the device is busy... */ 3275 if (mutex_trylock(&dev->struct_mutex)) { 3276 i915_gem_retire_requests(dev_priv); 3277 mutex_unlock(&dev->struct_mutex); 3278 } 3279 3280 /* Keep the retire handler running until we are finally idle. 3281 * We do not need to do this test under locking as in the worst-case 3282 * we queue the retire worker once too often. 3283 */ 3284 if (READ_ONCE(dev_priv->gt.awake)) 3285 queue_delayed_work(dev_priv->wq, 3286 &dev_priv->gt.retire_work, 3287 round_jiffies_up_relative(HZ)); 3288 } 3289 3290 static void 3291 i915_gem_idle_work_handler(struct work_struct *work) 3292 { 3293 struct drm_i915_private *dev_priv = 3294 container_of(work, typeof(*dev_priv), gt.idle_work.work); 3295 struct drm_device *dev = &dev_priv->drm; 3296 struct intel_engine_cs *engine; 3297 unsigned int stuck_engines; 3298 bool rearm_hangcheck; 3299 3300 if (!READ_ONCE(dev_priv->gt.awake)) 3301 return; 3302 3303 if (READ_ONCE(dev_priv->gt.active_engines)) 3304 return; 3305 3306 rearm_hangcheck = 3307 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 3308 3309 if (!mutex_trylock(&dev->struct_mutex)) { 3310 /* Currently busy, come back later */ 3311 mod_delayed_work(dev_priv->wq, 3312 &dev_priv->gt.idle_work, 3313 msecs_to_jiffies(50)); 3314 goto out_rearm; 3315 } 3316 3317 if (dev_priv->gt.active_engines) 3318 goto out_unlock; 3319 3320 for_each_engine(engine, dev_priv) 3321 i915_gem_batch_pool_fini(&engine->batch_pool); 3322 3323 GEM_BUG_ON(!dev_priv->gt.awake); 3324 dev_priv->gt.awake = false; 3325 rearm_hangcheck = false; 3326 3327 stuck_engines = intel_kick_waiters(dev_priv); 3328 if (unlikely(stuck_engines)) { 3329 DRM_DEBUG_DRIVER("kicked stuck waiters...missed irq\n"); 3330 dev_priv->gpu_error.missed_irq_rings |= stuck_engines; 3331 } 3332 3333 if (INTEL_GEN(dev_priv) >= 6) 3334 gen6_rps_idle(dev_priv); 3335 intel_runtime_pm_put(dev_priv); 3336 out_unlock: 3337 mutex_unlock(&dev->struct_mutex); 3338 3339 out_rearm: 3340 if (rearm_hangcheck) { 3341 GEM_BUG_ON(!dev_priv->gt.awake); 3342 i915_queue_hangcheck(dev_priv); 3343 } 3344 } 3345 3346 /** 3347 * Ensures that an object will eventually get non-busy by flushing any required 3348 * write domains, emitting any outstanding lazy request and retiring and 3349 * completed requests. 3350 * @obj: object to flush 3351 */ 3352 static int 3353 i915_gem_object_flush_active(struct drm_i915_gem_object *obj) 3354 { 3355 int i; 3356 3357 if (!obj->active) 3358 return 0; 3359 3360 for (i = 0; i < I915_NUM_ENGINES; i++) { 3361 struct drm_i915_gem_request *req; 3362 3363 req = obj->last_read_req[i]; 3364 if (req == NULL) 3365 continue; 3366 3367 if (i915_gem_request_completed(req)) 3368 i915_gem_object_retire__read(obj, i); 3369 } 3370 3371 return 0; 3372 } 3373 3374 /** 3375 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 3376 * @dev: drm device pointer 3377 * @data: ioctl data blob 3378 * @file: drm file pointer 3379 * 3380 * Returns 0 if successful, else an error is returned with the remaining time in 3381 * the timeout parameter. 3382 * -ETIME: object is still busy after timeout 3383 * -ERESTARTSYS: signal interrupted the wait 3384 * -ENONENT: object doesn't exist 3385 * Also possible, but rare: 3386 * -EAGAIN: GPU wedged 3387 * -ENOMEM: damn 3388 * -ENODEV: Internal IRQ fail 3389 * -E?: The add request failed 3390 * 3391 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 3392 * non-zero timeout parameter the wait ioctl will wait for the given number of 3393 * nanoseconds on an object becoming unbusy. Since the wait itself does so 3394 * without holding struct_mutex the object may become re-busied before this 3395 * function completes. A similar but shorter * race condition exists in the busy 3396 * ioctl 3397 */ 3398 int 3399 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3400 { 3401 struct drm_i915_gem_wait *args = data; 3402 struct drm_i915_gem_object *obj; 3403 struct drm_i915_gem_request *req[I915_NUM_ENGINES]; 3404 int i, n = 0; 3405 int ret; 3406 3407 if (args->flags != 0) 3408 return -EINVAL; 3409 3410 ret = i915_mutex_lock_interruptible(dev); 3411 if (ret) 3412 return ret; 3413 3414 obj = to_intel_bo(drm_gem_object_lookup(file, args->bo_handle)); 3415 if (&obj->base == NULL) { 3416 mutex_unlock(&dev->struct_mutex); 3417 return -ENOENT; 3418 } 3419 3420 /* Need to make sure the object gets inactive eventually. */ 3421 ret = i915_gem_object_flush_active(obj); 3422 if (ret) 3423 goto out; 3424 3425 if (!obj->active) 3426 goto out; 3427 3428 /* Do this after OLR check to make sure we make forward progress polling 3429 * on this IOCTL with a timeout == 0 (like busy ioctl) 3430 */ 3431 if (args->timeout_ns == 0) { 3432 ret = -ETIME; 3433 goto out; 3434 } 3435 3436 drm_gem_object_unreference(&obj->base); 3437 3438 for (i = 0; i < I915_NUM_ENGINES; i++) { 3439 if (obj->last_read_req[i] == NULL) 3440 continue; 3441 3442 req[n++] = i915_gem_request_reference(obj->last_read_req[i]); 3443 } 3444 3445 mutex_unlock(&dev->struct_mutex); 3446 3447 for (i = 0; i < n; i++) { 3448 if (ret == 0) 3449 ret = __i915_wait_request(req[i], true, 3450 args->timeout_ns > 0 ? &args->timeout_ns : NULL, 3451 to_rps_client(file)); 3452 i915_gem_request_unreference(req[i]); 3453 } 3454 return ret; 3455 3456 out: 3457 drm_gem_object_unreference(&obj->base); 3458 mutex_unlock(&dev->struct_mutex); 3459 return ret; 3460 } 3461 3462 static int 3463 __i915_gem_object_sync(struct drm_i915_gem_object *obj, 3464 struct intel_engine_cs *to, 3465 struct drm_i915_gem_request *from_req, 3466 struct drm_i915_gem_request **to_req) 3467 { 3468 struct intel_engine_cs *from; 3469 int ret; 3470 3471 from = i915_gem_request_get_engine(from_req); 3472 if (to == from) 3473 return 0; 3474 3475 if (i915_gem_request_completed(from_req)) 3476 return 0; 3477 3478 if (!i915_semaphore_is_enabled(to_i915(obj->base.dev))) { 3479 struct drm_i915_private *i915 = to_i915(obj->base.dev); 3480 ret = __i915_wait_request(from_req, 3481 i915->mm.interruptible, 3482 NULL, 3483 &i915->rps.semaphores); 3484 if (ret) 3485 return ret; 3486 3487 i915_gem_object_retire_request(obj, from_req); 3488 } else { 3489 int idx = intel_ring_sync_index(from, to); 3490 u32 seqno = i915_gem_request_get_seqno(from_req); 3491 3492 WARN_ON(!to_req); 3493 3494 if (seqno <= from->semaphore.sync_seqno[idx]) 3495 return 0; 3496 3497 if (*to_req == NULL) { 3498 struct drm_i915_gem_request *req; 3499 3500 req = i915_gem_request_alloc(to, NULL); 3501 if (IS_ERR(req)) 3502 return PTR_ERR(req); 3503 3504 *to_req = req; 3505 } 3506 3507 trace_i915_gem_ring_sync_to(*to_req, from, from_req); 3508 ret = to->semaphore.sync_to(*to_req, from, seqno); 3509 if (ret) 3510 return ret; 3511 3512 /* We use last_read_req because sync_to() 3513 * might have just caused seqno wrap under 3514 * the radar. 3515 */ 3516 from->semaphore.sync_seqno[idx] = 3517 i915_gem_request_get_seqno(obj->last_read_req[from->id]); 3518 } 3519 3520 return 0; 3521 } 3522 3523 /** 3524 * i915_gem_object_sync - sync an object to a ring. 3525 * 3526 * @obj: object which may be in use on another ring. 3527 * @to: ring we wish to use the object on. May be NULL. 3528 * @to_req: request we wish to use the object for. See below. 3529 * This will be allocated and returned if a request is 3530 * required but not passed in. 3531 * 3532 * This code is meant to abstract object synchronization with the GPU. 3533 * Calling with NULL implies synchronizing the object with the CPU 3534 * rather than a particular GPU ring. Conceptually we serialise writes 3535 * between engines inside the GPU. We only allow one engine to write 3536 * into a buffer at any time, but multiple readers. To ensure each has 3537 * a coherent view of memory, we must: 3538 * 3539 * - If there is an outstanding write request to the object, the new 3540 * request must wait for it to complete (either CPU or in hw, requests 3541 * on the same ring will be naturally ordered). 3542 * 3543 * - If we are a write request (pending_write_domain is set), the new 3544 * request must wait for outstanding read requests to complete. 3545 * 3546 * For CPU synchronisation (NULL to) no request is required. For syncing with 3547 * rings to_req must be non-NULL. However, a request does not have to be 3548 * pre-allocated. If *to_req is NULL and sync commands will be emitted then a 3549 * request will be allocated automatically and returned through *to_req. Note 3550 * that it is not guaranteed that commands will be emitted (because the system 3551 * might already be idle). Hence there is no need to create a request that 3552 * might never have any work submitted. Note further that if a request is 3553 * returned in *to_req, it is the responsibility of the caller to submit 3554 * that request (after potentially adding more work to it). 3555 * 3556 * Returns 0 if successful, else propagates up the lower layer error. 3557 */ 3558 int 3559 i915_gem_object_sync(struct drm_i915_gem_object *obj, 3560 struct intel_engine_cs *to, 3561 struct drm_i915_gem_request **to_req) 3562 { 3563 const bool readonly = obj->base.pending_write_domain == 0; 3564 struct drm_i915_gem_request *req[I915_NUM_ENGINES]; 3565 int ret, i, n; 3566 3567 if (!obj->active) 3568 return 0; 3569 3570 if (to == NULL) 3571 return i915_gem_object_wait_rendering(obj, readonly); 3572 3573 n = 0; 3574 if (readonly) { 3575 if (obj->last_write_req) 3576 req[n++] = obj->last_write_req; 3577 } else { 3578 for (i = 0; i < I915_NUM_ENGINES; i++) 3579 if (obj->last_read_req[i]) 3580 req[n++] = obj->last_read_req[i]; 3581 } 3582 for (i = 0; i < n; i++) { 3583 ret = __i915_gem_object_sync(obj, to, req[i], to_req); 3584 if (ret) 3585 return ret; 3586 } 3587 3588 return 0; 3589 } 3590 3591 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) 3592 { 3593 u32 old_write_domain, old_read_domains; 3594 3595 /* Force a pagefault for domain tracking on next user access */ 3596 i915_gem_release_mmap(obj); 3597 3598 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3599 return; 3600 3601 old_read_domains = obj->base.read_domains; 3602 old_write_domain = obj->base.write_domain; 3603 3604 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; 3605 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; 3606 3607 trace_i915_gem_object_change_domain(obj, 3608 old_read_domains, 3609 old_write_domain); 3610 } 3611 3612 static void __i915_vma_iounmap(struct i915_vma *vma) 3613 { 3614 GEM_BUG_ON(vma->pin_count); 3615 3616 if (vma->iomap == NULL) 3617 return; 3618 3619 io_mapping_unmap(vma->iomap); 3620 vma->iomap = NULL; 3621 } 3622 3623 static int __i915_vma_unbind(struct i915_vma *vma, bool wait) 3624 { 3625 struct drm_i915_gem_object *obj = vma->obj; 3626 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 3627 int ret; 3628 3629 if (list_empty(&vma->obj_link)) 3630 return 0; 3631 3632 if (!drm_mm_node_allocated(&vma->node)) { 3633 i915_gem_vma_destroy(vma); 3634 return 0; 3635 } 3636 3637 if (vma->pin_count) 3638 return -EBUSY; 3639 3640 BUG_ON(obj->pages == NULL); 3641 3642 if (wait) { 3643 ret = i915_gem_object_wait_rendering(obj, false); 3644 if (ret) 3645 return ret; 3646 } 3647 3648 if (vma->is_ggtt && vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 3649 i915_gem_object_finish_gtt(obj); 3650 3651 /* release the fence reg _after_ flushing */ 3652 ret = i915_gem_object_put_fence(obj); 3653 if (ret) 3654 return ret; 3655 3656 __i915_vma_iounmap(vma); 3657 } 3658 3659 trace_i915_vma_unbind(vma); 3660 3661 vma->vm->unbind_vma(vma); 3662 vma->bound = 0; 3663 3664 list_del_init(&vma->vm_link); 3665 if (vma->is_ggtt) { 3666 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 3667 obj->map_and_fenceable = false; 3668 } else if (vma->ggtt_view.pages) { 3669 sg_free_table(vma->ggtt_view.pages); 3670 kfree(vma->ggtt_view.pages); 3671 } 3672 vma->ggtt_view.pages = NULL; 3673 } 3674 3675 drm_mm_remove_node(&vma->node); 3676 i915_gem_vma_destroy(vma); 3677 3678 /* Since the unbound list is global, only move to that list if 3679 * no more VMAs exist. */ 3680 if (list_empty(&obj->vma_list)) 3681 list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list); 3682 3683 /* And finally now the object is completely decoupled from this vma, 3684 * we can drop its hold on the backing storage and allow it to be 3685 * reaped by the shrinker. 3686 */ 3687 i915_gem_object_unpin_pages(obj); 3688 3689 return 0; 3690 } 3691 3692 int i915_vma_unbind(struct i915_vma *vma) 3693 { 3694 return __i915_vma_unbind(vma, true); 3695 } 3696 3697 int __i915_vma_unbind_no_wait(struct i915_vma *vma) 3698 { 3699 return __i915_vma_unbind(vma, false); 3700 } 3701 3702 int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv) 3703 { 3704 struct intel_engine_cs *engine; 3705 int ret; 3706 3707 lockdep_assert_held(&dev_priv->drm.struct_mutex); 3708 3709 for_each_engine(engine, dev_priv) { 3710 if (engine->last_context == NULL) 3711 continue; 3712 3713 ret = intel_engine_idle(engine); 3714 if (ret) 3715 return ret; 3716 } 3717 3718 WARN_ON(i915_verify_lists(dev)); 3719 return 0; 3720 } 3721 3722 static bool i915_gem_valid_gtt_space(struct i915_vma *vma, 3723 unsigned long cache_level) 3724 { 3725 struct drm_mm_node *gtt_space = &vma->node; 3726 struct drm_mm_node *other; 3727 3728 /* 3729 * On some machines we have to be careful when putting differing types 3730 * of snoopable memory together to avoid the prefetcher crossing memory 3731 * domains and dying. During vm initialisation, we decide whether or not 3732 * these constraints apply and set the drm_mm.color_adjust 3733 * appropriately. 3734 */ 3735 if (vma->vm->mm.color_adjust == NULL) 3736 return true; 3737 3738 if (!drm_mm_node_allocated(gtt_space)) 3739 return true; 3740 3741 if (list_empty(>t_space->node_list)) 3742 return true; 3743 3744 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); 3745 if (other->allocated && !other->hole_follows && other->color != cache_level) 3746 return false; 3747 3748 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); 3749 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) 3750 return false; 3751 3752 return true; 3753 } 3754 3755 /** 3756 * Finds free space in the GTT aperture and binds the object or a view of it 3757 * there. 3758 * @obj: object to bind 3759 * @vm: address space to bind into 3760 * @ggtt_view: global gtt view if applicable 3761 * @alignment: requested alignment 3762 * @flags: mask of PIN_* flags to use 3763 */ 3764 static struct i915_vma * 3765 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, 3766 struct i915_address_space *vm, 3767 const struct i915_ggtt_view *ggtt_view, 3768 unsigned alignment, 3769 uint64_t flags) 3770 { 3771 struct drm_device *dev = obj->base.dev; 3772 struct drm_i915_private *dev_priv = to_i915(dev); 3773 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3774 u32 fence_alignment, unfenced_alignment; 3775 u32 search_flag, alloc_flag; 3776 u64 start, end; 3777 u64 size, fence_size; 3778 struct i915_vma *vma; 3779 int ret; 3780 3781 if (i915_is_ggtt(vm)) { 3782 u32 view_size; 3783 3784 if (WARN_ON(!ggtt_view)) 3785 return ERR_PTR(-EINVAL); 3786 3787 view_size = i915_ggtt_view_size(obj, ggtt_view); 3788 3789 fence_size = i915_gem_get_gtt_size(dev, 3790 view_size, 3791 obj->tiling_mode); 3792 fence_alignment = i915_gem_get_gtt_alignment(dev, 3793 view_size, 3794 obj->tiling_mode, 3795 true); 3796 unfenced_alignment = i915_gem_get_gtt_alignment(dev, 3797 view_size, 3798 obj->tiling_mode, 3799 false); 3800 size = flags & PIN_MAPPABLE ? fence_size : view_size; 3801 } else { 3802 fence_size = i915_gem_get_gtt_size(dev, 3803 obj->base.size, 3804 obj->tiling_mode); 3805 fence_alignment = i915_gem_get_gtt_alignment(dev, 3806 obj->base.size, 3807 obj->tiling_mode, 3808 true); 3809 unfenced_alignment = 3810 i915_gem_get_gtt_alignment(dev, 3811 obj->base.size, 3812 obj->tiling_mode, 3813 false); 3814 size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; 3815 } 3816 3817 start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; 3818 end = vm->total; 3819 if (flags & PIN_MAPPABLE) 3820 end = min_t(u64, end, ggtt->mappable_end); 3821 if (flags & PIN_ZONE_4G) 3822 end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE); 3823 3824 if (alignment == 0) 3825 alignment = flags & PIN_MAPPABLE ? fence_alignment : 3826 unfenced_alignment; 3827 if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) { 3828 DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n", 3829 ggtt_view ? ggtt_view->type : 0, 3830 alignment); 3831 return ERR_PTR(-EINVAL); 3832 } 3833 3834 /* If binding the object/GGTT view requires more space than the entire 3835 * aperture has, reject it early before evicting everything in a vain 3836 * attempt to find space. 3837 */ 3838 if (size > end) { 3839 DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n", 3840 ggtt_view ? ggtt_view->type : 0, 3841 size, 3842 flags & PIN_MAPPABLE ? "mappable" : "total", 3843 end); 3844 return ERR_PTR(-E2BIG); 3845 } 3846 3847 ret = i915_gem_object_get_pages(obj); 3848 if (ret) 3849 return ERR_PTR(ret); 3850 3851 i915_gem_object_pin_pages(obj); 3852 3853 vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) : 3854 i915_gem_obj_lookup_or_create_vma(obj, vm); 3855 3856 if (IS_ERR(vma)) 3857 goto err_unpin; 3858 3859 if (flags & PIN_OFFSET_FIXED) { 3860 uint64_t offset = flags & PIN_OFFSET_MASK; 3861 3862 if (offset & (alignment - 1) || offset + size > end) { 3863 ret = -EINVAL; 3864 goto err_free_vma; 3865 } 3866 vma->node.start = offset; 3867 vma->node.size = size; 3868 vma->node.color = obj->cache_level; 3869 ret = drm_mm_reserve_node(&vm->mm, &vma->node); 3870 if (ret) { 3871 ret = i915_gem_evict_for_vma(vma); 3872 if (ret == 0) 3873 ret = drm_mm_reserve_node(&vm->mm, &vma->node); 3874 } 3875 if (ret) 3876 goto err_free_vma; 3877 } else { 3878 if (flags & PIN_HIGH) { 3879 search_flag = DRM_MM_SEARCH_BELOW; 3880 alloc_flag = DRM_MM_CREATE_TOP; 3881 } else { 3882 search_flag = DRM_MM_SEARCH_DEFAULT; 3883 alloc_flag = DRM_MM_CREATE_DEFAULT; 3884 } 3885 3886 search_free: 3887 ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, 3888 size, alignment, 3889 obj->cache_level, 3890 start, end, 3891 search_flag, 3892 alloc_flag); 3893 if (ret) { 3894 ret = i915_gem_evict_something(dev, vm, size, alignment, 3895 obj->cache_level, 3896 start, end, 3897 flags); 3898 if (ret == 0) 3899 goto search_free; 3900 3901 goto err_free_vma; 3902 } 3903 } 3904 if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) { 3905 ret = -EINVAL; 3906 goto err_remove_node; 3907 } 3908 3909 trace_i915_vma_bind(vma, flags); 3910 ret = i915_vma_bind(vma, obj->cache_level, flags); 3911 if (ret) 3912 goto err_remove_node; 3913 3914 list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); 3915 list_add_tail(&vma->vm_link, &vm->inactive_list); 3916 3917 return vma; 3918 3919 err_remove_node: 3920 drm_mm_remove_node(&vma->node); 3921 err_free_vma: 3922 i915_gem_vma_destroy(vma); 3923 vma = ERR_PTR(ret); 3924 err_unpin: 3925 i915_gem_object_unpin_pages(obj); 3926 return vma; 3927 } 3928 3929 bool 3930 i915_gem_clflush_object(struct drm_i915_gem_object *obj, 3931 bool force) 3932 { 3933 /* If we don't have a page list set up, then we're not pinned 3934 * to GPU, and we can ignore the cache flush because it'll happen 3935 * again at bind time. 3936 */ 3937 if (obj->pages == NULL) 3938 return false; 3939 3940 /* 3941 * Stolen memory is always coherent with the GPU as it is explicitly 3942 * marked as wc by the system, or the system is cache-coherent. 3943 */ 3944 if (obj->stolen || obj->phys_handle) 3945 return false; 3946 3947 /* If the GPU is snooping the contents of the CPU cache, 3948 * we do not need to manually clear the CPU cache lines. However, 3949 * the caches are only snooped when the render cache is 3950 * flushed/invalidated. As we always have to emit invalidations 3951 * and flushes when moving into and out of the RENDER domain, correct 3952 * snooping behaviour occurs naturally as the result of our domain 3953 * tracking. 3954 */ 3955 if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) { 3956 obj->cache_dirty = true; 3957 return false; 3958 } 3959 3960 trace_i915_gem_object_clflush(obj); 3961 drm_clflush_sg(obj->pages); 3962 obj->cache_dirty = false; 3963 3964 return true; 3965 } 3966 3967 /** Flushes the GTT write domain for the object if it's dirty. */ 3968 static void 3969 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 3970 { 3971 uint32_t old_write_domain; 3972 3973 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 3974 return; 3975 3976 /* No actual flushing is required for the GTT write domain. Writes 3977 * to it immediately go to main memory as far as we know, so there's 3978 * no chipset flush. It also doesn't land in render cache. 3979 * 3980 * However, we do have to enforce the order so that all writes through 3981 * the GTT land before any writes to the device, such as updates to 3982 * the GATT itself. 3983 */ 3984 wmb(); 3985 3986 old_write_domain = obj->base.write_domain; 3987 obj->base.write_domain = 0; 3988 3989 intel_fb_obj_flush(obj, false, ORIGIN_GTT); 3990 3991 trace_i915_gem_object_change_domain(obj, 3992 obj->base.read_domains, 3993 old_write_domain); 3994 } 3995 3996 /** Flushes the CPU write domain for the object if it's dirty. */ 3997 static void 3998 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) 3999 { 4000 uint32_t old_write_domain; 4001 4002 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 4003 return; 4004 4005 if (i915_gem_clflush_object(obj, obj->pin_display)) 4006 i915_gem_chipset_flush(to_i915(obj->base.dev)); 4007 4008 old_write_domain = obj->base.write_domain; 4009 obj->base.write_domain = 0; 4010 4011 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 4012 4013 trace_i915_gem_object_change_domain(obj, 4014 obj->base.read_domains, 4015 old_write_domain); 4016 } 4017 4018 /** 4019 * Moves a single object to the GTT read, and possibly write domain. 4020 * @obj: object to act on 4021 * @write: ask for write access or read only 4022 * 4023 * This function returns when the move is complete, including waiting on 4024 * flushes to occur. 4025 */ 4026 int 4027 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 4028 { 4029 struct drm_device *dev = obj->base.dev; 4030 struct drm_i915_private *dev_priv = to_i915(dev); 4031 struct i915_ggtt *ggtt = &dev_priv->ggtt; 4032 uint32_t old_write_domain, old_read_domains; 4033 struct i915_vma *vma; 4034 int ret; 4035 4036 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 4037 return 0; 4038 4039 ret = i915_gem_object_wait_rendering(obj, !write); 4040 if (ret) 4041 return ret; 4042 4043 /* Flush and acquire obj->pages so that we are coherent through 4044 * direct access in memory with previous cached writes through 4045 * shmemfs and that our cache domain tracking remains valid. 4046 * For example, if the obj->filp was moved to swap without us 4047 * being notified and releasing the pages, we would mistakenly 4048 * continue to assume that the obj remained out of the CPU cached 4049 * domain. 4050 */ 4051 ret = i915_gem_object_get_pages(obj); 4052 if (ret) 4053 return ret; 4054 4055 i915_gem_object_flush_cpu_write_domain(obj); 4056 4057 /* Serialise direct access to this object with the barriers for 4058 * coherent writes from the GPU, by effectively invalidating the 4059 * GTT domain upon first access. 4060 */ 4061 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 4062 mb(); 4063 4064 old_write_domain = obj->base.write_domain; 4065 old_read_domains = obj->base.read_domains; 4066 4067 /* It should now be out of any other write domains, and we can update 4068 * the domain values for our changes. 4069 */ 4070 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 4071 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 4072 if (write) { 4073 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 4074 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 4075 obj->dirty = 1; 4076 } 4077 4078 trace_i915_gem_object_change_domain(obj, 4079 old_read_domains, 4080 old_write_domain); 4081 4082 /* And bump the LRU for this access */ 4083 vma = i915_gem_obj_to_ggtt(obj); 4084 if (vma && drm_mm_node_allocated(&vma->node) && !obj->active) 4085 list_move_tail(&vma->vm_link, 4086 &ggtt->base.inactive_list); 4087 4088 return 0; 4089 } 4090 4091 /** 4092 * Changes the cache-level of an object across all VMA. 4093 * @obj: object to act on 4094 * @cache_level: new cache level to set for the object 4095 * 4096 * After this function returns, the object will be in the new cache-level 4097 * across all GTT and the contents of the backing storage will be coherent, 4098 * with respect to the new cache-level. In order to keep the backing storage 4099 * coherent for all users, we only allow a single cache level to be set 4100 * globally on the object and prevent it from being changed whilst the 4101 * hardware is reading from the object. That is if the object is currently 4102 * on the scanout it will be set to uncached (or equivalent display 4103 * cache coherency) and all non-MOCS GPU access will also be uncached so 4104 * that all direct access to the scanout remains coherent. 4105 */ 4106 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 4107 enum i915_cache_level cache_level) 4108 { 4109 struct drm_device *dev = obj->base.dev; 4110 struct i915_vma *vma, *next; 4111 bool bound = false; 4112 int ret = 0; 4113 4114 if (obj->cache_level == cache_level) 4115 goto out; 4116 4117 /* Inspect the list of currently bound VMA and unbind any that would 4118 * be invalid given the new cache-level. This is principally to 4119 * catch the issue of the CS prefetch crossing page boundaries and 4120 * reading an invalid PTE on older architectures. 4121 */ 4122 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) { 4123 if (!drm_mm_node_allocated(&vma->node)) 4124 continue; 4125 4126 if (vma->pin_count) { 4127 DRM_DEBUG("can not change the cache level of pinned objects\n"); 4128 return -EBUSY; 4129 } 4130 4131 if (!i915_gem_valid_gtt_space(vma, cache_level)) { 4132 ret = i915_vma_unbind(vma); 4133 if (ret) 4134 return ret; 4135 } else 4136 bound = true; 4137 } 4138 4139 /* We can reuse the existing drm_mm nodes but need to change the 4140 * cache-level on the PTE. We could simply unbind them all and 4141 * rebind with the correct cache-level on next use. However since 4142 * we already have a valid slot, dma mapping, pages etc, we may as 4143 * rewrite the PTE in the belief that doing so tramples upon less 4144 * state and so involves less work. 4145 */ 4146 if (bound) { 4147 /* Before we change the PTE, the GPU must not be accessing it. 4148 * If we wait upon the object, we know that all the bound 4149 * VMA are no longer active. 4150 */ 4151 ret = i915_gem_object_wait_rendering(obj, false); 4152 if (ret) 4153 return ret; 4154 4155 if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) { 4156 /* Access to snoopable pages through the GTT is 4157 * incoherent and on some machines causes a hard 4158 * lockup. Relinquish the CPU mmaping to force 4159 * userspace to refault in the pages and we can 4160 * then double check if the GTT mapping is still 4161 * valid for that pointer access. 4162 */ 4163 i915_gem_release_mmap(obj); 4164 4165 /* As we no longer need a fence for GTT access, 4166 * we can relinquish it now (and so prevent having 4167 * to steal a fence from someone else on the next 4168 * fence request). Note GPU activity would have 4169 * dropped the fence as all snoopable access is 4170 * supposed to be linear. 4171 */ 4172 ret = i915_gem_object_put_fence(obj); 4173 if (ret) 4174 return ret; 4175 } else { 4176 /* We either have incoherent backing store and 4177 * so no GTT access or the architecture is fully 4178 * coherent. In such cases, existing GTT mmaps 4179 * ignore the cache bit in the PTE and we can 4180 * rewrite it without confusing the GPU or having 4181 * to force userspace to fault back in its mmaps. 4182 */ 4183 } 4184 4185 list_for_each_entry(vma, &obj->vma_list, obj_link) { 4186 if (!drm_mm_node_allocated(&vma->node)) 4187 continue; 4188 4189 ret = i915_vma_bind(vma, cache_level, PIN_UPDATE); 4190 if (ret) 4191 return ret; 4192 } 4193 } 4194 4195 list_for_each_entry(vma, &obj->vma_list, obj_link) 4196 vma->node.color = cache_level; 4197 obj->cache_level = cache_level; 4198 4199 out: 4200 /* Flush the dirty CPU caches to the backing storage so that the 4201 * object is now coherent at its new cache level (with respect 4202 * to the access domain). 4203 */ 4204 if (obj->cache_dirty && cpu_write_needs_clflush(obj)) { 4205 if (i915_gem_clflush_object(obj, true)) 4206 i915_gem_chipset_flush(to_i915(obj->base.dev)); 4207 } 4208 4209 return 0; 4210 } 4211 4212 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 4213 struct drm_file *file) 4214 { 4215 struct drm_i915_gem_caching *args = data; 4216 struct drm_i915_gem_object *obj; 4217 4218 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 4219 if (&obj->base == NULL) 4220 return -ENOENT; 4221 4222 switch (obj->cache_level) { 4223 case I915_CACHE_LLC: 4224 case I915_CACHE_L3_LLC: 4225 args->caching = I915_CACHING_CACHED; 4226 break; 4227 4228 case I915_CACHE_WT: 4229 args->caching = I915_CACHING_DISPLAY; 4230 break; 4231 4232 default: 4233 args->caching = I915_CACHING_NONE; 4234 break; 4235 } 4236 4237 drm_gem_object_unreference_unlocked(&obj->base); 4238 return 0; 4239 } 4240 4241 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 4242 struct drm_file *file) 4243 { 4244 struct drm_i915_private *dev_priv = to_i915(dev); 4245 struct drm_i915_gem_caching *args = data; 4246 struct drm_i915_gem_object *obj; 4247 enum i915_cache_level level; 4248 int ret; 4249 4250 switch (args->caching) { 4251 case I915_CACHING_NONE: 4252 level = I915_CACHE_NONE; 4253 break; 4254 case I915_CACHING_CACHED: 4255 /* 4256 * Due to a HW issue on BXT A stepping, GPU stores via a 4257 * snooped mapping may leave stale data in a corresponding CPU 4258 * cacheline, whereas normally such cachelines would get 4259 * invalidated. 4260 */ 4261 if (!HAS_LLC(dev) && !HAS_SNOOP(dev)) 4262 return -ENODEV; 4263 4264 level = I915_CACHE_LLC; 4265 break; 4266 case I915_CACHING_DISPLAY: 4267 level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE; 4268 break; 4269 default: 4270 return -EINVAL; 4271 } 4272 4273 intel_runtime_pm_get(dev_priv); 4274 4275 ret = i915_mutex_lock_interruptible(dev); 4276 if (ret) 4277 goto rpm_put; 4278 4279 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 4280 if (&obj->base == NULL) { 4281 ret = -ENOENT; 4282 goto unlock; 4283 } 4284 4285 ret = i915_gem_object_set_cache_level(obj, level); 4286 4287 drm_gem_object_unreference(&obj->base); 4288 unlock: 4289 mutex_unlock(&dev->struct_mutex); 4290 rpm_put: 4291 intel_runtime_pm_put(dev_priv); 4292 4293 return ret; 4294 } 4295 4296 /* 4297 * Prepare buffer for display plane (scanout, cursors, etc). 4298 * Can be called from an uninterruptible phase (modesetting) and allows 4299 * any flushes to be pipelined (for pageflips). 4300 */ 4301 int 4302 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 4303 u32 alignment, 4304 const struct i915_ggtt_view *view) 4305 { 4306 u32 old_read_domains, old_write_domain; 4307 int ret; 4308 4309 /* Mark the pin_display early so that we account for the 4310 * display coherency whilst setting up the cache domains. 4311 */ 4312 obj->pin_display++; 4313 4314 /* The display engine is not coherent with the LLC cache on gen6. As 4315 * a result, we make sure that the pinning that is about to occur is 4316 * done with uncached PTEs. This is lowest common denominator for all 4317 * chipsets. 4318 * 4319 * However for gen6+, we could do better by using the GFDT bit instead 4320 * of uncaching, which would allow us to flush all the LLC-cached data 4321 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 4322 */ 4323 ret = i915_gem_object_set_cache_level(obj, 4324 HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE); 4325 if (ret) 4326 goto err_unpin_display; 4327 4328 /* As the user may map the buffer once pinned in the display plane 4329 * (e.g. libkms for the bootup splash), we have to ensure that we 4330 * always use map_and_fenceable for all scanout buffers. 4331 */ 4332 ret = i915_gem_object_ggtt_pin(obj, view, alignment, 4333 view->type == I915_GGTT_VIEW_NORMAL ? 4334 PIN_MAPPABLE : 0); 4335 if (ret) 4336 goto err_unpin_display; 4337 4338 i915_gem_object_flush_cpu_write_domain(obj); 4339 4340 old_write_domain = obj->base.write_domain; 4341 old_read_domains = obj->base.read_domains; 4342 4343 /* It should now be out of any other write domains, and we can update 4344 * the domain values for our changes. 4345 */ 4346 obj->base.write_domain = 0; 4347 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 4348 4349 trace_i915_gem_object_change_domain(obj, 4350 old_read_domains, 4351 old_write_domain); 4352 4353 return 0; 4354 4355 err_unpin_display: 4356 obj->pin_display--; 4357 return ret; 4358 } 4359 4360 void 4361 i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj, 4362 const struct i915_ggtt_view *view) 4363 { 4364 if (WARN_ON(obj->pin_display == 0)) 4365 return; 4366 4367 i915_gem_object_ggtt_unpin_view(obj, view); 4368 4369 obj->pin_display--; 4370 } 4371 4372 /** 4373 * Moves a single object to the CPU read, and possibly write domain. 4374 * @obj: object to act on 4375 * @write: requesting write or read-only access 4376 * 4377 * This function returns when the move is complete, including waiting on 4378 * flushes to occur. 4379 */ 4380 int 4381 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 4382 { 4383 uint32_t old_write_domain, old_read_domains; 4384 int ret; 4385 4386 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 4387 return 0; 4388 4389 ret = i915_gem_object_wait_rendering(obj, !write); 4390 if (ret) 4391 return ret; 4392 4393 i915_gem_object_flush_gtt_write_domain(obj); 4394 4395 old_write_domain = obj->base.write_domain; 4396 old_read_domains = obj->base.read_domains; 4397 4398 /* Flush the CPU cache if it's still invalid. */ 4399 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 4400 i915_gem_clflush_object(obj, false); 4401 4402 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 4403 } 4404 4405 /* It should now be out of any other write domains, and we can update 4406 * the domain values for our changes. 4407 */ 4408 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 4409 4410 /* If we're writing through the CPU, then the GPU read domains will 4411 * need to be invalidated at next use. 4412 */ 4413 if (write) { 4414 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4415 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4416 } 4417 4418 trace_i915_gem_object_change_domain(obj, 4419 old_read_domains, 4420 old_write_domain); 4421 4422 return 0; 4423 } 4424 4425 /* Throttle our rendering by waiting until the ring has completed our requests 4426 * emitted over 20 msec ago. 4427 * 4428 * Note that if we were to use the current jiffies each time around the loop, 4429 * we wouldn't escape the function with any frames outstanding if the time to 4430 * render a frame was over 20ms. 4431 * 4432 * This should get us reasonable parallelism between CPU and GPU but also 4433 * relatively low latency when blocking on a particular request to finish. 4434 */ 4435 static int 4436 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 4437 { 4438 struct drm_i915_private *dev_priv = to_i915(dev); 4439 struct drm_i915_file_private *file_priv = file->driver_priv; 4440 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; 4441 struct drm_i915_gem_request *request, *target = NULL; 4442 int ret; 4443 4444 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 4445 if (ret) 4446 return ret; 4447 4448 /* ABI: return -EIO if already wedged */ 4449 if (i915_terminally_wedged(&dev_priv->gpu_error)) 4450 return -EIO; 4451 4452 spin_lock(&file_priv->mm.lock); 4453 list_for_each_entry(request, &file_priv->mm.request_list, client_list) { 4454 if (time_after_eq(request->emitted_jiffies, recent_enough)) 4455 break; 4456 4457 /* 4458 * Note that the request might not have been submitted yet. 4459 * In which case emitted_jiffies will be zero. 4460 */ 4461 if (!request->emitted_jiffies) 4462 continue; 4463 4464 target = request; 4465 } 4466 if (target) 4467 i915_gem_request_reference(target); 4468 spin_unlock(&file_priv->mm.lock); 4469 4470 if (target == NULL) 4471 return 0; 4472 4473 ret = __i915_wait_request(target, true, NULL, NULL); 4474 i915_gem_request_unreference(target); 4475 4476 return ret; 4477 } 4478 4479 static bool 4480 i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags) 4481 { 4482 struct drm_i915_gem_object *obj = vma->obj; 4483 4484 if (alignment && 4485 vma->node.start & (alignment - 1)) 4486 return true; 4487 4488 if (flags & PIN_MAPPABLE && !obj->map_and_fenceable) 4489 return true; 4490 4491 if (flags & PIN_OFFSET_BIAS && 4492 vma->node.start < (flags & PIN_OFFSET_MASK)) 4493 return true; 4494 4495 if (flags & PIN_OFFSET_FIXED && 4496 vma->node.start != (flags & PIN_OFFSET_MASK)) 4497 return true; 4498 4499 return false; 4500 } 4501 4502 void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) 4503 { 4504 struct drm_i915_gem_object *obj = vma->obj; 4505 bool mappable, fenceable; 4506 u32 fence_size, fence_alignment; 4507 4508 fence_size = i915_gem_get_gtt_size(obj->base.dev, 4509 obj->base.size, 4510 obj->tiling_mode); 4511 fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev, 4512 obj->base.size, 4513 obj->tiling_mode, 4514 true); 4515 4516 fenceable = (vma->node.size == fence_size && 4517 (vma->node.start & (fence_alignment - 1)) == 0); 4518 4519 mappable = (vma->node.start + fence_size <= 4520 to_i915(obj->base.dev)->ggtt.mappable_end); 4521 4522 obj->map_and_fenceable = mappable && fenceable; 4523 } 4524 4525 static int 4526 i915_gem_object_do_pin(struct drm_i915_gem_object *obj, 4527 struct i915_address_space *vm, 4528 const struct i915_ggtt_view *ggtt_view, 4529 uint32_t alignment, 4530 uint64_t flags) 4531 { 4532 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 4533 struct i915_vma *vma; 4534 unsigned bound; 4535 int ret; 4536 4537 if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base)) 4538 return -ENODEV; 4539 4540 if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm))) 4541 return -EINVAL; 4542 4543 if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE)) 4544 return -EINVAL; 4545 4546 if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view)) 4547 return -EINVAL; 4548 4549 vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) : 4550 i915_gem_obj_to_vma(obj, vm); 4551 4552 if (vma) { 4553 if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) 4554 return -EBUSY; 4555 4556 if (i915_vma_misplaced(vma, alignment, flags)) { 4557 WARN(vma->pin_count, 4558 "bo is already pinned in %s with incorrect alignment:" 4559 " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d," 4560 " obj->map_and_fenceable=%d\n", 4561 ggtt_view ? "ggtt" : "ppgtt", 4562 upper_32_bits(vma->node.start), 4563 lower_32_bits(vma->node.start), 4564 alignment, 4565 !!(flags & PIN_MAPPABLE), 4566 obj->map_and_fenceable); 4567 ret = i915_vma_unbind(vma); 4568 if (ret) 4569 return ret; 4570 4571 vma = NULL; 4572 } 4573 } 4574 4575 bound = vma ? vma->bound : 0; 4576 if (vma == NULL || !drm_mm_node_allocated(&vma->node)) { 4577 vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment, 4578 flags); 4579 if (IS_ERR(vma)) 4580 return PTR_ERR(vma); 4581 } else { 4582 ret = i915_vma_bind(vma, obj->cache_level, flags); 4583 if (ret) 4584 return ret; 4585 } 4586 4587 if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL && 4588 (bound ^ vma->bound) & GLOBAL_BIND) { 4589 __i915_vma_set_map_and_fenceable(vma); 4590 WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable); 4591 } 4592 4593 vma->pin_count++; 4594 return 0; 4595 } 4596 4597 int 4598 i915_gem_object_pin(struct drm_i915_gem_object *obj, 4599 struct i915_address_space *vm, 4600 uint32_t alignment, 4601 uint64_t flags) 4602 { 4603 return i915_gem_object_do_pin(obj, vm, 4604 i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL, 4605 alignment, flags); 4606 } 4607 4608 int 4609 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 4610 const struct i915_ggtt_view *view, 4611 uint32_t alignment, 4612 uint64_t flags) 4613 { 4614 struct drm_device *dev = obj->base.dev; 4615 struct drm_i915_private *dev_priv = to_i915(dev); 4616 struct i915_ggtt *ggtt = &dev_priv->ggtt; 4617 4618 BUG_ON(!view); 4619 4620 return i915_gem_object_do_pin(obj, &ggtt->base, view, 4621 alignment, flags | PIN_GLOBAL); 4622 } 4623 4624 void 4625 i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, 4626 const struct i915_ggtt_view *view) 4627 { 4628 struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view); 4629 4630 WARN_ON(vma->pin_count == 0); 4631 WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view)); 4632 4633 --vma->pin_count; 4634 } 4635 4636 int 4637 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4638 struct drm_file *file) 4639 { 4640 struct drm_i915_gem_busy *args = data; 4641 struct drm_i915_gem_object *obj; 4642 int ret; 4643 4644 ret = i915_mutex_lock_interruptible(dev); 4645 if (ret) 4646 return ret; 4647 4648 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 4649 if (&obj->base == NULL) { 4650 ret = -ENOENT; 4651 goto unlock; 4652 } 4653 4654 /* Count all active objects as busy, even if they are currently not used 4655 * by the gpu. Users of this interface expect objects to eventually 4656 * become non-busy without any further actions, therefore emit any 4657 * necessary flushes here. 4658 */ 4659 ret = i915_gem_object_flush_active(obj); 4660 if (ret) 4661 goto unref; 4662 4663 args->busy = 0; 4664 if (obj->active) { 4665 int i; 4666 4667 for (i = 0; i < I915_NUM_ENGINES; i++) { 4668 struct drm_i915_gem_request *req; 4669 4670 req = obj->last_read_req[i]; 4671 if (req) 4672 args->busy |= 1 << (16 + req->engine->exec_id); 4673 } 4674 if (obj->last_write_req) 4675 args->busy |= obj->last_write_req->engine->exec_id; 4676 } 4677 4678 unref: 4679 drm_gem_object_unreference(&obj->base); 4680 unlock: 4681 mutex_unlock(&dev->struct_mutex); 4682 return ret; 4683 } 4684 4685 int 4686 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4687 struct drm_file *file_priv) 4688 { 4689 return i915_gem_ring_throttle(dev, file_priv); 4690 } 4691 4692 int 4693 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4694 struct drm_file *file_priv) 4695 { 4696 struct drm_i915_private *dev_priv = to_i915(dev); 4697 struct drm_i915_gem_madvise *args = data; 4698 struct drm_i915_gem_object *obj; 4699 int ret; 4700 4701 switch (args->madv) { 4702 case I915_MADV_DONTNEED: 4703 case I915_MADV_WILLNEED: 4704 break; 4705 default: 4706 return -EINVAL; 4707 } 4708 4709 ret = i915_mutex_lock_interruptible(dev); 4710 if (ret) 4711 return ret; 4712 4713 obj = to_intel_bo(drm_gem_object_lookup(file_priv, args->handle)); 4714 if (&obj->base == NULL) { 4715 ret = -ENOENT; 4716 goto unlock; 4717 } 4718 4719 if (i915_gem_obj_is_pinned(obj)) { 4720 ret = -EINVAL; 4721 goto out; 4722 } 4723 4724 if (obj->pages && 4725 obj->tiling_mode != I915_TILING_NONE && 4726 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 4727 if (obj->madv == I915_MADV_WILLNEED) 4728 i915_gem_object_unpin_pages(obj); 4729 if (args->madv == I915_MADV_WILLNEED) 4730 i915_gem_object_pin_pages(obj); 4731 } 4732 4733 if (obj->madv != __I915_MADV_PURGED) 4734 obj->madv = args->madv; 4735 4736 /* if the object is no longer attached, discard its backing storage */ 4737 if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL) 4738 i915_gem_object_truncate(obj); 4739 4740 args->retained = obj->madv != __I915_MADV_PURGED; 4741 4742 out: 4743 drm_gem_object_unreference(&obj->base); 4744 unlock: 4745 mutex_unlock(&dev->struct_mutex); 4746 return ret; 4747 } 4748 4749 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4750 const struct drm_i915_gem_object_ops *ops) 4751 { 4752 int i; 4753 4754 INIT_LIST_HEAD(&obj->global_list); 4755 for (i = 0; i < I915_NUM_ENGINES; i++) 4756 INIT_LIST_HEAD(&obj->engine_list[i]); 4757 INIT_LIST_HEAD(&obj->obj_exec_link); 4758 INIT_LIST_HEAD(&obj->vma_list); 4759 INIT_LIST_HEAD(&obj->batch_pool_link); 4760 4761 obj->ops = ops; 4762 4763 obj->fence_reg = I915_FENCE_REG_NONE; 4764 obj->madv = I915_MADV_WILLNEED; 4765 4766 i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size); 4767 } 4768 4769 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4770 .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE, 4771 .get_pages = i915_gem_object_get_pages_gtt, 4772 .put_pages = i915_gem_object_put_pages_gtt, 4773 }; 4774 4775 struct drm_i915_gem_object *i915_gem_object_create(struct drm_device *dev, 4776 size_t size) 4777 { 4778 struct drm_i915_gem_object *obj; 4779 struct address_space *mapping; 4780 gfp_t mask; 4781 int ret; 4782 4783 obj = i915_gem_object_alloc(dev); 4784 if (obj == NULL) 4785 return ERR_PTR(-ENOMEM); 4786 4787 ret = drm_gem_object_init(dev, &obj->base, size); 4788 if (ret) 4789 goto fail; 4790 4791 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4792 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) { 4793 /* 965gm cannot relocate objects above 4GiB. */ 4794 mask &= ~__GFP_HIGHMEM; 4795 mask |= __GFP_DMA32; 4796 } 4797 4798 mapping = obj->base.filp->f_mapping; 4799 mapping_set_gfp_mask(mapping, mask); 4800 4801 i915_gem_object_init(obj, &i915_gem_object_ops); 4802 4803 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4804 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4805 4806 if (HAS_LLC(dev)) { 4807 /* On some devices, we can have the GPU use the LLC (the CPU 4808 * cache) for about a 10% performance improvement 4809 * compared to uncached. Graphics requests other than 4810 * display scanout are coherent with the CPU in 4811 * accessing this cache. This means in this mode we 4812 * don't need to clflush on the CPU side, and on the 4813 * GPU side we only need to flush internal caches to 4814 * get data visible to the CPU. 4815 * 4816 * However, we maintain the display planes as UC, and so 4817 * need to rebind when first used as such. 4818 */ 4819 obj->cache_level = I915_CACHE_LLC; 4820 } else 4821 obj->cache_level = I915_CACHE_NONE; 4822 4823 trace_i915_gem_object_create(obj); 4824 4825 return obj; 4826 4827 fail: 4828 i915_gem_object_free(obj); 4829 4830 return ERR_PTR(ret); 4831 } 4832 4833 static bool discard_backing_storage(struct drm_i915_gem_object *obj) 4834 { 4835 /* If we are the last user of the backing storage (be it shmemfs 4836 * pages or stolen etc), we know that the pages are going to be 4837 * immediately released. In this case, we can then skip copying 4838 * back the contents from the GPU. 4839 */ 4840 4841 if (obj->madv != I915_MADV_WILLNEED) 4842 return false; 4843 4844 if (obj->base.filp == NULL) 4845 return true; 4846 4847 /* At first glance, this looks racy, but then again so would be 4848 * userspace racing mmap against close. However, the first external 4849 * reference to the filp can only be obtained through the 4850 * i915_gem_mmap_ioctl() which safeguards us against the user 4851 * acquiring such a reference whilst we are in the middle of 4852 * freeing the object. 4853 */ 4854 return atomic_long_read(&obj->base.filp->f_count) == 1; 4855 } 4856 4857 void i915_gem_free_object(struct drm_gem_object *gem_obj) 4858 { 4859 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 4860 struct drm_device *dev = obj->base.dev; 4861 struct drm_i915_private *dev_priv = to_i915(dev); 4862 struct i915_vma *vma, *next; 4863 4864 intel_runtime_pm_get(dev_priv); 4865 4866 trace_i915_gem_object_destroy(obj); 4867 4868 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) { 4869 int ret; 4870 4871 vma->pin_count = 0; 4872 ret = i915_vma_unbind(vma); 4873 if (WARN_ON(ret == -ERESTARTSYS)) { 4874 bool was_interruptible; 4875 4876 was_interruptible = dev_priv->mm.interruptible; 4877 dev_priv->mm.interruptible = false; 4878 4879 WARN_ON(i915_vma_unbind(vma)); 4880 4881 dev_priv->mm.interruptible = was_interruptible; 4882 } 4883 } 4884 4885 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up 4886 * before progressing. */ 4887 if (obj->stolen) 4888 i915_gem_object_unpin_pages(obj); 4889 4890 WARN_ON(obj->frontbuffer_bits); 4891 4892 if (obj->pages && obj->madv == I915_MADV_WILLNEED && 4893 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES && 4894 obj->tiling_mode != I915_TILING_NONE) 4895 i915_gem_object_unpin_pages(obj); 4896 4897 if (WARN_ON(obj->pages_pin_count)) 4898 obj->pages_pin_count = 0; 4899 if (discard_backing_storage(obj)) 4900 obj->madv = I915_MADV_DONTNEED; 4901 i915_gem_object_put_pages(obj); 4902 i915_gem_object_free_mmap_offset(obj); 4903 4904 BUG_ON(obj->pages); 4905 4906 if (obj->base.import_attach) 4907 drm_prime_gem_destroy(&obj->base, NULL); 4908 4909 if (obj->ops->release) 4910 obj->ops->release(obj); 4911 4912 drm_gem_object_release(&obj->base); 4913 i915_gem_info_remove_obj(dev_priv, obj->base.size); 4914 4915 kfree(obj->bit_17); 4916 i915_gem_object_free(obj); 4917 4918 intel_runtime_pm_put(dev_priv); 4919 } 4920 4921 struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, 4922 struct i915_address_space *vm) 4923 { 4924 struct i915_vma *vma; 4925 list_for_each_entry(vma, &obj->vma_list, obj_link) { 4926 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL && 4927 vma->vm == vm) 4928 return vma; 4929 } 4930 return NULL; 4931 } 4932 4933 struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj, 4934 const struct i915_ggtt_view *view) 4935 { 4936 struct i915_vma *vma; 4937 4938 GEM_BUG_ON(!view); 4939 4940 list_for_each_entry(vma, &obj->vma_list, obj_link) 4941 if (vma->is_ggtt && i915_ggtt_view_equal(&vma->ggtt_view, view)) 4942 return vma; 4943 return NULL; 4944 } 4945 4946 void i915_gem_vma_destroy(struct i915_vma *vma) 4947 { 4948 WARN_ON(vma->node.allocated); 4949 4950 /* Keep the vma as a placeholder in the execbuffer reservation lists */ 4951 if (!list_empty(&vma->exec_list)) 4952 return; 4953 4954 if (!vma->is_ggtt) 4955 i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); 4956 4957 list_del(&vma->obj_link); 4958 4959 kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma); 4960 } 4961 4962 static void 4963 i915_gem_stop_engines(struct drm_device *dev) 4964 { 4965 struct drm_i915_private *dev_priv = to_i915(dev); 4966 struct intel_engine_cs *engine; 4967 4968 for_each_engine(engine, dev_priv) 4969 dev_priv->gt.stop_engine(engine); 4970 } 4971 4972 int 4973 i915_gem_suspend(struct drm_device *dev) 4974 { 4975 struct drm_i915_private *dev_priv = to_i915(dev); 4976 int ret = 0; 4977 4978 mutex_lock(&dev->struct_mutex); 4979 ret = i915_gem_wait_for_idle(dev_priv); 4980 if (ret) 4981 goto err; 4982 4983 i915_gem_retire_requests(dev_priv); 4984 4985 i915_gem_stop_engines(dev); 4986 i915_gem_context_lost(dev_priv); 4987 mutex_unlock(&dev->struct_mutex); 4988 4989 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 4990 cancel_delayed_work_sync(&dev_priv->gt.retire_work); 4991 flush_delayed_work(&dev_priv->gt.idle_work); 4992 4993 /* Assert that we sucessfully flushed all the work and 4994 * reset the GPU back to its idle, low power state. 4995 */ 4996 WARN_ON(dev_priv->gt.awake); 4997 4998 return 0; 4999 5000 err: 5001 mutex_unlock(&dev->struct_mutex); 5002 return ret; 5003 } 5004 5005 void i915_gem_init_swizzling(struct drm_device *dev) 5006 { 5007 struct drm_i915_private *dev_priv = to_i915(dev); 5008 5009 if (INTEL_INFO(dev)->gen < 5 || 5010 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 5011 return; 5012 5013 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 5014 DISP_TILE_SURFACE_SWIZZLING); 5015 5016 if (IS_GEN5(dev)) 5017 return; 5018 5019 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 5020 if (IS_GEN6(dev)) 5021 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 5022 else if (IS_GEN7(dev)) 5023 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 5024 else if (IS_GEN8(dev)) 5025 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 5026 else 5027 BUG(); 5028 } 5029 5030 static void init_unused_ring(struct drm_device *dev, u32 base) 5031 { 5032 struct drm_i915_private *dev_priv = to_i915(dev); 5033 5034 I915_WRITE(RING_CTL(base), 0); 5035 I915_WRITE(RING_HEAD(base), 0); 5036 I915_WRITE(RING_TAIL(base), 0); 5037 I915_WRITE(RING_START(base), 0); 5038 } 5039 5040 static void init_unused_rings(struct drm_device *dev) 5041 { 5042 if (IS_I830(dev)) { 5043 init_unused_ring(dev, PRB1_BASE); 5044 init_unused_ring(dev, SRB0_BASE); 5045 init_unused_ring(dev, SRB1_BASE); 5046 init_unused_ring(dev, SRB2_BASE); 5047 init_unused_ring(dev, SRB3_BASE); 5048 } else if (IS_GEN2(dev)) { 5049 init_unused_ring(dev, SRB0_BASE); 5050 init_unused_ring(dev, SRB1_BASE); 5051 } else if (IS_GEN3(dev)) { 5052 init_unused_ring(dev, PRB1_BASE); 5053 init_unused_ring(dev, PRB2_BASE); 5054 } 5055 } 5056 5057 int i915_gem_init_engines(struct drm_device *dev) 5058 { 5059 struct drm_i915_private *dev_priv = to_i915(dev); 5060 int ret; 5061 5062 ret = intel_init_render_ring_buffer(dev); 5063 if (ret) 5064 return ret; 5065 5066 if (HAS_BSD(dev)) { 5067 ret = intel_init_bsd_ring_buffer(dev); 5068 if (ret) 5069 goto cleanup_render_ring; 5070 } 5071 5072 if (HAS_BLT(dev)) { 5073 ret = intel_init_blt_ring_buffer(dev); 5074 if (ret) 5075 goto cleanup_bsd_ring; 5076 } 5077 5078 if (HAS_VEBOX(dev)) { 5079 ret = intel_init_vebox_ring_buffer(dev); 5080 if (ret) 5081 goto cleanup_blt_ring; 5082 } 5083 5084 if (HAS_BSD2(dev)) { 5085 ret = intel_init_bsd2_ring_buffer(dev); 5086 if (ret) 5087 goto cleanup_vebox_ring; 5088 } 5089 5090 return 0; 5091 5092 cleanup_vebox_ring: 5093 intel_cleanup_engine(&dev_priv->engine[VECS]); 5094 cleanup_blt_ring: 5095 intel_cleanup_engine(&dev_priv->engine[BCS]); 5096 cleanup_bsd_ring: 5097 intel_cleanup_engine(&dev_priv->engine[VCS]); 5098 cleanup_render_ring: 5099 intel_cleanup_engine(&dev_priv->engine[RCS]); 5100 5101 return ret; 5102 } 5103 5104 int 5105 i915_gem_init_hw(struct drm_device *dev) 5106 { 5107 struct drm_i915_private *dev_priv = to_i915(dev); 5108 struct intel_engine_cs *engine; 5109 int ret; 5110 5111 /* Double layer security blanket, see i915_gem_init() */ 5112 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5113 5114 if (HAS_EDRAM(dev) && INTEL_GEN(dev_priv) < 9) 5115 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 5116 5117 if (IS_HASWELL(dev)) 5118 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ? 5119 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 5120 5121 if (HAS_PCH_NOP(dev)) { 5122 if (IS_IVYBRIDGE(dev)) { 5123 u32 temp = I915_READ(GEN7_MSG_CTL); 5124 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); 5125 I915_WRITE(GEN7_MSG_CTL, temp); 5126 } else if (INTEL_INFO(dev)->gen >= 7) { 5127 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT); 5128 temp &= ~RESET_PCH_HANDSHAKE_ENABLE; 5129 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp); 5130 } 5131 } 5132 5133 i915_gem_init_swizzling(dev); 5134 5135 /* 5136 * At least 830 can leave some of the unused rings 5137 * "active" (ie. head != tail) after resume which 5138 * will prevent c3 entry. Makes sure all unused rings 5139 * are totally idle. 5140 */ 5141 init_unused_rings(dev); 5142 5143 BUG_ON(!dev_priv->kernel_context); 5144 5145 ret = i915_ppgtt_init_hw(dev); 5146 if (ret) { 5147 DRM_ERROR("PPGTT enable HW failed %d\n", ret); 5148 goto out; 5149 } 5150 5151 /* Need to do basic initialisation of all rings first: */ 5152 for_each_engine(engine, dev_priv) { 5153 ret = engine->init_hw(engine); 5154 if (ret) 5155 goto out; 5156 } 5157 5158 intel_mocs_init_l3cc_table(dev); 5159 5160 /* We can't enable contexts until all firmware is loaded */ 5161 ret = intel_guc_setup(dev); 5162 if (ret) 5163 goto out; 5164 5165 out: 5166 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5167 return ret; 5168 } 5169 5170 int i915_gem_init(struct drm_device *dev) 5171 { 5172 struct drm_i915_private *dev_priv = to_i915(dev); 5173 int ret; 5174 5175 mutex_lock(&dev->struct_mutex); 5176 5177 if (!i915.enable_execlists) { 5178 dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission; 5179 dev_priv->gt.init_engines = i915_gem_init_engines; 5180 dev_priv->gt.cleanup_engine = intel_cleanup_engine; 5181 dev_priv->gt.stop_engine = intel_stop_engine; 5182 } else { 5183 dev_priv->gt.execbuf_submit = intel_execlists_submission; 5184 dev_priv->gt.init_engines = intel_logical_rings_init; 5185 dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup; 5186 dev_priv->gt.stop_engine = intel_logical_ring_stop; 5187 } 5188 5189 /* This is just a security blanket to placate dragons. 5190 * On some systems, we very sporadically observe that the first TLBs 5191 * used by the CS may be stale, despite us poking the TLB reset. If 5192 * we hold the forcewake during initialisation these problems 5193 * just magically go away. 5194 */ 5195 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5196 5197 i915_gem_init_userptr(dev_priv); 5198 i915_gem_init_ggtt(dev); 5199 5200 ret = i915_gem_context_init(dev); 5201 if (ret) 5202 goto out_unlock; 5203 5204 ret = dev_priv->gt.init_engines(dev); 5205 if (ret) 5206 goto out_unlock; 5207 5208 ret = i915_gem_init_hw(dev); 5209 if (ret == -EIO) { 5210 /* Allow ring initialisation to fail by marking the GPU as 5211 * wedged. But we only want to do this where the GPU is angry, 5212 * for all other failure, such as an allocation failure, bail. 5213 */ 5214 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n"); 5215 atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter); 5216 ret = 0; 5217 } 5218 5219 out_unlock: 5220 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5221 mutex_unlock(&dev->struct_mutex); 5222 5223 return ret; 5224 } 5225 5226 void 5227 i915_gem_cleanup_engines(struct drm_device *dev) 5228 { 5229 struct drm_i915_private *dev_priv = to_i915(dev); 5230 struct intel_engine_cs *engine; 5231 5232 for_each_engine(engine, dev_priv) 5233 dev_priv->gt.cleanup_engine(engine); 5234 } 5235 5236 static void 5237 init_engine_lists(struct intel_engine_cs *engine) 5238 { 5239 INIT_LIST_HEAD(&engine->active_list); 5240 INIT_LIST_HEAD(&engine->request_list); 5241 } 5242 5243 void 5244 i915_gem_load_init_fences(struct drm_i915_private *dev_priv) 5245 { 5246 struct drm_device *dev = &dev_priv->drm; 5247 5248 if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) && 5249 !IS_CHERRYVIEW(dev_priv)) 5250 dev_priv->num_fence_regs = 32; 5251 else if (INTEL_INFO(dev_priv)->gen >= 4 || IS_I945G(dev_priv) || 5252 IS_I945GM(dev_priv) || IS_G33(dev_priv)) 5253 dev_priv->num_fence_regs = 16; 5254 else 5255 dev_priv->num_fence_regs = 8; 5256 5257 if (intel_vgpu_active(dev_priv)) 5258 dev_priv->num_fence_regs = 5259 I915_READ(vgtif_reg(avail_rs.fence_num)); 5260 5261 /* Initialize fence registers to zero */ 5262 i915_gem_restore_fences(dev); 5263 5264 i915_gem_detect_bit_6_swizzle(dev); 5265 } 5266 5267 void 5268 i915_gem_load_init(struct drm_device *dev) 5269 { 5270 struct drm_i915_private *dev_priv = to_i915(dev); 5271 int i; 5272 5273 dev_priv->objects = 5274 kmem_cache_create("i915_gem_object", 5275 sizeof(struct drm_i915_gem_object), 0, 5276 SLAB_HWCACHE_ALIGN, 5277 NULL); 5278 dev_priv->vmas = 5279 kmem_cache_create("i915_gem_vma", 5280 sizeof(struct i915_vma), 0, 5281 SLAB_HWCACHE_ALIGN, 5282 NULL); 5283 dev_priv->requests = 5284 kmem_cache_create("i915_gem_request", 5285 sizeof(struct drm_i915_gem_request), 0, 5286 SLAB_HWCACHE_ALIGN, 5287 NULL); 5288 5289 INIT_LIST_HEAD(&dev_priv->vm_list); 5290 INIT_LIST_HEAD(&dev_priv->context_list); 5291 INIT_LIST_HEAD(&dev_priv->mm.unbound_list); 5292 INIT_LIST_HEAD(&dev_priv->mm.bound_list); 5293 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5294 for (i = 0; i < I915_NUM_ENGINES; i++) 5295 init_engine_lists(&dev_priv->engine[i]); 5296 for (i = 0; i < I915_MAX_NUM_FENCES; i++) 5297 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 5298 INIT_DELAYED_WORK(&dev_priv->gt.retire_work, 5299 i915_gem_retire_work_handler); 5300 INIT_DELAYED_WORK(&dev_priv->gt.idle_work, 5301 i915_gem_idle_work_handler); 5302 init_waitqueue_head(&dev_priv->gpu_error.wait_queue); 5303 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 5304 5305 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 5306 5307 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5308 5309 init_waitqueue_head(&dev_priv->pending_flip_queue); 5310 5311 dev_priv->mm.interruptible = true; 5312 5313 mutex_init(&dev_priv->fb_tracking.lock); 5314 } 5315 5316 void i915_gem_load_cleanup(struct drm_device *dev) 5317 { 5318 struct drm_i915_private *dev_priv = to_i915(dev); 5319 5320 kmem_cache_destroy(dev_priv->requests); 5321 kmem_cache_destroy(dev_priv->vmas); 5322 kmem_cache_destroy(dev_priv->objects); 5323 } 5324 5325 int i915_gem_freeze_late(struct drm_i915_private *dev_priv) 5326 { 5327 struct drm_i915_gem_object *obj; 5328 5329 /* Called just before we write the hibernation image. 5330 * 5331 * We need to update the domain tracking to reflect that the CPU 5332 * will be accessing all the pages to create and restore from the 5333 * hibernation, and so upon restoration those pages will be in the 5334 * CPU domain. 5335 * 5336 * To make sure the hibernation image contains the latest state, 5337 * we update that state just before writing out the image. 5338 */ 5339 5340 list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) { 5341 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 5342 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 5343 } 5344 5345 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 5346 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 5347 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 5348 } 5349 5350 return 0; 5351 } 5352 5353 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 5354 { 5355 struct drm_i915_file_private *file_priv = file->driver_priv; 5356 5357 /* Clean up our request list when the client is going away, so that 5358 * later retire_requests won't dereference our soon-to-be-gone 5359 * file_priv. 5360 */ 5361 spin_lock(&file_priv->mm.lock); 5362 while (!list_empty(&file_priv->mm.request_list)) { 5363 struct drm_i915_gem_request *request; 5364 5365 request = list_first_entry(&file_priv->mm.request_list, 5366 struct drm_i915_gem_request, 5367 client_list); 5368 list_del(&request->client_list); 5369 request->file_priv = NULL; 5370 } 5371 spin_unlock(&file_priv->mm.lock); 5372 5373 if (!list_empty(&file_priv->rps.link)) { 5374 spin_lock(&to_i915(dev)->rps.client_lock); 5375 list_del(&file_priv->rps.link); 5376 spin_unlock(&to_i915(dev)->rps.client_lock); 5377 } 5378 } 5379 5380 int i915_gem_open(struct drm_device *dev, struct drm_file *file) 5381 { 5382 struct drm_i915_file_private *file_priv; 5383 int ret; 5384 5385 DRM_DEBUG_DRIVER("\n"); 5386 5387 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 5388 if (!file_priv) 5389 return -ENOMEM; 5390 5391 file->driver_priv = file_priv; 5392 file_priv->dev_priv = to_i915(dev); 5393 file_priv->file = file; 5394 INIT_LIST_HEAD(&file_priv->rps.link); 5395 5396 spin_lock_init(&file_priv->mm.lock); 5397 INIT_LIST_HEAD(&file_priv->mm.request_list); 5398 5399 file_priv->bsd_ring = -1; 5400 5401 ret = i915_gem_context_open(dev, file); 5402 if (ret) 5403 kfree(file_priv); 5404 5405 return ret; 5406 } 5407 5408 /** 5409 * i915_gem_track_fb - update frontbuffer tracking 5410 * @old: current GEM buffer for the frontbuffer slots 5411 * @new: new GEM buffer for the frontbuffer slots 5412 * @frontbuffer_bits: bitmask of frontbuffer slots 5413 * 5414 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them 5415 * from @old and setting them in @new. Both @old and @new can be NULL. 5416 */ 5417 void i915_gem_track_fb(struct drm_i915_gem_object *old, 5418 struct drm_i915_gem_object *new, 5419 unsigned frontbuffer_bits) 5420 { 5421 if (old) { 5422 WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex)); 5423 WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits)); 5424 old->frontbuffer_bits &= ~frontbuffer_bits; 5425 } 5426 5427 if (new) { 5428 WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex)); 5429 WARN_ON(new->frontbuffer_bits & frontbuffer_bits); 5430 new->frontbuffer_bits |= frontbuffer_bits; 5431 } 5432 } 5433 5434 /* All the new VM stuff */ 5435 u64 i915_gem_obj_offset(struct drm_i915_gem_object *o, 5436 struct i915_address_space *vm) 5437 { 5438 struct drm_i915_private *dev_priv = to_i915(o->base.dev); 5439 struct i915_vma *vma; 5440 5441 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 5442 5443 list_for_each_entry(vma, &o->vma_list, obj_link) { 5444 if (vma->is_ggtt && 5445 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5446 continue; 5447 if (vma->vm == vm) 5448 return vma->node.start; 5449 } 5450 5451 WARN(1, "%s vma for this object not found.\n", 5452 i915_is_ggtt(vm) ? "global" : "ppgtt"); 5453 return -1; 5454 } 5455 5456 u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o, 5457 const struct i915_ggtt_view *view) 5458 { 5459 struct i915_vma *vma; 5460 5461 list_for_each_entry(vma, &o->vma_list, obj_link) 5462 if (vma->is_ggtt && i915_ggtt_view_equal(&vma->ggtt_view, view)) 5463 return vma->node.start; 5464 5465 WARN(1, "global vma for this object not found. (view=%u)\n", view->type); 5466 return -1; 5467 } 5468 5469 bool i915_gem_obj_bound(struct drm_i915_gem_object *o, 5470 struct i915_address_space *vm) 5471 { 5472 struct i915_vma *vma; 5473 5474 list_for_each_entry(vma, &o->vma_list, obj_link) { 5475 if (vma->is_ggtt && 5476 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5477 continue; 5478 if (vma->vm == vm && drm_mm_node_allocated(&vma->node)) 5479 return true; 5480 } 5481 5482 return false; 5483 } 5484 5485 bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o, 5486 const struct i915_ggtt_view *view) 5487 { 5488 struct i915_vma *vma; 5489 5490 list_for_each_entry(vma, &o->vma_list, obj_link) 5491 if (vma->is_ggtt && 5492 i915_ggtt_view_equal(&vma->ggtt_view, view) && 5493 drm_mm_node_allocated(&vma->node)) 5494 return true; 5495 5496 return false; 5497 } 5498 5499 bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o) 5500 { 5501 struct i915_vma *vma; 5502 5503 list_for_each_entry(vma, &o->vma_list, obj_link) 5504 if (drm_mm_node_allocated(&vma->node)) 5505 return true; 5506 5507 return false; 5508 } 5509 5510 unsigned long i915_gem_obj_ggtt_size(struct drm_i915_gem_object *o) 5511 { 5512 struct i915_vma *vma; 5513 5514 GEM_BUG_ON(list_empty(&o->vma_list)); 5515 5516 list_for_each_entry(vma, &o->vma_list, obj_link) { 5517 if (vma->is_ggtt && 5518 vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) 5519 return vma->node.size; 5520 } 5521 5522 return 0; 5523 } 5524 5525 bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj) 5526 { 5527 struct i915_vma *vma; 5528 list_for_each_entry(vma, &obj->vma_list, obj_link) 5529 if (vma->pin_count > 0) 5530 return true; 5531 5532 return false; 5533 } 5534 5535 /* Like i915_gem_object_get_page(), but mark the returned page dirty */ 5536 struct page * 5537 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n) 5538 { 5539 struct page *page; 5540 5541 /* Only default objects have per-page dirty tracking */ 5542 if (WARN_ON(!i915_gem_object_has_struct_page(obj))) 5543 return NULL; 5544 5545 page = i915_gem_object_get_page(obj, n); 5546 set_page_dirty(page); 5547 return page; 5548 } 5549 5550 /* Allocate a new GEM object and fill it with the supplied data */ 5551 struct drm_i915_gem_object * 5552 i915_gem_object_create_from_data(struct drm_device *dev, 5553 const void *data, size_t size) 5554 { 5555 struct drm_i915_gem_object *obj; 5556 struct sg_table *sg; 5557 size_t bytes; 5558 int ret; 5559 5560 obj = i915_gem_object_create(dev, round_up(size, PAGE_SIZE)); 5561 if (IS_ERR(obj)) 5562 return obj; 5563 5564 ret = i915_gem_object_set_to_cpu_domain(obj, true); 5565 if (ret) 5566 goto fail; 5567 5568 ret = i915_gem_object_get_pages(obj); 5569 if (ret) 5570 goto fail; 5571 5572 i915_gem_object_pin_pages(obj); 5573 sg = obj->pages; 5574 bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size); 5575 obj->dirty = 1; /* Backing store is now out of date */ 5576 i915_gem_object_unpin_pages(obj); 5577 5578 if (WARN_ON(bytes != size)) { 5579 DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size); 5580 ret = -EFAULT; 5581 goto fail; 5582 } 5583 5584 return obj; 5585 5586 fail: 5587 drm_gem_object_unreference(&obj->base); 5588 return ERR_PTR(ret); 5589 } 5590