1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drm_vma_manager.h> 29 #include <drm/i915_drm.h> 30 #include <linux/dma-fence-array.h> 31 #include <linux/kthread.h> 32 #include <linux/reservation.h> 33 #include <linux/shmem_fs.h> 34 #include <linux/slab.h> 35 #include <linux/stop_machine.h> 36 #include <linux/swap.h> 37 #include <linux/pci.h> 38 #include <linux/dma-buf.h> 39 #include <linux/mman.h> 40 41 #include "display/intel_display.h" 42 #include "display/intel_frontbuffer.h" 43 44 #include "gem/i915_gem_clflush.h" 45 #include "gem/i915_gem_context.h" 46 #include "gem/i915_gem_ioctls.h" 47 #include "gem/i915_gem_pm.h" 48 #include "gem/i915_gemfs.h" 49 #include "gt/intel_engine_pm.h" 50 #include "gt/intel_gt_pm.h" 51 #include "gt/intel_mocs.h" 52 #include "gt/intel_reset.h" 53 #include "gt/intel_workarounds.h" 54 55 #include "i915_drv.h" 56 #include "i915_scatterlist.h" 57 #include "i915_trace.h" 58 #include "i915_vgpu.h" 59 60 #include "intel_drv.h" 61 #include "intel_pm.h" 62 63 static int 64 insert_mappable_node(struct i915_ggtt *ggtt, 65 struct drm_mm_node *node, u32 size) 66 { 67 memset(node, 0, sizeof(*node)); 68 return drm_mm_insert_node_in_range(&ggtt->vm.mm, node, 69 size, 0, I915_COLOR_UNEVICTABLE, 70 0, ggtt->mappable_end, 71 DRM_MM_INSERT_LOW); 72 } 73 74 static void 75 remove_mappable_node(struct drm_mm_node *node) 76 { 77 drm_mm_remove_node(node); 78 } 79 80 int 81 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 82 struct drm_file *file) 83 { 84 struct i915_ggtt *ggtt = &to_i915(dev)->ggtt; 85 struct drm_i915_gem_get_aperture *args = data; 86 struct i915_vma *vma; 87 u64 pinned; 88 89 mutex_lock(&ggtt->vm.mutex); 90 91 pinned = ggtt->vm.reserved; 92 list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) 93 if (i915_vma_is_pinned(vma)) 94 pinned += vma->node.size; 95 96 mutex_unlock(&ggtt->vm.mutex); 97 98 args->aper_size = ggtt->vm.total; 99 args->aper_available_size = args->aper_size - pinned; 100 101 return 0; 102 } 103 104 int i915_gem_object_unbind(struct drm_i915_gem_object *obj) 105 { 106 struct i915_vma *vma; 107 LIST_HEAD(still_in_list); 108 int ret = 0; 109 110 lockdep_assert_held(&obj->base.dev->struct_mutex); 111 112 spin_lock(&obj->vma.lock); 113 while (!ret && (vma = list_first_entry_or_null(&obj->vma.list, 114 struct i915_vma, 115 obj_link))) { 116 list_move_tail(&vma->obj_link, &still_in_list); 117 spin_unlock(&obj->vma.lock); 118 119 ret = i915_vma_unbind(vma); 120 121 spin_lock(&obj->vma.lock); 122 } 123 list_splice(&still_in_list, &obj->vma.list); 124 spin_unlock(&obj->vma.lock); 125 126 return ret; 127 } 128 129 static int 130 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 131 struct drm_i915_gem_pwrite *args, 132 struct drm_file *file) 133 { 134 void *vaddr = obj->phys_handle->vaddr + args->offset; 135 char __user *user_data = u64_to_user_ptr(args->data_ptr); 136 137 /* We manually control the domain here and pretend that it 138 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 139 */ 140 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 141 if (copy_from_user(vaddr, user_data, args->size)) 142 return -EFAULT; 143 144 drm_clflush_virt_range(vaddr, args->size); 145 i915_gem_chipset_flush(to_i915(obj->base.dev)); 146 147 intel_fb_obj_flush(obj, ORIGIN_CPU); 148 return 0; 149 } 150 151 static int 152 i915_gem_create(struct drm_file *file, 153 struct drm_i915_private *dev_priv, 154 u64 *size_p, 155 u32 *handle_p) 156 { 157 struct drm_i915_gem_object *obj; 158 u32 handle; 159 u64 size; 160 int ret; 161 162 size = round_up(*size_p, PAGE_SIZE); 163 if (size == 0) 164 return -EINVAL; 165 166 /* Allocate the new object */ 167 obj = i915_gem_object_create_shmem(dev_priv, size); 168 if (IS_ERR(obj)) 169 return PTR_ERR(obj); 170 171 ret = drm_gem_handle_create(file, &obj->base, &handle); 172 /* drop reference from allocate - handle holds it now */ 173 i915_gem_object_put(obj); 174 if (ret) 175 return ret; 176 177 *handle_p = handle; 178 *size_p = size; 179 return 0; 180 } 181 182 int 183 i915_gem_dumb_create(struct drm_file *file, 184 struct drm_device *dev, 185 struct drm_mode_create_dumb *args) 186 { 187 int cpp = DIV_ROUND_UP(args->bpp, 8); 188 u32 format; 189 190 switch (cpp) { 191 case 1: 192 format = DRM_FORMAT_C8; 193 break; 194 case 2: 195 format = DRM_FORMAT_RGB565; 196 break; 197 case 4: 198 format = DRM_FORMAT_XRGB8888; 199 break; 200 default: 201 return -EINVAL; 202 } 203 204 /* have to work out size/pitch and return them */ 205 args->pitch = ALIGN(args->width * cpp, 64); 206 207 /* align stride to page size so that we can remap */ 208 if (args->pitch > intel_plane_fb_max_stride(to_i915(dev), format, 209 DRM_FORMAT_MOD_LINEAR)) 210 args->pitch = ALIGN(args->pitch, 4096); 211 212 args->size = args->pitch * args->height; 213 return i915_gem_create(file, to_i915(dev), 214 &args->size, &args->handle); 215 } 216 217 /** 218 * Creates a new mm object and returns a handle to it. 219 * @dev: drm device pointer 220 * @data: ioctl data blob 221 * @file: drm file pointer 222 */ 223 int 224 i915_gem_create_ioctl(struct drm_device *dev, void *data, 225 struct drm_file *file) 226 { 227 struct drm_i915_private *dev_priv = to_i915(dev); 228 struct drm_i915_gem_create *args = data; 229 230 i915_gem_flush_free_objects(dev_priv); 231 232 return i915_gem_create(file, dev_priv, 233 &args->size, &args->handle); 234 } 235 236 void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv) 237 { 238 intel_wakeref_t wakeref; 239 240 /* 241 * No actual flushing is required for the GTT write domain for reads 242 * from the GTT domain. Writes to it "immediately" go to main memory 243 * as far as we know, so there's no chipset flush. It also doesn't 244 * land in the GPU render cache. 245 * 246 * However, we do have to enforce the order so that all writes through 247 * the GTT land before any writes to the device, such as updates to 248 * the GATT itself. 249 * 250 * We also have to wait a bit for the writes to land from the GTT. 251 * An uncached read (i.e. mmio) seems to be ideal for the round-trip 252 * timing. This issue has only been observed when switching quickly 253 * between GTT writes and CPU reads from inside the kernel on recent hw, 254 * and it appears to only affect discrete GTT blocks (i.e. on LLC 255 * system agents we cannot reproduce this behaviour, until Cannonlake 256 * that was!). 257 */ 258 259 wmb(); 260 261 if (INTEL_INFO(dev_priv)->has_coherent_ggtt) 262 return; 263 264 i915_gem_chipset_flush(dev_priv); 265 266 with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) { 267 struct intel_uncore *uncore = &dev_priv->uncore; 268 269 spin_lock_irq(&uncore->lock); 270 intel_uncore_posting_read_fw(uncore, 271 RING_HEAD(RENDER_RING_BASE)); 272 spin_unlock_irq(&uncore->lock); 273 } 274 } 275 276 static int 277 shmem_pread(struct page *page, int offset, int len, char __user *user_data, 278 bool needs_clflush) 279 { 280 char *vaddr; 281 int ret; 282 283 vaddr = kmap(page); 284 285 if (needs_clflush) 286 drm_clflush_virt_range(vaddr + offset, len); 287 288 ret = __copy_to_user(user_data, vaddr + offset, len); 289 290 kunmap(page); 291 292 return ret ? -EFAULT : 0; 293 } 294 295 static int 296 i915_gem_shmem_pread(struct drm_i915_gem_object *obj, 297 struct drm_i915_gem_pread *args) 298 { 299 unsigned int needs_clflush; 300 unsigned int idx, offset; 301 struct dma_fence *fence; 302 char __user *user_data; 303 u64 remain; 304 int ret; 305 306 ret = i915_gem_object_prepare_read(obj, &needs_clflush); 307 if (ret) 308 return ret; 309 310 fence = i915_gem_object_lock_fence(obj); 311 i915_gem_object_finish_access(obj); 312 if (!fence) 313 return -ENOMEM; 314 315 remain = args->size; 316 user_data = u64_to_user_ptr(args->data_ptr); 317 offset = offset_in_page(args->offset); 318 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 319 struct page *page = i915_gem_object_get_page(obj, idx); 320 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); 321 322 ret = shmem_pread(page, offset, length, user_data, 323 needs_clflush); 324 if (ret) 325 break; 326 327 remain -= length; 328 user_data += length; 329 offset = 0; 330 } 331 332 i915_gem_object_unlock_fence(obj, fence); 333 return ret; 334 } 335 336 static inline bool 337 gtt_user_read(struct io_mapping *mapping, 338 loff_t base, int offset, 339 char __user *user_data, int length) 340 { 341 void __iomem *vaddr; 342 unsigned long unwritten; 343 344 /* We can use the cpu mem copy function because this is X86. */ 345 vaddr = io_mapping_map_atomic_wc(mapping, base); 346 unwritten = __copy_to_user_inatomic(user_data, 347 (void __force *)vaddr + offset, 348 length); 349 io_mapping_unmap_atomic(vaddr); 350 if (unwritten) { 351 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 352 unwritten = copy_to_user(user_data, 353 (void __force *)vaddr + offset, 354 length); 355 io_mapping_unmap(vaddr); 356 } 357 return unwritten; 358 } 359 360 static int 361 i915_gem_gtt_pread(struct drm_i915_gem_object *obj, 362 const struct drm_i915_gem_pread *args) 363 { 364 struct drm_i915_private *i915 = to_i915(obj->base.dev); 365 struct i915_ggtt *ggtt = &i915->ggtt; 366 intel_wakeref_t wakeref; 367 struct drm_mm_node node; 368 struct dma_fence *fence; 369 void __user *user_data; 370 struct i915_vma *vma; 371 u64 remain, offset; 372 int ret; 373 374 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 375 if (ret) 376 return ret; 377 378 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 379 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 380 PIN_MAPPABLE | 381 PIN_NONFAULT | 382 PIN_NONBLOCK); 383 if (!IS_ERR(vma)) { 384 node.start = i915_ggtt_offset(vma); 385 node.allocated = false; 386 ret = i915_vma_put_fence(vma); 387 if (ret) { 388 i915_vma_unpin(vma); 389 vma = ERR_PTR(ret); 390 } 391 } 392 if (IS_ERR(vma)) { 393 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 394 if (ret) 395 goto out_unlock; 396 GEM_BUG_ON(!node.allocated); 397 } 398 399 mutex_unlock(&i915->drm.struct_mutex); 400 401 ret = i915_gem_object_lock_interruptible(obj); 402 if (ret) 403 goto out_unpin; 404 405 ret = i915_gem_object_set_to_gtt_domain(obj, false); 406 if (ret) { 407 i915_gem_object_unlock(obj); 408 goto out_unpin; 409 } 410 411 fence = i915_gem_object_lock_fence(obj); 412 i915_gem_object_unlock(obj); 413 if (!fence) { 414 ret = -ENOMEM; 415 goto out_unpin; 416 } 417 418 user_data = u64_to_user_ptr(args->data_ptr); 419 remain = args->size; 420 offset = args->offset; 421 422 while (remain > 0) { 423 /* Operation in this page 424 * 425 * page_base = page offset within aperture 426 * page_offset = offset within page 427 * page_length = bytes to copy for this page 428 */ 429 u32 page_base = node.start; 430 unsigned page_offset = offset_in_page(offset); 431 unsigned page_length = PAGE_SIZE - page_offset; 432 page_length = remain < page_length ? remain : page_length; 433 if (node.allocated) { 434 wmb(); 435 ggtt->vm.insert_page(&ggtt->vm, 436 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 437 node.start, I915_CACHE_NONE, 0); 438 wmb(); 439 } else { 440 page_base += offset & PAGE_MASK; 441 } 442 443 if (gtt_user_read(&ggtt->iomap, page_base, page_offset, 444 user_data, page_length)) { 445 ret = -EFAULT; 446 break; 447 } 448 449 remain -= page_length; 450 user_data += page_length; 451 offset += page_length; 452 } 453 454 i915_gem_object_unlock_fence(obj, fence); 455 out_unpin: 456 mutex_lock(&i915->drm.struct_mutex); 457 if (node.allocated) { 458 wmb(); 459 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); 460 remove_mappable_node(&node); 461 } else { 462 i915_vma_unpin(vma); 463 } 464 out_unlock: 465 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 466 mutex_unlock(&i915->drm.struct_mutex); 467 468 return ret; 469 } 470 471 /** 472 * Reads data from the object referenced by handle. 473 * @dev: drm device pointer 474 * @data: ioctl data blob 475 * @file: drm file pointer 476 * 477 * On error, the contents of *data are undefined. 478 */ 479 int 480 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 481 struct drm_file *file) 482 { 483 struct drm_i915_gem_pread *args = data; 484 struct drm_i915_gem_object *obj; 485 int ret; 486 487 if (args->size == 0) 488 return 0; 489 490 if (!access_ok(u64_to_user_ptr(args->data_ptr), 491 args->size)) 492 return -EFAULT; 493 494 obj = i915_gem_object_lookup(file, args->handle); 495 if (!obj) 496 return -ENOENT; 497 498 /* Bounds check source. */ 499 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 500 ret = -EINVAL; 501 goto out; 502 } 503 504 trace_i915_gem_object_pread(obj, args->offset, args->size); 505 506 ret = i915_gem_object_wait(obj, 507 I915_WAIT_INTERRUPTIBLE, 508 MAX_SCHEDULE_TIMEOUT); 509 if (ret) 510 goto out; 511 512 ret = i915_gem_object_pin_pages(obj); 513 if (ret) 514 goto out; 515 516 ret = i915_gem_shmem_pread(obj, args); 517 if (ret == -EFAULT || ret == -ENODEV) 518 ret = i915_gem_gtt_pread(obj, args); 519 520 i915_gem_object_unpin_pages(obj); 521 out: 522 i915_gem_object_put(obj); 523 return ret; 524 } 525 526 /* This is the fast write path which cannot handle 527 * page faults in the source data 528 */ 529 530 static inline bool 531 ggtt_write(struct io_mapping *mapping, 532 loff_t base, int offset, 533 char __user *user_data, int length) 534 { 535 void __iomem *vaddr; 536 unsigned long unwritten; 537 538 /* We can use the cpu mem copy function because this is X86. */ 539 vaddr = io_mapping_map_atomic_wc(mapping, base); 540 unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset, 541 user_data, length); 542 io_mapping_unmap_atomic(vaddr); 543 if (unwritten) { 544 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 545 unwritten = copy_from_user((void __force *)vaddr + offset, 546 user_data, length); 547 io_mapping_unmap(vaddr); 548 } 549 550 return unwritten; 551 } 552 553 /** 554 * This is the fast pwrite path, where we copy the data directly from the 555 * user into the GTT, uncached. 556 * @obj: i915 GEM object 557 * @args: pwrite arguments structure 558 */ 559 static int 560 i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, 561 const struct drm_i915_gem_pwrite *args) 562 { 563 struct drm_i915_private *i915 = to_i915(obj->base.dev); 564 struct i915_ggtt *ggtt = &i915->ggtt; 565 struct intel_runtime_pm *rpm = &i915->runtime_pm; 566 intel_wakeref_t wakeref; 567 struct drm_mm_node node; 568 struct dma_fence *fence; 569 struct i915_vma *vma; 570 u64 remain, offset; 571 void __user *user_data; 572 int ret; 573 574 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 575 if (ret) 576 return ret; 577 578 if (i915_gem_object_has_struct_page(obj)) { 579 /* 580 * Avoid waking the device up if we can fallback, as 581 * waking/resuming is very slow (worst-case 10-100 ms 582 * depending on PCI sleeps and our own resume time). 583 * This easily dwarfs any performance advantage from 584 * using the cache bypass of indirect GGTT access. 585 */ 586 wakeref = intel_runtime_pm_get_if_in_use(rpm); 587 if (!wakeref) { 588 ret = -EFAULT; 589 goto out_unlock; 590 } 591 } else { 592 /* No backing pages, no fallback, we must force GGTT access */ 593 wakeref = intel_runtime_pm_get(rpm); 594 } 595 596 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 597 PIN_MAPPABLE | 598 PIN_NONFAULT | 599 PIN_NONBLOCK); 600 if (!IS_ERR(vma)) { 601 node.start = i915_ggtt_offset(vma); 602 node.allocated = false; 603 ret = i915_vma_put_fence(vma); 604 if (ret) { 605 i915_vma_unpin(vma); 606 vma = ERR_PTR(ret); 607 } 608 } 609 if (IS_ERR(vma)) { 610 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 611 if (ret) 612 goto out_rpm; 613 GEM_BUG_ON(!node.allocated); 614 } 615 616 mutex_unlock(&i915->drm.struct_mutex); 617 618 ret = i915_gem_object_lock_interruptible(obj); 619 if (ret) 620 goto out_unpin; 621 622 ret = i915_gem_object_set_to_gtt_domain(obj, true); 623 if (ret) { 624 i915_gem_object_unlock(obj); 625 goto out_unpin; 626 } 627 628 fence = i915_gem_object_lock_fence(obj); 629 i915_gem_object_unlock(obj); 630 if (!fence) { 631 ret = -ENOMEM; 632 goto out_unpin; 633 } 634 635 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 636 637 user_data = u64_to_user_ptr(args->data_ptr); 638 offset = args->offset; 639 remain = args->size; 640 while (remain) { 641 /* Operation in this page 642 * 643 * page_base = page offset within aperture 644 * page_offset = offset within page 645 * page_length = bytes to copy for this page 646 */ 647 u32 page_base = node.start; 648 unsigned int page_offset = offset_in_page(offset); 649 unsigned int page_length = PAGE_SIZE - page_offset; 650 page_length = remain < page_length ? remain : page_length; 651 if (node.allocated) { 652 wmb(); /* flush the write before we modify the GGTT */ 653 ggtt->vm.insert_page(&ggtt->vm, 654 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 655 node.start, I915_CACHE_NONE, 0); 656 wmb(); /* flush modifications to the GGTT (insert_page) */ 657 } else { 658 page_base += offset & PAGE_MASK; 659 } 660 /* If we get a fault while copying data, then (presumably) our 661 * source page isn't available. Return the error and we'll 662 * retry in the slow path. 663 * If the object is non-shmem backed, we retry again with the 664 * path that handles page fault. 665 */ 666 if (ggtt_write(&ggtt->iomap, page_base, page_offset, 667 user_data, page_length)) { 668 ret = -EFAULT; 669 break; 670 } 671 672 remain -= page_length; 673 user_data += page_length; 674 offset += page_length; 675 } 676 intel_fb_obj_flush(obj, ORIGIN_CPU); 677 678 i915_gem_object_unlock_fence(obj, fence); 679 out_unpin: 680 mutex_lock(&i915->drm.struct_mutex); 681 if (node.allocated) { 682 wmb(); 683 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); 684 remove_mappable_node(&node); 685 } else { 686 i915_vma_unpin(vma); 687 } 688 out_rpm: 689 intel_runtime_pm_put(rpm, wakeref); 690 out_unlock: 691 mutex_unlock(&i915->drm.struct_mutex); 692 return ret; 693 } 694 695 /* Per-page copy function for the shmem pwrite fastpath. 696 * Flushes invalid cachelines before writing to the target if 697 * needs_clflush_before is set and flushes out any written cachelines after 698 * writing if needs_clflush is set. 699 */ 700 static int 701 shmem_pwrite(struct page *page, int offset, int len, char __user *user_data, 702 bool needs_clflush_before, 703 bool needs_clflush_after) 704 { 705 char *vaddr; 706 int ret; 707 708 vaddr = kmap(page); 709 710 if (needs_clflush_before) 711 drm_clflush_virt_range(vaddr + offset, len); 712 713 ret = __copy_from_user(vaddr + offset, user_data, len); 714 if (!ret && needs_clflush_after) 715 drm_clflush_virt_range(vaddr + offset, len); 716 717 kunmap(page); 718 719 return ret ? -EFAULT : 0; 720 } 721 722 static int 723 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, 724 const struct drm_i915_gem_pwrite *args) 725 { 726 unsigned int partial_cacheline_write; 727 unsigned int needs_clflush; 728 unsigned int offset, idx; 729 struct dma_fence *fence; 730 void __user *user_data; 731 u64 remain; 732 int ret; 733 734 ret = i915_gem_object_prepare_write(obj, &needs_clflush); 735 if (ret) 736 return ret; 737 738 fence = i915_gem_object_lock_fence(obj); 739 i915_gem_object_finish_access(obj); 740 if (!fence) 741 return -ENOMEM; 742 743 /* If we don't overwrite a cacheline completely we need to be 744 * careful to have up-to-date data by first clflushing. Don't 745 * overcomplicate things and flush the entire patch. 746 */ 747 partial_cacheline_write = 0; 748 if (needs_clflush & CLFLUSH_BEFORE) 749 partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1; 750 751 user_data = u64_to_user_ptr(args->data_ptr); 752 remain = args->size; 753 offset = offset_in_page(args->offset); 754 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 755 struct page *page = i915_gem_object_get_page(obj, idx); 756 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); 757 758 ret = shmem_pwrite(page, offset, length, user_data, 759 (offset | length) & partial_cacheline_write, 760 needs_clflush & CLFLUSH_AFTER); 761 if (ret) 762 break; 763 764 remain -= length; 765 user_data += length; 766 offset = 0; 767 } 768 769 intel_fb_obj_flush(obj, ORIGIN_CPU); 770 i915_gem_object_unlock_fence(obj, fence); 771 772 return ret; 773 } 774 775 /** 776 * Writes data to the object referenced by handle. 777 * @dev: drm device 778 * @data: ioctl data blob 779 * @file: drm file 780 * 781 * On error, the contents of the buffer that were to be modified are undefined. 782 */ 783 int 784 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 785 struct drm_file *file) 786 { 787 struct drm_i915_gem_pwrite *args = data; 788 struct drm_i915_gem_object *obj; 789 int ret; 790 791 if (args->size == 0) 792 return 0; 793 794 if (!access_ok(u64_to_user_ptr(args->data_ptr), args->size)) 795 return -EFAULT; 796 797 obj = i915_gem_object_lookup(file, args->handle); 798 if (!obj) 799 return -ENOENT; 800 801 /* Bounds check destination. */ 802 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 803 ret = -EINVAL; 804 goto err; 805 } 806 807 /* Writes not allowed into this read-only object */ 808 if (i915_gem_object_is_readonly(obj)) { 809 ret = -EINVAL; 810 goto err; 811 } 812 813 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 814 815 ret = -ENODEV; 816 if (obj->ops->pwrite) 817 ret = obj->ops->pwrite(obj, args); 818 if (ret != -ENODEV) 819 goto err; 820 821 ret = i915_gem_object_wait(obj, 822 I915_WAIT_INTERRUPTIBLE | 823 I915_WAIT_ALL, 824 MAX_SCHEDULE_TIMEOUT); 825 if (ret) 826 goto err; 827 828 ret = i915_gem_object_pin_pages(obj); 829 if (ret) 830 goto err; 831 832 ret = -EFAULT; 833 /* We can only do the GTT pwrite on untiled buffers, as otherwise 834 * it would end up going through the fenced access, and we'll get 835 * different detiling behavior between reading and writing. 836 * pread/pwrite currently are reading and writing from the CPU 837 * perspective, requiring manual detiling by the client. 838 */ 839 if (!i915_gem_object_has_struct_page(obj) || 840 cpu_write_needs_clflush(obj)) 841 /* Note that the gtt paths might fail with non-page-backed user 842 * pointers (e.g. gtt mappings when moving data between 843 * textures). Fallback to the shmem path in that case. 844 */ 845 ret = i915_gem_gtt_pwrite_fast(obj, args); 846 847 if (ret == -EFAULT || ret == -ENOSPC) { 848 if (obj->phys_handle) 849 ret = i915_gem_phys_pwrite(obj, args, file); 850 else 851 ret = i915_gem_shmem_pwrite(obj, args); 852 } 853 854 i915_gem_object_unpin_pages(obj); 855 err: 856 i915_gem_object_put(obj); 857 return ret; 858 } 859 860 /** 861 * Called when user space has done writes to this buffer 862 * @dev: drm device 863 * @data: ioctl data blob 864 * @file: drm file 865 */ 866 int 867 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 868 struct drm_file *file) 869 { 870 struct drm_i915_gem_sw_finish *args = data; 871 struct drm_i915_gem_object *obj; 872 873 obj = i915_gem_object_lookup(file, args->handle); 874 if (!obj) 875 return -ENOENT; 876 877 /* 878 * Proxy objects are barred from CPU access, so there is no 879 * need to ban sw_finish as it is a nop. 880 */ 881 882 /* Pinned buffers may be scanout, so flush the cache */ 883 i915_gem_object_flush_if_display(obj); 884 i915_gem_object_put(obj); 885 886 return 0; 887 } 888 889 void i915_gem_runtime_suspend(struct drm_i915_private *i915) 890 { 891 struct drm_i915_gem_object *obj, *on; 892 int i; 893 894 /* 895 * Only called during RPM suspend. All users of the userfault_list 896 * must be holding an RPM wakeref to ensure that this can not 897 * run concurrently with themselves (and use the struct_mutex for 898 * protection between themselves). 899 */ 900 901 list_for_each_entry_safe(obj, on, 902 &i915->ggtt.userfault_list, userfault_link) 903 __i915_gem_object_release_mmap(obj); 904 905 /* 906 * The fence will be lost when the device powers down. If any were 907 * in use by hardware (i.e. they are pinned), we should not be powering 908 * down! All other fences will be reacquired by the user upon waking. 909 */ 910 for (i = 0; i < i915->ggtt.num_fences; i++) { 911 struct i915_fence_reg *reg = &i915->ggtt.fence_regs[i]; 912 913 /* 914 * Ideally we want to assert that the fence register is not 915 * live at this point (i.e. that no piece of code will be 916 * trying to write through fence + GTT, as that both violates 917 * our tracking of activity and associated locking/barriers, 918 * but also is illegal given that the hw is powered down). 919 * 920 * Previously we used reg->pin_count as a "liveness" indicator. 921 * That is not sufficient, and we need a more fine-grained 922 * tool if we want to have a sanity check here. 923 */ 924 925 if (!reg->vma) 926 continue; 927 928 GEM_BUG_ON(i915_vma_has_userfault(reg->vma)); 929 reg->dirty = true; 930 } 931 } 932 933 static int wait_for_engines(struct drm_i915_private *i915) 934 { 935 if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) { 936 dev_err(i915->drm.dev, 937 "Failed to idle engines, declaring wedged!\n"); 938 GEM_TRACE_DUMP(); 939 i915_gem_set_wedged(i915); 940 return -EIO; 941 } 942 943 return 0; 944 } 945 946 static long 947 wait_for_timelines(struct drm_i915_private *i915, 948 unsigned int flags, long timeout) 949 { 950 struct i915_gt_timelines *gt = &i915->gt.timelines; 951 struct i915_timeline *tl; 952 953 mutex_lock(>->mutex); 954 list_for_each_entry(tl, >->active_list, link) { 955 struct i915_request *rq; 956 957 rq = i915_active_request_get_unlocked(&tl->last_request); 958 if (!rq) 959 continue; 960 961 mutex_unlock(>->mutex); 962 963 /* 964 * "Race-to-idle". 965 * 966 * Switching to the kernel context is often used a synchronous 967 * step prior to idling, e.g. in suspend for flushing all 968 * current operations to memory before sleeping. These we 969 * want to complete as quickly as possible to avoid prolonged 970 * stalls, so allow the gpu to boost to maximum clocks. 971 */ 972 if (flags & I915_WAIT_FOR_IDLE_BOOST) 973 gen6_rps_boost(rq); 974 975 timeout = i915_request_wait(rq, flags, timeout); 976 i915_request_put(rq); 977 if (timeout < 0) 978 return timeout; 979 980 /* restart after reacquiring the lock */ 981 mutex_lock(>->mutex); 982 tl = list_entry(>->active_list, typeof(*tl), link); 983 } 984 mutex_unlock(>->mutex); 985 986 return timeout; 987 } 988 989 int i915_gem_wait_for_idle(struct drm_i915_private *i915, 990 unsigned int flags, long timeout) 991 { 992 GEM_TRACE("flags=%x (%s), timeout=%ld%s, awake?=%s\n", 993 flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked", 994 timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : "", 995 yesno(i915->gt.awake)); 996 997 /* If the device is asleep, we have no requests outstanding */ 998 if (!READ_ONCE(i915->gt.awake)) 999 return 0; 1000 1001 timeout = wait_for_timelines(i915, flags, timeout); 1002 if (timeout < 0) 1003 return timeout; 1004 1005 if (flags & I915_WAIT_LOCKED) { 1006 int err; 1007 1008 lockdep_assert_held(&i915->drm.struct_mutex); 1009 1010 err = wait_for_engines(i915); 1011 if (err) 1012 return err; 1013 1014 i915_retire_requests(i915); 1015 } 1016 1017 return 0; 1018 } 1019 1020 struct i915_vma * 1021 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 1022 const struct i915_ggtt_view *view, 1023 u64 size, 1024 u64 alignment, 1025 u64 flags) 1026 { 1027 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 1028 struct i915_address_space *vm = &dev_priv->ggtt.vm; 1029 struct i915_vma *vma; 1030 int ret; 1031 1032 lockdep_assert_held(&obj->base.dev->struct_mutex); 1033 1034 if (flags & PIN_MAPPABLE && 1035 (!view || view->type == I915_GGTT_VIEW_NORMAL)) { 1036 /* If the required space is larger than the available 1037 * aperture, we will not able to find a slot for the 1038 * object and unbinding the object now will be in 1039 * vain. Worse, doing so may cause us to ping-pong 1040 * the object in and out of the Global GTT and 1041 * waste a lot of cycles under the mutex. 1042 */ 1043 if (obj->base.size > dev_priv->ggtt.mappable_end) 1044 return ERR_PTR(-E2BIG); 1045 1046 /* If NONBLOCK is set the caller is optimistically 1047 * trying to cache the full object within the mappable 1048 * aperture, and *must* have a fallback in place for 1049 * situations where we cannot bind the object. We 1050 * can be a little more lax here and use the fallback 1051 * more often to avoid costly migrations of ourselves 1052 * and other objects within the aperture. 1053 * 1054 * Half-the-aperture is used as a simple heuristic. 1055 * More interesting would to do search for a free 1056 * block prior to making the commitment to unbind. 1057 * That caters for the self-harm case, and with a 1058 * little more heuristics (e.g. NOFAULT, NOEVICT) 1059 * we could try to minimise harm to others. 1060 */ 1061 if (flags & PIN_NONBLOCK && 1062 obj->base.size > dev_priv->ggtt.mappable_end / 2) 1063 return ERR_PTR(-ENOSPC); 1064 } 1065 1066 vma = i915_vma_instance(obj, vm, view); 1067 if (IS_ERR(vma)) 1068 return vma; 1069 1070 if (i915_vma_misplaced(vma, size, alignment, flags)) { 1071 if (flags & PIN_NONBLOCK) { 1072 if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) 1073 return ERR_PTR(-ENOSPC); 1074 1075 if (flags & PIN_MAPPABLE && 1076 vma->fence_size > dev_priv->ggtt.mappable_end / 2) 1077 return ERR_PTR(-ENOSPC); 1078 } 1079 1080 WARN(i915_vma_is_pinned(vma), 1081 "bo is already pinned in ggtt with incorrect alignment:" 1082 " offset=%08x, req.alignment=%llx," 1083 " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n", 1084 i915_ggtt_offset(vma), alignment, 1085 !!(flags & PIN_MAPPABLE), 1086 i915_vma_is_map_and_fenceable(vma)); 1087 ret = i915_vma_unbind(vma); 1088 if (ret) 1089 return ERR_PTR(ret); 1090 } 1091 1092 ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); 1093 if (ret) 1094 return ERR_PTR(ret); 1095 1096 return vma; 1097 } 1098 1099 int 1100 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 1101 struct drm_file *file_priv) 1102 { 1103 struct drm_i915_private *i915 = to_i915(dev); 1104 struct drm_i915_gem_madvise *args = data; 1105 struct drm_i915_gem_object *obj; 1106 int err; 1107 1108 switch (args->madv) { 1109 case I915_MADV_DONTNEED: 1110 case I915_MADV_WILLNEED: 1111 break; 1112 default: 1113 return -EINVAL; 1114 } 1115 1116 obj = i915_gem_object_lookup(file_priv, args->handle); 1117 if (!obj) 1118 return -ENOENT; 1119 1120 err = mutex_lock_interruptible(&obj->mm.lock); 1121 if (err) 1122 goto out; 1123 1124 if (i915_gem_object_has_pages(obj) && 1125 i915_gem_object_is_tiled(obj) && 1126 i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 1127 if (obj->mm.madv == I915_MADV_WILLNEED) { 1128 GEM_BUG_ON(!obj->mm.quirked); 1129 __i915_gem_object_unpin_pages(obj); 1130 obj->mm.quirked = false; 1131 } 1132 if (args->madv == I915_MADV_WILLNEED) { 1133 GEM_BUG_ON(obj->mm.quirked); 1134 __i915_gem_object_pin_pages(obj); 1135 obj->mm.quirked = true; 1136 } 1137 } 1138 1139 if (obj->mm.madv != __I915_MADV_PURGED) 1140 obj->mm.madv = args->madv; 1141 1142 if (i915_gem_object_has_pages(obj)) { 1143 struct list_head *list; 1144 1145 if (i915_gem_object_is_shrinkable(obj)) { 1146 unsigned long flags; 1147 1148 spin_lock_irqsave(&i915->mm.obj_lock, flags); 1149 1150 if (obj->mm.madv != I915_MADV_WILLNEED) 1151 list = &i915->mm.purge_list; 1152 else 1153 list = &i915->mm.shrink_list; 1154 list_move_tail(&obj->mm.link, list); 1155 1156 spin_unlock_irqrestore(&i915->mm.obj_lock, flags); 1157 } 1158 } 1159 1160 /* if the object is no longer attached, discard its backing storage */ 1161 if (obj->mm.madv == I915_MADV_DONTNEED && 1162 !i915_gem_object_has_pages(obj)) 1163 i915_gem_object_truncate(obj); 1164 1165 args->retained = obj->mm.madv != __I915_MADV_PURGED; 1166 mutex_unlock(&obj->mm.lock); 1167 1168 out: 1169 i915_gem_object_put(obj); 1170 return err; 1171 } 1172 1173 void i915_gem_sanitize(struct drm_i915_private *i915) 1174 { 1175 intel_wakeref_t wakeref; 1176 1177 GEM_TRACE("\n"); 1178 1179 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 1180 intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); 1181 1182 /* 1183 * As we have just resumed the machine and woken the device up from 1184 * deep PCI sleep (presumably D3_cold), assume the HW has been reset 1185 * back to defaults, recovering from whatever wedged state we left it 1186 * in and so worth trying to use the device once more. 1187 */ 1188 if (i915_terminally_wedged(i915)) 1189 i915_gem_unset_wedged(i915); 1190 1191 /* 1192 * If we inherit context state from the BIOS or earlier occupants 1193 * of the GPU, the GPU may be in an inconsistent state when we 1194 * try to take over. The only way to remove the earlier state 1195 * is by resetting. However, resetting on earlier gen is tricky as 1196 * it may impact the display and we are uncertain about the stability 1197 * of the reset, so this could be applied to even earlier gen. 1198 */ 1199 intel_gt_sanitize(i915, false); 1200 1201 intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); 1202 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 1203 } 1204 1205 void i915_gem_init_swizzling(struct drm_i915_private *dev_priv) 1206 { 1207 if (INTEL_GEN(dev_priv) < 5 || 1208 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 1209 return; 1210 1211 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 1212 DISP_TILE_SURFACE_SWIZZLING); 1213 1214 if (IS_GEN(dev_priv, 5)) 1215 return; 1216 1217 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 1218 if (IS_GEN(dev_priv, 6)) 1219 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 1220 else if (IS_GEN(dev_priv, 7)) 1221 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 1222 else if (IS_GEN(dev_priv, 8)) 1223 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 1224 else 1225 BUG(); 1226 } 1227 1228 static void init_unused_ring(struct drm_i915_private *dev_priv, u32 base) 1229 { 1230 I915_WRITE(RING_CTL(base), 0); 1231 I915_WRITE(RING_HEAD(base), 0); 1232 I915_WRITE(RING_TAIL(base), 0); 1233 I915_WRITE(RING_START(base), 0); 1234 } 1235 1236 static void init_unused_rings(struct drm_i915_private *dev_priv) 1237 { 1238 if (IS_I830(dev_priv)) { 1239 init_unused_ring(dev_priv, PRB1_BASE); 1240 init_unused_ring(dev_priv, SRB0_BASE); 1241 init_unused_ring(dev_priv, SRB1_BASE); 1242 init_unused_ring(dev_priv, SRB2_BASE); 1243 init_unused_ring(dev_priv, SRB3_BASE); 1244 } else if (IS_GEN(dev_priv, 2)) { 1245 init_unused_ring(dev_priv, SRB0_BASE); 1246 init_unused_ring(dev_priv, SRB1_BASE); 1247 } else if (IS_GEN(dev_priv, 3)) { 1248 init_unused_ring(dev_priv, PRB1_BASE); 1249 init_unused_ring(dev_priv, PRB2_BASE); 1250 } 1251 } 1252 1253 int i915_gem_init_hw(struct drm_i915_private *dev_priv) 1254 { 1255 int ret; 1256 1257 dev_priv->gt.last_init_time = ktime_get(); 1258 1259 /* Double layer security blanket, see i915_gem_init() */ 1260 intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); 1261 1262 if (HAS_EDRAM(dev_priv) && INTEL_GEN(dev_priv) < 9) 1263 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 1264 1265 if (IS_HASWELL(dev_priv)) 1266 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ? 1267 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 1268 1269 /* Apply the GT workarounds... */ 1270 intel_gt_apply_workarounds(dev_priv); 1271 /* ...and determine whether they are sticking. */ 1272 intel_gt_verify_workarounds(dev_priv, "init"); 1273 1274 i915_gem_init_swizzling(dev_priv); 1275 1276 /* 1277 * At least 830 can leave some of the unused rings 1278 * "active" (ie. head != tail) after resume which 1279 * will prevent c3 entry. Makes sure all unused rings 1280 * are totally idle. 1281 */ 1282 init_unused_rings(dev_priv); 1283 1284 BUG_ON(!dev_priv->kernel_context); 1285 ret = i915_terminally_wedged(dev_priv); 1286 if (ret) 1287 goto out; 1288 1289 ret = i915_ppgtt_init_hw(dev_priv); 1290 if (ret) { 1291 DRM_ERROR("Enabling PPGTT failed (%d)\n", ret); 1292 goto out; 1293 } 1294 1295 ret = intel_wopcm_init_hw(&dev_priv->wopcm); 1296 if (ret) { 1297 DRM_ERROR("Enabling WOPCM failed (%d)\n", ret); 1298 goto out; 1299 } 1300 1301 /* We can't enable contexts until all firmware is loaded */ 1302 ret = intel_uc_init_hw(dev_priv); 1303 if (ret) { 1304 DRM_ERROR("Enabling uc failed (%d)\n", ret); 1305 goto out; 1306 } 1307 1308 intel_mocs_init_l3cc_table(dev_priv); 1309 1310 /* Only when the HW is re-initialised, can we replay the requests */ 1311 ret = intel_engines_resume(dev_priv); 1312 if (ret) 1313 goto cleanup_uc; 1314 1315 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); 1316 1317 intel_engines_set_scheduler_caps(dev_priv); 1318 return 0; 1319 1320 cleanup_uc: 1321 intel_uc_fini_hw(dev_priv); 1322 out: 1323 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); 1324 1325 return ret; 1326 } 1327 1328 static int __intel_engines_record_defaults(struct drm_i915_private *i915) 1329 { 1330 struct intel_engine_cs *engine; 1331 struct i915_gem_context *ctx; 1332 struct i915_gem_engines *e; 1333 enum intel_engine_id id; 1334 int err = 0; 1335 1336 /* 1337 * As we reset the gpu during very early sanitisation, the current 1338 * register state on the GPU should reflect its defaults values. 1339 * We load a context onto the hw (with restore-inhibit), then switch 1340 * over to a second context to save that default register state. We 1341 * can then prime every new context with that state so they all start 1342 * from the same default HW values. 1343 */ 1344 1345 ctx = i915_gem_context_create_kernel(i915, 0); 1346 if (IS_ERR(ctx)) 1347 return PTR_ERR(ctx); 1348 1349 e = i915_gem_context_lock_engines(ctx); 1350 1351 for_each_engine(engine, i915, id) { 1352 struct intel_context *ce = e->engines[id]; 1353 struct i915_request *rq; 1354 1355 rq = intel_context_create_request(ce); 1356 if (IS_ERR(rq)) { 1357 err = PTR_ERR(rq); 1358 goto err_active; 1359 } 1360 1361 err = 0; 1362 if (rq->engine->init_context) 1363 err = rq->engine->init_context(rq); 1364 1365 i915_request_add(rq); 1366 if (err) 1367 goto err_active; 1368 } 1369 1370 /* Flush the default context image to memory, and enable powersaving. */ 1371 if (!i915_gem_load_power_context(i915)) { 1372 err = -EIO; 1373 goto err_active; 1374 } 1375 1376 for_each_engine(engine, i915, id) { 1377 struct intel_context *ce = e->engines[id]; 1378 struct i915_vma *state = ce->state; 1379 void *vaddr; 1380 1381 if (!state) 1382 continue; 1383 1384 GEM_BUG_ON(intel_context_is_pinned(ce)); 1385 1386 /* 1387 * As we will hold a reference to the logical state, it will 1388 * not be torn down with the context, and importantly the 1389 * object will hold onto its vma (making it possible for a 1390 * stray GTT write to corrupt our defaults). Unmap the vma 1391 * from the GTT to prevent such accidents and reclaim the 1392 * space. 1393 */ 1394 err = i915_vma_unbind(state); 1395 if (err) 1396 goto err_active; 1397 1398 i915_gem_object_lock(state->obj); 1399 err = i915_gem_object_set_to_cpu_domain(state->obj, false); 1400 i915_gem_object_unlock(state->obj); 1401 if (err) 1402 goto err_active; 1403 1404 engine->default_state = i915_gem_object_get(state->obj); 1405 i915_gem_object_set_cache_coherency(engine->default_state, 1406 I915_CACHE_LLC); 1407 1408 /* Check we can acquire the image of the context state */ 1409 vaddr = i915_gem_object_pin_map(engine->default_state, 1410 I915_MAP_FORCE_WB); 1411 if (IS_ERR(vaddr)) { 1412 err = PTR_ERR(vaddr); 1413 goto err_active; 1414 } 1415 1416 i915_gem_object_unpin_map(engine->default_state); 1417 } 1418 1419 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) { 1420 unsigned int found = intel_engines_has_context_isolation(i915); 1421 1422 /* 1423 * Make sure that classes with multiple engine instances all 1424 * share the same basic configuration. 1425 */ 1426 for_each_engine(engine, i915, id) { 1427 unsigned int bit = BIT(engine->uabi_class); 1428 unsigned int expected = engine->default_state ? bit : 0; 1429 1430 if ((found & bit) != expected) { 1431 DRM_ERROR("mismatching default context state for class %d on engine %s\n", 1432 engine->uabi_class, engine->name); 1433 } 1434 } 1435 } 1436 1437 out_ctx: 1438 i915_gem_context_unlock_engines(ctx); 1439 i915_gem_context_set_closed(ctx); 1440 i915_gem_context_put(ctx); 1441 return err; 1442 1443 err_active: 1444 /* 1445 * If we have to abandon now, we expect the engines to be idle 1446 * and ready to be torn-down. The quickest way we can accomplish 1447 * this is by declaring ourselves wedged. 1448 */ 1449 i915_gem_set_wedged(i915); 1450 goto out_ctx; 1451 } 1452 1453 static int 1454 i915_gem_init_scratch(struct drm_i915_private *i915, unsigned int size) 1455 { 1456 struct drm_i915_gem_object *obj; 1457 struct i915_vma *vma; 1458 int ret; 1459 1460 obj = i915_gem_object_create_stolen(i915, size); 1461 if (!obj) 1462 obj = i915_gem_object_create_internal(i915, size); 1463 if (IS_ERR(obj)) { 1464 DRM_ERROR("Failed to allocate scratch page\n"); 1465 return PTR_ERR(obj); 1466 } 1467 1468 vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); 1469 if (IS_ERR(vma)) { 1470 ret = PTR_ERR(vma); 1471 goto err_unref; 1472 } 1473 1474 ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); 1475 if (ret) 1476 goto err_unref; 1477 1478 i915->gt.scratch = vma; 1479 return 0; 1480 1481 err_unref: 1482 i915_gem_object_put(obj); 1483 return ret; 1484 } 1485 1486 static void i915_gem_fini_scratch(struct drm_i915_private *i915) 1487 { 1488 i915_vma_unpin_and_release(&i915->gt.scratch, 0); 1489 } 1490 1491 static int intel_engines_verify_workarounds(struct drm_i915_private *i915) 1492 { 1493 struct intel_engine_cs *engine; 1494 enum intel_engine_id id; 1495 int err = 0; 1496 1497 if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 1498 return 0; 1499 1500 for_each_engine(engine, i915, id) { 1501 if (intel_engine_verify_workarounds(engine, "load")) 1502 err = -EIO; 1503 } 1504 1505 return err; 1506 } 1507 1508 int i915_gem_init(struct drm_i915_private *dev_priv) 1509 { 1510 int ret; 1511 1512 /* We need to fallback to 4K pages if host doesn't support huge gtt. */ 1513 if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_huge_gtt(dev_priv)) 1514 mkwrite_device_info(dev_priv)->page_sizes = 1515 I915_GTT_PAGE_SIZE_4K; 1516 1517 dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1); 1518 1519 i915_timelines_init(dev_priv); 1520 1521 ret = i915_gem_init_userptr(dev_priv); 1522 if (ret) 1523 return ret; 1524 1525 ret = intel_uc_init_misc(dev_priv); 1526 if (ret) 1527 return ret; 1528 1529 ret = intel_wopcm_init(&dev_priv->wopcm); 1530 if (ret) 1531 goto err_uc_misc; 1532 1533 /* This is just a security blanket to placate dragons. 1534 * On some systems, we very sporadically observe that the first TLBs 1535 * used by the CS may be stale, despite us poking the TLB reset. If 1536 * we hold the forcewake during initialisation these problems 1537 * just magically go away. 1538 */ 1539 mutex_lock(&dev_priv->drm.struct_mutex); 1540 intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); 1541 1542 ret = i915_gem_init_ggtt(dev_priv); 1543 if (ret) { 1544 GEM_BUG_ON(ret == -EIO); 1545 goto err_unlock; 1546 } 1547 1548 ret = i915_gem_init_scratch(dev_priv, 1549 IS_GEN(dev_priv, 2) ? SZ_256K : PAGE_SIZE); 1550 if (ret) { 1551 GEM_BUG_ON(ret == -EIO); 1552 goto err_ggtt; 1553 } 1554 1555 ret = intel_engines_setup(dev_priv); 1556 if (ret) { 1557 GEM_BUG_ON(ret == -EIO); 1558 goto err_unlock; 1559 } 1560 1561 ret = i915_gem_contexts_init(dev_priv); 1562 if (ret) { 1563 GEM_BUG_ON(ret == -EIO); 1564 goto err_scratch; 1565 } 1566 1567 ret = intel_engines_init(dev_priv); 1568 if (ret) { 1569 GEM_BUG_ON(ret == -EIO); 1570 goto err_context; 1571 } 1572 1573 intel_init_gt_powersave(dev_priv); 1574 1575 ret = intel_uc_init(dev_priv); 1576 if (ret) 1577 goto err_pm; 1578 1579 ret = i915_gem_init_hw(dev_priv); 1580 if (ret) 1581 goto err_uc_init; 1582 1583 /* 1584 * Despite its name intel_init_clock_gating applies both display 1585 * clock gating workarounds; GT mmio workarounds and the occasional 1586 * GT power context workaround. Worse, sometimes it includes a context 1587 * register workaround which we need to apply before we record the 1588 * default HW state for all contexts. 1589 * 1590 * FIXME: break up the workarounds and apply them at the right time! 1591 */ 1592 intel_init_clock_gating(dev_priv); 1593 1594 ret = intel_engines_verify_workarounds(dev_priv); 1595 if (ret) 1596 goto err_init_hw; 1597 1598 ret = __intel_engines_record_defaults(dev_priv); 1599 if (ret) 1600 goto err_init_hw; 1601 1602 if (i915_inject_load_failure()) { 1603 ret = -ENODEV; 1604 goto err_init_hw; 1605 } 1606 1607 if (i915_inject_load_failure()) { 1608 ret = -EIO; 1609 goto err_init_hw; 1610 } 1611 1612 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); 1613 mutex_unlock(&dev_priv->drm.struct_mutex); 1614 1615 return 0; 1616 1617 /* 1618 * Unwinding is complicated by that we want to handle -EIO to mean 1619 * disable GPU submission but keep KMS alive. We want to mark the 1620 * HW as irrevisibly wedged, but keep enough state around that the 1621 * driver doesn't explode during runtime. 1622 */ 1623 err_init_hw: 1624 mutex_unlock(&dev_priv->drm.struct_mutex); 1625 1626 i915_gem_set_wedged(dev_priv); 1627 i915_gem_suspend(dev_priv); 1628 i915_gem_suspend_late(dev_priv); 1629 1630 i915_gem_drain_workqueue(dev_priv); 1631 1632 mutex_lock(&dev_priv->drm.struct_mutex); 1633 intel_uc_fini_hw(dev_priv); 1634 err_uc_init: 1635 intel_uc_fini(dev_priv); 1636 err_pm: 1637 if (ret != -EIO) { 1638 intel_cleanup_gt_powersave(dev_priv); 1639 intel_engines_cleanup(dev_priv); 1640 } 1641 err_context: 1642 if (ret != -EIO) 1643 i915_gem_contexts_fini(dev_priv); 1644 err_scratch: 1645 i915_gem_fini_scratch(dev_priv); 1646 err_ggtt: 1647 err_unlock: 1648 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); 1649 mutex_unlock(&dev_priv->drm.struct_mutex); 1650 1651 err_uc_misc: 1652 intel_uc_fini_misc(dev_priv); 1653 1654 if (ret != -EIO) { 1655 i915_gem_cleanup_userptr(dev_priv); 1656 i915_timelines_fini(dev_priv); 1657 } 1658 1659 if (ret == -EIO) { 1660 mutex_lock(&dev_priv->drm.struct_mutex); 1661 1662 /* 1663 * Allow engine initialisation to fail by marking the GPU as 1664 * wedged. But we only want to do this where the GPU is angry, 1665 * for all other failure, such as an allocation failure, bail. 1666 */ 1667 if (!i915_reset_failed(dev_priv)) { 1668 i915_load_error(dev_priv, 1669 "Failed to initialize GPU, declaring it wedged!\n"); 1670 i915_gem_set_wedged(dev_priv); 1671 } 1672 1673 /* Minimal basic recovery for KMS */ 1674 ret = i915_ggtt_enable_hw(dev_priv); 1675 i915_gem_restore_gtt_mappings(dev_priv); 1676 i915_gem_restore_fences(dev_priv); 1677 intel_init_clock_gating(dev_priv); 1678 1679 mutex_unlock(&dev_priv->drm.struct_mutex); 1680 } 1681 1682 i915_gem_drain_freed_objects(dev_priv); 1683 return ret; 1684 } 1685 1686 void i915_gem_fini_hw(struct drm_i915_private *dev_priv) 1687 { 1688 GEM_BUG_ON(dev_priv->gt.awake); 1689 1690 intel_wakeref_auto_fini(&dev_priv->ggtt.userfault_wakeref); 1691 1692 i915_gem_suspend_late(dev_priv); 1693 intel_disable_gt_powersave(dev_priv); 1694 1695 /* Flush any outstanding unpin_work. */ 1696 i915_gem_drain_workqueue(dev_priv); 1697 1698 mutex_lock(&dev_priv->drm.struct_mutex); 1699 intel_uc_fini_hw(dev_priv); 1700 intel_uc_fini(dev_priv); 1701 mutex_unlock(&dev_priv->drm.struct_mutex); 1702 1703 i915_gem_drain_freed_objects(dev_priv); 1704 } 1705 1706 void i915_gem_fini(struct drm_i915_private *dev_priv) 1707 { 1708 mutex_lock(&dev_priv->drm.struct_mutex); 1709 intel_engines_cleanup(dev_priv); 1710 i915_gem_contexts_fini(dev_priv); 1711 i915_gem_fini_scratch(dev_priv); 1712 mutex_unlock(&dev_priv->drm.struct_mutex); 1713 1714 intel_wa_list_free(&dev_priv->gt_wa_list); 1715 1716 intel_cleanup_gt_powersave(dev_priv); 1717 1718 intel_uc_fini_misc(dev_priv); 1719 i915_gem_cleanup_userptr(dev_priv); 1720 i915_timelines_fini(dev_priv); 1721 1722 i915_gem_drain_freed_objects(dev_priv); 1723 1724 WARN_ON(!list_empty(&dev_priv->contexts.list)); 1725 } 1726 1727 void i915_gem_init_mmio(struct drm_i915_private *i915) 1728 { 1729 i915_gem_sanitize(i915); 1730 } 1731 1732 static void i915_gem_init__mm(struct drm_i915_private *i915) 1733 { 1734 spin_lock_init(&i915->mm.obj_lock); 1735 spin_lock_init(&i915->mm.free_lock); 1736 1737 init_llist_head(&i915->mm.free_list); 1738 1739 INIT_LIST_HEAD(&i915->mm.purge_list); 1740 INIT_LIST_HEAD(&i915->mm.shrink_list); 1741 1742 i915_gem_init__objects(i915); 1743 } 1744 1745 int i915_gem_init_early(struct drm_i915_private *dev_priv) 1746 { 1747 int err; 1748 1749 intel_gt_pm_init(dev_priv); 1750 1751 INIT_LIST_HEAD(&dev_priv->gt.active_rings); 1752 INIT_LIST_HEAD(&dev_priv->gt.closed_vma); 1753 spin_lock_init(&dev_priv->gt.closed_lock); 1754 1755 i915_gem_init__mm(dev_priv); 1756 i915_gem_init__pm(dev_priv); 1757 1758 init_waitqueue_head(&dev_priv->gpu_error.wait_queue); 1759 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 1760 mutex_init(&dev_priv->gpu_error.wedge_mutex); 1761 init_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu); 1762 1763 atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0); 1764 1765 spin_lock_init(&dev_priv->fb_tracking.lock); 1766 1767 err = i915_gemfs_init(dev_priv); 1768 if (err) 1769 DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err); 1770 1771 return 0; 1772 } 1773 1774 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) 1775 { 1776 i915_gem_drain_freed_objects(dev_priv); 1777 GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list)); 1778 GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count)); 1779 WARN_ON(dev_priv->mm.shrink_count); 1780 1781 cleanup_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu); 1782 1783 i915_gemfs_fini(dev_priv); 1784 } 1785 1786 int i915_gem_freeze(struct drm_i915_private *dev_priv) 1787 { 1788 /* Discard all purgeable objects, let userspace recover those as 1789 * required after resuming. 1790 */ 1791 i915_gem_shrink_all(dev_priv); 1792 1793 return 0; 1794 } 1795 1796 int i915_gem_freeze_late(struct drm_i915_private *i915) 1797 { 1798 struct drm_i915_gem_object *obj; 1799 intel_wakeref_t wakeref; 1800 1801 /* 1802 * Called just before we write the hibernation image. 1803 * 1804 * We need to update the domain tracking to reflect that the CPU 1805 * will be accessing all the pages to create and restore from the 1806 * hibernation, and so upon restoration those pages will be in the 1807 * CPU domain. 1808 * 1809 * To make sure the hibernation image contains the latest state, 1810 * we update that state just before writing out the image. 1811 * 1812 * To try and reduce the hibernation image, we manually shrink 1813 * the objects as well, see i915_gem_freeze() 1814 */ 1815 1816 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 1817 1818 i915_gem_shrink(i915, -1UL, NULL, ~0); 1819 i915_gem_drain_freed_objects(i915); 1820 1821 list_for_each_entry(obj, &i915->mm.shrink_list, mm.link) { 1822 i915_gem_object_lock(obj); 1823 WARN_ON(i915_gem_object_set_to_cpu_domain(obj, true)); 1824 i915_gem_object_unlock(obj); 1825 } 1826 1827 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 1828 1829 return 0; 1830 } 1831 1832 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 1833 { 1834 struct drm_i915_file_private *file_priv = file->driver_priv; 1835 struct i915_request *request; 1836 1837 /* Clean up our request list when the client is going away, so that 1838 * later retire_requests won't dereference our soon-to-be-gone 1839 * file_priv. 1840 */ 1841 spin_lock(&file_priv->mm.lock); 1842 list_for_each_entry(request, &file_priv->mm.request_list, client_link) 1843 request->file_priv = NULL; 1844 spin_unlock(&file_priv->mm.lock); 1845 } 1846 1847 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) 1848 { 1849 struct drm_i915_file_private *file_priv; 1850 int ret; 1851 1852 DRM_DEBUG("\n"); 1853 1854 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 1855 if (!file_priv) 1856 return -ENOMEM; 1857 1858 file->driver_priv = file_priv; 1859 file_priv->dev_priv = i915; 1860 file_priv->file = file; 1861 1862 spin_lock_init(&file_priv->mm.lock); 1863 INIT_LIST_HEAD(&file_priv->mm.request_list); 1864 1865 file_priv->bsd_engine = -1; 1866 file_priv->hang_timestamp = jiffies; 1867 1868 ret = i915_gem_context_open(i915, file); 1869 if (ret) 1870 kfree(file_priv); 1871 1872 return ret; 1873 } 1874 1875 /** 1876 * i915_gem_track_fb - update frontbuffer tracking 1877 * @old: current GEM buffer for the frontbuffer slots 1878 * @new: new GEM buffer for the frontbuffer slots 1879 * @frontbuffer_bits: bitmask of frontbuffer slots 1880 * 1881 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them 1882 * from @old and setting them in @new. Both @old and @new can be NULL. 1883 */ 1884 void i915_gem_track_fb(struct drm_i915_gem_object *old, 1885 struct drm_i915_gem_object *new, 1886 unsigned frontbuffer_bits) 1887 { 1888 /* Control of individual bits within the mask are guarded by 1889 * the owning plane->mutex, i.e. we can never see concurrent 1890 * manipulation of individual bits. But since the bitfield as a whole 1891 * is updated using RMW, we need to use atomics in order to update 1892 * the bits. 1893 */ 1894 BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES > 1895 BITS_PER_TYPE(atomic_t)); 1896 1897 if (old) { 1898 WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits)); 1899 atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits); 1900 } 1901 1902 if (new) { 1903 WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits); 1904 atomic_or(frontbuffer_bits, &new->frontbuffer_bits); 1905 } 1906 } 1907 1908 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 1909 #include "selftests/mock_gem_device.c" 1910 #include "selftests/i915_gem.c" 1911 #endif 1912