1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drm_vma_manager.h> 29 #include <drm/i915_drm.h> 30 #include <linux/dma-fence-array.h> 31 #include <linux/kthread.h> 32 #include <linux/reservation.h> 33 #include <linux/shmem_fs.h> 34 #include <linux/slab.h> 35 #include <linux/stop_machine.h> 36 #include <linux/swap.h> 37 #include <linux/pci.h> 38 #include <linux/dma-buf.h> 39 #include <linux/mman.h> 40 41 #include "display/intel_display.h" 42 #include "display/intel_frontbuffer.h" 43 44 #include "gem/i915_gem_clflush.h" 45 #include "gem/i915_gem_context.h" 46 #include "gem/i915_gem_ioctls.h" 47 #include "gem/i915_gem_pm.h" 48 #include "gem/i915_gemfs.h" 49 #include "gt/intel_gt_pm.h" 50 #include "gt/intel_mocs.h" 51 #include "gt/intel_reset.h" 52 #include "gt/intel_workarounds.h" 53 54 #include "i915_drv.h" 55 #include "i915_scatterlist.h" 56 #include "i915_trace.h" 57 #include "i915_vgpu.h" 58 59 #include "intel_drv.h" 60 #include "intel_pm.h" 61 62 static int 63 insert_mappable_node(struct i915_ggtt *ggtt, 64 struct drm_mm_node *node, u32 size) 65 { 66 memset(node, 0, sizeof(*node)); 67 return drm_mm_insert_node_in_range(&ggtt->vm.mm, node, 68 size, 0, I915_COLOR_UNEVICTABLE, 69 0, ggtt->mappable_end, 70 DRM_MM_INSERT_LOW); 71 } 72 73 static void 74 remove_mappable_node(struct drm_mm_node *node) 75 { 76 drm_mm_remove_node(node); 77 } 78 79 int 80 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 81 struct drm_file *file) 82 { 83 struct i915_ggtt *ggtt = &to_i915(dev)->ggtt; 84 struct drm_i915_gem_get_aperture *args = data; 85 struct i915_vma *vma; 86 u64 pinned; 87 88 mutex_lock(&ggtt->vm.mutex); 89 90 pinned = ggtt->vm.reserved; 91 list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) 92 if (i915_vma_is_pinned(vma)) 93 pinned += vma->node.size; 94 95 mutex_unlock(&ggtt->vm.mutex); 96 97 args->aper_size = ggtt->vm.total; 98 args->aper_available_size = args->aper_size - pinned; 99 100 return 0; 101 } 102 103 int i915_gem_object_unbind(struct drm_i915_gem_object *obj) 104 { 105 struct i915_vma *vma; 106 LIST_HEAD(still_in_list); 107 int ret = 0; 108 109 lockdep_assert_held(&obj->base.dev->struct_mutex); 110 111 spin_lock(&obj->vma.lock); 112 while (!ret && (vma = list_first_entry_or_null(&obj->vma.list, 113 struct i915_vma, 114 obj_link))) { 115 list_move_tail(&vma->obj_link, &still_in_list); 116 spin_unlock(&obj->vma.lock); 117 118 ret = i915_vma_unbind(vma); 119 120 spin_lock(&obj->vma.lock); 121 } 122 list_splice(&still_in_list, &obj->vma.list); 123 spin_unlock(&obj->vma.lock); 124 125 return ret; 126 } 127 128 static int 129 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 130 struct drm_i915_gem_pwrite *args, 131 struct drm_file *file) 132 { 133 void *vaddr = obj->phys_handle->vaddr + args->offset; 134 char __user *user_data = u64_to_user_ptr(args->data_ptr); 135 136 /* We manually control the domain here and pretend that it 137 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 138 */ 139 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 140 if (copy_from_user(vaddr, user_data, args->size)) 141 return -EFAULT; 142 143 drm_clflush_virt_range(vaddr, args->size); 144 i915_gem_chipset_flush(to_i915(obj->base.dev)); 145 146 intel_fb_obj_flush(obj, ORIGIN_CPU); 147 return 0; 148 } 149 150 static int 151 i915_gem_create(struct drm_file *file, 152 struct drm_i915_private *dev_priv, 153 u64 *size_p, 154 u32 *handle_p) 155 { 156 struct drm_i915_gem_object *obj; 157 u32 handle; 158 u64 size; 159 int ret; 160 161 size = round_up(*size_p, PAGE_SIZE); 162 if (size == 0) 163 return -EINVAL; 164 165 /* Allocate the new object */ 166 obj = i915_gem_object_create_shmem(dev_priv, size); 167 if (IS_ERR(obj)) 168 return PTR_ERR(obj); 169 170 ret = drm_gem_handle_create(file, &obj->base, &handle); 171 /* drop reference from allocate - handle holds it now */ 172 i915_gem_object_put(obj); 173 if (ret) 174 return ret; 175 176 *handle_p = handle; 177 *size_p = size; 178 return 0; 179 } 180 181 int 182 i915_gem_dumb_create(struct drm_file *file, 183 struct drm_device *dev, 184 struct drm_mode_create_dumb *args) 185 { 186 int cpp = DIV_ROUND_UP(args->bpp, 8); 187 u32 format; 188 189 switch (cpp) { 190 case 1: 191 format = DRM_FORMAT_C8; 192 break; 193 case 2: 194 format = DRM_FORMAT_RGB565; 195 break; 196 case 4: 197 format = DRM_FORMAT_XRGB8888; 198 break; 199 default: 200 return -EINVAL; 201 } 202 203 /* have to work out size/pitch and return them */ 204 args->pitch = ALIGN(args->width * cpp, 64); 205 206 /* align stride to page size so that we can remap */ 207 if (args->pitch > intel_plane_fb_max_stride(to_i915(dev), format, 208 DRM_FORMAT_MOD_LINEAR)) 209 args->pitch = ALIGN(args->pitch, 4096); 210 211 args->size = args->pitch * args->height; 212 return i915_gem_create(file, to_i915(dev), 213 &args->size, &args->handle); 214 } 215 216 /** 217 * Creates a new mm object and returns a handle to it. 218 * @dev: drm device pointer 219 * @data: ioctl data blob 220 * @file: drm file pointer 221 */ 222 int 223 i915_gem_create_ioctl(struct drm_device *dev, void *data, 224 struct drm_file *file) 225 { 226 struct drm_i915_private *dev_priv = to_i915(dev); 227 struct drm_i915_gem_create *args = data; 228 229 i915_gem_flush_free_objects(dev_priv); 230 231 return i915_gem_create(file, dev_priv, 232 &args->size, &args->handle); 233 } 234 235 void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv) 236 { 237 intel_wakeref_t wakeref; 238 239 /* 240 * No actual flushing is required for the GTT write domain for reads 241 * from the GTT domain. Writes to it "immediately" go to main memory 242 * as far as we know, so there's no chipset flush. It also doesn't 243 * land in the GPU render cache. 244 * 245 * However, we do have to enforce the order so that all writes through 246 * the GTT land before any writes to the device, such as updates to 247 * the GATT itself. 248 * 249 * We also have to wait a bit for the writes to land from the GTT. 250 * An uncached read (i.e. mmio) seems to be ideal for the round-trip 251 * timing. This issue has only been observed when switching quickly 252 * between GTT writes and CPU reads from inside the kernel on recent hw, 253 * and it appears to only affect discrete GTT blocks (i.e. on LLC 254 * system agents we cannot reproduce this behaviour, until Cannonlake 255 * that was!). 256 */ 257 258 wmb(); 259 260 if (INTEL_INFO(dev_priv)->has_coherent_ggtt) 261 return; 262 263 i915_gem_chipset_flush(dev_priv); 264 265 with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) { 266 struct intel_uncore *uncore = &dev_priv->uncore; 267 268 spin_lock_irq(&uncore->lock); 269 intel_uncore_posting_read_fw(uncore, 270 RING_HEAD(RENDER_RING_BASE)); 271 spin_unlock_irq(&uncore->lock); 272 } 273 } 274 275 static int 276 shmem_pread(struct page *page, int offset, int len, char __user *user_data, 277 bool needs_clflush) 278 { 279 char *vaddr; 280 int ret; 281 282 vaddr = kmap(page); 283 284 if (needs_clflush) 285 drm_clflush_virt_range(vaddr + offset, len); 286 287 ret = __copy_to_user(user_data, vaddr + offset, len); 288 289 kunmap(page); 290 291 return ret ? -EFAULT : 0; 292 } 293 294 static int 295 i915_gem_shmem_pread(struct drm_i915_gem_object *obj, 296 struct drm_i915_gem_pread *args) 297 { 298 unsigned int needs_clflush; 299 unsigned int idx, offset; 300 struct dma_fence *fence; 301 char __user *user_data; 302 u64 remain; 303 int ret; 304 305 ret = i915_gem_object_prepare_read(obj, &needs_clflush); 306 if (ret) 307 return ret; 308 309 fence = i915_gem_object_lock_fence(obj); 310 i915_gem_object_finish_access(obj); 311 if (!fence) 312 return -ENOMEM; 313 314 remain = args->size; 315 user_data = u64_to_user_ptr(args->data_ptr); 316 offset = offset_in_page(args->offset); 317 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 318 struct page *page = i915_gem_object_get_page(obj, idx); 319 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); 320 321 ret = shmem_pread(page, offset, length, user_data, 322 needs_clflush); 323 if (ret) 324 break; 325 326 remain -= length; 327 user_data += length; 328 offset = 0; 329 } 330 331 i915_gem_object_unlock_fence(obj, fence); 332 return ret; 333 } 334 335 static inline bool 336 gtt_user_read(struct io_mapping *mapping, 337 loff_t base, int offset, 338 char __user *user_data, int length) 339 { 340 void __iomem *vaddr; 341 unsigned long unwritten; 342 343 /* We can use the cpu mem copy function because this is X86. */ 344 vaddr = io_mapping_map_atomic_wc(mapping, base); 345 unwritten = __copy_to_user_inatomic(user_data, 346 (void __force *)vaddr + offset, 347 length); 348 io_mapping_unmap_atomic(vaddr); 349 if (unwritten) { 350 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 351 unwritten = copy_to_user(user_data, 352 (void __force *)vaddr + offset, 353 length); 354 io_mapping_unmap(vaddr); 355 } 356 return unwritten; 357 } 358 359 static int 360 i915_gem_gtt_pread(struct drm_i915_gem_object *obj, 361 const struct drm_i915_gem_pread *args) 362 { 363 struct drm_i915_private *i915 = to_i915(obj->base.dev); 364 struct i915_ggtt *ggtt = &i915->ggtt; 365 intel_wakeref_t wakeref; 366 struct drm_mm_node node; 367 struct dma_fence *fence; 368 void __user *user_data; 369 struct i915_vma *vma; 370 u64 remain, offset; 371 int ret; 372 373 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 374 if (ret) 375 return ret; 376 377 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 378 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 379 PIN_MAPPABLE | 380 PIN_NONFAULT | 381 PIN_NONBLOCK); 382 if (!IS_ERR(vma)) { 383 node.start = i915_ggtt_offset(vma); 384 node.allocated = false; 385 ret = i915_vma_put_fence(vma); 386 if (ret) { 387 i915_vma_unpin(vma); 388 vma = ERR_PTR(ret); 389 } 390 } 391 if (IS_ERR(vma)) { 392 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 393 if (ret) 394 goto out_unlock; 395 GEM_BUG_ON(!node.allocated); 396 } 397 398 mutex_unlock(&i915->drm.struct_mutex); 399 400 ret = i915_gem_object_lock_interruptible(obj); 401 if (ret) 402 goto out_unpin; 403 404 ret = i915_gem_object_set_to_gtt_domain(obj, false); 405 if (ret) { 406 i915_gem_object_unlock(obj); 407 goto out_unpin; 408 } 409 410 fence = i915_gem_object_lock_fence(obj); 411 i915_gem_object_unlock(obj); 412 if (!fence) { 413 ret = -ENOMEM; 414 goto out_unpin; 415 } 416 417 user_data = u64_to_user_ptr(args->data_ptr); 418 remain = args->size; 419 offset = args->offset; 420 421 while (remain > 0) { 422 /* Operation in this page 423 * 424 * page_base = page offset within aperture 425 * page_offset = offset within page 426 * page_length = bytes to copy for this page 427 */ 428 u32 page_base = node.start; 429 unsigned page_offset = offset_in_page(offset); 430 unsigned page_length = PAGE_SIZE - page_offset; 431 page_length = remain < page_length ? remain : page_length; 432 if (node.allocated) { 433 wmb(); 434 ggtt->vm.insert_page(&ggtt->vm, 435 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 436 node.start, I915_CACHE_NONE, 0); 437 wmb(); 438 } else { 439 page_base += offset & PAGE_MASK; 440 } 441 442 if (gtt_user_read(&ggtt->iomap, page_base, page_offset, 443 user_data, page_length)) { 444 ret = -EFAULT; 445 break; 446 } 447 448 remain -= page_length; 449 user_data += page_length; 450 offset += page_length; 451 } 452 453 i915_gem_object_unlock_fence(obj, fence); 454 out_unpin: 455 mutex_lock(&i915->drm.struct_mutex); 456 if (node.allocated) { 457 wmb(); 458 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); 459 remove_mappable_node(&node); 460 } else { 461 i915_vma_unpin(vma); 462 } 463 out_unlock: 464 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 465 mutex_unlock(&i915->drm.struct_mutex); 466 467 return ret; 468 } 469 470 /** 471 * Reads data from the object referenced by handle. 472 * @dev: drm device pointer 473 * @data: ioctl data blob 474 * @file: drm file pointer 475 * 476 * On error, the contents of *data are undefined. 477 */ 478 int 479 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 480 struct drm_file *file) 481 { 482 struct drm_i915_gem_pread *args = data; 483 struct drm_i915_gem_object *obj; 484 int ret; 485 486 if (args->size == 0) 487 return 0; 488 489 if (!access_ok(u64_to_user_ptr(args->data_ptr), 490 args->size)) 491 return -EFAULT; 492 493 obj = i915_gem_object_lookup(file, args->handle); 494 if (!obj) 495 return -ENOENT; 496 497 /* Bounds check source. */ 498 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 499 ret = -EINVAL; 500 goto out; 501 } 502 503 trace_i915_gem_object_pread(obj, args->offset, args->size); 504 505 ret = i915_gem_object_wait(obj, 506 I915_WAIT_INTERRUPTIBLE, 507 MAX_SCHEDULE_TIMEOUT); 508 if (ret) 509 goto out; 510 511 ret = i915_gem_object_pin_pages(obj); 512 if (ret) 513 goto out; 514 515 ret = i915_gem_shmem_pread(obj, args); 516 if (ret == -EFAULT || ret == -ENODEV) 517 ret = i915_gem_gtt_pread(obj, args); 518 519 i915_gem_object_unpin_pages(obj); 520 out: 521 i915_gem_object_put(obj); 522 return ret; 523 } 524 525 /* This is the fast write path which cannot handle 526 * page faults in the source data 527 */ 528 529 static inline bool 530 ggtt_write(struct io_mapping *mapping, 531 loff_t base, int offset, 532 char __user *user_data, int length) 533 { 534 void __iomem *vaddr; 535 unsigned long unwritten; 536 537 /* We can use the cpu mem copy function because this is X86. */ 538 vaddr = io_mapping_map_atomic_wc(mapping, base); 539 unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset, 540 user_data, length); 541 io_mapping_unmap_atomic(vaddr); 542 if (unwritten) { 543 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 544 unwritten = copy_from_user((void __force *)vaddr + offset, 545 user_data, length); 546 io_mapping_unmap(vaddr); 547 } 548 549 return unwritten; 550 } 551 552 /** 553 * This is the fast pwrite path, where we copy the data directly from the 554 * user into the GTT, uncached. 555 * @obj: i915 GEM object 556 * @args: pwrite arguments structure 557 */ 558 static int 559 i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, 560 const struct drm_i915_gem_pwrite *args) 561 { 562 struct drm_i915_private *i915 = to_i915(obj->base.dev); 563 struct i915_ggtt *ggtt = &i915->ggtt; 564 struct intel_runtime_pm *rpm = &i915->runtime_pm; 565 intel_wakeref_t wakeref; 566 struct drm_mm_node node; 567 struct dma_fence *fence; 568 struct i915_vma *vma; 569 u64 remain, offset; 570 void __user *user_data; 571 int ret; 572 573 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 574 if (ret) 575 return ret; 576 577 if (i915_gem_object_has_struct_page(obj)) { 578 /* 579 * Avoid waking the device up if we can fallback, as 580 * waking/resuming is very slow (worst-case 10-100 ms 581 * depending on PCI sleeps and our own resume time). 582 * This easily dwarfs any performance advantage from 583 * using the cache bypass of indirect GGTT access. 584 */ 585 wakeref = intel_runtime_pm_get_if_in_use(rpm); 586 if (!wakeref) { 587 ret = -EFAULT; 588 goto out_unlock; 589 } 590 } else { 591 /* No backing pages, no fallback, we must force GGTT access */ 592 wakeref = intel_runtime_pm_get(rpm); 593 } 594 595 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 596 PIN_MAPPABLE | 597 PIN_NONFAULT | 598 PIN_NONBLOCK); 599 if (!IS_ERR(vma)) { 600 node.start = i915_ggtt_offset(vma); 601 node.allocated = false; 602 ret = i915_vma_put_fence(vma); 603 if (ret) { 604 i915_vma_unpin(vma); 605 vma = ERR_PTR(ret); 606 } 607 } 608 if (IS_ERR(vma)) { 609 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 610 if (ret) 611 goto out_rpm; 612 GEM_BUG_ON(!node.allocated); 613 } 614 615 mutex_unlock(&i915->drm.struct_mutex); 616 617 ret = i915_gem_object_lock_interruptible(obj); 618 if (ret) 619 goto out_unpin; 620 621 ret = i915_gem_object_set_to_gtt_domain(obj, true); 622 if (ret) { 623 i915_gem_object_unlock(obj); 624 goto out_unpin; 625 } 626 627 fence = i915_gem_object_lock_fence(obj); 628 i915_gem_object_unlock(obj); 629 if (!fence) { 630 ret = -ENOMEM; 631 goto out_unpin; 632 } 633 634 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 635 636 user_data = u64_to_user_ptr(args->data_ptr); 637 offset = args->offset; 638 remain = args->size; 639 while (remain) { 640 /* Operation in this page 641 * 642 * page_base = page offset within aperture 643 * page_offset = offset within page 644 * page_length = bytes to copy for this page 645 */ 646 u32 page_base = node.start; 647 unsigned int page_offset = offset_in_page(offset); 648 unsigned int page_length = PAGE_SIZE - page_offset; 649 page_length = remain < page_length ? remain : page_length; 650 if (node.allocated) { 651 wmb(); /* flush the write before we modify the GGTT */ 652 ggtt->vm.insert_page(&ggtt->vm, 653 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 654 node.start, I915_CACHE_NONE, 0); 655 wmb(); /* flush modifications to the GGTT (insert_page) */ 656 } else { 657 page_base += offset & PAGE_MASK; 658 } 659 /* If we get a fault while copying data, then (presumably) our 660 * source page isn't available. Return the error and we'll 661 * retry in the slow path. 662 * If the object is non-shmem backed, we retry again with the 663 * path that handles page fault. 664 */ 665 if (ggtt_write(&ggtt->iomap, page_base, page_offset, 666 user_data, page_length)) { 667 ret = -EFAULT; 668 break; 669 } 670 671 remain -= page_length; 672 user_data += page_length; 673 offset += page_length; 674 } 675 intel_fb_obj_flush(obj, ORIGIN_CPU); 676 677 i915_gem_object_unlock_fence(obj, fence); 678 out_unpin: 679 mutex_lock(&i915->drm.struct_mutex); 680 if (node.allocated) { 681 wmb(); 682 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); 683 remove_mappable_node(&node); 684 } else { 685 i915_vma_unpin(vma); 686 } 687 out_rpm: 688 intel_runtime_pm_put(rpm, wakeref); 689 out_unlock: 690 mutex_unlock(&i915->drm.struct_mutex); 691 return ret; 692 } 693 694 /* Per-page copy function for the shmem pwrite fastpath. 695 * Flushes invalid cachelines before writing to the target if 696 * needs_clflush_before is set and flushes out any written cachelines after 697 * writing if needs_clflush is set. 698 */ 699 static int 700 shmem_pwrite(struct page *page, int offset, int len, char __user *user_data, 701 bool needs_clflush_before, 702 bool needs_clflush_after) 703 { 704 char *vaddr; 705 int ret; 706 707 vaddr = kmap(page); 708 709 if (needs_clflush_before) 710 drm_clflush_virt_range(vaddr + offset, len); 711 712 ret = __copy_from_user(vaddr + offset, user_data, len); 713 if (!ret && needs_clflush_after) 714 drm_clflush_virt_range(vaddr + offset, len); 715 716 kunmap(page); 717 718 return ret ? -EFAULT : 0; 719 } 720 721 static int 722 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, 723 const struct drm_i915_gem_pwrite *args) 724 { 725 unsigned int partial_cacheline_write; 726 unsigned int needs_clflush; 727 unsigned int offset, idx; 728 struct dma_fence *fence; 729 void __user *user_data; 730 u64 remain; 731 int ret; 732 733 ret = i915_gem_object_prepare_write(obj, &needs_clflush); 734 if (ret) 735 return ret; 736 737 fence = i915_gem_object_lock_fence(obj); 738 i915_gem_object_finish_access(obj); 739 if (!fence) 740 return -ENOMEM; 741 742 /* If we don't overwrite a cacheline completely we need to be 743 * careful to have up-to-date data by first clflushing. Don't 744 * overcomplicate things and flush the entire patch. 745 */ 746 partial_cacheline_write = 0; 747 if (needs_clflush & CLFLUSH_BEFORE) 748 partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1; 749 750 user_data = u64_to_user_ptr(args->data_ptr); 751 remain = args->size; 752 offset = offset_in_page(args->offset); 753 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 754 struct page *page = i915_gem_object_get_page(obj, idx); 755 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); 756 757 ret = shmem_pwrite(page, offset, length, user_data, 758 (offset | length) & partial_cacheline_write, 759 needs_clflush & CLFLUSH_AFTER); 760 if (ret) 761 break; 762 763 remain -= length; 764 user_data += length; 765 offset = 0; 766 } 767 768 intel_fb_obj_flush(obj, ORIGIN_CPU); 769 i915_gem_object_unlock_fence(obj, fence); 770 771 return ret; 772 } 773 774 /** 775 * Writes data to the object referenced by handle. 776 * @dev: drm device 777 * @data: ioctl data blob 778 * @file: drm file 779 * 780 * On error, the contents of the buffer that were to be modified are undefined. 781 */ 782 int 783 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 784 struct drm_file *file) 785 { 786 struct drm_i915_gem_pwrite *args = data; 787 struct drm_i915_gem_object *obj; 788 int ret; 789 790 if (args->size == 0) 791 return 0; 792 793 if (!access_ok(u64_to_user_ptr(args->data_ptr), args->size)) 794 return -EFAULT; 795 796 obj = i915_gem_object_lookup(file, args->handle); 797 if (!obj) 798 return -ENOENT; 799 800 /* Bounds check destination. */ 801 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 802 ret = -EINVAL; 803 goto err; 804 } 805 806 /* Writes not allowed into this read-only object */ 807 if (i915_gem_object_is_readonly(obj)) { 808 ret = -EINVAL; 809 goto err; 810 } 811 812 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 813 814 ret = -ENODEV; 815 if (obj->ops->pwrite) 816 ret = obj->ops->pwrite(obj, args); 817 if (ret != -ENODEV) 818 goto err; 819 820 ret = i915_gem_object_wait(obj, 821 I915_WAIT_INTERRUPTIBLE | 822 I915_WAIT_ALL, 823 MAX_SCHEDULE_TIMEOUT); 824 if (ret) 825 goto err; 826 827 ret = i915_gem_object_pin_pages(obj); 828 if (ret) 829 goto err; 830 831 ret = -EFAULT; 832 /* We can only do the GTT pwrite on untiled buffers, as otherwise 833 * it would end up going through the fenced access, and we'll get 834 * different detiling behavior between reading and writing. 835 * pread/pwrite currently are reading and writing from the CPU 836 * perspective, requiring manual detiling by the client. 837 */ 838 if (!i915_gem_object_has_struct_page(obj) || 839 cpu_write_needs_clflush(obj)) 840 /* Note that the gtt paths might fail with non-page-backed user 841 * pointers (e.g. gtt mappings when moving data between 842 * textures). Fallback to the shmem path in that case. 843 */ 844 ret = i915_gem_gtt_pwrite_fast(obj, args); 845 846 if (ret == -EFAULT || ret == -ENOSPC) { 847 if (obj->phys_handle) 848 ret = i915_gem_phys_pwrite(obj, args, file); 849 else 850 ret = i915_gem_shmem_pwrite(obj, args); 851 } 852 853 i915_gem_object_unpin_pages(obj); 854 err: 855 i915_gem_object_put(obj); 856 return ret; 857 } 858 859 /** 860 * Called when user space has done writes to this buffer 861 * @dev: drm device 862 * @data: ioctl data blob 863 * @file: drm file 864 */ 865 int 866 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 867 struct drm_file *file) 868 { 869 struct drm_i915_gem_sw_finish *args = data; 870 struct drm_i915_gem_object *obj; 871 872 obj = i915_gem_object_lookup(file, args->handle); 873 if (!obj) 874 return -ENOENT; 875 876 /* 877 * Proxy objects are barred from CPU access, so there is no 878 * need to ban sw_finish as it is a nop. 879 */ 880 881 /* Pinned buffers may be scanout, so flush the cache */ 882 i915_gem_object_flush_if_display(obj); 883 i915_gem_object_put(obj); 884 885 return 0; 886 } 887 888 void i915_gem_runtime_suspend(struct drm_i915_private *i915) 889 { 890 struct drm_i915_gem_object *obj, *on; 891 int i; 892 893 /* 894 * Only called during RPM suspend. All users of the userfault_list 895 * must be holding an RPM wakeref to ensure that this can not 896 * run concurrently with themselves (and use the struct_mutex for 897 * protection between themselves). 898 */ 899 900 list_for_each_entry_safe(obj, on, 901 &i915->ggtt.userfault_list, userfault_link) 902 __i915_gem_object_release_mmap(obj); 903 904 /* 905 * The fence will be lost when the device powers down. If any were 906 * in use by hardware (i.e. they are pinned), we should not be powering 907 * down! All other fences will be reacquired by the user upon waking. 908 */ 909 for (i = 0; i < i915->ggtt.num_fences; i++) { 910 struct i915_fence_reg *reg = &i915->ggtt.fence_regs[i]; 911 912 /* 913 * Ideally we want to assert that the fence register is not 914 * live at this point (i.e. that no piece of code will be 915 * trying to write through fence + GTT, as that both violates 916 * our tracking of activity and associated locking/barriers, 917 * but also is illegal given that the hw is powered down). 918 * 919 * Previously we used reg->pin_count as a "liveness" indicator. 920 * That is not sufficient, and we need a more fine-grained 921 * tool if we want to have a sanity check here. 922 */ 923 924 if (!reg->vma) 925 continue; 926 927 GEM_BUG_ON(i915_vma_has_userfault(reg->vma)); 928 reg->dirty = true; 929 } 930 } 931 932 static int wait_for_engines(struct drm_i915_private *i915) 933 { 934 if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) { 935 dev_err(i915->drm.dev, 936 "Failed to idle engines, declaring wedged!\n"); 937 GEM_TRACE_DUMP(); 938 i915_gem_set_wedged(i915); 939 return -EIO; 940 } 941 942 return 0; 943 } 944 945 static long 946 wait_for_timelines(struct drm_i915_private *i915, 947 unsigned int flags, long timeout) 948 { 949 struct i915_gt_timelines *gt = &i915->gt.timelines; 950 struct i915_timeline *tl; 951 952 mutex_lock(>->mutex); 953 list_for_each_entry(tl, >->active_list, link) { 954 struct i915_request *rq; 955 956 rq = i915_active_request_get_unlocked(&tl->last_request); 957 if (!rq) 958 continue; 959 960 mutex_unlock(>->mutex); 961 962 /* 963 * "Race-to-idle". 964 * 965 * Switching to the kernel context is often used a synchronous 966 * step prior to idling, e.g. in suspend for flushing all 967 * current operations to memory before sleeping. These we 968 * want to complete as quickly as possible to avoid prolonged 969 * stalls, so allow the gpu to boost to maximum clocks. 970 */ 971 if (flags & I915_WAIT_FOR_IDLE_BOOST) 972 gen6_rps_boost(rq); 973 974 timeout = i915_request_wait(rq, flags, timeout); 975 i915_request_put(rq); 976 if (timeout < 0) 977 return timeout; 978 979 /* restart after reacquiring the lock */ 980 mutex_lock(>->mutex); 981 tl = list_entry(>->active_list, typeof(*tl), link); 982 } 983 mutex_unlock(>->mutex); 984 985 return timeout; 986 } 987 988 int i915_gem_wait_for_idle(struct drm_i915_private *i915, 989 unsigned int flags, long timeout) 990 { 991 GEM_TRACE("flags=%x (%s), timeout=%ld%s, awake?=%s\n", 992 flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked", 993 timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : "", 994 yesno(i915->gt.awake)); 995 996 /* If the device is asleep, we have no requests outstanding */ 997 if (!READ_ONCE(i915->gt.awake)) 998 return 0; 999 1000 timeout = wait_for_timelines(i915, flags, timeout); 1001 if (timeout < 0) 1002 return timeout; 1003 1004 if (flags & I915_WAIT_LOCKED) { 1005 int err; 1006 1007 lockdep_assert_held(&i915->drm.struct_mutex); 1008 1009 err = wait_for_engines(i915); 1010 if (err) 1011 return err; 1012 1013 i915_retire_requests(i915); 1014 } 1015 1016 return 0; 1017 } 1018 1019 struct i915_vma * 1020 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 1021 const struct i915_ggtt_view *view, 1022 u64 size, 1023 u64 alignment, 1024 u64 flags) 1025 { 1026 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 1027 struct i915_address_space *vm = &dev_priv->ggtt.vm; 1028 struct i915_vma *vma; 1029 int ret; 1030 1031 lockdep_assert_held(&obj->base.dev->struct_mutex); 1032 1033 if (flags & PIN_MAPPABLE && 1034 (!view || view->type == I915_GGTT_VIEW_NORMAL)) { 1035 /* If the required space is larger than the available 1036 * aperture, we will not able to find a slot for the 1037 * object and unbinding the object now will be in 1038 * vain. Worse, doing so may cause us to ping-pong 1039 * the object in and out of the Global GTT and 1040 * waste a lot of cycles under the mutex. 1041 */ 1042 if (obj->base.size > dev_priv->ggtt.mappable_end) 1043 return ERR_PTR(-E2BIG); 1044 1045 /* If NONBLOCK is set the caller is optimistically 1046 * trying to cache the full object within the mappable 1047 * aperture, and *must* have a fallback in place for 1048 * situations where we cannot bind the object. We 1049 * can be a little more lax here and use the fallback 1050 * more often to avoid costly migrations of ourselves 1051 * and other objects within the aperture. 1052 * 1053 * Half-the-aperture is used as a simple heuristic. 1054 * More interesting would to do search for a free 1055 * block prior to making the commitment to unbind. 1056 * That caters for the self-harm case, and with a 1057 * little more heuristics (e.g. NOFAULT, NOEVICT) 1058 * we could try to minimise harm to others. 1059 */ 1060 if (flags & PIN_NONBLOCK && 1061 obj->base.size > dev_priv->ggtt.mappable_end / 2) 1062 return ERR_PTR(-ENOSPC); 1063 } 1064 1065 vma = i915_vma_instance(obj, vm, view); 1066 if (IS_ERR(vma)) 1067 return vma; 1068 1069 if (i915_vma_misplaced(vma, size, alignment, flags)) { 1070 if (flags & PIN_NONBLOCK) { 1071 if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) 1072 return ERR_PTR(-ENOSPC); 1073 1074 if (flags & PIN_MAPPABLE && 1075 vma->fence_size > dev_priv->ggtt.mappable_end / 2) 1076 return ERR_PTR(-ENOSPC); 1077 } 1078 1079 WARN(i915_vma_is_pinned(vma), 1080 "bo is already pinned in ggtt with incorrect alignment:" 1081 " offset=%08x, req.alignment=%llx," 1082 " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n", 1083 i915_ggtt_offset(vma), alignment, 1084 !!(flags & PIN_MAPPABLE), 1085 i915_vma_is_map_and_fenceable(vma)); 1086 ret = i915_vma_unbind(vma); 1087 if (ret) 1088 return ERR_PTR(ret); 1089 } 1090 1091 ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); 1092 if (ret) 1093 return ERR_PTR(ret); 1094 1095 return vma; 1096 } 1097 1098 int 1099 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 1100 struct drm_file *file_priv) 1101 { 1102 struct drm_i915_private *i915 = to_i915(dev); 1103 struct drm_i915_gem_madvise *args = data; 1104 struct drm_i915_gem_object *obj; 1105 int err; 1106 1107 switch (args->madv) { 1108 case I915_MADV_DONTNEED: 1109 case I915_MADV_WILLNEED: 1110 break; 1111 default: 1112 return -EINVAL; 1113 } 1114 1115 obj = i915_gem_object_lookup(file_priv, args->handle); 1116 if (!obj) 1117 return -ENOENT; 1118 1119 err = mutex_lock_interruptible(&obj->mm.lock); 1120 if (err) 1121 goto out; 1122 1123 if (i915_gem_object_has_pages(obj) && 1124 i915_gem_object_is_tiled(obj) && 1125 i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 1126 if (obj->mm.madv == I915_MADV_WILLNEED) { 1127 GEM_BUG_ON(!obj->mm.quirked); 1128 __i915_gem_object_unpin_pages(obj); 1129 obj->mm.quirked = false; 1130 } 1131 if (args->madv == I915_MADV_WILLNEED) { 1132 GEM_BUG_ON(obj->mm.quirked); 1133 __i915_gem_object_pin_pages(obj); 1134 obj->mm.quirked = true; 1135 } 1136 } 1137 1138 if (obj->mm.madv != __I915_MADV_PURGED) 1139 obj->mm.madv = args->madv; 1140 1141 if (i915_gem_object_has_pages(obj)) { 1142 struct list_head *list; 1143 1144 if (i915_gem_object_is_shrinkable(obj)) { 1145 unsigned long flags; 1146 1147 spin_lock_irqsave(&i915->mm.obj_lock, flags); 1148 1149 if (obj->mm.madv != I915_MADV_WILLNEED) 1150 list = &i915->mm.purge_list; 1151 else 1152 list = &i915->mm.shrink_list; 1153 list_move_tail(&obj->mm.link, list); 1154 1155 spin_unlock_irqrestore(&i915->mm.obj_lock, flags); 1156 } 1157 } 1158 1159 /* if the object is no longer attached, discard its backing storage */ 1160 if (obj->mm.madv == I915_MADV_DONTNEED && 1161 !i915_gem_object_has_pages(obj)) 1162 i915_gem_object_truncate(obj); 1163 1164 args->retained = obj->mm.madv != __I915_MADV_PURGED; 1165 mutex_unlock(&obj->mm.lock); 1166 1167 out: 1168 i915_gem_object_put(obj); 1169 return err; 1170 } 1171 1172 void i915_gem_sanitize(struct drm_i915_private *i915) 1173 { 1174 intel_wakeref_t wakeref; 1175 1176 GEM_TRACE("\n"); 1177 1178 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 1179 intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); 1180 1181 /* 1182 * As we have just resumed the machine and woken the device up from 1183 * deep PCI sleep (presumably D3_cold), assume the HW has been reset 1184 * back to defaults, recovering from whatever wedged state we left it 1185 * in and so worth trying to use the device once more. 1186 */ 1187 if (i915_terminally_wedged(i915)) 1188 i915_gem_unset_wedged(i915); 1189 1190 /* 1191 * If we inherit context state from the BIOS or earlier occupants 1192 * of the GPU, the GPU may be in an inconsistent state when we 1193 * try to take over. The only way to remove the earlier state 1194 * is by resetting. However, resetting on earlier gen is tricky as 1195 * it may impact the display and we are uncertain about the stability 1196 * of the reset, so this could be applied to even earlier gen. 1197 */ 1198 intel_gt_sanitize(i915, false); 1199 1200 intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); 1201 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 1202 } 1203 1204 void i915_gem_init_swizzling(struct drm_i915_private *dev_priv) 1205 { 1206 if (INTEL_GEN(dev_priv) < 5 || 1207 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 1208 return; 1209 1210 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 1211 DISP_TILE_SURFACE_SWIZZLING); 1212 1213 if (IS_GEN(dev_priv, 5)) 1214 return; 1215 1216 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 1217 if (IS_GEN(dev_priv, 6)) 1218 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 1219 else if (IS_GEN(dev_priv, 7)) 1220 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 1221 else if (IS_GEN(dev_priv, 8)) 1222 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 1223 else 1224 BUG(); 1225 } 1226 1227 static void init_unused_ring(struct drm_i915_private *dev_priv, u32 base) 1228 { 1229 I915_WRITE(RING_CTL(base), 0); 1230 I915_WRITE(RING_HEAD(base), 0); 1231 I915_WRITE(RING_TAIL(base), 0); 1232 I915_WRITE(RING_START(base), 0); 1233 } 1234 1235 static void init_unused_rings(struct drm_i915_private *dev_priv) 1236 { 1237 if (IS_I830(dev_priv)) { 1238 init_unused_ring(dev_priv, PRB1_BASE); 1239 init_unused_ring(dev_priv, SRB0_BASE); 1240 init_unused_ring(dev_priv, SRB1_BASE); 1241 init_unused_ring(dev_priv, SRB2_BASE); 1242 init_unused_ring(dev_priv, SRB3_BASE); 1243 } else if (IS_GEN(dev_priv, 2)) { 1244 init_unused_ring(dev_priv, SRB0_BASE); 1245 init_unused_ring(dev_priv, SRB1_BASE); 1246 } else if (IS_GEN(dev_priv, 3)) { 1247 init_unused_ring(dev_priv, PRB1_BASE); 1248 init_unused_ring(dev_priv, PRB2_BASE); 1249 } 1250 } 1251 1252 int i915_gem_init_hw(struct drm_i915_private *dev_priv) 1253 { 1254 int ret; 1255 1256 dev_priv->gt.last_init_time = ktime_get(); 1257 1258 /* Double layer security blanket, see i915_gem_init() */ 1259 intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); 1260 1261 if (HAS_EDRAM(dev_priv) && INTEL_GEN(dev_priv) < 9) 1262 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 1263 1264 if (IS_HASWELL(dev_priv)) 1265 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ? 1266 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 1267 1268 /* Apply the GT workarounds... */ 1269 intel_gt_apply_workarounds(dev_priv); 1270 /* ...and determine whether they are sticking. */ 1271 intel_gt_verify_workarounds(dev_priv, "init"); 1272 1273 i915_gem_init_swizzling(dev_priv); 1274 1275 /* 1276 * At least 830 can leave some of the unused rings 1277 * "active" (ie. head != tail) after resume which 1278 * will prevent c3 entry. Makes sure all unused rings 1279 * are totally idle. 1280 */ 1281 init_unused_rings(dev_priv); 1282 1283 BUG_ON(!dev_priv->kernel_context); 1284 ret = i915_terminally_wedged(dev_priv); 1285 if (ret) 1286 goto out; 1287 1288 ret = i915_ppgtt_init_hw(dev_priv); 1289 if (ret) { 1290 DRM_ERROR("Enabling PPGTT failed (%d)\n", ret); 1291 goto out; 1292 } 1293 1294 ret = intel_wopcm_init_hw(&dev_priv->wopcm); 1295 if (ret) { 1296 DRM_ERROR("Enabling WOPCM failed (%d)\n", ret); 1297 goto out; 1298 } 1299 1300 /* We can't enable contexts until all firmware is loaded */ 1301 ret = intel_uc_init_hw(dev_priv); 1302 if (ret) { 1303 DRM_ERROR("Enabling uc failed (%d)\n", ret); 1304 goto out; 1305 } 1306 1307 intel_mocs_init_l3cc_table(dev_priv); 1308 1309 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); 1310 1311 intel_engines_set_scheduler_caps(dev_priv); 1312 return 0; 1313 1314 out: 1315 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); 1316 return ret; 1317 } 1318 1319 static int __intel_engines_record_defaults(struct drm_i915_private *i915) 1320 { 1321 struct intel_engine_cs *engine; 1322 struct i915_gem_context *ctx; 1323 struct i915_gem_engines *e; 1324 enum intel_engine_id id; 1325 int err = 0; 1326 1327 /* 1328 * As we reset the gpu during very early sanitisation, the current 1329 * register state on the GPU should reflect its defaults values. 1330 * We load a context onto the hw (with restore-inhibit), then switch 1331 * over to a second context to save that default register state. We 1332 * can then prime every new context with that state so they all start 1333 * from the same default HW values. 1334 */ 1335 1336 ctx = i915_gem_context_create_kernel(i915, 0); 1337 if (IS_ERR(ctx)) 1338 return PTR_ERR(ctx); 1339 1340 e = i915_gem_context_lock_engines(ctx); 1341 1342 for_each_engine(engine, i915, id) { 1343 struct intel_context *ce = e->engines[id]; 1344 struct i915_request *rq; 1345 1346 rq = intel_context_create_request(ce); 1347 if (IS_ERR(rq)) { 1348 err = PTR_ERR(rq); 1349 goto err_active; 1350 } 1351 1352 err = 0; 1353 if (rq->engine->init_context) 1354 err = rq->engine->init_context(rq); 1355 1356 i915_request_add(rq); 1357 if (err) 1358 goto err_active; 1359 } 1360 1361 /* Flush the default context image to memory, and enable powersaving. */ 1362 if (!i915_gem_load_power_context(i915)) { 1363 err = -EIO; 1364 goto err_active; 1365 } 1366 1367 for_each_engine(engine, i915, id) { 1368 struct intel_context *ce = e->engines[id]; 1369 struct i915_vma *state = ce->state; 1370 void *vaddr; 1371 1372 if (!state) 1373 continue; 1374 1375 GEM_BUG_ON(intel_context_is_pinned(ce)); 1376 1377 /* 1378 * As we will hold a reference to the logical state, it will 1379 * not be torn down with the context, and importantly the 1380 * object will hold onto its vma (making it possible for a 1381 * stray GTT write to corrupt our defaults). Unmap the vma 1382 * from the GTT to prevent such accidents and reclaim the 1383 * space. 1384 */ 1385 err = i915_vma_unbind(state); 1386 if (err) 1387 goto err_active; 1388 1389 i915_gem_object_lock(state->obj); 1390 err = i915_gem_object_set_to_cpu_domain(state->obj, false); 1391 i915_gem_object_unlock(state->obj); 1392 if (err) 1393 goto err_active; 1394 1395 engine->default_state = i915_gem_object_get(state->obj); 1396 i915_gem_object_set_cache_coherency(engine->default_state, 1397 I915_CACHE_LLC); 1398 1399 /* Check we can acquire the image of the context state */ 1400 vaddr = i915_gem_object_pin_map(engine->default_state, 1401 I915_MAP_FORCE_WB); 1402 if (IS_ERR(vaddr)) { 1403 err = PTR_ERR(vaddr); 1404 goto err_active; 1405 } 1406 1407 i915_gem_object_unpin_map(engine->default_state); 1408 } 1409 1410 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) { 1411 unsigned int found = intel_engines_has_context_isolation(i915); 1412 1413 /* 1414 * Make sure that classes with multiple engine instances all 1415 * share the same basic configuration. 1416 */ 1417 for_each_engine(engine, i915, id) { 1418 unsigned int bit = BIT(engine->uabi_class); 1419 unsigned int expected = engine->default_state ? bit : 0; 1420 1421 if ((found & bit) != expected) { 1422 DRM_ERROR("mismatching default context state for class %d on engine %s\n", 1423 engine->uabi_class, engine->name); 1424 } 1425 } 1426 } 1427 1428 out_ctx: 1429 i915_gem_context_unlock_engines(ctx); 1430 i915_gem_context_set_closed(ctx); 1431 i915_gem_context_put(ctx); 1432 return err; 1433 1434 err_active: 1435 /* 1436 * If we have to abandon now, we expect the engines to be idle 1437 * and ready to be torn-down. The quickest way we can accomplish 1438 * this is by declaring ourselves wedged. 1439 */ 1440 i915_gem_set_wedged(i915); 1441 goto out_ctx; 1442 } 1443 1444 static int 1445 i915_gem_init_scratch(struct drm_i915_private *i915, unsigned int size) 1446 { 1447 struct drm_i915_gem_object *obj; 1448 struct i915_vma *vma; 1449 int ret; 1450 1451 obj = i915_gem_object_create_stolen(i915, size); 1452 if (!obj) 1453 obj = i915_gem_object_create_internal(i915, size); 1454 if (IS_ERR(obj)) { 1455 DRM_ERROR("Failed to allocate scratch page\n"); 1456 return PTR_ERR(obj); 1457 } 1458 1459 vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); 1460 if (IS_ERR(vma)) { 1461 ret = PTR_ERR(vma); 1462 goto err_unref; 1463 } 1464 1465 ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); 1466 if (ret) 1467 goto err_unref; 1468 1469 i915->gt.scratch = vma; 1470 return 0; 1471 1472 err_unref: 1473 i915_gem_object_put(obj); 1474 return ret; 1475 } 1476 1477 static void i915_gem_fini_scratch(struct drm_i915_private *i915) 1478 { 1479 i915_vma_unpin_and_release(&i915->gt.scratch, 0); 1480 } 1481 1482 static int intel_engines_verify_workarounds(struct drm_i915_private *i915) 1483 { 1484 struct intel_engine_cs *engine; 1485 enum intel_engine_id id; 1486 int err = 0; 1487 1488 if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 1489 return 0; 1490 1491 for_each_engine(engine, i915, id) { 1492 if (intel_engine_verify_workarounds(engine, "load")) 1493 err = -EIO; 1494 } 1495 1496 return err; 1497 } 1498 1499 int i915_gem_init(struct drm_i915_private *dev_priv) 1500 { 1501 int ret; 1502 1503 /* We need to fallback to 4K pages if host doesn't support huge gtt. */ 1504 if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_huge_gtt(dev_priv)) 1505 mkwrite_device_info(dev_priv)->page_sizes = 1506 I915_GTT_PAGE_SIZE_4K; 1507 1508 dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1); 1509 1510 i915_timelines_init(dev_priv); 1511 1512 ret = i915_gem_init_userptr(dev_priv); 1513 if (ret) 1514 return ret; 1515 1516 ret = intel_uc_init_misc(dev_priv); 1517 if (ret) 1518 return ret; 1519 1520 ret = intel_wopcm_init(&dev_priv->wopcm); 1521 if (ret) 1522 goto err_uc_misc; 1523 1524 /* This is just a security blanket to placate dragons. 1525 * On some systems, we very sporadically observe that the first TLBs 1526 * used by the CS may be stale, despite us poking the TLB reset. If 1527 * we hold the forcewake during initialisation these problems 1528 * just magically go away. 1529 */ 1530 mutex_lock(&dev_priv->drm.struct_mutex); 1531 intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); 1532 1533 ret = i915_gem_init_ggtt(dev_priv); 1534 if (ret) { 1535 GEM_BUG_ON(ret == -EIO); 1536 goto err_unlock; 1537 } 1538 1539 ret = i915_gem_init_scratch(dev_priv, 1540 IS_GEN(dev_priv, 2) ? SZ_256K : PAGE_SIZE); 1541 if (ret) { 1542 GEM_BUG_ON(ret == -EIO); 1543 goto err_ggtt; 1544 } 1545 1546 ret = intel_engines_setup(dev_priv); 1547 if (ret) { 1548 GEM_BUG_ON(ret == -EIO); 1549 goto err_unlock; 1550 } 1551 1552 ret = i915_gem_contexts_init(dev_priv); 1553 if (ret) { 1554 GEM_BUG_ON(ret == -EIO); 1555 goto err_scratch; 1556 } 1557 1558 ret = intel_engines_init(dev_priv); 1559 if (ret) { 1560 GEM_BUG_ON(ret == -EIO); 1561 goto err_context; 1562 } 1563 1564 intel_init_gt_powersave(dev_priv); 1565 1566 ret = intel_uc_init(dev_priv); 1567 if (ret) 1568 goto err_pm; 1569 1570 ret = i915_gem_init_hw(dev_priv); 1571 if (ret) 1572 goto err_uc_init; 1573 1574 /* Only when the HW is re-initialised, can we replay the requests */ 1575 ret = intel_gt_resume(dev_priv); 1576 if (ret) 1577 goto err_init_hw; 1578 1579 /* 1580 * Despite its name intel_init_clock_gating applies both display 1581 * clock gating workarounds; GT mmio workarounds and the occasional 1582 * GT power context workaround. Worse, sometimes it includes a context 1583 * register workaround which we need to apply before we record the 1584 * default HW state for all contexts. 1585 * 1586 * FIXME: break up the workarounds and apply them at the right time! 1587 */ 1588 intel_init_clock_gating(dev_priv); 1589 1590 ret = intel_engines_verify_workarounds(dev_priv); 1591 if (ret) 1592 goto err_gt; 1593 1594 ret = __intel_engines_record_defaults(dev_priv); 1595 if (ret) 1596 goto err_gt; 1597 1598 if (i915_inject_load_failure()) { 1599 ret = -ENODEV; 1600 goto err_gt; 1601 } 1602 1603 if (i915_inject_load_failure()) { 1604 ret = -EIO; 1605 goto err_gt; 1606 } 1607 1608 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); 1609 mutex_unlock(&dev_priv->drm.struct_mutex); 1610 1611 return 0; 1612 1613 /* 1614 * Unwinding is complicated by that we want to handle -EIO to mean 1615 * disable GPU submission but keep KMS alive. We want to mark the 1616 * HW as irrevisibly wedged, but keep enough state around that the 1617 * driver doesn't explode during runtime. 1618 */ 1619 err_gt: 1620 mutex_unlock(&dev_priv->drm.struct_mutex); 1621 1622 i915_gem_set_wedged(dev_priv); 1623 i915_gem_suspend(dev_priv); 1624 i915_gem_suspend_late(dev_priv); 1625 1626 i915_gem_drain_workqueue(dev_priv); 1627 1628 mutex_lock(&dev_priv->drm.struct_mutex); 1629 err_init_hw: 1630 intel_uc_fini_hw(dev_priv); 1631 err_uc_init: 1632 intel_uc_fini(dev_priv); 1633 err_pm: 1634 if (ret != -EIO) { 1635 intel_cleanup_gt_powersave(dev_priv); 1636 intel_engines_cleanup(dev_priv); 1637 } 1638 err_context: 1639 if (ret != -EIO) 1640 i915_gem_contexts_fini(dev_priv); 1641 err_scratch: 1642 i915_gem_fini_scratch(dev_priv); 1643 err_ggtt: 1644 err_unlock: 1645 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); 1646 mutex_unlock(&dev_priv->drm.struct_mutex); 1647 1648 err_uc_misc: 1649 intel_uc_fini_misc(dev_priv); 1650 1651 if (ret != -EIO) { 1652 i915_gem_cleanup_userptr(dev_priv); 1653 i915_timelines_fini(dev_priv); 1654 } 1655 1656 if (ret == -EIO) { 1657 mutex_lock(&dev_priv->drm.struct_mutex); 1658 1659 /* 1660 * Allow engine initialisation to fail by marking the GPU as 1661 * wedged. But we only want to do this where the GPU is angry, 1662 * for all other failure, such as an allocation failure, bail. 1663 */ 1664 if (!i915_reset_failed(dev_priv)) { 1665 i915_load_error(dev_priv, 1666 "Failed to initialize GPU, declaring it wedged!\n"); 1667 i915_gem_set_wedged(dev_priv); 1668 } 1669 1670 /* Minimal basic recovery for KMS */ 1671 ret = i915_ggtt_enable_hw(dev_priv); 1672 i915_gem_restore_gtt_mappings(dev_priv); 1673 i915_gem_restore_fences(dev_priv); 1674 intel_init_clock_gating(dev_priv); 1675 1676 mutex_unlock(&dev_priv->drm.struct_mutex); 1677 } 1678 1679 i915_gem_drain_freed_objects(dev_priv); 1680 return ret; 1681 } 1682 1683 void i915_gem_fini_hw(struct drm_i915_private *dev_priv) 1684 { 1685 GEM_BUG_ON(dev_priv->gt.awake); 1686 1687 intel_wakeref_auto_fini(&dev_priv->ggtt.userfault_wakeref); 1688 1689 i915_gem_suspend_late(dev_priv); 1690 intel_disable_gt_powersave(dev_priv); 1691 1692 /* Flush any outstanding unpin_work. */ 1693 i915_gem_drain_workqueue(dev_priv); 1694 1695 mutex_lock(&dev_priv->drm.struct_mutex); 1696 intel_uc_fini_hw(dev_priv); 1697 intel_uc_fini(dev_priv); 1698 mutex_unlock(&dev_priv->drm.struct_mutex); 1699 1700 i915_gem_drain_freed_objects(dev_priv); 1701 } 1702 1703 void i915_gem_fini(struct drm_i915_private *dev_priv) 1704 { 1705 mutex_lock(&dev_priv->drm.struct_mutex); 1706 intel_engines_cleanup(dev_priv); 1707 i915_gem_contexts_fini(dev_priv); 1708 i915_gem_fini_scratch(dev_priv); 1709 mutex_unlock(&dev_priv->drm.struct_mutex); 1710 1711 intel_wa_list_free(&dev_priv->gt_wa_list); 1712 1713 intel_cleanup_gt_powersave(dev_priv); 1714 1715 intel_uc_fini_misc(dev_priv); 1716 i915_gem_cleanup_userptr(dev_priv); 1717 i915_timelines_fini(dev_priv); 1718 1719 i915_gem_drain_freed_objects(dev_priv); 1720 1721 WARN_ON(!list_empty(&dev_priv->contexts.list)); 1722 } 1723 1724 void i915_gem_init_mmio(struct drm_i915_private *i915) 1725 { 1726 i915_gem_sanitize(i915); 1727 } 1728 1729 static void i915_gem_init__mm(struct drm_i915_private *i915) 1730 { 1731 spin_lock_init(&i915->mm.obj_lock); 1732 spin_lock_init(&i915->mm.free_lock); 1733 1734 init_llist_head(&i915->mm.free_list); 1735 1736 INIT_LIST_HEAD(&i915->mm.purge_list); 1737 INIT_LIST_HEAD(&i915->mm.shrink_list); 1738 1739 i915_gem_init__objects(i915); 1740 } 1741 1742 int i915_gem_init_early(struct drm_i915_private *dev_priv) 1743 { 1744 int err; 1745 1746 intel_gt_pm_init(dev_priv); 1747 1748 INIT_LIST_HEAD(&dev_priv->gt.active_rings); 1749 INIT_LIST_HEAD(&dev_priv->gt.closed_vma); 1750 spin_lock_init(&dev_priv->gt.closed_lock); 1751 1752 i915_gem_init__mm(dev_priv); 1753 i915_gem_init__pm(dev_priv); 1754 1755 init_waitqueue_head(&dev_priv->gpu_error.wait_queue); 1756 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 1757 mutex_init(&dev_priv->gpu_error.wedge_mutex); 1758 init_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu); 1759 1760 atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0); 1761 1762 spin_lock_init(&dev_priv->fb_tracking.lock); 1763 1764 err = i915_gemfs_init(dev_priv); 1765 if (err) 1766 DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err); 1767 1768 return 0; 1769 } 1770 1771 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) 1772 { 1773 i915_gem_drain_freed_objects(dev_priv); 1774 GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list)); 1775 GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count)); 1776 WARN_ON(dev_priv->mm.shrink_count); 1777 1778 cleanup_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu); 1779 1780 i915_gemfs_fini(dev_priv); 1781 } 1782 1783 int i915_gem_freeze(struct drm_i915_private *dev_priv) 1784 { 1785 /* Discard all purgeable objects, let userspace recover those as 1786 * required after resuming. 1787 */ 1788 i915_gem_shrink_all(dev_priv); 1789 1790 return 0; 1791 } 1792 1793 int i915_gem_freeze_late(struct drm_i915_private *i915) 1794 { 1795 struct drm_i915_gem_object *obj; 1796 intel_wakeref_t wakeref; 1797 1798 /* 1799 * Called just before we write the hibernation image. 1800 * 1801 * We need to update the domain tracking to reflect that the CPU 1802 * will be accessing all the pages to create and restore from the 1803 * hibernation, and so upon restoration those pages will be in the 1804 * CPU domain. 1805 * 1806 * To make sure the hibernation image contains the latest state, 1807 * we update that state just before writing out the image. 1808 * 1809 * To try and reduce the hibernation image, we manually shrink 1810 * the objects as well, see i915_gem_freeze() 1811 */ 1812 1813 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 1814 1815 i915_gem_shrink(i915, -1UL, NULL, ~0); 1816 i915_gem_drain_freed_objects(i915); 1817 1818 list_for_each_entry(obj, &i915->mm.shrink_list, mm.link) { 1819 i915_gem_object_lock(obj); 1820 WARN_ON(i915_gem_object_set_to_cpu_domain(obj, true)); 1821 i915_gem_object_unlock(obj); 1822 } 1823 1824 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 1825 1826 return 0; 1827 } 1828 1829 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 1830 { 1831 struct drm_i915_file_private *file_priv = file->driver_priv; 1832 struct i915_request *request; 1833 1834 /* Clean up our request list when the client is going away, so that 1835 * later retire_requests won't dereference our soon-to-be-gone 1836 * file_priv. 1837 */ 1838 spin_lock(&file_priv->mm.lock); 1839 list_for_each_entry(request, &file_priv->mm.request_list, client_link) 1840 request->file_priv = NULL; 1841 spin_unlock(&file_priv->mm.lock); 1842 } 1843 1844 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) 1845 { 1846 struct drm_i915_file_private *file_priv; 1847 int ret; 1848 1849 DRM_DEBUG("\n"); 1850 1851 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 1852 if (!file_priv) 1853 return -ENOMEM; 1854 1855 file->driver_priv = file_priv; 1856 file_priv->dev_priv = i915; 1857 file_priv->file = file; 1858 1859 spin_lock_init(&file_priv->mm.lock); 1860 INIT_LIST_HEAD(&file_priv->mm.request_list); 1861 1862 file_priv->bsd_engine = -1; 1863 file_priv->hang_timestamp = jiffies; 1864 1865 ret = i915_gem_context_open(i915, file); 1866 if (ret) 1867 kfree(file_priv); 1868 1869 return ret; 1870 } 1871 1872 /** 1873 * i915_gem_track_fb - update frontbuffer tracking 1874 * @old: current GEM buffer for the frontbuffer slots 1875 * @new: new GEM buffer for the frontbuffer slots 1876 * @frontbuffer_bits: bitmask of frontbuffer slots 1877 * 1878 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them 1879 * from @old and setting them in @new. Both @old and @new can be NULL. 1880 */ 1881 void i915_gem_track_fb(struct drm_i915_gem_object *old, 1882 struct drm_i915_gem_object *new, 1883 unsigned frontbuffer_bits) 1884 { 1885 /* Control of individual bits within the mask are guarded by 1886 * the owning plane->mutex, i.e. we can never see concurrent 1887 * manipulation of individual bits. But since the bitfield as a whole 1888 * is updated using RMW, we need to use atomics in order to update 1889 * the bits. 1890 */ 1891 BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES > 1892 BITS_PER_TYPE(atomic_t)); 1893 1894 if (old) { 1895 WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits)); 1896 atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits); 1897 } 1898 1899 if (new) { 1900 WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits); 1901 atomic_or(frontbuffer_bits, &new->frontbuffer_bits); 1902 } 1903 } 1904 1905 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 1906 #include "selftests/mock_gem_device.c" 1907 #include "selftests/i915_gem.c" 1908 #endif 1909