1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drm_vma_manager.h> 29 #include <drm/i915_drm.h> 30 #include <linux/dma-fence-array.h> 31 #include <linux/kthread.h> 32 #include <linux/reservation.h> 33 #include <linux/shmem_fs.h> 34 #include <linux/slab.h> 35 #include <linux/stop_machine.h> 36 #include <linux/swap.h> 37 #include <linux/pci.h> 38 #include <linux/dma-buf.h> 39 #include <linux/mman.h> 40 41 #include "display/intel_display.h" 42 #include "display/intel_frontbuffer.h" 43 44 #include "gem/i915_gem_clflush.h" 45 #include "gem/i915_gem_context.h" 46 #include "gem/i915_gem_ioctls.h" 47 #include "gem/i915_gem_pm.h" 48 #include "gem/i915_gemfs.h" 49 #include "gt/intel_gt.h" 50 #include "gt/intel_gt_pm.h" 51 #include "gt/intel_mocs.h" 52 #include "gt/intel_reset.h" 53 #include "gt/intel_renderstate.h" 54 #include "gt/intel_workarounds.h" 55 56 #include "i915_drv.h" 57 #include "i915_scatterlist.h" 58 #include "i915_trace.h" 59 #include "i915_vgpu.h" 60 61 #include "intel_drv.h" 62 #include "intel_pm.h" 63 64 static int 65 insert_mappable_node(struct i915_ggtt *ggtt, 66 struct drm_mm_node *node, u32 size) 67 { 68 memset(node, 0, sizeof(*node)); 69 return drm_mm_insert_node_in_range(&ggtt->vm.mm, node, 70 size, 0, I915_COLOR_UNEVICTABLE, 71 0, ggtt->mappable_end, 72 DRM_MM_INSERT_LOW); 73 } 74 75 static void 76 remove_mappable_node(struct drm_mm_node *node) 77 { 78 drm_mm_remove_node(node); 79 } 80 81 int 82 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 83 struct drm_file *file) 84 { 85 struct i915_ggtt *ggtt = &to_i915(dev)->ggtt; 86 struct drm_i915_gem_get_aperture *args = data; 87 struct i915_vma *vma; 88 u64 pinned; 89 90 mutex_lock(&ggtt->vm.mutex); 91 92 pinned = ggtt->vm.reserved; 93 list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) 94 if (i915_vma_is_pinned(vma)) 95 pinned += vma->node.size; 96 97 mutex_unlock(&ggtt->vm.mutex); 98 99 args->aper_size = ggtt->vm.total; 100 args->aper_available_size = args->aper_size - pinned; 101 102 return 0; 103 } 104 105 int i915_gem_object_unbind(struct drm_i915_gem_object *obj, 106 unsigned long flags) 107 { 108 struct i915_vma *vma; 109 LIST_HEAD(still_in_list); 110 int ret = 0; 111 112 lockdep_assert_held(&obj->base.dev->struct_mutex); 113 114 spin_lock(&obj->vma.lock); 115 while (!ret && (vma = list_first_entry_or_null(&obj->vma.list, 116 struct i915_vma, 117 obj_link))) { 118 list_move_tail(&vma->obj_link, &still_in_list); 119 spin_unlock(&obj->vma.lock); 120 121 ret = -EBUSY; 122 if (flags & I915_GEM_OBJECT_UNBIND_ACTIVE || 123 !i915_vma_is_active(vma)) 124 ret = i915_vma_unbind(vma); 125 126 spin_lock(&obj->vma.lock); 127 } 128 list_splice(&still_in_list, &obj->vma.list); 129 spin_unlock(&obj->vma.lock); 130 131 return ret; 132 } 133 134 static int 135 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 136 struct drm_i915_gem_pwrite *args, 137 struct drm_file *file) 138 { 139 void *vaddr = obj->phys_handle->vaddr + args->offset; 140 char __user *user_data = u64_to_user_ptr(args->data_ptr); 141 142 /* We manually control the domain here and pretend that it 143 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 144 */ 145 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 146 if (copy_from_user(vaddr, user_data, args->size)) 147 return -EFAULT; 148 149 drm_clflush_virt_range(vaddr, args->size); 150 intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt); 151 152 intel_fb_obj_flush(obj, ORIGIN_CPU); 153 return 0; 154 } 155 156 static int 157 i915_gem_create(struct drm_file *file, 158 struct drm_i915_private *dev_priv, 159 u64 *size_p, 160 u32 *handle_p) 161 { 162 struct drm_i915_gem_object *obj; 163 u32 handle; 164 u64 size; 165 int ret; 166 167 size = round_up(*size_p, PAGE_SIZE); 168 if (size == 0) 169 return -EINVAL; 170 171 /* Allocate the new object */ 172 obj = i915_gem_object_create_shmem(dev_priv, size); 173 if (IS_ERR(obj)) 174 return PTR_ERR(obj); 175 176 ret = drm_gem_handle_create(file, &obj->base, &handle); 177 /* drop reference from allocate - handle holds it now */ 178 i915_gem_object_put(obj); 179 if (ret) 180 return ret; 181 182 *handle_p = handle; 183 *size_p = size; 184 return 0; 185 } 186 187 int 188 i915_gem_dumb_create(struct drm_file *file, 189 struct drm_device *dev, 190 struct drm_mode_create_dumb *args) 191 { 192 int cpp = DIV_ROUND_UP(args->bpp, 8); 193 u32 format; 194 195 switch (cpp) { 196 case 1: 197 format = DRM_FORMAT_C8; 198 break; 199 case 2: 200 format = DRM_FORMAT_RGB565; 201 break; 202 case 4: 203 format = DRM_FORMAT_XRGB8888; 204 break; 205 default: 206 return -EINVAL; 207 } 208 209 /* have to work out size/pitch and return them */ 210 args->pitch = ALIGN(args->width * cpp, 64); 211 212 /* align stride to page size so that we can remap */ 213 if (args->pitch > intel_plane_fb_max_stride(to_i915(dev), format, 214 DRM_FORMAT_MOD_LINEAR)) 215 args->pitch = ALIGN(args->pitch, 4096); 216 217 args->size = args->pitch * args->height; 218 return i915_gem_create(file, to_i915(dev), 219 &args->size, &args->handle); 220 } 221 222 /** 223 * Creates a new mm object and returns a handle to it. 224 * @dev: drm device pointer 225 * @data: ioctl data blob 226 * @file: drm file pointer 227 */ 228 int 229 i915_gem_create_ioctl(struct drm_device *dev, void *data, 230 struct drm_file *file) 231 { 232 struct drm_i915_private *dev_priv = to_i915(dev); 233 struct drm_i915_gem_create *args = data; 234 235 i915_gem_flush_free_objects(dev_priv); 236 237 return i915_gem_create(file, dev_priv, 238 &args->size, &args->handle); 239 } 240 241 static int 242 shmem_pread(struct page *page, int offset, int len, char __user *user_data, 243 bool needs_clflush) 244 { 245 char *vaddr; 246 int ret; 247 248 vaddr = kmap(page); 249 250 if (needs_clflush) 251 drm_clflush_virt_range(vaddr + offset, len); 252 253 ret = __copy_to_user(user_data, vaddr + offset, len); 254 255 kunmap(page); 256 257 return ret ? -EFAULT : 0; 258 } 259 260 static int 261 i915_gem_shmem_pread(struct drm_i915_gem_object *obj, 262 struct drm_i915_gem_pread *args) 263 { 264 unsigned int needs_clflush; 265 unsigned int idx, offset; 266 struct dma_fence *fence; 267 char __user *user_data; 268 u64 remain; 269 int ret; 270 271 ret = i915_gem_object_prepare_read(obj, &needs_clflush); 272 if (ret) 273 return ret; 274 275 fence = i915_gem_object_lock_fence(obj); 276 i915_gem_object_finish_access(obj); 277 if (!fence) 278 return -ENOMEM; 279 280 remain = args->size; 281 user_data = u64_to_user_ptr(args->data_ptr); 282 offset = offset_in_page(args->offset); 283 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 284 struct page *page = i915_gem_object_get_page(obj, idx); 285 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); 286 287 ret = shmem_pread(page, offset, length, user_data, 288 needs_clflush); 289 if (ret) 290 break; 291 292 remain -= length; 293 user_data += length; 294 offset = 0; 295 } 296 297 i915_gem_object_unlock_fence(obj, fence); 298 return ret; 299 } 300 301 static inline bool 302 gtt_user_read(struct io_mapping *mapping, 303 loff_t base, int offset, 304 char __user *user_data, int length) 305 { 306 void __iomem *vaddr; 307 unsigned long unwritten; 308 309 /* We can use the cpu mem copy function because this is X86. */ 310 vaddr = io_mapping_map_atomic_wc(mapping, base); 311 unwritten = __copy_to_user_inatomic(user_data, 312 (void __force *)vaddr + offset, 313 length); 314 io_mapping_unmap_atomic(vaddr); 315 if (unwritten) { 316 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 317 unwritten = copy_to_user(user_data, 318 (void __force *)vaddr + offset, 319 length); 320 io_mapping_unmap(vaddr); 321 } 322 return unwritten; 323 } 324 325 static int 326 i915_gem_gtt_pread(struct drm_i915_gem_object *obj, 327 const struct drm_i915_gem_pread *args) 328 { 329 struct drm_i915_private *i915 = to_i915(obj->base.dev); 330 struct i915_ggtt *ggtt = &i915->ggtt; 331 intel_wakeref_t wakeref; 332 struct drm_mm_node node; 333 struct dma_fence *fence; 334 void __user *user_data; 335 struct i915_vma *vma; 336 u64 remain, offset; 337 int ret; 338 339 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 340 if (ret) 341 return ret; 342 343 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 344 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 345 PIN_MAPPABLE | 346 PIN_NONFAULT | 347 PIN_NONBLOCK); 348 if (!IS_ERR(vma)) { 349 node.start = i915_ggtt_offset(vma); 350 node.allocated = false; 351 ret = i915_vma_put_fence(vma); 352 if (ret) { 353 i915_vma_unpin(vma); 354 vma = ERR_PTR(ret); 355 } 356 } 357 if (IS_ERR(vma)) { 358 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 359 if (ret) 360 goto out_unlock; 361 GEM_BUG_ON(!node.allocated); 362 } 363 364 mutex_unlock(&i915->drm.struct_mutex); 365 366 ret = i915_gem_object_lock_interruptible(obj); 367 if (ret) 368 goto out_unpin; 369 370 ret = i915_gem_object_set_to_gtt_domain(obj, false); 371 if (ret) { 372 i915_gem_object_unlock(obj); 373 goto out_unpin; 374 } 375 376 fence = i915_gem_object_lock_fence(obj); 377 i915_gem_object_unlock(obj); 378 if (!fence) { 379 ret = -ENOMEM; 380 goto out_unpin; 381 } 382 383 user_data = u64_to_user_ptr(args->data_ptr); 384 remain = args->size; 385 offset = args->offset; 386 387 while (remain > 0) { 388 /* Operation in this page 389 * 390 * page_base = page offset within aperture 391 * page_offset = offset within page 392 * page_length = bytes to copy for this page 393 */ 394 u32 page_base = node.start; 395 unsigned page_offset = offset_in_page(offset); 396 unsigned page_length = PAGE_SIZE - page_offset; 397 page_length = remain < page_length ? remain : page_length; 398 if (node.allocated) { 399 ggtt->vm.insert_page(&ggtt->vm, 400 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 401 node.start, I915_CACHE_NONE, 0); 402 } else { 403 page_base += offset & PAGE_MASK; 404 } 405 406 if (gtt_user_read(&ggtt->iomap, page_base, page_offset, 407 user_data, page_length)) { 408 ret = -EFAULT; 409 break; 410 } 411 412 remain -= page_length; 413 user_data += page_length; 414 offset += page_length; 415 } 416 417 i915_gem_object_unlock_fence(obj, fence); 418 out_unpin: 419 mutex_lock(&i915->drm.struct_mutex); 420 if (node.allocated) { 421 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); 422 remove_mappable_node(&node); 423 } else { 424 i915_vma_unpin(vma); 425 } 426 out_unlock: 427 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 428 mutex_unlock(&i915->drm.struct_mutex); 429 430 return ret; 431 } 432 433 /** 434 * Reads data from the object referenced by handle. 435 * @dev: drm device pointer 436 * @data: ioctl data blob 437 * @file: drm file pointer 438 * 439 * On error, the contents of *data are undefined. 440 */ 441 int 442 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 443 struct drm_file *file) 444 { 445 struct drm_i915_gem_pread *args = data; 446 struct drm_i915_gem_object *obj; 447 int ret; 448 449 if (args->size == 0) 450 return 0; 451 452 if (!access_ok(u64_to_user_ptr(args->data_ptr), 453 args->size)) 454 return -EFAULT; 455 456 obj = i915_gem_object_lookup(file, args->handle); 457 if (!obj) 458 return -ENOENT; 459 460 /* Bounds check source. */ 461 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 462 ret = -EINVAL; 463 goto out; 464 } 465 466 trace_i915_gem_object_pread(obj, args->offset, args->size); 467 468 ret = i915_gem_object_wait(obj, 469 I915_WAIT_INTERRUPTIBLE, 470 MAX_SCHEDULE_TIMEOUT); 471 if (ret) 472 goto out; 473 474 ret = i915_gem_object_pin_pages(obj); 475 if (ret) 476 goto out; 477 478 ret = i915_gem_shmem_pread(obj, args); 479 if (ret == -EFAULT || ret == -ENODEV) 480 ret = i915_gem_gtt_pread(obj, args); 481 482 i915_gem_object_unpin_pages(obj); 483 out: 484 i915_gem_object_put(obj); 485 return ret; 486 } 487 488 /* This is the fast write path which cannot handle 489 * page faults in the source data 490 */ 491 492 static inline bool 493 ggtt_write(struct io_mapping *mapping, 494 loff_t base, int offset, 495 char __user *user_data, int length) 496 { 497 void __iomem *vaddr; 498 unsigned long unwritten; 499 500 /* We can use the cpu mem copy function because this is X86. */ 501 vaddr = io_mapping_map_atomic_wc(mapping, base); 502 unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset, 503 user_data, length); 504 io_mapping_unmap_atomic(vaddr); 505 if (unwritten) { 506 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 507 unwritten = copy_from_user((void __force *)vaddr + offset, 508 user_data, length); 509 io_mapping_unmap(vaddr); 510 } 511 512 return unwritten; 513 } 514 515 /** 516 * This is the fast pwrite path, where we copy the data directly from the 517 * user into the GTT, uncached. 518 * @obj: i915 GEM object 519 * @args: pwrite arguments structure 520 */ 521 static int 522 i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, 523 const struct drm_i915_gem_pwrite *args) 524 { 525 struct drm_i915_private *i915 = to_i915(obj->base.dev); 526 struct i915_ggtt *ggtt = &i915->ggtt; 527 struct intel_runtime_pm *rpm = &i915->runtime_pm; 528 intel_wakeref_t wakeref; 529 struct drm_mm_node node; 530 struct dma_fence *fence; 531 struct i915_vma *vma; 532 u64 remain, offset; 533 void __user *user_data; 534 int ret; 535 536 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 537 if (ret) 538 return ret; 539 540 if (i915_gem_object_has_struct_page(obj)) { 541 /* 542 * Avoid waking the device up if we can fallback, as 543 * waking/resuming is very slow (worst-case 10-100 ms 544 * depending on PCI sleeps and our own resume time). 545 * This easily dwarfs any performance advantage from 546 * using the cache bypass of indirect GGTT access. 547 */ 548 wakeref = intel_runtime_pm_get_if_in_use(rpm); 549 if (!wakeref) { 550 ret = -EFAULT; 551 goto out_unlock; 552 } 553 } else { 554 /* No backing pages, no fallback, we must force GGTT access */ 555 wakeref = intel_runtime_pm_get(rpm); 556 } 557 558 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 559 PIN_MAPPABLE | 560 PIN_NONFAULT | 561 PIN_NONBLOCK); 562 if (!IS_ERR(vma)) { 563 node.start = i915_ggtt_offset(vma); 564 node.allocated = false; 565 ret = i915_vma_put_fence(vma); 566 if (ret) { 567 i915_vma_unpin(vma); 568 vma = ERR_PTR(ret); 569 } 570 } 571 if (IS_ERR(vma)) { 572 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 573 if (ret) 574 goto out_rpm; 575 GEM_BUG_ON(!node.allocated); 576 } 577 578 mutex_unlock(&i915->drm.struct_mutex); 579 580 ret = i915_gem_object_lock_interruptible(obj); 581 if (ret) 582 goto out_unpin; 583 584 ret = i915_gem_object_set_to_gtt_domain(obj, true); 585 if (ret) { 586 i915_gem_object_unlock(obj); 587 goto out_unpin; 588 } 589 590 fence = i915_gem_object_lock_fence(obj); 591 i915_gem_object_unlock(obj); 592 if (!fence) { 593 ret = -ENOMEM; 594 goto out_unpin; 595 } 596 597 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 598 599 user_data = u64_to_user_ptr(args->data_ptr); 600 offset = args->offset; 601 remain = args->size; 602 while (remain) { 603 /* Operation in this page 604 * 605 * page_base = page offset within aperture 606 * page_offset = offset within page 607 * page_length = bytes to copy for this page 608 */ 609 u32 page_base = node.start; 610 unsigned int page_offset = offset_in_page(offset); 611 unsigned int page_length = PAGE_SIZE - page_offset; 612 page_length = remain < page_length ? remain : page_length; 613 if (node.allocated) { 614 /* flush the write before we modify the GGTT */ 615 intel_gt_flush_ggtt_writes(ggtt->vm.gt); 616 ggtt->vm.insert_page(&ggtt->vm, 617 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 618 node.start, I915_CACHE_NONE, 0); 619 wmb(); /* flush modifications to the GGTT (insert_page) */ 620 } else { 621 page_base += offset & PAGE_MASK; 622 } 623 /* If we get a fault while copying data, then (presumably) our 624 * source page isn't available. Return the error and we'll 625 * retry in the slow path. 626 * If the object is non-shmem backed, we retry again with the 627 * path that handles page fault. 628 */ 629 if (ggtt_write(&ggtt->iomap, page_base, page_offset, 630 user_data, page_length)) { 631 ret = -EFAULT; 632 break; 633 } 634 635 remain -= page_length; 636 user_data += page_length; 637 offset += page_length; 638 } 639 intel_fb_obj_flush(obj, ORIGIN_CPU); 640 641 i915_gem_object_unlock_fence(obj, fence); 642 out_unpin: 643 mutex_lock(&i915->drm.struct_mutex); 644 intel_gt_flush_ggtt_writes(ggtt->vm.gt); 645 if (node.allocated) { 646 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); 647 remove_mappable_node(&node); 648 } else { 649 i915_vma_unpin(vma); 650 } 651 out_rpm: 652 intel_runtime_pm_put(rpm, wakeref); 653 out_unlock: 654 mutex_unlock(&i915->drm.struct_mutex); 655 return ret; 656 } 657 658 /* Per-page copy function for the shmem pwrite fastpath. 659 * Flushes invalid cachelines before writing to the target if 660 * needs_clflush_before is set and flushes out any written cachelines after 661 * writing if needs_clflush is set. 662 */ 663 static int 664 shmem_pwrite(struct page *page, int offset, int len, char __user *user_data, 665 bool needs_clflush_before, 666 bool needs_clflush_after) 667 { 668 char *vaddr; 669 int ret; 670 671 vaddr = kmap(page); 672 673 if (needs_clflush_before) 674 drm_clflush_virt_range(vaddr + offset, len); 675 676 ret = __copy_from_user(vaddr + offset, user_data, len); 677 if (!ret && needs_clflush_after) 678 drm_clflush_virt_range(vaddr + offset, len); 679 680 kunmap(page); 681 682 return ret ? -EFAULT : 0; 683 } 684 685 static int 686 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, 687 const struct drm_i915_gem_pwrite *args) 688 { 689 unsigned int partial_cacheline_write; 690 unsigned int needs_clflush; 691 unsigned int offset, idx; 692 struct dma_fence *fence; 693 void __user *user_data; 694 u64 remain; 695 int ret; 696 697 ret = i915_gem_object_prepare_write(obj, &needs_clflush); 698 if (ret) 699 return ret; 700 701 fence = i915_gem_object_lock_fence(obj); 702 i915_gem_object_finish_access(obj); 703 if (!fence) 704 return -ENOMEM; 705 706 /* If we don't overwrite a cacheline completely we need to be 707 * careful to have up-to-date data by first clflushing. Don't 708 * overcomplicate things and flush the entire patch. 709 */ 710 partial_cacheline_write = 0; 711 if (needs_clflush & CLFLUSH_BEFORE) 712 partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1; 713 714 user_data = u64_to_user_ptr(args->data_ptr); 715 remain = args->size; 716 offset = offset_in_page(args->offset); 717 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 718 struct page *page = i915_gem_object_get_page(obj, idx); 719 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); 720 721 ret = shmem_pwrite(page, offset, length, user_data, 722 (offset | length) & partial_cacheline_write, 723 needs_clflush & CLFLUSH_AFTER); 724 if (ret) 725 break; 726 727 remain -= length; 728 user_data += length; 729 offset = 0; 730 } 731 732 intel_fb_obj_flush(obj, ORIGIN_CPU); 733 i915_gem_object_unlock_fence(obj, fence); 734 735 return ret; 736 } 737 738 /** 739 * Writes data to the object referenced by handle. 740 * @dev: drm device 741 * @data: ioctl data blob 742 * @file: drm file 743 * 744 * On error, the contents of the buffer that were to be modified are undefined. 745 */ 746 int 747 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 748 struct drm_file *file) 749 { 750 struct drm_i915_gem_pwrite *args = data; 751 struct drm_i915_gem_object *obj; 752 int ret; 753 754 if (args->size == 0) 755 return 0; 756 757 if (!access_ok(u64_to_user_ptr(args->data_ptr), args->size)) 758 return -EFAULT; 759 760 obj = i915_gem_object_lookup(file, args->handle); 761 if (!obj) 762 return -ENOENT; 763 764 /* Bounds check destination. */ 765 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 766 ret = -EINVAL; 767 goto err; 768 } 769 770 /* Writes not allowed into this read-only object */ 771 if (i915_gem_object_is_readonly(obj)) { 772 ret = -EINVAL; 773 goto err; 774 } 775 776 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 777 778 ret = -ENODEV; 779 if (obj->ops->pwrite) 780 ret = obj->ops->pwrite(obj, args); 781 if (ret != -ENODEV) 782 goto err; 783 784 ret = i915_gem_object_wait(obj, 785 I915_WAIT_INTERRUPTIBLE | 786 I915_WAIT_ALL, 787 MAX_SCHEDULE_TIMEOUT); 788 if (ret) 789 goto err; 790 791 ret = i915_gem_object_pin_pages(obj); 792 if (ret) 793 goto err; 794 795 ret = -EFAULT; 796 /* We can only do the GTT pwrite on untiled buffers, as otherwise 797 * it would end up going through the fenced access, and we'll get 798 * different detiling behavior between reading and writing. 799 * pread/pwrite currently are reading and writing from the CPU 800 * perspective, requiring manual detiling by the client. 801 */ 802 if (!i915_gem_object_has_struct_page(obj) || 803 cpu_write_needs_clflush(obj)) 804 /* Note that the gtt paths might fail with non-page-backed user 805 * pointers (e.g. gtt mappings when moving data between 806 * textures). Fallback to the shmem path in that case. 807 */ 808 ret = i915_gem_gtt_pwrite_fast(obj, args); 809 810 if (ret == -EFAULT || ret == -ENOSPC) { 811 if (obj->phys_handle) 812 ret = i915_gem_phys_pwrite(obj, args, file); 813 else 814 ret = i915_gem_shmem_pwrite(obj, args); 815 } 816 817 i915_gem_object_unpin_pages(obj); 818 err: 819 i915_gem_object_put(obj); 820 return ret; 821 } 822 823 /** 824 * Called when user space has done writes to this buffer 825 * @dev: drm device 826 * @data: ioctl data blob 827 * @file: drm file 828 */ 829 int 830 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 831 struct drm_file *file) 832 { 833 struct drm_i915_gem_sw_finish *args = data; 834 struct drm_i915_gem_object *obj; 835 836 obj = i915_gem_object_lookup(file, args->handle); 837 if (!obj) 838 return -ENOENT; 839 840 /* 841 * Proxy objects are barred from CPU access, so there is no 842 * need to ban sw_finish as it is a nop. 843 */ 844 845 /* Pinned buffers may be scanout, so flush the cache */ 846 i915_gem_object_flush_if_display(obj); 847 i915_gem_object_put(obj); 848 849 return 0; 850 } 851 852 void i915_gem_runtime_suspend(struct drm_i915_private *i915) 853 { 854 struct drm_i915_gem_object *obj, *on; 855 int i; 856 857 /* 858 * Only called during RPM suspend. All users of the userfault_list 859 * must be holding an RPM wakeref to ensure that this can not 860 * run concurrently with themselves (and use the struct_mutex for 861 * protection between themselves). 862 */ 863 864 list_for_each_entry_safe(obj, on, 865 &i915->ggtt.userfault_list, userfault_link) 866 __i915_gem_object_release_mmap(obj); 867 868 /* 869 * The fence will be lost when the device powers down. If any were 870 * in use by hardware (i.e. they are pinned), we should not be powering 871 * down! All other fences will be reacquired by the user upon waking. 872 */ 873 for (i = 0; i < i915->ggtt.num_fences; i++) { 874 struct i915_fence_reg *reg = &i915->ggtt.fence_regs[i]; 875 876 /* 877 * Ideally we want to assert that the fence register is not 878 * live at this point (i.e. that no piece of code will be 879 * trying to write through fence + GTT, as that both violates 880 * our tracking of activity and associated locking/barriers, 881 * but also is illegal given that the hw is powered down). 882 * 883 * Previously we used reg->pin_count as a "liveness" indicator. 884 * That is not sufficient, and we need a more fine-grained 885 * tool if we want to have a sanity check here. 886 */ 887 888 if (!reg->vma) 889 continue; 890 891 GEM_BUG_ON(i915_vma_has_userfault(reg->vma)); 892 reg->dirty = true; 893 } 894 } 895 896 static int wait_for_engines(struct intel_gt *gt) 897 { 898 if (wait_for(intel_engines_are_idle(gt), I915_IDLE_ENGINES_TIMEOUT)) { 899 dev_err(gt->i915->drm.dev, 900 "Failed to idle engines, declaring wedged!\n"); 901 GEM_TRACE_DUMP(); 902 intel_gt_set_wedged(gt); 903 return -EIO; 904 } 905 906 return 0; 907 } 908 909 static long 910 wait_for_timelines(struct drm_i915_private *i915, 911 unsigned int flags, long timeout) 912 { 913 struct intel_gt_timelines *gt = &i915->gt.timelines; 914 struct intel_timeline *tl; 915 916 mutex_lock(>->mutex); 917 list_for_each_entry(tl, >->active_list, link) { 918 struct i915_request *rq; 919 920 rq = i915_active_request_get_unlocked(&tl->last_request); 921 if (!rq) 922 continue; 923 924 mutex_unlock(>->mutex); 925 926 /* 927 * "Race-to-idle". 928 * 929 * Switching to the kernel context is often used a synchronous 930 * step prior to idling, e.g. in suspend for flushing all 931 * current operations to memory before sleeping. These we 932 * want to complete as quickly as possible to avoid prolonged 933 * stalls, so allow the gpu to boost to maximum clocks. 934 */ 935 if (flags & I915_WAIT_FOR_IDLE_BOOST) 936 gen6_rps_boost(rq); 937 938 timeout = i915_request_wait(rq, flags, timeout); 939 i915_request_put(rq); 940 if (timeout < 0) 941 return timeout; 942 943 /* restart after reacquiring the lock */ 944 mutex_lock(>->mutex); 945 tl = list_entry(>->active_list, typeof(*tl), link); 946 } 947 mutex_unlock(>->mutex); 948 949 return timeout; 950 } 951 952 int i915_gem_wait_for_idle(struct drm_i915_private *i915, 953 unsigned int flags, long timeout) 954 { 955 /* If the device is asleep, we have no requests outstanding */ 956 if (!READ_ONCE(i915->gt.awake)) 957 return 0; 958 959 GEM_TRACE("flags=%x (%s), timeout=%ld%s, awake?=%s\n", 960 flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked", 961 timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : "", 962 yesno(i915->gt.awake)); 963 964 timeout = wait_for_timelines(i915, flags, timeout); 965 if (timeout < 0) 966 return timeout; 967 968 if (flags & I915_WAIT_LOCKED) { 969 int err; 970 971 lockdep_assert_held(&i915->drm.struct_mutex); 972 973 err = wait_for_engines(&i915->gt); 974 if (err) 975 return err; 976 977 i915_retire_requests(i915); 978 } 979 980 return 0; 981 } 982 983 struct i915_vma * 984 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 985 const struct i915_ggtt_view *view, 986 u64 size, 987 u64 alignment, 988 u64 flags) 989 { 990 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 991 struct i915_address_space *vm = &dev_priv->ggtt.vm; 992 struct i915_vma *vma; 993 int ret; 994 995 lockdep_assert_held(&obj->base.dev->struct_mutex); 996 997 if (flags & PIN_MAPPABLE && 998 (!view || view->type == I915_GGTT_VIEW_NORMAL)) { 999 /* If the required space is larger than the available 1000 * aperture, we will not able to find a slot for the 1001 * object and unbinding the object now will be in 1002 * vain. Worse, doing so may cause us to ping-pong 1003 * the object in and out of the Global GTT and 1004 * waste a lot of cycles under the mutex. 1005 */ 1006 if (obj->base.size > dev_priv->ggtt.mappable_end) 1007 return ERR_PTR(-E2BIG); 1008 1009 /* If NONBLOCK is set the caller is optimistically 1010 * trying to cache the full object within the mappable 1011 * aperture, and *must* have a fallback in place for 1012 * situations where we cannot bind the object. We 1013 * can be a little more lax here and use the fallback 1014 * more often to avoid costly migrations of ourselves 1015 * and other objects within the aperture. 1016 * 1017 * Half-the-aperture is used as a simple heuristic. 1018 * More interesting would to do search for a free 1019 * block prior to making the commitment to unbind. 1020 * That caters for the self-harm case, and with a 1021 * little more heuristics (e.g. NOFAULT, NOEVICT) 1022 * we could try to minimise harm to others. 1023 */ 1024 if (flags & PIN_NONBLOCK && 1025 obj->base.size > dev_priv->ggtt.mappable_end / 2) 1026 return ERR_PTR(-ENOSPC); 1027 } 1028 1029 vma = i915_vma_instance(obj, vm, view); 1030 if (IS_ERR(vma)) 1031 return vma; 1032 1033 if (i915_vma_misplaced(vma, size, alignment, flags)) { 1034 if (flags & PIN_NONBLOCK) { 1035 if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) 1036 return ERR_PTR(-ENOSPC); 1037 1038 if (flags & PIN_MAPPABLE && 1039 vma->fence_size > dev_priv->ggtt.mappable_end / 2) 1040 return ERR_PTR(-ENOSPC); 1041 } 1042 1043 WARN(i915_vma_is_pinned(vma), 1044 "bo is already pinned in ggtt with incorrect alignment:" 1045 " offset=%08x, req.alignment=%llx," 1046 " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n", 1047 i915_ggtt_offset(vma), alignment, 1048 !!(flags & PIN_MAPPABLE), 1049 i915_vma_is_map_and_fenceable(vma)); 1050 ret = i915_vma_unbind(vma); 1051 if (ret) 1052 return ERR_PTR(ret); 1053 } 1054 1055 ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); 1056 if (ret) 1057 return ERR_PTR(ret); 1058 1059 return vma; 1060 } 1061 1062 int 1063 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 1064 struct drm_file *file_priv) 1065 { 1066 struct drm_i915_private *i915 = to_i915(dev); 1067 struct drm_i915_gem_madvise *args = data; 1068 struct drm_i915_gem_object *obj; 1069 int err; 1070 1071 switch (args->madv) { 1072 case I915_MADV_DONTNEED: 1073 case I915_MADV_WILLNEED: 1074 break; 1075 default: 1076 return -EINVAL; 1077 } 1078 1079 obj = i915_gem_object_lookup(file_priv, args->handle); 1080 if (!obj) 1081 return -ENOENT; 1082 1083 err = mutex_lock_interruptible(&obj->mm.lock); 1084 if (err) 1085 goto out; 1086 1087 if (i915_gem_object_has_pages(obj) && 1088 i915_gem_object_is_tiled(obj) && 1089 i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 1090 if (obj->mm.madv == I915_MADV_WILLNEED) { 1091 GEM_BUG_ON(!obj->mm.quirked); 1092 __i915_gem_object_unpin_pages(obj); 1093 obj->mm.quirked = false; 1094 } 1095 if (args->madv == I915_MADV_WILLNEED) { 1096 GEM_BUG_ON(obj->mm.quirked); 1097 __i915_gem_object_pin_pages(obj); 1098 obj->mm.quirked = true; 1099 } 1100 } 1101 1102 if (obj->mm.madv != __I915_MADV_PURGED) 1103 obj->mm.madv = args->madv; 1104 1105 if (i915_gem_object_has_pages(obj)) { 1106 struct list_head *list; 1107 1108 if (i915_gem_object_is_shrinkable(obj)) { 1109 unsigned long flags; 1110 1111 spin_lock_irqsave(&i915->mm.obj_lock, flags); 1112 1113 if (obj->mm.madv != I915_MADV_WILLNEED) 1114 list = &i915->mm.purge_list; 1115 else 1116 list = &i915->mm.shrink_list; 1117 list_move_tail(&obj->mm.link, list); 1118 1119 spin_unlock_irqrestore(&i915->mm.obj_lock, flags); 1120 } 1121 } 1122 1123 /* if the object is no longer attached, discard its backing storage */ 1124 if (obj->mm.madv == I915_MADV_DONTNEED && 1125 !i915_gem_object_has_pages(obj)) 1126 i915_gem_object_truncate(obj); 1127 1128 args->retained = obj->mm.madv != __I915_MADV_PURGED; 1129 mutex_unlock(&obj->mm.lock); 1130 1131 out: 1132 i915_gem_object_put(obj); 1133 return err; 1134 } 1135 1136 void i915_gem_sanitize(struct drm_i915_private *i915) 1137 { 1138 intel_wakeref_t wakeref; 1139 1140 GEM_TRACE("\n"); 1141 1142 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 1143 intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); 1144 1145 /* 1146 * As we have just resumed the machine and woken the device up from 1147 * deep PCI sleep (presumably D3_cold), assume the HW has been reset 1148 * back to defaults, recovering from whatever wedged state we left it 1149 * in and so worth trying to use the device once more. 1150 */ 1151 if (intel_gt_is_wedged(&i915->gt)) 1152 intel_gt_unset_wedged(&i915->gt); 1153 1154 /* 1155 * If we inherit context state from the BIOS or earlier occupants 1156 * of the GPU, the GPU may be in an inconsistent state when we 1157 * try to take over. The only way to remove the earlier state 1158 * is by resetting. However, resetting on earlier gen is tricky as 1159 * it may impact the display and we are uncertain about the stability 1160 * of the reset, so this could be applied to even earlier gen. 1161 */ 1162 intel_gt_sanitize(&i915->gt, false); 1163 1164 intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); 1165 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 1166 } 1167 1168 static void init_unused_ring(struct intel_gt *gt, u32 base) 1169 { 1170 struct intel_uncore *uncore = gt->uncore; 1171 1172 intel_uncore_write(uncore, RING_CTL(base), 0); 1173 intel_uncore_write(uncore, RING_HEAD(base), 0); 1174 intel_uncore_write(uncore, RING_TAIL(base), 0); 1175 intel_uncore_write(uncore, RING_START(base), 0); 1176 } 1177 1178 static void init_unused_rings(struct intel_gt *gt) 1179 { 1180 struct drm_i915_private *i915 = gt->i915; 1181 1182 if (IS_I830(i915)) { 1183 init_unused_ring(gt, PRB1_BASE); 1184 init_unused_ring(gt, SRB0_BASE); 1185 init_unused_ring(gt, SRB1_BASE); 1186 init_unused_ring(gt, SRB2_BASE); 1187 init_unused_ring(gt, SRB3_BASE); 1188 } else if (IS_GEN(i915, 2)) { 1189 init_unused_ring(gt, SRB0_BASE); 1190 init_unused_ring(gt, SRB1_BASE); 1191 } else if (IS_GEN(i915, 3)) { 1192 init_unused_ring(gt, PRB1_BASE); 1193 init_unused_ring(gt, PRB2_BASE); 1194 } 1195 } 1196 1197 int i915_gem_init_hw(struct drm_i915_private *i915) 1198 { 1199 struct intel_uncore *uncore = &i915->uncore; 1200 struct intel_gt *gt = &i915->gt; 1201 int ret; 1202 1203 BUG_ON(!i915->kernel_context); 1204 ret = intel_gt_terminally_wedged(gt); 1205 if (ret) 1206 return ret; 1207 1208 gt->last_init_time = ktime_get(); 1209 1210 /* Double layer security blanket, see i915_gem_init() */ 1211 intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); 1212 1213 if (HAS_EDRAM(i915) && INTEL_GEN(i915) < 9) 1214 intel_uncore_rmw(uncore, HSW_IDICR, 0, IDIHASHMSK(0xf)); 1215 1216 if (IS_HASWELL(i915)) 1217 intel_uncore_write(uncore, 1218 MI_PREDICATE_RESULT_2, 1219 IS_HSW_GT3(i915) ? 1220 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 1221 1222 /* Apply the GT workarounds... */ 1223 intel_gt_apply_workarounds(gt); 1224 /* ...and determine whether they are sticking. */ 1225 intel_gt_verify_workarounds(gt, "init"); 1226 1227 intel_gt_init_swizzling(gt); 1228 1229 /* 1230 * At least 830 can leave some of the unused rings 1231 * "active" (ie. head != tail) after resume which 1232 * will prevent c3 entry. Makes sure all unused rings 1233 * are totally idle. 1234 */ 1235 init_unused_rings(gt); 1236 1237 ret = i915_ppgtt_init_hw(gt); 1238 if (ret) { 1239 DRM_ERROR("Enabling PPGTT failed (%d)\n", ret); 1240 goto out; 1241 } 1242 1243 ret = intel_wopcm_init_hw(&i915->wopcm, gt); 1244 if (ret) { 1245 DRM_ERROR("Enabling WOPCM failed (%d)\n", ret); 1246 goto out; 1247 } 1248 1249 /* We can't enable contexts until all firmware is loaded */ 1250 ret = intel_uc_init_hw(&i915->gt.uc); 1251 if (ret) { 1252 DRM_ERROR("Enabling uc failed (%d)\n", ret); 1253 goto out; 1254 } 1255 1256 intel_mocs_init_l3cc_table(gt); 1257 1258 intel_engines_set_scheduler_caps(i915); 1259 1260 out: 1261 intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); 1262 return ret; 1263 } 1264 1265 static int __intel_engines_record_defaults(struct drm_i915_private *i915) 1266 { 1267 struct intel_engine_cs *engine; 1268 struct i915_gem_context *ctx; 1269 struct i915_gem_engines *e; 1270 enum intel_engine_id id; 1271 int err = 0; 1272 1273 /* 1274 * As we reset the gpu during very early sanitisation, the current 1275 * register state on the GPU should reflect its defaults values. 1276 * We load a context onto the hw (with restore-inhibit), then switch 1277 * over to a second context to save that default register state. We 1278 * can then prime every new context with that state so they all start 1279 * from the same default HW values. 1280 */ 1281 1282 ctx = i915_gem_context_create_kernel(i915, 0); 1283 if (IS_ERR(ctx)) 1284 return PTR_ERR(ctx); 1285 1286 e = i915_gem_context_lock_engines(ctx); 1287 1288 for_each_engine(engine, i915, id) { 1289 struct intel_context *ce = e->engines[id]; 1290 struct i915_request *rq; 1291 1292 rq = intel_context_create_request(ce); 1293 if (IS_ERR(rq)) { 1294 err = PTR_ERR(rq); 1295 goto err_active; 1296 } 1297 1298 err = intel_engine_emit_ctx_wa(rq); 1299 if (err) 1300 goto err_rq; 1301 1302 /* 1303 * Failing to program the MOCS is non-fatal.The system will not 1304 * run at peak performance. So warn the user and carry on. 1305 */ 1306 err = intel_mocs_emit(rq); 1307 if (err) 1308 dev_notice(i915->drm.dev, 1309 "Failed to program MOCS registers; expect performance issues.\n"); 1310 1311 err = intel_renderstate_emit(rq); 1312 if (err) 1313 goto err_rq; 1314 1315 err_rq: 1316 i915_request_add(rq); 1317 if (err) 1318 goto err_active; 1319 } 1320 1321 /* Flush the default context image to memory, and enable powersaving. */ 1322 if (!i915_gem_load_power_context(i915)) { 1323 err = -EIO; 1324 goto err_active; 1325 } 1326 1327 for_each_engine(engine, i915, id) { 1328 struct intel_context *ce = e->engines[id]; 1329 struct i915_vma *state = ce->state; 1330 void *vaddr; 1331 1332 if (!state) 1333 continue; 1334 1335 GEM_BUG_ON(intel_context_is_pinned(ce)); 1336 1337 /* 1338 * As we will hold a reference to the logical state, it will 1339 * not be torn down with the context, and importantly the 1340 * object will hold onto its vma (making it possible for a 1341 * stray GTT write to corrupt our defaults). Unmap the vma 1342 * from the GTT to prevent such accidents and reclaim the 1343 * space. 1344 */ 1345 err = i915_vma_unbind(state); 1346 if (err) 1347 goto err_active; 1348 1349 i915_gem_object_lock(state->obj); 1350 err = i915_gem_object_set_to_cpu_domain(state->obj, false); 1351 i915_gem_object_unlock(state->obj); 1352 if (err) 1353 goto err_active; 1354 1355 engine->default_state = i915_gem_object_get(state->obj); 1356 i915_gem_object_set_cache_coherency(engine->default_state, 1357 I915_CACHE_LLC); 1358 1359 /* Check we can acquire the image of the context state */ 1360 vaddr = i915_gem_object_pin_map(engine->default_state, 1361 I915_MAP_FORCE_WB); 1362 if (IS_ERR(vaddr)) { 1363 err = PTR_ERR(vaddr); 1364 goto err_active; 1365 } 1366 1367 i915_gem_object_unpin_map(engine->default_state); 1368 } 1369 1370 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) { 1371 unsigned int found = intel_engines_has_context_isolation(i915); 1372 1373 /* 1374 * Make sure that classes with multiple engine instances all 1375 * share the same basic configuration. 1376 */ 1377 for_each_engine(engine, i915, id) { 1378 unsigned int bit = BIT(engine->uabi_class); 1379 unsigned int expected = engine->default_state ? bit : 0; 1380 1381 if ((found & bit) != expected) { 1382 DRM_ERROR("mismatching default context state for class %d on engine %s\n", 1383 engine->uabi_class, engine->name); 1384 } 1385 } 1386 } 1387 1388 out_ctx: 1389 i915_gem_context_unlock_engines(ctx); 1390 i915_gem_context_set_closed(ctx); 1391 i915_gem_context_put(ctx); 1392 return err; 1393 1394 err_active: 1395 /* 1396 * If we have to abandon now, we expect the engines to be idle 1397 * and ready to be torn-down. The quickest way we can accomplish 1398 * this is by declaring ourselves wedged. 1399 */ 1400 intel_gt_set_wedged(&i915->gt); 1401 goto out_ctx; 1402 } 1403 1404 static int 1405 i915_gem_init_scratch(struct drm_i915_private *i915, unsigned int size) 1406 { 1407 return intel_gt_init_scratch(&i915->gt, size); 1408 } 1409 1410 static void i915_gem_fini_scratch(struct drm_i915_private *i915) 1411 { 1412 intel_gt_fini_scratch(&i915->gt); 1413 } 1414 1415 static int intel_engines_verify_workarounds(struct drm_i915_private *i915) 1416 { 1417 struct intel_engine_cs *engine; 1418 enum intel_engine_id id; 1419 int err = 0; 1420 1421 if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 1422 return 0; 1423 1424 for_each_engine(engine, i915, id) { 1425 if (intel_engine_verify_workarounds(engine, "load")) 1426 err = -EIO; 1427 } 1428 1429 return err; 1430 } 1431 1432 int i915_gem_init(struct drm_i915_private *dev_priv) 1433 { 1434 int ret; 1435 1436 /* We need to fallback to 4K pages if host doesn't support huge gtt. */ 1437 if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_huge_gtt(dev_priv)) 1438 mkwrite_device_info(dev_priv)->page_sizes = 1439 I915_GTT_PAGE_SIZE_4K; 1440 1441 dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1); 1442 1443 intel_timelines_init(dev_priv); 1444 1445 ret = i915_gem_init_userptr(dev_priv); 1446 if (ret) 1447 return ret; 1448 1449 intel_uc_fetch_firmwares(&dev_priv->gt.uc); 1450 1451 ret = intel_wopcm_init(&dev_priv->wopcm); 1452 if (ret) 1453 goto err_uc_fw; 1454 1455 /* This is just a security blanket to placate dragons. 1456 * On some systems, we very sporadically observe that the first TLBs 1457 * used by the CS may be stale, despite us poking the TLB reset. If 1458 * we hold the forcewake during initialisation these problems 1459 * just magically go away. 1460 */ 1461 mutex_lock(&dev_priv->drm.struct_mutex); 1462 intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); 1463 1464 ret = i915_init_ggtt(dev_priv); 1465 if (ret) { 1466 GEM_BUG_ON(ret == -EIO); 1467 goto err_unlock; 1468 } 1469 1470 ret = i915_gem_init_scratch(dev_priv, 1471 IS_GEN(dev_priv, 2) ? SZ_256K : PAGE_SIZE); 1472 if (ret) { 1473 GEM_BUG_ON(ret == -EIO); 1474 goto err_ggtt; 1475 } 1476 1477 ret = intel_engines_setup(dev_priv); 1478 if (ret) { 1479 GEM_BUG_ON(ret == -EIO); 1480 goto err_unlock; 1481 } 1482 1483 ret = i915_gem_contexts_init(dev_priv); 1484 if (ret) { 1485 GEM_BUG_ON(ret == -EIO); 1486 goto err_scratch; 1487 } 1488 1489 ret = intel_engines_init(dev_priv); 1490 if (ret) { 1491 GEM_BUG_ON(ret == -EIO); 1492 goto err_context; 1493 } 1494 1495 intel_init_gt_powersave(dev_priv); 1496 1497 ret = intel_uc_init(&dev_priv->gt.uc); 1498 if (ret) 1499 goto err_pm; 1500 1501 ret = i915_gem_init_hw(dev_priv); 1502 if (ret) 1503 goto err_uc_init; 1504 1505 /* Only when the HW is re-initialised, can we replay the requests */ 1506 ret = intel_gt_resume(&dev_priv->gt); 1507 if (ret) 1508 goto err_init_hw; 1509 1510 /* 1511 * Despite its name intel_init_clock_gating applies both display 1512 * clock gating workarounds; GT mmio workarounds and the occasional 1513 * GT power context workaround. Worse, sometimes it includes a context 1514 * register workaround which we need to apply before we record the 1515 * default HW state for all contexts. 1516 * 1517 * FIXME: break up the workarounds and apply them at the right time! 1518 */ 1519 intel_init_clock_gating(dev_priv); 1520 1521 ret = intel_engines_verify_workarounds(dev_priv); 1522 if (ret) 1523 goto err_gt; 1524 1525 ret = __intel_engines_record_defaults(dev_priv); 1526 if (ret) 1527 goto err_gt; 1528 1529 if (i915_inject_probe_failure()) { 1530 ret = -ENODEV; 1531 goto err_gt; 1532 } 1533 1534 if (i915_inject_probe_failure()) { 1535 ret = -EIO; 1536 goto err_gt; 1537 } 1538 1539 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); 1540 mutex_unlock(&dev_priv->drm.struct_mutex); 1541 1542 return 0; 1543 1544 /* 1545 * Unwinding is complicated by that we want to handle -EIO to mean 1546 * disable GPU submission but keep KMS alive. We want to mark the 1547 * HW as irrevisibly wedged, but keep enough state around that the 1548 * driver doesn't explode during runtime. 1549 */ 1550 err_gt: 1551 mutex_unlock(&dev_priv->drm.struct_mutex); 1552 1553 intel_gt_set_wedged(&dev_priv->gt); 1554 i915_gem_suspend(dev_priv); 1555 i915_gem_suspend_late(dev_priv); 1556 1557 i915_gem_drain_workqueue(dev_priv); 1558 1559 mutex_lock(&dev_priv->drm.struct_mutex); 1560 err_init_hw: 1561 intel_uc_fini_hw(&dev_priv->gt.uc); 1562 err_uc_init: 1563 intel_uc_fini(&dev_priv->gt.uc); 1564 err_pm: 1565 if (ret != -EIO) { 1566 intel_cleanup_gt_powersave(dev_priv); 1567 intel_engines_cleanup(dev_priv); 1568 } 1569 err_context: 1570 if (ret != -EIO) 1571 i915_gem_contexts_fini(dev_priv); 1572 err_scratch: 1573 i915_gem_fini_scratch(dev_priv); 1574 err_ggtt: 1575 err_unlock: 1576 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); 1577 mutex_unlock(&dev_priv->drm.struct_mutex); 1578 1579 err_uc_fw: 1580 intel_uc_cleanup_firmwares(&dev_priv->gt.uc); 1581 1582 if (ret != -EIO) { 1583 i915_gem_cleanup_userptr(dev_priv); 1584 intel_timelines_fini(dev_priv); 1585 } 1586 1587 if (ret == -EIO) { 1588 mutex_lock(&dev_priv->drm.struct_mutex); 1589 1590 /* 1591 * Allow engine initialisation to fail by marking the GPU as 1592 * wedged. But we only want to do this where the GPU is angry, 1593 * for all other failure, such as an allocation failure, bail. 1594 */ 1595 if (!intel_gt_is_wedged(&dev_priv->gt)) { 1596 i915_probe_error(dev_priv, 1597 "Failed to initialize GPU, declaring it wedged!\n"); 1598 intel_gt_set_wedged(&dev_priv->gt); 1599 } 1600 1601 /* Minimal basic recovery for KMS */ 1602 ret = i915_ggtt_enable_hw(dev_priv); 1603 i915_gem_restore_gtt_mappings(dev_priv); 1604 i915_gem_restore_fences(dev_priv); 1605 intel_init_clock_gating(dev_priv); 1606 1607 mutex_unlock(&dev_priv->drm.struct_mutex); 1608 } 1609 1610 i915_gem_drain_freed_objects(dev_priv); 1611 return ret; 1612 } 1613 1614 void i915_gem_driver_remove(struct drm_i915_private *dev_priv) 1615 { 1616 GEM_BUG_ON(dev_priv->gt.awake); 1617 1618 intel_wakeref_auto_fini(&dev_priv->ggtt.userfault_wakeref); 1619 1620 i915_gem_suspend_late(dev_priv); 1621 intel_disable_gt_powersave(dev_priv); 1622 1623 /* Flush any outstanding unpin_work. */ 1624 i915_gem_drain_workqueue(dev_priv); 1625 1626 mutex_lock(&dev_priv->drm.struct_mutex); 1627 intel_uc_fini_hw(&dev_priv->gt.uc); 1628 intel_uc_fini(&dev_priv->gt.uc); 1629 mutex_unlock(&dev_priv->drm.struct_mutex); 1630 1631 i915_gem_drain_freed_objects(dev_priv); 1632 } 1633 1634 void i915_gem_driver_release(struct drm_i915_private *dev_priv) 1635 { 1636 mutex_lock(&dev_priv->drm.struct_mutex); 1637 intel_engines_cleanup(dev_priv); 1638 i915_gem_contexts_fini(dev_priv); 1639 i915_gem_fini_scratch(dev_priv); 1640 mutex_unlock(&dev_priv->drm.struct_mutex); 1641 1642 intel_wa_list_free(&dev_priv->gt_wa_list); 1643 1644 intel_cleanup_gt_powersave(dev_priv); 1645 1646 intel_uc_cleanup_firmwares(&dev_priv->gt.uc); 1647 i915_gem_cleanup_userptr(dev_priv); 1648 intel_timelines_fini(dev_priv); 1649 1650 i915_gem_drain_freed_objects(dev_priv); 1651 1652 WARN_ON(!list_empty(&dev_priv->contexts.list)); 1653 } 1654 1655 void i915_gem_init_mmio(struct drm_i915_private *i915) 1656 { 1657 i915_gem_sanitize(i915); 1658 } 1659 1660 static void i915_gem_init__mm(struct drm_i915_private *i915) 1661 { 1662 spin_lock_init(&i915->mm.obj_lock); 1663 spin_lock_init(&i915->mm.free_lock); 1664 1665 init_llist_head(&i915->mm.free_list); 1666 1667 INIT_LIST_HEAD(&i915->mm.purge_list); 1668 INIT_LIST_HEAD(&i915->mm.shrink_list); 1669 1670 i915_gem_init__objects(i915); 1671 } 1672 1673 int i915_gem_init_early(struct drm_i915_private *dev_priv) 1674 { 1675 int err; 1676 1677 i915_gem_init__mm(dev_priv); 1678 i915_gem_init__pm(dev_priv); 1679 1680 atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0); 1681 1682 spin_lock_init(&dev_priv->fb_tracking.lock); 1683 1684 err = i915_gemfs_init(dev_priv); 1685 if (err) 1686 DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err); 1687 1688 return 0; 1689 } 1690 1691 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) 1692 { 1693 i915_gem_drain_freed_objects(dev_priv); 1694 GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list)); 1695 GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count)); 1696 WARN_ON(dev_priv->mm.shrink_count); 1697 1698 intel_gt_cleanup_early(&dev_priv->gt); 1699 1700 i915_gemfs_fini(dev_priv); 1701 } 1702 1703 int i915_gem_freeze(struct drm_i915_private *dev_priv) 1704 { 1705 /* Discard all purgeable objects, let userspace recover those as 1706 * required after resuming. 1707 */ 1708 i915_gem_shrink_all(dev_priv); 1709 1710 return 0; 1711 } 1712 1713 int i915_gem_freeze_late(struct drm_i915_private *i915) 1714 { 1715 struct drm_i915_gem_object *obj; 1716 intel_wakeref_t wakeref; 1717 1718 /* 1719 * Called just before we write the hibernation image. 1720 * 1721 * We need to update the domain tracking to reflect that the CPU 1722 * will be accessing all the pages to create and restore from the 1723 * hibernation, and so upon restoration those pages will be in the 1724 * CPU domain. 1725 * 1726 * To make sure the hibernation image contains the latest state, 1727 * we update that state just before writing out the image. 1728 * 1729 * To try and reduce the hibernation image, we manually shrink 1730 * the objects as well, see i915_gem_freeze() 1731 */ 1732 1733 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 1734 1735 i915_gem_shrink(i915, -1UL, NULL, ~0); 1736 i915_gem_drain_freed_objects(i915); 1737 1738 list_for_each_entry(obj, &i915->mm.shrink_list, mm.link) { 1739 i915_gem_object_lock(obj); 1740 WARN_ON(i915_gem_object_set_to_cpu_domain(obj, true)); 1741 i915_gem_object_unlock(obj); 1742 } 1743 1744 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 1745 1746 return 0; 1747 } 1748 1749 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 1750 { 1751 struct drm_i915_file_private *file_priv = file->driver_priv; 1752 struct i915_request *request; 1753 1754 /* Clean up our request list when the client is going away, so that 1755 * later retire_requests won't dereference our soon-to-be-gone 1756 * file_priv. 1757 */ 1758 spin_lock(&file_priv->mm.lock); 1759 list_for_each_entry(request, &file_priv->mm.request_list, client_link) 1760 request->file_priv = NULL; 1761 spin_unlock(&file_priv->mm.lock); 1762 } 1763 1764 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) 1765 { 1766 struct drm_i915_file_private *file_priv; 1767 int ret; 1768 1769 DRM_DEBUG("\n"); 1770 1771 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 1772 if (!file_priv) 1773 return -ENOMEM; 1774 1775 file->driver_priv = file_priv; 1776 file_priv->dev_priv = i915; 1777 file_priv->file = file; 1778 1779 spin_lock_init(&file_priv->mm.lock); 1780 INIT_LIST_HEAD(&file_priv->mm.request_list); 1781 1782 file_priv->bsd_engine = -1; 1783 file_priv->hang_timestamp = jiffies; 1784 1785 ret = i915_gem_context_open(i915, file); 1786 if (ret) 1787 kfree(file_priv); 1788 1789 return ret; 1790 } 1791 1792 /** 1793 * i915_gem_track_fb - update frontbuffer tracking 1794 * @old: current GEM buffer for the frontbuffer slots 1795 * @new: new GEM buffer for the frontbuffer slots 1796 * @frontbuffer_bits: bitmask of frontbuffer slots 1797 * 1798 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them 1799 * from @old and setting them in @new. Both @old and @new can be NULL. 1800 */ 1801 void i915_gem_track_fb(struct drm_i915_gem_object *old, 1802 struct drm_i915_gem_object *new, 1803 unsigned frontbuffer_bits) 1804 { 1805 /* Control of individual bits within the mask are guarded by 1806 * the owning plane->mutex, i.e. we can never see concurrent 1807 * manipulation of individual bits. But since the bitfield as a whole 1808 * is updated using RMW, we need to use atomics in order to update 1809 * the bits. 1810 */ 1811 BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES > 1812 BITS_PER_TYPE(atomic_t)); 1813 1814 if (old) { 1815 WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits)); 1816 atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits); 1817 } 1818 1819 if (new) { 1820 WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits); 1821 atomic_or(frontbuffer_bits, &new->frontbuffer_bits); 1822 } 1823 } 1824 1825 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 1826 #include "selftests/mock_gem_device.c" 1827 #include "selftests/i915_gem.c" 1828 #endif 1829