1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drm_vma_manager.h> 29 #include <drm/i915_drm.h> 30 #include <linux/dma-fence-array.h> 31 #include <linux/kthread.h> 32 #include <linux/dma-resv.h> 33 #include <linux/shmem_fs.h> 34 #include <linux/slab.h> 35 #include <linux/stop_machine.h> 36 #include <linux/swap.h> 37 #include <linux/pci.h> 38 #include <linux/dma-buf.h> 39 #include <linux/mman.h> 40 41 #include "display/intel_display.h" 42 #include "display/intel_frontbuffer.h" 43 44 #include "gem/i915_gem_clflush.h" 45 #include "gem/i915_gem_context.h" 46 #include "gem/i915_gem_ioctls.h" 47 #include "gem/i915_gem_pm.h" 48 #include "gem/i915_gemfs.h" 49 #include "gt/intel_engine_user.h" 50 #include "gt/intel_gt.h" 51 #include "gt/intel_gt_pm.h" 52 #include "gt/intel_mocs.h" 53 #include "gt/intel_reset.h" 54 #include "gt/intel_renderstate.h" 55 #include "gt/intel_workarounds.h" 56 57 #include "i915_drv.h" 58 #include "i915_scatterlist.h" 59 #include "i915_trace.h" 60 #include "i915_vgpu.h" 61 62 #include "intel_pm.h" 63 64 static int 65 insert_mappable_node(struct i915_ggtt *ggtt, 66 struct drm_mm_node *node, u32 size) 67 { 68 memset(node, 0, sizeof(*node)); 69 return drm_mm_insert_node_in_range(&ggtt->vm.mm, node, 70 size, 0, I915_COLOR_UNEVICTABLE, 71 0, ggtt->mappable_end, 72 DRM_MM_INSERT_LOW); 73 } 74 75 static void 76 remove_mappable_node(struct drm_mm_node *node) 77 { 78 drm_mm_remove_node(node); 79 } 80 81 int 82 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 83 struct drm_file *file) 84 { 85 struct i915_ggtt *ggtt = &to_i915(dev)->ggtt; 86 struct drm_i915_gem_get_aperture *args = data; 87 struct i915_vma *vma; 88 u64 pinned; 89 90 mutex_lock(&ggtt->vm.mutex); 91 92 pinned = ggtt->vm.reserved; 93 list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) 94 if (i915_vma_is_pinned(vma)) 95 pinned += vma->node.size; 96 97 mutex_unlock(&ggtt->vm.mutex); 98 99 args->aper_size = ggtt->vm.total; 100 args->aper_available_size = args->aper_size - pinned; 101 102 return 0; 103 } 104 105 int i915_gem_object_unbind(struct drm_i915_gem_object *obj, 106 unsigned long flags) 107 { 108 struct i915_vma *vma; 109 LIST_HEAD(still_in_list); 110 int ret = 0; 111 112 lockdep_assert_held(&obj->base.dev->struct_mutex); 113 114 spin_lock(&obj->vma.lock); 115 while (!ret && (vma = list_first_entry_or_null(&obj->vma.list, 116 struct i915_vma, 117 obj_link))) { 118 list_move_tail(&vma->obj_link, &still_in_list); 119 spin_unlock(&obj->vma.lock); 120 121 ret = -EBUSY; 122 if (flags & I915_GEM_OBJECT_UNBIND_ACTIVE || 123 !i915_vma_is_active(vma)) 124 ret = i915_vma_unbind(vma); 125 126 spin_lock(&obj->vma.lock); 127 } 128 list_splice(&still_in_list, &obj->vma.list); 129 spin_unlock(&obj->vma.lock); 130 131 return ret; 132 } 133 134 static int 135 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 136 struct drm_i915_gem_pwrite *args, 137 struct drm_file *file) 138 { 139 void *vaddr = obj->phys_handle->vaddr + args->offset; 140 char __user *user_data = u64_to_user_ptr(args->data_ptr); 141 142 /* 143 * We manually control the domain here and pretend that it 144 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 145 */ 146 intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CPU); 147 148 if (copy_from_user(vaddr, user_data, args->size)) 149 return -EFAULT; 150 151 drm_clflush_virt_range(vaddr, args->size); 152 intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt); 153 154 intel_frontbuffer_flush(obj->frontbuffer, ORIGIN_CPU); 155 return 0; 156 } 157 158 static int 159 i915_gem_create(struct drm_file *file, 160 struct drm_i915_private *dev_priv, 161 u64 *size_p, 162 u32 *handle_p) 163 { 164 struct drm_i915_gem_object *obj; 165 u32 handle; 166 u64 size; 167 int ret; 168 169 size = round_up(*size_p, PAGE_SIZE); 170 if (size == 0) 171 return -EINVAL; 172 173 /* Allocate the new object */ 174 obj = i915_gem_object_create_shmem(dev_priv, size); 175 if (IS_ERR(obj)) 176 return PTR_ERR(obj); 177 178 ret = drm_gem_handle_create(file, &obj->base, &handle); 179 /* drop reference from allocate - handle holds it now */ 180 i915_gem_object_put(obj); 181 if (ret) 182 return ret; 183 184 *handle_p = handle; 185 *size_p = size; 186 return 0; 187 } 188 189 int 190 i915_gem_dumb_create(struct drm_file *file, 191 struct drm_device *dev, 192 struct drm_mode_create_dumb *args) 193 { 194 int cpp = DIV_ROUND_UP(args->bpp, 8); 195 u32 format; 196 197 switch (cpp) { 198 case 1: 199 format = DRM_FORMAT_C8; 200 break; 201 case 2: 202 format = DRM_FORMAT_RGB565; 203 break; 204 case 4: 205 format = DRM_FORMAT_XRGB8888; 206 break; 207 default: 208 return -EINVAL; 209 } 210 211 /* have to work out size/pitch and return them */ 212 args->pitch = ALIGN(args->width * cpp, 64); 213 214 /* align stride to page size so that we can remap */ 215 if (args->pitch > intel_plane_fb_max_stride(to_i915(dev), format, 216 DRM_FORMAT_MOD_LINEAR)) 217 args->pitch = ALIGN(args->pitch, 4096); 218 219 args->size = args->pitch * args->height; 220 return i915_gem_create(file, to_i915(dev), 221 &args->size, &args->handle); 222 } 223 224 /** 225 * Creates a new mm object and returns a handle to it. 226 * @dev: drm device pointer 227 * @data: ioctl data blob 228 * @file: drm file pointer 229 */ 230 int 231 i915_gem_create_ioctl(struct drm_device *dev, void *data, 232 struct drm_file *file) 233 { 234 struct drm_i915_private *dev_priv = to_i915(dev); 235 struct drm_i915_gem_create *args = data; 236 237 i915_gem_flush_free_objects(dev_priv); 238 239 return i915_gem_create(file, dev_priv, 240 &args->size, &args->handle); 241 } 242 243 static int 244 shmem_pread(struct page *page, int offset, int len, char __user *user_data, 245 bool needs_clflush) 246 { 247 char *vaddr; 248 int ret; 249 250 vaddr = kmap(page); 251 252 if (needs_clflush) 253 drm_clflush_virt_range(vaddr + offset, len); 254 255 ret = __copy_to_user(user_data, vaddr + offset, len); 256 257 kunmap(page); 258 259 return ret ? -EFAULT : 0; 260 } 261 262 static int 263 i915_gem_shmem_pread(struct drm_i915_gem_object *obj, 264 struct drm_i915_gem_pread *args) 265 { 266 unsigned int needs_clflush; 267 unsigned int idx, offset; 268 struct dma_fence *fence; 269 char __user *user_data; 270 u64 remain; 271 int ret; 272 273 ret = i915_gem_object_prepare_read(obj, &needs_clflush); 274 if (ret) 275 return ret; 276 277 fence = i915_gem_object_lock_fence(obj); 278 i915_gem_object_finish_access(obj); 279 if (!fence) 280 return -ENOMEM; 281 282 remain = args->size; 283 user_data = u64_to_user_ptr(args->data_ptr); 284 offset = offset_in_page(args->offset); 285 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 286 struct page *page = i915_gem_object_get_page(obj, idx); 287 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); 288 289 ret = shmem_pread(page, offset, length, user_data, 290 needs_clflush); 291 if (ret) 292 break; 293 294 remain -= length; 295 user_data += length; 296 offset = 0; 297 } 298 299 i915_gem_object_unlock_fence(obj, fence); 300 return ret; 301 } 302 303 static inline bool 304 gtt_user_read(struct io_mapping *mapping, 305 loff_t base, int offset, 306 char __user *user_data, int length) 307 { 308 void __iomem *vaddr; 309 unsigned long unwritten; 310 311 /* We can use the cpu mem copy function because this is X86. */ 312 vaddr = io_mapping_map_atomic_wc(mapping, base); 313 unwritten = __copy_to_user_inatomic(user_data, 314 (void __force *)vaddr + offset, 315 length); 316 io_mapping_unmap_atomic(vaddr); 317 if (unwritten) { 318 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 319 unwritten = copy_to_user(user_data, 320 (void __force *)vaddr + offset, 321 length); 322 io_mapping_unmap(vaddr); 323 } 324 return unwritten; 325 } 326 327 static int 328 i915_gem_gtt_pread(struct drm_i915_gem_object *obj, 329 const struct drm_i915_gem_pread *args) 330 { 331 struct drm_i915_private *i915 = to_i915(obj->base.dev); 332 struct i915_ggtt *ggtt = &i915->ggtt; 333 intel_wakeref_t wakeref; 334 struct drm_mm_node node; 335 struct dma_fence *fence; 336 void __user *user_data; 337 struct i915_vma *vma; 338 u64 remain, offset; 339 int ret; 340 341 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 342 if (ret) 343 return ret; 344 345 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 346 vma = ERR_PTR(-ENODEV); 347 if (!i915_gem_object_is_tiled(obj)) 348 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 349 PIN_MAPPABLE | 350 PIN_NONBLOCK /* NOWARN */ | 351 PIN_NOEVICT); 352 if (!IS_ERR(vma)) { 353 node.start = i915_ggtt_offset(vma); 354 node.allocated = false; 355 } else { 356 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 357 if (ret) 358 goto out_unlock; 359 GEM_BUG_ON(!node.allocated); 360 } 361 362 mutex_unlock(&i915->drm.struct_mutex); 363 364 ret = i915_gem_object_lock_interruptible(obj); 365 if (ret) 366 goto out_unpin; 367 368 ret = i915_gem_object_set_to_gtt_domain(obj, false); 369 if (ret) { 370 i915_gem_object_unlock(obj); 371 goto out_unpin; 372 } 373 374 fence = i915_gem_object_lock_fence(obj); 375 i915_gem_object_unlock(obj); 376 if (!fence) { 377 ret = -ENOMEM; 378 goto out_unpin; 379 } 380 381 user_data = u64_to_user_ptr(args->data_ptr); 382 remain = args->size; 383 offset = args->offset; 384 385 while (remain > 0) { 386 /* Operation in this page 387 * 388 * page_base = page offset within aperture 389 * page_offset = offset within page 390 * page_length = bytes to copy for this page 391 */ 392 u32 page_base = node.start; 393 unsigned page_offset = offset_in_page(offset); 394 unsigned page_length = PAGE_SIZE - page_offset; 395 page_length = remain < page_length ? remain : page_length; 396 if (node.allocated) { 397 ggtt->vm.insert_page(&ggtt->vm, 398 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 399 node.start, I915_CACHE_NONE, 0); 400 } else { 401 page_base += offset & PAGE_MASK; 402 } 403 404 if (gtt_user_read(&ggtt->iomap, page_base, page_offset, 405 user_data, page_length)) { 406 ret = -EFAULT; 407 break; 408 } 409 410 remain -= page_length; 411 user_data += page_length; 412 offset += page_length; 413 } 414 415 i915_gem_object_unlock_fence(obj, fence); 416 out_unpin: 417 mutex_lock(&i915->drm.struct_mutex); 418 if (node.allocated) { 419 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); 420 remove_mappable_node(&node); 421 } else { 422 i915_vma_unpin(vma); 423 } 424 out_unlock: 425 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 426 mutex_unlock(&i915->drm.struct_mutex); 427 428 return ret; 429 } 430 431 /** 432 * Reads data from the object referenced by handle. 433 * @dev: drm device pointer 434 * @data: ioctl data blob 435 * @file: drm file pointer 436 * 437 * On error, the contents of *data are undefined. 438 */ 439 int 440 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 441 struct drm_file *file) 442 { 443 struct drm_i915_gem_pread *args = data; 444 struct drm_i915_gem_object *obj; 445 int ret; 446 447 if (args->size == 0) 448 return 0; 449 450 if (!access_ok(u64_to_user_ptr(args->data_ptr), 451 args->size)) 452 return -EFAULT; 453 454 obj = i915_gem_object_lookup(file, args->handle); 455 if (!obj) 456 return -ENOENT; 457 458 /* Bounds check source. */ 459 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 460 ret = -EINVAL; 461 goto out; 462 } 463 464 trace_i915_gem_object_pread(obj, args->offset, args->size); 465 466 ret = i915_gem_object_wait(obj, 467 I915_WAIT_INTERRUPTIBLE, 468 MAX_SCHEDULE_TIMEOUT); 469 if (ret) 470 goto out; 471 472 ret = i915_gem_object_pin_pages(obj); 473 if (ret) 474 goto out; 475 476 ret = i915_gem_shmem_pread(obj, args); 477 if (ret == -EFAULT || ret == -ENODEV) 478 ret = i915_gem_gtt_pread(obj, args); 479 480 i915_gem_object_unpin_pages(obj); 481 out: 482 i915_gem_object_put(obj); 483 return ret; 484 } 485 486 /* This is the fast write path which cannot handle 487 * page faults in the source data 488 */ 489 490 static inline bool 491 ggtt_write(struct io_mapping *mapping, 492 loff_t base, int offset, 493 char __user *user_data, int length) 494 { 495 void __iomem *vaddr; 496 unsigned long unwritten; 497 498 /* We can use the cpu mem copy function because this is X86. */ 499 vaddr = io_mapping_map_atomic_wc(mapping, base); 500 unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset, 501 user_data, length); 502 io_mapping_unmap_atomic(vaddr); 503 if (unwritten) { 504 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 505 unwritten = copy_from_user((void __force *)vaddr + offset, 506 user_data, length); 507 io_mapping_unmap(vaddr); 508 } 509 510 return unwritten; 511 } 512 513 /** 514 * This is the fast pwrite path, where we copy the data directly from the 515 * user into the GTT, uncached. 516 * @obj: i915 GEM object 517 * @args: pwrite arguments structure 518 */ 519 static int 520 i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, 521 const struct drm_i915_gem_pwrite *args) 522 { 523 struct drm_i915_private *i915 = to_i915(obj->base.dev); 524 struct i915_ggtt *ggtt = &i915->ggtt; 525 struct intel_runtime_pm *rpm = &i915->runtime_pm; 526 intel_wakeref_t wakeref; 527 struct drm_mm_node node; 528 struct dma_fence *fence; 529 struct i915_vma *vma; 530 u64 remain, offset; 531 void __user *user_data; 532 int ret; 533 534 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 535 if (ret) 536 return ret; 537 538 if (i915_gem_object_has_struct_page(obj)) { 539 /* 540 * Avoid waking the device up if we can fallback, as 541 * waking/resuming is very slow (worst-case 10-100 ms 542 * depending on PCI sleeps and our own resume time). 543 * This easily dwarfs any performance advantage from 544 * using the cache bypass of indirect GGTT access. 545 */ 546 wakeref = intel_runtime_pm_get_if_in_use(rpm); 547 if (!wakeref) { 548 ret = -EFAULT; 549 goto out_unlock; 550 } 551 } else { 552 /* No backing pages, no fallback, we must force GGTT access */ 553 wakeref = intel_runtime_pm_get(rpm); 554 } 555 556 vma = ERR_PTR(-ENODEV); 557 if (!i915_gem_object_is_tiled(obj)) 558 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 559 PIN_MAPPABLE | 560 PIN_NONBLOCK /* NOWARN */ | 561 PIN_NOEVICT); 562 if (!IS_ERR(vma)) { 563 node.start = i915_ggtt_offset(vma); 564 node.allocated = false; 565 } else { 566 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 567 if (ret) 568 goto out_rpm; 569 GEM_BUG_ON(!node.allocated); 570 } 571 572 mutex_unlock(&i915->drm.struct_mutex); 573 574 ret = i915_gem_object_lock_interruptible(obj); 575 if (ret) 576 goto out_unpin; 577 578 ret = i915_gem_object_set_to_gtt_domain(obj, true); 579 if (ret) { 580 i915_gem_object_unlock(obj); 581 goto out_unpin; 582 } 583 584 fence = i915_gem_object_lock_fence(obj); 585 i915_gem_object_unlock(obj); 586 if (!fence) { 587 ret = -ENOMEM; 588 goto out_unpin; 589 } 590 591 intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CPU); 592 593 user_data = u64_to_user_ptr(args->data_ptr); 594 offset = args->offset; 595 remain = args->size; 596 while (remain) { 597 /* Operation in this page 598 * 599 * page_base = page offset within aperture 600 * page_offset = offset within page 601 * page_length = bytes to copy for this page 602 */ 603 u32 page_base = node.start; 604 unsigned int page_offset = offset_in_page(offset); 605 unsigned int page_length = PAGE_SIZE - page_offset; 606 page_length = remain < page_length ? remain : page_length; 607 if (node.allocated) { 608 /* flush the write before we modify the GGTT */ 609 intel_gt_flush_ggtt_writes(ggtt->vm.gt); 610 ggtt->vm.insert_page(&ggtt->vm, 611 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 612 node.start, I915_CACHE_NONE, 0); 613 wmb(); /* flush modifications to the GGTT (insert_page) */ 614 } else { 615 page_base += offset & PAGE_MASK; 616 } 617 /* If we get a fault while copying data, then (presumably) our 618 * source page isn't available. Return the error and we'll 619 * retry in the slow path. 620 * If the object is non-shmem backed, we retry again with the 621 * path that handles page fault. 622 */ 623 if (ggtt_write(&ggtt->iomap, page_base, page_offset, 624 user_data, page_length)) { 625 ret = -EFAULT; 626 break; 627 } 628 629 remain -= page_length; 630 user_data += page_length; 631 offset += page_length; 632 } 633 intel_frontbuffer_flush(obj->frontbuffer, ORIGIN_CPU); 634 635 i915_gem_object_unlock_fence(obj, fence); 636 out_unpin: 637 mutex_lock(&i915->drm.struct_mutex); 638 intel_gt_flush_ggtt_writes(ggtt->vm.gt); 639 if (node.allocated) { 640 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); 641 remove_mappable_node(&node); 642 } else { 643 i915_vma_unpin(vma); 644 } 645 out_rpm: 646 intel_runtime_pm_put(rpm, wakeref); 647 out_unlock: 648 mutex_unlock(&i915->drm.struct_mutex); 649 return ret; 650 } 651 652 /* Per-page copy function for the shmem pwrite fastpath. 653 * Flushes invalid cachelines before writing to the target if 654 * needs_clflush_before is set and flushes out any written cachelines after 655 * writing if needs_clflush is set. 656 */ 657 static int 658 shmem_pwrite(struct page *page, int offset, int len, char __user *user_data, 659 bool needs_clflush_before, 660 bool needs_clflush_after) 661 { 662 char *vaddr; 663 int ret; 664 665 vaddr = kmap(page); 666 667 if (needs_clflush_before) 668 drm_clflush_virt_range(vaddr + offset, len); 669 670 ret = __copy_from_user(vaddr + offset, user_data, len); 671 if (!ret && needs_clflush_after) 672 drm_clflush_virt_range(vaddr + offset, len); 673 674 kunmap(page); 675 676 return ret ? -EFAULT : 0; 677 } 678 679 static int 680 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, 681 const struct drm_i915_gem_pwrite *args) 682 { 683 unsigned int partial_cacheline_write; 684 unsigned int needs_clflush; 685 unsigned int offset, idx; 686 struct dma_fence *fence; 687 void __user *user_data; 688 u64 remain; 689 int ret; 690 691 ret = i915_gem_object_prepare_write(obj, &needs_clflush); 692 if (ret) 693 return ret; 694 695 fence = i915_gem_object_lock_fence(obj); 696 i915_gem_object_finish_access(obj); 697 if (!fence) 698 return -ENOMEM; 699 700 /* If we don't overwrite a cacheline completely we need to be 701 * careful to have up-to-date data by first clflushing. Don't 702 * overcomplicate things and flush the entire patch. 703 */ 704 partial_cacheline_write = 0; 705 if (needs_clflush & CLFLUSH_BEFORE) 706 partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1; 707 708 user_data = u64_to_user_ptr(args->data_ptr); 709 remain = args->size; 710 offset = offset_in_page(args->offset); 711 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 712 struct page *page = i915_gem_object_get_page(obj, idx); 713 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); 714 715 ret = shmem_pwrite(page, offset, length, user_data, 716 (offset | length) & partial_cacheline_write, 717 needs_clflush & CLFLUSH_AFTER); 718 if (ret) 719 break; 720 721 remain -= length; 722 user_data += length; 723 offset = 0; 724 } 725 726 intel_frontbuffer_flush(obj->frontbuffer, ORIGIN_CPU); 727 i915_gem_object_unlock_fence(obj, fence); 728 729 return ret; 730 } 731 732 /** 733 * Writes data to the object referenced by handle. 734 * @dev: drm device 735 * @data: ioctl data blob 736 * @file: drm file 737 * 738 * On error, the contents of the buffer that were to be modified are undefined. 739 */ 740 int 741 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 742 struct drm_file *file) 743 { 744 struct drm_i915_gem_pwrite *args = data; 745 struct drm_i915_gem_object *obj; 746 int ret; 747 748 if (args->size == 0) 749 return 0; 750 751 if (!access_ok(u64_to_user_ptr(args->data_ptr), args->size)) 752 return -EFAULT; 753 754 obj = i915_gem_object_lookup(file, args->handle); 755 if (!obj) 756 return -ENOENT; 757 758 /* Bounds check destination. */ 759 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 760 ret = -EINVAL; 761 goto err; 762 } 763 764 /* Writes not allowed into this read-only object */ 765 if (i915_gem_object_is_readonly(obj)) { 766 ret = -EINVAL; 767 goto err; 768 } 769 770 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 771 772 ret = -ENODEV; 773 if (obj->ops->pwrite) 774 ret = obj->ops->pwrite(obj, args); 775 if (ret != -ENODEV) 776 goto err; 777 778 ret = i915_gem_object_wait(obj, 779 I915_WAIT_INTERRUPTIBLE | 780 I915_WAIT_ALL, 781 MAX_SCHEDULE_TIMEOUT); 782 if (ret) 783 goto err; 784 785 ret = i915_gem_object_pin_pages(obj); 786 if (ret) 787 goto err; 788 789 ret = -EFAULT; 790 /* We can only do the GTT pwrite on untiled buffers, as otherwise 791 * it would end up going through the fenced access, and we'll get 792 * different detiling behavior between reading and writing. 793 * pread/pwrite currently are reading and writing from the CPU 794 * perspective, requiring manual detiling by the client. 795 */ 796 if (!i915_gem_object_has_struct_page(obj) || 797 cpu_write_needs_clflush(obj)) 798 /* Note that the gtt paths might fail with non-page-backed user 799 * pointers (e.g. gtt mappings when moving data between 800 * textures). Fallback to the shmem path in that case. 801 */ 802 ret = i915_gem_gtt_pwrite_fast(obj, args); 803 804 if (ret == -EFAULT || ret == -ENOSPC) { 805 if (obj->phys_handle) 806 ret = i915_gem_phys_pwrite(obj, args, file); 807 else 808 ret = i915_gem_shmem_pwrite(obj, args); 809 } 810 811 i915_gem_object_unpin_pages(obj); 812 err: 813 i915_gem_object_put(obj); 814 return ret; 815 } 816 817 /** 818 * Called when user space has done writes to this buffer 819 * @dev: drm device 820 * @data: ioctl data blob 821 * @file: drm file 822 */ 823 int 824 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 825 struct drm_file *file) 826 { 827 struct drm_i915_gem_sw_finish *args = data; 828 struct drm_i915_gem_object *obj; 829 830 obj = i915_gem_object_lookup(file, args->handle); 831 if (!obj) 832 return -ENOENT; 833 834 /* 835 * Proxy objects are barred from CPU access, so there is no 836 * need to ban sw_finish as it is a nop. 837 */ 838 839 /* Pinned buffers may be scanout, so flush the cache */ 840 i915_gem_object_flush_if_display(obj); 841 i915_gem_object_put(obj); 842 843 return 0; 844 } 845 846 void i915_gem_runtime_suspend(struct drm_i915_private *i915) 847 { 848 struct drm_i915_gem_object *obj, *on; 849 int i; 850 851 /* 852 * Only called during RPM suspend. All users of the userfault_list 853 * must be holding an RPM wakeref to ensure that this can not 854 * run concurrently with themselves (and use the struct_mutex for 855 * protection between themselves). 856 */ 857 858 list_for_each_entry_safe(obj, on, 859 &i915->ggtt.userfault_list, userfault_link) 860 __i915_gem_object_release_mmap(obj); 861 862 /* 863 * The fence will be lost when the device powers down. If any were 864 * in use by hardware (i.e. they are pinned), we should not be powering 865 * down! All other fences will be reacquired by the user upon waking. 866 */ 867 for (i = 0; i < i915->ggtt.num_fences; i++) { 868 struct i915_fence_reg *reg = &i915->ggtt.fence_regs[i]; 869 870 /* 871 * Ideally we want to assert that the fence register is not 872 * live at this point (i.e. that no piece of code will be 873 * trying to write through fence + GTT, as that both violates 874 * our tracking of activity and associated locking/barriers, 875 * but also is illegal given that the hw is powered down). 876 * 877 * Previously we used reg->pin_count as a "liveness" indicator. 878 * That is not sufficient, and we need a more fine-grained 879 * tool if we want to have a sanity check here. 880 */ 881 882 if (!reg->vma) 883 continue; 884 885 GEM_BUG_ON(i915_vma_has_userfault(reg->vma)); 886 reg->dirty = true; 887 } 888 } 889 890 static long 891 wait_for_timelines(struct drm_i915_private *i915, 892 unsigned int wait, long timeout) 893 { 894 struct intel_gt_timelines *timelines = &i915->gt.timelines; 895 struct intel_timeline *tl; 896 unsigned long flags; 897 898 spin_lock_irqsave(&timelines->lock, flags); 899 list_for_each_entry(tl, &timelines->active_list, link) { 900 struct i915_request *rq; 901 902 rq = i915_active_request_get_unlocked(&tl->last_request); 903 if (!rq) 904 continue; 905 906 spin_unlock_irqrestore(&timelines->lock, flags); 907 908 /* 909 * "Race-to-idle". 910 * 911 * Switching to the kernel context is often used a synchronous 912 * step prior to idling, e.g. in suspend for flushing all 913 * current operations to memory before sleeping. These we 914 * want to complete as quickly as possible to avoid prolonged 915 * stalls, so allow the gpu to boost to maximum clocks. 916 */ 917 if (wait & I915_WAIT_FOR_IDLE_BOOST) 918 gen6_rps_boost(rq); 919 920 timeout = i915_request_wait(rq, wait, timeout); 921 i915_request_put(rq); 922 if (timeout < 0) 923 return timeout; 924 925 /* restart after reacquiring the lock */ 926 spin_lock_irqsave(&timelines->lock, flags); 927 tl = list_entry(&timelines->active_list, typeof(*tl), link); 928 } 929 spin_unlock_irqrestore(&timelines->lock, flags); 930 931 return timeout; 932 } 933 934 int i915_gem_wait_for_idle(struct drm_i915_private *i915, 935 unsigned int flags, long timeout) 936 { 937 /* If the device is asleep, we have no requests outstanding */ 938 if (!intel_gt_pm_is_awake(&i915->gt)) 939 return 0; 940 941 GEM_TRACE("flags=%x (%s), timeout=%ld%s\n", 942 flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked", 943 timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : ""); 944 945 timeout = wait_for_timelines(i915, flags, timeout); 946 if (timeout < 0) 947 return timeout; 948 949 if (flags & I915_WAIT_LOCKED) { 950 lockdep_assert_held(&i915->drm.struct_mutex); 951 952 i915_retire_requests(i915); 953 } 954 955 return 0; 956 } 957 958 struct i915_vma * 959 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 960 const struct i915_ggtt_view *view, 961 u64 size, 962 u64 alignment, 963 u64 flags) 964 { 965 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 966 struct i915_address_space *vm = &dev_priv->ggtt.vm; 967 968 return i915_gem_object_pin(obj, vm, view, size, alignment, 969 flags | PIN_GLOBAL); 970 } 971 972 struct i915_vma * 973 i915_gem_object_pin(struct drm_i915_gem_object *obj, 974 struct i915_address_space *vm, 975 const struct i915_ggtt_view *view, 976 u64 size, 977 u64 alignment, 978 u64 flags) 979 { 980 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 981 struct i915_vma *vma; 982 int ret; 983 984 lockdep_assert_held(&obj->base.dev->struct_mutex); 985 986 if (i915_gem_object_never_bind_ggtt(obj)) 987 return ERR_PTR(-ENODEV); 988 989 if (flags & PIN_MAPPABLE && 990 (!view || view->type == I915_GGTT_VIEW_NORMAL)) { 991 /* If the required space is larger than the available 992 * aperture, we will not able to find a slot for the 993 * object and unbinding the object now will be in 994 * vain. Worse, doing so may cause us to ping-pong 995 * the object in and out of the Global GTT and 996 * waste a lot of cycles under the mutex. 997 */ 998 if (obj->base.size > dev_priv->ggtt.mappable_end) 999 return ERR_PTR(-E2BIG); 1000 1001 /* If NONBLOCK is set the caller is optimistically 1002 * trying to cache the full object within the mappable 1003 * aperture, and *must* have a fallback in place for 1004 * situations where we cannot bind the object. We 1005 * can be a little more lax here and use the fallback 1006 * more often to avoid costly migrations of ourselves 1007 * and other objects within the aperture. 1008 * 1009 * Half-the-aperture is used as a simple heuristic. 1010 * More interesting would to do search for a free 1011 * block prior to making the commitment to unbind. 1012 * That caters for the self-harm case, and with a 1013 * little more heuristics (e.g. NOFAULT, NOEVICT) 1014 * we could try to minimise harm to others. 1015 */ 1016 if (flags & PIN_NONBLOCK && 1017 obj->base.size > dev_priv->ggtt.mappable_end / 2) 1018 return ERR_PTR(-ENOSPC); 1019 } 1020 1021 vma = i915_vma_instance(obj, vm, view); 1022 if (IS_ERR(vma)) 1023 return vma; 1024 1025 if (i915_vma_misplaced(vma, size, alignment, flags)) { 1026 if (flags & PIN_NONBLOCK) { 1027 if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) 1028 return ERR_PTR(-ENOSPC); 1029 1030 if (flags & PIN_MAPPABLE && 1031 vma->fence_size > dev_priv->ggtt.mappable_end / 2) 1032 return ERR_PTR(-ENOSPC); 1033 } 1034 1035 WARN(i915_vma_is_pinned(vma), 1036 "bo is already pinned in ggtt with incorrect alignment:" 1037 " offset=%08x, req.alignment=%llx," 1038 " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n", 1039 i915_ggtt_offset(vma), alignment, 1040 !!(flags & PIN_MAPPABLE), 1041 i915_vma_is_map_and_fenceable(vma)); 1042 ret = i915_vma_unbind(vma); 1043 if (ret) 1044 return ERR_PTR(ret); 1045 } 1046 1047 if (vma->fence && !i915_gem_object_is_tiled(obj)) { 1048 mutex_lock(&vma->vm->mutex); 1049 ret = i915_vma_revoke_fence(vma); 1050 mutex_unlock(&vma->vm->mutex); 1051 if (ret) 1052 return ERR_PTR(ret); 1053 } 1054 1055 ret = i915_vma_pin(vma, size, alignment, flags); 1056 if (ret) 1057 return ERR_PTR(ret); 1058 1059 return vma; 1060 } 1061 1062 int 1063 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 1064 struct drm_file *file_priv) 1065 { 1066 struct drm_i915_private *i915 = to_i915(dev); 1067 struct drm_i915_gem_madvise *args = data; 1068 struct drm_i915_gem_object *obj; 1069 int err; 1070 1071 switch (args->madv) { 1072 case I915_MADV_DONTNEED: 1073 case I915_MADV_WILLNEED: 1074 break; 1075 default: 1076 return -EINVAL; 1077 } 1078 1079 obj = i915_gem_object_lookup(file_priv, args->handle); 1080 if (!obj) 1081 return -ENOENT; 1082 1083 err = mutex_lock_interruptible(&obj->mm.lock); 1084 if (err) 1085 goto out; 1086 1087 if (i915_gem_object_has_pages(obj) && 1088 i915_gem_object_is_tiled(obj) && 1089 i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 1090 if (obj->mm.madv == I915_MADV_WILLNEED) { 1091 GEM_BUG_ON(!obj->mm.quirked); 1092 __i915_gem_object_unpin_pages(obj); 1093 obj->mm.quirked = false; 1094 } 1095 if (args->madv == I915_MADV_WILLNEED) { 1096 GEM_BUG_ON(obj->mm.quirked); 1097 __i915_gem_object_pin_pages(obj); 1098 obj->mm.quirked = true; 1099 } 1100 } 1101 1102 if (obj->mm.madv != __I915_MADV_PURGED) 1103 obj->mm.madv = args->madv; 1104 1105 if (i915_gem_object_has_pages(obj)) { 1106 struct list_head *list; 1107 1108 if (i915_gem_object_is_shrinkable(obj)) { 1109 unsigned long flags; 1110 1111 spin_lock_irqsave(&i915->mm.obj_lock, flags); 1112 1113 if (obj->mm.madv != I915_MADV_WILLNEED) 1114 list = &i915->mm.purge_list; 1115 else 1116 list = &i915->mm.shrink_list; 1117 list_move_tail(&obj->mm.link, list); 1118 1119 spin_unlock_irqrestore(&i915->mm.obj_lock, flags); 1120 } 1121 } 1122 1123 /* if the object is no longer attached, discard its backing storage */ 1124 if (obj->mm.madv == I915_MADV_DONTNEED && 1125 !i915_gem_object_has_pages(obj)) 1126 i915_gem_object_truncate(obj); 1127 1128 args->retained = obj->mm.madv != __I915_MADV_PURGED; 1129 mutex_unlock(&obj->mm.lock); 1130 1131 out: 1132 i915_gem_object_put(obj); 1133 return err; 1134 } 1135 1136 void i915_gem_sanitize(struct drm_i915_private *i915) 1137 { 1138 intel_wakeref_t wakeref; 1139 1140 GEM_TRACE("\n"); 1141 1142 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 1143 intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); 1144 1145 /* 1146 * As we have just resumed the machine and woken the device up from 1147 * deep PCI sleep (presumably D3_cold), assume the HW has been reset 1148 * back to defaults, recovering from whatever wedged state we left it 1149 * in and so worth trying to use the device once more. 1150 */ 1151 if (intel_gt_is_wedged(&i915->gt)) 1152 intel_gt_unset_wedged(&i915->gt); 1153 1154 /* 1155 * If we inherit context state from the BIOS or earlier occupants 1156 * of the GPU, the GPU may be in an inconsistent state when we 1157 * try to take over. The only way to remove the earlier state 1158 * is by resetting. However, resetting on earlier gen is tricky as 1159 * it may impact the display and we are uncertain about the stability 1160 * of the reset, so this could be applied to even earlier gen. 1161 */ 1162 intel_gt_sanitize(&i915->gt, false); 1163 1164 intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); 1165 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 1166 } 1167 1168 static void init_unused_ring(struct intel_gt *gt, u32 base) 1169 { 1170 struct intel_uncore *uncore = gt->uncore; 1171 1172 intel_uncore_write(uncore, RING_CTL(base), 0); 1173 intel_uncore_write(uncore, RING_HEAD(base), 0); 1174 intel_uncore_write(uncore, RING_TAIL(base), 0); 1175 intel_uncore_write(uncore, RING_START(base), 0); 1176 } 1177 1178 static void init_unused_rings(struct intel_gt *gt) 1179 { 1180 struct drm_i915_private *i915 = gt->i915; 1181 1182 if (IS_I830(i915)) { 1183 init_unused_ring(gt, PRB1_BASE); 1184 init_unused_ring(gt, SRB0_BASE); 1185 init_unused_ring(gt, SRB1_BASE); 1186 init_unused_ring(gt, SRB2_BASE); 1187 init_unused_ring(gt, SRB3_BASE); 1188 } else if (IS_GEN(i915, 2)) { 1189 init_unused_ring(gt, SRB0_BASE); 1190 init_unused_ring(gt, SRB1_BASE); 1191 } else if (IS_GEN(i915, 3)) { 1192 init_unused_ring(gt, PRB1_BASE); 1193 init_unused_ring(gt, PRB2_BASE); 1194 } 1195 } 1196 1197 int i915_gem_init_hw(struct drm_i915_private *i915) 1198 { 1199 struct intel_uncore *uncore = &i915->uncore; 1200 struct intel_gt *gt = &i915->gt; 1201 int ret; 1202 1203 BUG_ON(!i915->kernel_context); 1204 ret = intel_gt_terminally_wedged(gt); 1205 if (ret) 1206 return ret; 1207 1208 gt->last_init_time = ktime_get(); 1209 1210 /* Double layer security blanket, see i915_gem_init() */ 1211 intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); 1212 1213 if (HAS_EDRAM(i915) && INTEL_GEN(i915) < 9) 1214 intel_uncore_rmw(uncore, HSW_IDICR, 0, IDIHASHMSK(0xf)); 1215 1216 if (IS_HASWELL(i915)) 1217 intel_uncore_write(uncore, 1218 MI_PREDICATE_RESULT_2, 1219 IS_HSW_GT3(i915) ? 1220 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 1221 1222 /* Apply the GT workarounds... */ 1223 intel_gt_apply_workarounds(gt); 1224 /* ...and determine whether they are sticking. */ 1225 intel_gt_verify_workarounds(gt, "init"); 1226 1227 intel_gt_init_swizzling(gt); 1228 1229 /* 1230 * At least 830 can leave some of the unused rings 1231 * "active" (ie. head != tail) after resume which 1232 * will prevent c3 entry. Makes sure all unused rings 1233 * are totally idle. 1234 */ 1235 init_unused_rings(gt); 1236 1237 ret = i915_ppgtt_init_hw(gt); 1238 if (ret) { 1239 DRM_ERROR("Enabling PPGTT failed (%d)\n", ret); 1240 goto out; 1241 } 1242 1243 /* We can't enable contexts until all firmware is loaded */ 1244 ret = intel_uc_init_hw(>->uc); 1245 if (ret) { 1246 i915_probe_error(i915, "Enabling uc failed (%d)\n", ret); 1247 goto out; 1248 } 1249 1250 intel_mocs_init(gt); 1251 1252 out: 1253 intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); 1254 return ret; 1255 } 1256 1257 static int __intel_engines_record_defaults(struct drm_i915_private *i915) 1258 { 1259 struct i915_request *requests[I915_NUM_ENGINES] = {}; 1260 struct intel_engine_cs *engine; 1261 enum intel_engine_id id; 1262 int err = 0; 1263 1264 /* 1265 * As we reset the gpu during very early sanitisation, the current 1266 * register state on the GPU should reflect its defaults values. 1267 * We load a context onto the hw (with restore-inhibit), then switch 1268 * over to a second context to save that default register state. We 1269 * can then prime every new context with that state so they all start 1270 * from the same default HW values. 1271 */ 1272 1273 for_each_engine(engine, i915, id) { 1274 struct intel_context *ce; 1275 struct i915_request *rq; 1276 1277 /* We must be able to switch to something! */ 1278 GEM_BUG_ON(!engine->kernel_context); 1279 engine->serial++; /* force the kernel context switch */ 1280 1281 ce = intel_context_create(i915->kernel_context, engine); 1282 if (IS_ERR(ce)) { 1283 err = PTR_ERR(ce); 1284 goto out; 1285 } 1286 1287 rq = intel_context_create_request(ce); 1288 if (IS_ERR(rq)) { 1289 err = PTR_ERR(rq); 1290 intel_context_put(ce); 1291 goto out; 1292 } 1293 1294 err = intel_engine_emit_ctx_wa(rq); 1295 if (err) 1296 goto err_rq; 1297 1298 /* 1299 * Failing to program the MOCS is non-fatal.The system will not 1300 * run at peak performance. So warn the user and carry on. 1301 */ 1302 err = intel_mocs_emit(rq); 1303 if (err) 1304 dev_notice(i915->drm.dev, 1305 "Failed to program MOCS registers; expect performance issues.\n"); 1306 1307 err = intel_renderstate_emit(rq); 1308 if (err) 1309 goto err_rq; 1310 1311 err_rq: 1312 requests[id] = i915_request_get(rq); 1313 i915_request_add(rq); 1314 if (err) 1315 goto out; 1316 } 1317 1318 /* Flush the default context image to memory, and enable powersaving. */ 1319 if (!i915_gem_load_power_context(i915)) { 1320 err = -EIO; 1321 goto out; 1322 } 1323 1324 for (id = 0; id < ARRAY_SIZE(requests); id++) { 1325 struct i915_request *rq; 1326 struct i915_vma *state; 1327 void *vaddr; 1328 1329 rq = requests[id]; 1330 if (!rq) 1331 continue; 1332 1333 /* We want to be able to unbind the state from the GGTT */ 1334 GEM_BUG_ON(intel_context_is_pinned(rq->hw_context)); 1335 1336 state = rq->hw_context->state; 1337 if (!state) 1338 continue; 1339 1340 /* 1341 * As we will hold a reference to the logical state, it will 1342 * not be torn down with the context, and importantly the 1343 * object will hold onto its vma (making it possible for a 1344 * stray GTT write to corrupt our defaults). Unmap the vma 1345 * from the GTT to prevent such accidents and reclaim the 1346 * space. 1347 */ 1348 err = i915_vma_unbind(state); 1349 if (err) 1350 goto out; 1351 1352 i915_gem_object_lock(state->obj); 1353 err = i915_gem_object_set_to_cpu_domain(state->obj, false); 1354 i915_gem_object_unlock(state->obj); 1355 if (err) 1356 goto out; 1357 1358 i915_gem_object_set_cache_coherency(state->obj, I915_CACHE_LLC); 1359 1360 /* Check we can acquire the image of the context state */ 1361 vaddr = i915_gem_object_pin_map(state->obj, I915_MAP_FORCE_WB); 1362 if (IS_ERR(vaddr)) { 1363 err = PTR_ERR(vaddr); 1364 goto out; 1365 } 1366 1367 rq->engine->default_state = i915_gem_object_get(state->obj); 1368 i915_gem_object_unpin_map(state->obj); 1369 } 1370 1371 out: 1372 /* 1373 * If we have to abandon now, we expect the engines to be idle 1374 * and ready to be torn-down. The quickest way we can accomplish 1375 * this is by declaring ourselves wedged. 1376 */ 1377 if (err) 1378 intel_gt_set_wedged(&i915->gt); 1379 1380 for (id = 0; id < ARRAY_SIZE(requests); id++) { 1381 struct intel_context *ce; 1382 struct i915_request *rq; 1383 1384 rq = requests[id]; 1385 if (!rq) 1386 continue; 1387 1388 ce = rq->hw_context; 1389 i915_request_put(rq); 1390 intel_context_put(ce); 1391 } 1392 return err; 1393 } 1394 1395 static int 1396 i915_gem_init_scratch(struct drm_i915_private *i915, unsigned int size) 1397 { 1398 return intel_gt_init_scratch(&i915->gt, size); 1399 } 1400 1401 static void i915_gem_fini_scratch(struct drm_i915_private *i915) 1402 { 1403 intel_gt_fini_scratch(&i915->gt); 1404 } 1405 1406 static int intel_engines_verify_workarounds(struct drm_i915_private *i915) 1407 { 1408 struct intel_engine_cs *engine; 1409 enum intel_engine_id id; 1410 int err = 0; 1411 1412 if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 1413 return 0; 1414 1415 for_each_engine(engine, i915, id) { 1416 if (intel_engine_verify_workarounds(engine, "load")) 1417 err = -EIO; 1418 } 1419 1420 return err; 1421 } 1422 1423 int i915_gem_init(struct drm_i915_private *dev_priv) 1424 { 1425 int ret; 1426 1427 /* We need to fallback to 4K pages if host doesn't support huge gtt. */ 1428 if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_huge_gtt(dev_priv)) 1429 mkwrite_device_info(dev_priv)->page_sizes = 1430 I915_GTT_PAGE_SIZE_4K; 1431 1432 intel_timelines_init(dev_priv); 1433 1434 ret = i915_gem_init_userptr(dev_priv); 1435 if (ret) 1436 return ret; 1437 1438 intel_uc_fetch_firmwares(&dev_priv->gt.uc); 1439 intel_wopcm_init(&dev_priv->wopcm); 1440 1441 /* This is just a security blanket to placate dragons. 1442 * On some systems, we very sporadically observe that the first TLBs 1443 * used by the CS may be stale, despite us poking the TLB reset. If 1444 * we hold the forcewake during initialisation these problems 1445 * just magically go away. 1446 */ 1447 mutex_lock(&dev_priv->drm.struct_mutex); 1448 intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); 1449 1450 ret = i915_init_ggtt(dev_priv); 1451 if (ret) { 1452 GEM_BUG_ON(ret == -EIO); 1453 goto err_unlock; 1454 } 1455 1456 ret = i915_gem_init_scratch(dev_priv, 1457 IS_GEN(dev_priv, 2) ? SZ_256K : PAGE_SIZE); 1458 if (ret) { 1459 GEM_BUG_ON(ret == -EIO); 1460 goto err_ggtt; 1461 } 1462 1463 ret = intel_engines_setup(dev_priv); 1464 if (ret) { 1465 GEM_BUG_ON(ret == -EIO); 1466 goto err_unlock; 1467 } 1468 1469 ret = i915_gem_contexts_init(dev_priv); 1470 if (ret) { 1471 GEM_BUG_ON(ret == -EIO); 1472 goto err_scratch; 1473 } 1474 1475 ret = intel_engines_init(dev_priv); 1476 if (ret) { 1477 GEM_BUG_ON(ret == -EIO); 1478 goto err_context; 1479 } 1480 1481 intel_init_gt_powersave(dev_priv); 1482 1483 intel_uc_init(&dev_priv->gt.uc); 1484 1485 ret = i915_gem_init_hw(dev_priv); 1486 if (ret) 1487 goto err_uc_init; 1488 1489 /* Only when the HW is re-initialised, can we replay the requests */ 1490 ret = intel_gt_resume(&dev_priv->gt); 1491 if (ret) 1492 goto err_init_hw; 1493 1494 /* 1495 * Despite its name intel_init_clock_gating applies both display 1496 * clock gating workarounds; GT mmio workarounds and the occasional 1497 * GT power context workaround. Worse, sometimes it includes a context 1498 * register workaround which we need to apply before we record the 1499 * default HW state for all contexts. 1500 * 1501 * FIXME: break up the workarounds and apply them at the right time! 1502 */ 1503 intel_init_clock_gating(dev_priv); 1504 1505 ret = intel_engines_verify_workarounds(dev_priv); 1506 if (ret) 1507 goto err_gt; 1508 1509 ret = __intel_engines_record_defaults(dev_priv); 1510 if (ret) 1511 goto err_gt; 1512 1513 ret = i915_inject_load_error(dev_priv, -ENODEV); 1514 if (ret) 1515 goto err_gt; 1516 1517 ret = i915_inject_load_error(dev_priv, -EIO); 1518 if (ret) 1519 goto err_gt; 1520 1521 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); 1522 mutex_unlock(&dev_priv->drm.struct_mutex); 1523 1524 return 0; 1525 1526 /* 1527 * Unwinding is complicated by that we want to handle -EIO to mean 1528 * disable GPU submission but keep KMS alive. We want to mark the 1529 * HW as irrevisibly wedged, but keep enough state around that the 1530 * driver doesn't explode during runtime. 1531 */ 1532 err_gt: 1533 mutex_unlock(&dev_priv->drm.struct_mutex); 1534 1535 intel_gt_set_wedged(&dev_priv->gt); 1536 i915_gem_suspend(dev_priv); 1537 i915_gem_suspend_late(dev_priv); 1538 1539 i915_gem_drain_workqueue(dev_priv); 1540 1541 mutex_lock(&dev_priv->drm.struct_mutex); 1542 err_init_hw: 1543 intel_uc_fini_hw(&dev_priv->gt.uc); 1544 err_uc_init: 1545 if (ret != -EIO) { 1546 intel_uc_fini(&dev_priv->gt.uc); 1547 intel_cleanup_gt_powersave(dev_priv); 1548 intel_engines_cleanup(dev_priv); 1549 } 1550 err_context: 1551 if (ret != -EIO) 1552 i915_gem_contexts_fini(dev_priv); 1553 err_scratch: 1554 i915_gem_fini_scratch(dev_priv); 1555 err_ggtt: 1556 err_unlock: 1557 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); 1558 mutex_unlock(&dev_priv->drm.struct_mutex); 1559 1560 if (ret != -EIO) { 1561 intel_uc_cleanup_firmwares(&dev_priv->gt.uc); 1562 i915_gem_cleanup_userptr(dev_priv); 1563 intel_timelines_fini(dev_priv); 1564 } 1565 1566 if (ret == -EIO) { 1567 mutex_lock(&dev_priv->drm.struct_mutex); 1568 1569 /* 1570 * Allow engines or uC initialisation to fail by marking the GPU 1571 * as wedged. But we only want to do this when the GPU is angry, 1572 * for all other failure, such as an allocation failure, bail. 1573 */ 1574 if (!intel_gt_is_wedged(&dev_priv->gt)) { 1575 i915_probe_error(dev_priv, 1576 "Failed to initialize GPU, declaring it wedged!\n"); 1577 intel_gt_set_wedged(&dev_priv->gt); 1578 } 1579 1580 /* Minimal basic recovery for KMS */ 1581 ret = i915_ggtt_enable_hw(dev_priv); 1582 i915_gem_restore_gtt_mappings(dev_priv); 1583 i915_gem_restore_fences(dev_priv); 1584 intel_init_clock_gating(dev_priv); 1585 1586 mutex_unlock(&dev_priv->drm.struct_mutex); 1587 } 1588 1589 i915_gem_drain_freed_objects(dev_priv); 1590 return ret; 1591 } 1592 1593 void i915_gem_driver_register(struct drm_i915_private *i915) 1594 { 1595 i915_gem_driver_register__shrinker(i915); 1596 1597 intel_engines_driver_register(i915); 1598 } 1599 1600 void i915_gem_driver_unregister(struct drm_i915_private *i915) 1601 { 1602 i915_gem_driver_unregister__shrinker(i915); 1603 } 1604 1605 void i915_gem_driver_remove(struct drm_i915_private *dev_priv) 1606 { 1607 GEM_BUG_ON(dev_priv->gt.awake); 1608 1609 intel_wakeref_auto_fini(&dev_priv->ggtt.userfault_wakeref); 1610 1611 i915_gem_suspend_late(dev_priv); 1612 intel_disable_gt_powersave(dev_priv); 1613 1614 /* Flush any outstanding unpin_work. */ 1615 i915_gem_drain_workqueue(dev_priv); 1616 1617 mutex_lock(&dev_priv->drm.struct_mutex); 1618 intel_uc_fini_hw(&dev_priv->gt.uc); 1619 intel_uc_fini(&dev_priv->gt.uc); 1620 mutex_unlock(&dev_priv->drm.struct_mutex); 1621 1622 i915_gem_drain_freed_objects(dev_priv); 1623 } 1624 1625 void i915_gem_driver_release(struct drm_i915_private *dev_priv) 1626 { 1627 mutex_lock(&dev_priv->drm.struct_mutex); 1628 intel_engines_cleanup(dev_priv); 1629 i915_gem_contexts_fini(dev_priv); 1630 i915_gem_fini_scratch(dev_priv); 1631 mutex_unlock(&dev_priv->drm.struct_mutex); 1632 1633 intel_wa_list_free(&dev_priv->gt_wa_list); 1634 1635 intel_cleanup_gt_powersave(dev_priv); 1636 1637 intel_uc_cleanup_firmwares(&dev_priv->gt.uc); 1638 i915_gem_cleanup_userptr(dev_priv); 1639 intel_timelines_fini(dev_priv); 1640 1641 i915_gem_drain_freed_objects(dev_priv); 1642 1643 WARN_ON(!list_empty(&dev_priv->contexts.list)); 1644 } 1645 1646 void i915_gem_init_mmio(struct drm_i915_private *i915) 1647 { 1648 i915_gem_sanitize(i915); 1649 } 1650 1651 static void i915_gem_init__mm(struct drm_i915_private *i915) 1652 { 1653 spin_lock_init(&i915->mm.obj_lock); 1654 1655 init_llist_head(&i915->mm.free_list); 1656 1657 INIT_LIST_HEAD(&i915->mm.purge_list); 1658 INIT_LIST_HEAD(&i915->mm.shrink_list); 1659 1660 i915_gem_init__objects(i915); 1661 } 1662 1663 int i915_gem_init_early(struct drm_i915_private *dev_priv) 1664 { 1665 int err; 1666 1667 i915_gem_init__mm(dev_priv); 1668 i915_gem_init__pm(dev_priv); 1669 1670 spin_lock_init(&dev_priv->fb_tracking.lock); 1671 1672 err = i915_gemfs_init(dev_priv); 1673 if (err) 1674 DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err); 1675 1676 return 0; 1677 } 1678 1679 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) 1680 { 1681 i915_gem_drain_freed_objects(dev_priv); 1682 GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list)); 1683 GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count)); 1684 WARN_ON(dev_priv->mm.shrink_count); 1685 1686 i915_gemfs_fini(dev_priv); 1687 } 1688 1689 int i915_gem_freeze(struct drm_i915_private *dev_priv) 1690 { 1691 /* Discard all purgeable objects, let userspace recover those as 1692 * required after resuming. 1693 */ 1694 i915_gem_shrink_all(dev_priv); 1695 1696 return 0; 1697 } 1698 1699 int i915_gem_freeze_late(struct drm_i915_private *i915) 1700 { 1701 struct drm_i915_gem_object *obj; 1702 intel_wakeref_t wakeref; 1703 1704 /* 1705 * Called just before we write the hibernation image. 1706 * 1707 * We need to update the domain tracking to reflect that the CPU 1708 * will be accessing all the pages to create and restore from the 1709 * hibernation, and so upon restoration those pages will be in the 1710 * CPU domain. 1711 * 1712 * To make sure the hibernation image contains the latest state, 1713 * we update that state just before writing out the image. 1714 * 1715 * To try and reduce the hibernation image, we manually shrink 1716 * the objects as well, see i915_gem_freeze() 1717 */ 1718 1719 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 1720 1721 i915_gem_shrink(i915, -1UL, NULL, ~0); 1722 i915_gem_drain_freed_objects(i915); 1723 1724 list_for_each_entry(obj, &i915->mm.shrink_list, mm.link) { 1725 i915_gem_object_lock(obj); 1726 WARN_ON(i915_gem_object_set_to_cpu_domain(obj, true)); 1727 i915_gem_object_unlock(obj); 1728 } 1729 1730 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 1731 1732 return 0; 1733 } 1734 1735 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 1736 { 1737 struct drm_i915_file_private *file_priv = file->driver_priv; 1738 struct i915_request *request; 1739 1740 /* Clean up our request list when the client is going away, so that 1741 * later retire_requests won't dereference our soon-to-be-gone 1742 * file_priv. 1743 */ 1744 spin_lock(&file_priv->mm.lock); 1745 list_for_each_entry(request, &file_priv->mm.request_list, client_link) 1746 request->file_priv = NULL; 1747 spin_unlock(&file_priv->mm.lock); 1748 } 1749 1750 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) 1751 { 1752 struct drm_i915_file_private *file_priv; 1753 int ret; 1754 1755 DRM_DEBUG("\n"); 1756 1757 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 1758 if (!file_priv) 1759 return -ENOMEM; 1760 1761 file->driver_priv = file_priv; 1762 file_priv->dev_priv = i915; 1763 file_priv->file = file; 1764 1765 spin_lock_init(&file_priv->mm.lock); 1766 INIT_LIST_HEAD(&file_priv->mm.request_list); 1767 1768 file_priv->bsd_engine = -1; 1769 file_priv->hang_timestamp = jiffies; 1770 1771 ret = i915_gem_context_open(i915, file); 1772 if (ret) 1773 kfree(file_priv); 1774 1775 return ret; 1776 } 1777 1778 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 1779 #include "selftests/mock_gem_device.c" 1780 #include "selftests/i915_gem.c" 1781 #endif 1782