1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drm_vma_manager.h> 29 #include <linux/dma-fence-array.h> 30 #include <linux/kthread.h> 31 #include <linux/dma-resv.h> 32 #include <linux/shmem_fs.h> 33 #include <linux/slab.h> 34 #include <linux/stop_machine.h> 35 #include <linux/swap.h> 36 #include <linux/pci.h> 37 #include <linux/dma-buf.h> 38 #include <linux/mman.h> 39 40 #include "display/intel_display.h" 41 #include "display/intel_frontbuffer.h" 42 43 #include "gem/i915_gem_clflush.h" 44 #include "gem/i915_gem_context.h" 45 #include "gem/i915_gem_ioctls.h" 46 #include "gem/i915_gem_mman.h" 47 #include "gem/i915_gem_region.h" 48 #include "gt/intel_engine_user.h" 49 #include "gt/intel_gt.h" 50 #include "gt/intel_gt_pm.h" 51 #include "gt/intel_workarounds.h" 52 53 #include "i915_drv.h" 54 #include "i915_trace.h" 55 #include "i915_vgpu.h" 56 57 #include "intel_pm.h" 58 59 static int 60 insert_mappable_node(struct i915_ggtt *ggtt, struct drm_mm_node *node, u32 size) 61 { 62 int err; 63 64 err = mutex_lock_interruptible(&ggtt->vm.mutex); 65 if (err) 66 return err; 67 68 memset(node, 0, sizeof(*node)); 69 err = drm_mm_insert_node_in_range(&ggtt->vm.mm, node, 70 size, 0, I915_COLOR_UNEVICTABLE, 71 0, ggtt->mappable_end, 72 DRM_MM_INSERT_LOW); 73 74 mutex_unlock(&ggtt->vm.mutex); 75 76 return err; 77 } 78 79 static void 80 remove_mappable_node(struct i915_ggtt *ggtt, struct drm_mm_node *node) 81 { 82 mutex_lock(&ggtt->vm.mutex); 83 drm_mm_remove_node(node); 84 mutex_unlock(&ggtt->vm.mutex); 85 } 86 87 int 88 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 89 struct drm_file *file) 90 { 91 struct i915_ggtt *ggtt = &to_i915(dev)->ggtt; 92 struct drm_i915_gem_get_aperture *args = data; 93 struct i915_vma *vma; 94 u64 pinned; 95 96 if (mutex_lock_interruptible(&ggtt->vm.mutex)) 97 return -EINTR; 98 99 pinned = ggtt->vm.reserved; 100 list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) 101 if (i915_vma_is_pinned(vma)) 102 pinned += vma->node.size; 103 104 mutex_unlock(&ggtt->vm.mutex); 105 106 args->aper_size = ggtt->vm.total; 107 args->aper_available_size = args->aper_size - pinned; 108 109 return 0; 110 } 111 112 int i915_gem_object_unbind(struct drm_i915_gem_object *obj, 113 unsigned long flags) 114 { 115 struct intel_runtime_pm *rpm = &to_i915(obj->base.dev)->runtime_pm; 116 LIST_HEAD(still_in_list); 117 intel_wakeref_t wakeref; 118 struct i915_vma *vma; 119 int ret; 120 121 if (list_empty(&obj->vma.list)) 122 return 0; 123 124 /* 125 * As some machines use ACPI to handle runtime-resume callbacks, and 126 * ACPI is quite kmalloc happy, we cannot resume beneath the vm->mutex 127 * as they are required by the shrinker. Ergo, we wake the device up 128 * first just in case. 129 */ 130 wakeref = intel_runtime_pm_get(rpm); 131 132 try_again: 133 ret = 0; 134 spin_lock(&obj->vma.lock); 135 while (!ret && (vma = list_first_entry_or_null(&obj->vma.list, 136 struct i915_vma, 137 obj_link))) { 138 struct i915_address_space *vm = vma->vm; 139 140 list_move_tail(&vma->obj_link, &still_in_list); 141 if (!i915_vma_is_bound(vma, I915_VMA_BIND_MASK)) 142 continue; 143 144 if (flags & I915_GEM_OBJECT_UNBIND_TEST) { 145 ret = -EBUSY; 146 break; 147 } 148 149 ret = -EAGAIN; 150 if (!i915_vm_tryopen(vm)) 151 break; 152 153 /* Prevent vma being freed by i915_vma_parked as we unbind */ 154 vma = __i915_vma_get(vma); 155 spin_unlock(&obj->vma.lock); 156 157 if (vma) { 158 ret = -EBUSY; 159 if (flags & I915_GEM_OBJECT_UNBIND_ACTIVE || 160 !i915_vma_is_active(vma)) 161 ret = i915_vma_unbind(vma); 162 163 __i915_vma_put(vma); 164 } 165 166 i915_vm_close(vm); 167 spin_lock(&obj->vma.lock); 168 } 169 list_splice_init(&still_in_list, &obj->vma.list); 170 spin_unlock(&obj->vma.lock); 171 172 if (ret == -EAGAIN && flags & I915_GEM_OBJECT_UNBIND_BARRIER) { 173 rcu_barrier(); /* flush the i915_vm_release() */ 174 goto try_again; 175 } 176 177 intel_runtime_pm_put(rpm, wakeref); 178 179 return ret; 180 } 181 182 static int 183 i915_gem_create(struct drm_file *file, 184 struct intel_memory_region *mr, 185 u64 *size_p, 186 u32 *handle_p) 187 { 188 struct drm_i915_gem_object *obj; 189 u32 handle; 190 u64 size; 191 int ret; 192 193 GEM_BUG_ON(!is_power_of_2(mr->min_page_size)); 194 size = round_up(*size_p, mr->min_page_size); 195 if (size == 0) 196 return -EINVAL; 197 198 /* For most of the ABI (e.g. mmap) we think in system pages */ 199 GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE)); 200 201 /* Allocate the new object */ 202 obj = i915_gem_object_create_region(mr, size, 0); 203 if (IS_ERR(obj)) 204 return PTR_ERR(obj); 205 206 ret = drm_gem_handle_create(file, &obj->base, &handle); 207 /* drop reference from allocate - handle holds it now */ 208 i915_gem_object_put(obj); 209 if (ret) 210 return ret; 211 212 *handle_p = handle; 213 *size_p = size; 214 return 0; 215 } 216 217 int 218 i915_gem_dumb_create(struct drm_file *file, 219 struct drm_device *dev, 220 struct drm_mode_create_dumb *args) 221 { 222 enum intel_memory_type mem_type; 223 int cpp = DIV_ROUND_UP(args->bpp, 8); 224 u32 format; 225 226 switch (cpp) { 227 case 1: 228 format = DRM_FORMAT_C8; 229 break; 230 case 2: 231 format = DRM_FORMAT_RGB565; 232 break; 233 case 4: 234 format = DRM_FORMAT_XRGB8888; 235 break; 236 default: 237 return -EINVAL; 238 } 239 240 /* have to work out size/pitch and return them */ 241 args->pitch = ALIGN(args->width * cpp, 64); 242 243 /* align stride to page size so that we can remap */ 244 if (args->pitch > intel_plane_fb_max_stride(to_i915(dev), format, 245 DRM_FORMAT_MOD_LINEAR)) 246 args->pitch = ALIGN(args->pitch, 4096); 247 248 if (args->pitch < args->width) 249 return -EINVAL; 250 251 args->size = mul_u32_u32(args->pitch, args->height); 252 253 mem_type = INTEL_MEMORY_SYSTEM; 254 if (HAS_LMEM(to_i915(dev))) 255 mem_type = INTEL_MEMORY_LOCAL; 256 257 return i915_gem_create(file, 258 intel_memory_region_by_type(to_i915(dev), 259 mem_type), 260 &args->size, &args->handle); 261 } 262 263 /** 264 * Creates a new mm object and returns a handle to it. 265 * @dev: drm device pointer 266 * @data: ioctl data blob 267 * @file: drm file pointer 268 */ 269 int 270 i915_gem_create_ioctl(struct drm_device *dev, void *data, 271 struct drm_file *file) 272 { 273 struct drm_i915_private *i915 = to_i915(dev); 274 struct drm_i915_gem_create *args = data; 275 276 i915_gem_flush_free_objects(i915); 277 278 return i915_gem_create(file, 279 intel_memory_region_by_type(i915, 280 INTEL_MEMORY_SYSTEM), 281 &args->size, &args->handle); 282 } 283 284 static int 285 shmem_pread(struct page *page, int offset, int len, char __user *user_data, 286 bool needs_clflush) 287 { 288 char *vaddr; 289 int ret; 290 291 vaddr = kmap(page); 292 293 if (needs_clflush) 294 drm_clflush_virt_range(vaddr + offset, len); 295 296 ret = __copy_to_user(user_data, vaddr + offset, len); 297 298 kunmap(page); 299 300 return ret ? -EFAULT : 0; 301 } 302 303 static int 304 i915_gem_shmem_pread(struct drm_i915_gem_object *obj, 305 struct drm_i915_gem_pread *args) 306 { 307 unsigned int needs_clflush; 308 unsigned int idx, offset; 309 struct dma_fence *fence; 310 char __user *user_data; 311 u64 remain; 312 int ret; 313 314 ret = i915_gem_object_lock_interruptible(obj, NULL); 315 if (ret) 316 return ret; 317 318 ret = i915_gem_object_prepare_read(obj, &needs_clflush); 319 if (ret) { 320 i915_gem_object_unlock(obj); 321 return ret; 322 } 323 324 fence = i915_gem_object_lock_fence(obj); 325 i915_gem_object_finish_access(obj); 326 i915_gem_object_unlock(obj); 327 328 if (!fence) 329 return -ENOMEM; 330 331 remain = args->size; 332 user_data = u64_to_user_ptr(args->data_ptr); 333 offset = offset_in_page(args->offset); 334 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 335 struct page *page = i915_gem_object_get_page(obj, idx); 336 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); 337 338 ret = shmem_pread(page, offset, length, user_data, 339 needs_clflush); 340 if (ret) 341 break; 342 343 remain -= length; 344 user_data += length; 345 offset = 0; 346 } 347 348 i915_gem_object_unlock_fence(obj, fence); 349 return ret; 350 } 351 352 static inline bool 353 gtt_user_read(struct io_mapping *mapping, 354 loff_t base, int offset, 355 char __user *user_data, int length) 356 { 357 void __iomem *vaddr; 358 unsigned long unwritten; 359 360 /* We can use the cpu mem copy function because this is X86. */ 361 vaddr = io_mapping_map_atomic_wc(mapping, base); 362 unwritten = __copy_to_user_inatomic(user_data, 363 (void __force *)vaddr + offset, 364 length); 365 io_mapping_unmap_atomic(vaddr); 366 if (unwritten) { 367 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 368 unwritten = copy_to_user(user_data, 369 (void __force *)vaddr + offset, 370 length); 371 io_mapping_unmap(vaddr); 372 } 373 return unwritten; 374 } 375 376 static int 377 i915_gem_gtt_pread(struct drm_i915_gem_object *obj, 378 const struct drm_i915_gem_pread *args) 379 { 380 struct drm_i915_private *i915 = to_i915(obj->base.dev); 381 struct i915_ggtt *ggtt = &i915->ggtt; 382 intel_wakeref_t wakeref; 383 struct drm_mm_node node; 384 struct dma_fence *fence; 385 void __user *user_data; 386 struct i915_vma *vma; 387 u64 remain, offset; 388 int ret; 389 390 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 391 vma = ERR_PTR(-ENODEV); 392 if (!i915_gem_object_is_tiled(obj)) 393 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 394 PIN_MAPPABLE | 395 PIN_NONBLOCK /* NOWARN */ | 396 PIN_NOEVICT); 397 if (!IS_ERR(vma)) { 398 node.start = i915_ggtt_offset(vma); 399 node.flags = 0; 400 } else { 401 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 402 if (ret) 403 goto out_rpm; 404 GEM_BUG_ON(!drm_mm_node_allocated(&node)); 405 } 406 407 ret = i915_gem_object_lock_interruptible(obj, NULL); 408 if (ret) 409 goto out_unpin; 410 411 ret = i915_gem_object_set_to_gtt_domain(obj, false); 412 if (ret) { 413 i915_gem_object_unlock(obj); 414 goto out_unpin; 415 } 416 417 fence = i915_gem_object_lock_fence(obj); 418 i915_gem_object_unlock(obj); 419 if (!fence) { 420 ret = -ENOMEM; 421 goto out_unpin; 422 } 423 424 user_data = u64_to_user_ptr(args->data_ptr); 425 remain = args->size; 426 offset = args->offset; 427 428 while (remain > 0) { 429 /* Operation in this page 430 * 431 * page_base = page offset within aperture 432 * page_offset = offset within page 433 * page_length = bytes to copy for this page 434 */ 435 u32 page_base = node.start; 436 unsigned page_offset = offset_in_page(offset); 437 unsigned page_length = PAGE_SIZE - page_offset; 438 page_length = remain < page_length ? remain : page_length; 439 if (drm_mm_node_allocated(&node)) { 440 ggtt->vm.insert_page(&ggtt->vm, 441 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 442 node.start, I915_CACHE_NONE, 0); 443 } else { 444 page_base += offset & PAGE_MASK; 445 } 446 447 if (gtt_user_read(&ggtt->iomap, page_base, page_offset, 448 user_data, page_length)) { 449 ret = -EFAULT; 450 break; 451 } 452 453 remain -= page_length; 454 user_data += page_length; 455 offset += page_length; 456 } 457 458 i915_gem_object_unlock_fence(obj, fence); 459 out_unpin: 460 if (drm_mm_node_allocated(&node)) { 461 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); 462 remove_mappable_node(ggtt, &node); 463 } else { 464 i915_vma_unpin(vma); 465 } 466 out_rpm: 467 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 468 return ret; 469 } 470 471 /** 472 * Reads data from the object referenced by handle. 473 * @dev: drm device pointer 474 * @data: ioctl data blob 475 * @file: drm file pointer 476 * 477 * On error, the contents of *data are undefined. 478 */ 479 int 480 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 481 struct drm_file *file) 482 { 483 struct drm_i915_gem_pread *args = data; 484 struct drm_i915_gem_object *obj; 485 int ret; 486 487 if (args->size == 0) 488 return 0; 489 490 if (!access_ok(u64_to_user_ptr(args->data_ptr), 491 args->size)) 492 return -EFAULT; 493 494 obj = i915_gem_object_lookup(file, args->handle); 495 if (!obj) 496 return -ENOENT; 497 498 /* Bounds check source. */ 499 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 500 ret = -EINVAL; 501 goto out; 502 } 503 504 trace_i915_gem_object_pread(obj, args->offset, args->size); 505 506 ret = -ENODEV; 507 if (obj->ops->pread) 508 ret = obj->ops->pread(obj, args); 509 if (ret != -ENODEV) 510 goto out; 511 512 ret = i915_gem_object_wait(obj, 513 I915_WAIT_INTERRUPTIBLE, 514 MAX_SCHEDULE_TIMEOUT); 515 if (ret) 516 goto out; 517 518 ret = i915_gem_object_pin_pages(obj); 519 if (ret) 520 goto out; 521 522 ret = i915_gem_shmem_pread(obj, args); 523 if (ret == -EFAULT || ret == -ENODEV) 524 ret = i915_gem_gtt_pread(obj, args); 525 526 i915_gem_object_unpin_pages(obj); 527 out: 528 i915_gem_object_put(obj); 529 return ret; 530 } 531 532 /* This is the fast write path which cannot handle 533 * page faults in the source data 534 */ 535 536 static inline bool 537 ggtt_write(struct io_mapping *mapping, 538 loff_t base, int offset, 539 char __user *user_data, int length) 540 { 541 void __iomem *vaddr; 542 unsigned long unwritten; 543 544 /* We can use the cpu mem copy function because this is X86. */ 545 vaddr = io_mapping_map_atomic_wc(mapping, base); 546 unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset, 547 user_data, length); 548 io_mapping_unmap_atomic(vaddr); 549 if (unwritten) { 550 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 551 unwritten = copy_from_user((void __force *)vaddr + offset, 552 user_data, length); 553 io_mapping_unmap(vaddr); 554 } 555 556 return unwritten; 557 } 558 559 /** 560 * This is the fast pwrite path, where we copy the data directly from the 561 * user into the GTT, uncached. 562 * @obj: i915 GEM object 563 * @args: pwrite arguments structure 564 */ 565 static int 566 i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, 567 const struct drm_i915_gem_pwrite *args) 568 { 569 struct drm_i915_private *i915 = to_i915(obj->base.dev); 570 struct i915_ggtt *ggtt = &i915->ggtt; 571 struct intel_runtime_pm *rpm = &i915->runtime_pm; 572 intel_wakeref_t wakeref; 573 struct drm_mm_node node; 574 struct dma_fence *fence; 575 struct i915_vma *vma; 576 u64 remain, offset; 577 void __user *user_data; 578 int ret; 579 580 if (i915_gem_object_has_struct_page(obj)) { 581 /* 582 * Avoid waking the device up if we can fallback, as 583 * waking/resuming is very slow (worst-case 10-100 ms 584 * depending on PCI sleeps and our own resume time). 585 * This easily dwarfs any performance advantage from 586 * using the cache bypass of indirect GGTT access. 587 */ 588 wakeref = intel_runtime_pm_get_if_in_use(rpm); 589 if (!wakeref) 590 return -EFAULT; 591 } else { 592 /* No backing pages, no fallback, we must force GGTT access */ 593 wakeref = intel_runtime_pm_get(rpm); 594 } 595 596 vma = ERR_PTR(-ENODEV); 597 if (!i915_gem_object_is_tiled(obj)) 598 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 599 PIN_MAPPABLE | 600 PIN_NONBLOCK /* NOWARN */ | 601 PIN_NOEVICT); 602 if (!IS_ERR(vma)) { 603 node.start = i915_ggtt_offset(vma); 604 node.flags = 0; 605 } else { 606 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 607 if (ret) 608 goto out_rpm; 609 GEM_BUG_ON(!drm_mm_node_allocated(&node)); 610 } 611 612 ret = i915_gem_object_lock_interruptible(obj, NULL); 613 if (ret) 614 goto out_unpin; 615 616 ret = i915_gem_object_set_to_gtt_domain(obj, true); 617 if (ret) { 618 i915_gem_object_unlock(obj); 619 goto out_unpin; 620 } 621 622 fence = i915_gem_object_lock_fence(obj); 623 i915_gem_object_unlock(obj); 624 if (!fence) { 625 ret = -ENOMEM; 626 goto out_unpin; 627 } 628 629 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); 630 631 user_data = u64_to_user_ptr(args->data_ptr); 632 offset = args->offset; 633 remain = args->size; 634 while (remain) { 635 /* Operation in this page 636 * 637 * page_base = page offset within aperture 638 * page_offset = offset within page 639 * page_length = bytes to copy for this page 640 */ 641 u32 page_base = node.start; 642 unsigned int page_offset = offset_in_page(offset); 643 unsigned int page_length = PAGE_SIZE - page_offset; 644 page_length = remain < page_length ? remain : page_length; 645 if (drm_mm_node_allocated(&node)) { 646 /* flush the write before we modify the GGTT */ 647 intel_gt_flush_ggtt_writes(ggtt->vm.gt); 648 ggtt->vm.insert_page(&ggtt->vm, 649 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 650 node.start, I915_CACHE_NONE, 0); 651 wmb(); /* flush modifications to the GGTT (insert_page) */ 652 } else { 653 page_base += offset & PAGE_MASK; 654 } 655 /* If we get a fault while copying data, then (presumably) our 656 * source page isn't available. Return the error and we'll 657 * retry in the slow path. 658 * If the object is non-shmem backed, we retry again with the 659 * path that handles page fault. 660 */ 661 if (ggtt_write(&ggtt->iomap, page_base, page_offset, 662 user_data, page_length)) { 663 ret = -EFAULT; 664 break; 665 } 666 667 remain -= page_length; 668 user_data += page_length; 669 offset += page_length; 670 } 671 672 intel_gt_flush_ggtt_writes(ggtt->vm.gt); 673 i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); 674 675 i915_gem_object_unlock_fence(obj, fence); 676 out_unpin: 677 if (drm_mm_node_allocated(&node)) { 678 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); 679 remove_mappable_node(ggtt, &node); 680 } else { 681 i915_vma_unpin(vma); 682 } 683 out_rpm: 684 intel_runtime_pm_put(rpm, wakeref); 685 return ret; 686 } 687 688 /* Per-page copy function for the shmem pwrite fastpath. 689 * Flushes invalid cachelines before writing to the target if 690 * needs_clflush_before is set and flushes out any written cachelines after 691 * writing if needs_clflush is set. 692 */ 693 static int 694 shmem_pwrite(struct page *page, int offset, int len, char __user *user_data, 695 bool needs_clflush_before, 696 bool needs_clflush_after) 697 { 698 char *vaddr; 699 int ret; 700 701 vaddr = kmap(page); 702 703 if (needs_clflush_before) 704 drm_clflush_virt_range(vaddr + offset, len); 705 706 ret = __copy_from_user(vaddr + offset, user_data, len); 707 if (!ret && needs_clflush_after) 708 drm_clflush_virt_range(vaddr + offset, len); 709 710 kunmap(page); 711 712 return ret ? -EFAULT : 0; 713 } 714 715 static int 716 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, 717 const struct drm_i915_gem_pwrite *args) 718 { 719 unsigned int partial_cacheline_write; 720 unsigned int needs_clflush; 721 unsigned int offset, idx; 722 struct dma_fence *fence; 723 void __user *user_data; 724 u64 remain; 725 int ret; 726 727 ret = i915_gem_object_lock_interruptible(obj, NULL); 728 if (ret) 729 return ret; 730 731 ret = i915_gem_object_prepare_write(obj, &needs_clflush); 732 if (ret) { 733 i915_gem_object_unlock(obj); 734 return ret; 735 } 736 737 fence = i915_gem_object_lock_fence(obj); 738 i915_gem_object_finish_access(obj); 739 i915_gem_object_unlock(obj); 740 741 if (!fence) 742 return -ENOMEM; 743 744 /* If we don't overwrite a cacheline completely we need to be 745 * careful to have up-to-date data by first clflushing. Don't 746 * overcomplicate things and flush the entire patch. 747 */ 748 partial_cacheline_write = 0; 749 if (needs_clflush & CLFLUSH_BEFORE) 750 partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1; 751 752 user_data = u64_to_user_ptr(args->data_ptr); 753 remain = args->size; 754 offset = offset_in_page(args->offset); 755 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 756 struct page *page = i915_gem_object_get_page(obj, idx); 757 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); 758 759 ret = shmem_pwrite(page, offset, length, user_data, 760 (offset | length) & partial_cacheline_write, 761 needs_clflush & CLFLUSH_AFTER); 762 if (ret) 763 break; 764 765 remain -= length; 766 user_data += length; 767 offset = 0; 768 } 769 770 i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); 771 i915_gem_object_unlock_fence(obj, fence); 772 773 return ret; 774 } 775 776 /** 777 * Writes data to the object referenced by handle. 778 * @dev: drm device 779 * @data: ioctl data blob 780 * @file: drm file 781 * 782 * On error, the contents of the buffer that were to be modified are undefined. 783 */ 784 int 785 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 786 struct drm_file *file) 787 { 788 struct drm_i915_gem_pwrite *args = data; 789 struct drm_i915_gem_object *obj; 790 int ret; 791 792 if (args->size == 0) 793 return 0; 794 795 if (!access_ok(u64_to_user_ptr(args->data_ptr), args->size)) 796 return -EFAULT; 797 798 obj = i915_gem_object_lookup(file, args->handle); 799 if (!obj) 800 return -ENOENT; 801 802 /* Bounds check destination. */ 803 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 804 ret = -EINVAL; 805 goto err; 806 } 807 808 /* Writes not allowed into this read-only object */ 809 if (i915_gem_object_is_readonly(obj)) { 810 ret = -EINVAL; 811 goto err; 812 } 813 814 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 815 816 ret = -ENODEV; 817 if (obj->ops->pwrite) 818 ret = obj->ops->pwrite(obj, args); 819 if (ret != -ENODEV) 820 goto err; 821 822 ret = i915_gem_object_wait(obj, 823 I915_WAIT_INTERRUPTIBLE | 824 I915_WAIT_ALL, 825 MAX_SCHEDULE_TIMEOUT); 826 if (ret) 827 goto err; 828 829 ret = i915_gem_object_pin_pages(obj); 830 if (ret) 831 goto err; 832 833 ret = -EFAULT; 834 /* We can only do the GTT pwrite on untiled buffers, as otherwise 835 * it would end up going through the fenced access, and we'll get 836 * different detiling behavior between reading and writing. 837 * pread/pwrite currently are reading and writing from the CPU 838 * perspective, requiring manual detiling by the client. 839 */ 840 if (!i915_gem_object_has_struct_page(obj) || 841 cpu_write_needs_clflush(obj)) 842 /* Note that the gtt paths might fail with non-page-backed user 843 * pointers (e.g. gtt mappings when moving data between 844 * textures). Fallback to the shmem path in that case. 845 */ 846 ret = i915_gem_gtt_pwrite_fast(obj, args); 847 848 if (ret == -EFAULT || ret == -ENOSPC) { 849 if (i915_gem_object_has_struct_page(obj)) 850 ret = i915_gem_shmem_pwrite(obj, args); 851 } 852 853 i915_gem_object_unpin_pages(obj); 854 err: 855 i915_gem_object_put(obj); 856 return ret; 857 } 858 859 /** 860 * Called when user space has done writes to this buffer 861 * @dev: drm device 862 * @data: ioctl data blob 863 * @file: drm file 864 */ 865 int 866 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 867 struct drm_file *file) 868 { 869 struct drm_i915_gem_sw_finish *args = data; 870 struct drm_i915_gem_object *obj; 871 872 obj = i915_gem_object_lookup(file, args->handle); 873 if (!obj) 874 return -ENOENT; 875 876 /* 877 * Proxy objects are barred from CPU access, so there is no 878 * need to ban sw_finish as it is a nop. 879 */ 880 881 /* Pinned buffers may be scanout, so flush the cache */ 882 i915_gem_object_flush_if_display(obj); 883 i915_gem_object_put(obj); 884 885 return 0; 886 } 887 888 void i915_gem_runtime_suspend(struct drm_i915_private *i915) 889 { 890 struct drm_i915_gem_object *obj, *on; 891 int i; 892 893 /* 894 * Only called during RPM suspend. All users of the userfault_list 895 * must be holding an RPM wakeref to ensure that this can not 896 * run concurrently with themselves (and use the struct_mutex for 897 * protection between themselves). 898 */ 899 900 list_for_each_entry_safe(obj, on, 901 &i915->ggtt.userfault_list, userfault_link) 902 __i915_gem_object_release_mmap_gtt(obj); 903 904 /* 905 * The fence will be lost when the device powers down. If any were 906 * in use by hardware (i.e. they are pinned), we should not be powering 907 * down! All other fences will be reacquired by the user upon waking. 908 */ 909 for (i = 0; i < i915->ggtt.num_fences; i++) { 910 struct i915_fence_reg *reg = &i915->ggtt.fence_regs[i]; 911 912 /* 913 * Ideally we want to assert that the fence register is not 914 * live at this point (i.e. that no piece of code will be 915 * trying to write through fence + GTT, as that both violates 916 * our tracking of activity and associated locking/barriers, 917 * but also is illegal given that the hw is powered down). 918 * 919 * Previously we used reg->pin_count as a "liveness" indicator. 920 * That is not sufficient, and we need a more fine-grained 921 * tool if we want to have a sanity check here. 922 */ 923 924 if (!reg->vma) 925 continue; 926 927 GEM_BUG_ON(i915_vma_has_userfault(reg->vma)); 928 reg->dirty = true; 929 } 930 } 931 932 static void discard_ggtt_vma(struct i915_vma *vma) 933 { 934 struct drm_i915_gem_object *obj = vma->obj; 935 936 spin_lock(&obj->vma.lock); 937 if (!RB_EMPTY_NODE(&vma->obj_node)) { 938 rb_erase(&vma->obj_node, &obj->vma.tree); 939 RB_CLEAR_NODE(&vma->obj_node); 940 } 941 spin_unlock(&obj->vma.lock); 942 } 943 944 struct i915_vma * 945 i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj, 946 struct i915_gem_ww_ctx *ww, 947 const struct i915_ggtt_view *view, 948 u64 size, u64 alignment, u64 flags) 949 { 950 struct drm_i915_private *i915 = to_i915(obj->base.dev); 951 struct i915_ggtt *ggtt = &i915->ggtt; 952 struct i915_vma *vma; 953 int ret; 954 955 if (flags & PIN_MAPPABLE && 956 (!view || view->type == I915_GGTT_VIEW_NORMAL)) { 957 /* 958 * If the required space is larger than the available 959 * aperture, we will not able to find a slot for the 960 * object and unbinding the object now will be in 961 * vain. Worse, doing so may cause us to ping-pong 962 * the object in and out of the Global GTT and 963 * waste a lot of cycles under the mutex. 964 */ 965 if (obj->base.size > ggtt->mappable_end) 966 return ERR_PTR(-E2BIG); 967 968 /* 969 * If NONBLOCK is set the caller is optimistically 970 * trying to cache the full object within the mappable 971 * aperture, and *must* have a fallback in place for 972 * situations where we cannot bind the object. We 973 * can be a little more lax here and use the fallback 974 * more often to avoid costly migrations of ourselves 975 * and other objects within the aperture. 976 * 977 * Half-the-aperture is used as a simple heuristic. 978 * More interesting would to do search for a free 979 * block prior to making the commitment to unbind. 980 * That caters for the self-harm case, and with a 981 * little more heuristics (e.g. NOFAULT, NOEVICT) 982 * we could try to minimise harm to others. 983 */ 984 if (flags & PIN_NONBLOCK && 985 obj->base.size > ggtt->mappable_end / 2) 986 return ERR_PTR(-ENOSPC); 987 } 988 989 new_vma: 990 vma = i915_vma_instance(obj, &ggtt->vm, view); 991 if (IS_ERR(vma)) 992 return vma; 993 994 if (i915_vma_misplaced(vma, size, alignment, flags)) { 995 if (flags & PIN_NONBLOCK) { 996 if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) 997 return ERR_PTR(-ENOSPC); 998 999 if (flags & PIN_MAPPABLE && 1000 vma->fence_size > ggtt->mappable_end / 2) 1001 return ERR_PTR(-ENOSPC); 1002 } 1003 1004 if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) { 1005 discard_ggtt_vma(vma); 1006 goto new_vma; 1007 } 1008 1009 ret = i915_vma_unbind(vma); 1010 if (ret) 1011 return ERR_PTR(ret); 1012 } 1013 1014 ret = i915_vma_pin_ww(vma, ww, size, alignment, flags | PIN_GLOBAL); 1015 if (ret) 1016 return ERR_PTR(ret); 1017 1018 if (vma->fence && !i915_gem_object_is_tiled(obj)) { 1019 mutex_lock(&ggtt->vm.mutex); 1020 i915_vma_revoke_fence(vma); 1021 mutex_unlock(&ggtt->vm.mutex); 1022 } 1023 1024 ret = i915_vma_wait_for_bind(vma); 1025 if (ret) { 1026 i915_vma_unpin(vma); 1027 return ERR_PTR(ret); 1028 } 1029 1030 return vma; 1031 } 1032 1033 int 1034 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 1035 struct drm_file *file_priv) 1036 { 1037 struct drm_i915_private *i915 = to_i915(dev); 1038 struct drm_i915_gem_madvise *args = data; 1039 struct drm_i915_gem_object *obj; 1040 int err; 1041 1042 switch (args->madv) { 1043 case I915_MADV_DONTNEED: 1044 case I915_MADV_WILLNEED: 1045 break; 1046 default: 1047 return -EINVAL; 1048 } 1049 1050 obj = i915_gem_object_lookup(file_priv, args->handle); 1051 if (!obj) 1052 return -ENOENT; 1053 1054 err = mutex_lock_interruptible(&obj->mm.lock); 1055 if (err) 1056 goto out; 1057 1058 if (i915_gem_object_has_pages(obj) && 1059 i915_gem_object_is_tiled(obj) && 1060 i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 1061 if (obj->mm.madv == I915_MADV_WILLNEED) { 1062 GEM_BUG_ON(!obj->mm.quirked); 1063 __i915_gem_object_unpin_pages(obj); 1064 obj->mm.quirked = false; 1065 } 1066 if (args->madv == I915_MADV_WILLNEED) { 1067 GEM_BUG_ON(obj->mm.quirked); 1068 __i915_gem_object_pin_pages(obj); 1069 obj->mm.quirked = true; 1070 } 1071 } 1072 1073 if (obj->mm.madv != __I915_MADV_PURGED) 1074 obj->mm.madv = args->madv; 1075 1076 if (i915_gem_object_has_pages(obj)) { 1077 struct list_head *list; 1078 1079 if (i915_gem_object_is_shrinkable(obj)) { 1080 unsigned long flags; 1081 1082 spin_lock_irqsave(&i915->mm.obj_lock, flags); 1083 1084 if (obj->mm.madv != I915_MADV_WILLNEED) 1085 list = &i915->mm.purge_list; 1086 else 1087 list = &i915->mm.shrink_list; 1088 list_move_tail(&obj->mm.link, list); 1089 1090 spin_unlock_irqrestore(&i915->mm.obj_lock, flags); 1091 } 1092 } 1093 1094 /* if the object is no longer attached, discard its backing storage */ 1095 if (obj->mm.madv == I915_MADV_DONTNEED && 1096 !i915_gem_object_has_pages(obj)) 1097 i915_gem_object_truncate(obj); 1098 1099 args->retained = obj->mm.madv != __I915_MADV_PURGED; 1100 mutex_unlock(&obj->mm.lock); 1101 1102 out: 1103 i915_gem_object_put(obj); 1104 return err; 1105 } 1106 1107 int i915_gem_init(struct drm_i915_private *dev_priv) 1108 { 1109 int ret; 1110 1111 /* We need to fallback to 4K pages if host doesn't support huge gtt. */ 1112 if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_huge_gtt(dev_priv)) 1113 mkwrite_device_info(dev_priv)->page_sizes = 1114 I915_GTT_PAGE_SIZE_4K; 1115 1116 ret = i915_gem_init_userptr(dev_priv); 1117 if (ret) 1118 return ret; 1119 1120 intel_uc_fetch_firmwares(&dev_priv->gt.uc); 1121 intel_wopcm_init(&dev_priv->wopcm); 1122 1123 ret = i915_init_ggtt(dev_priv); 1124 if (ret) { 1125 GEM_BUG_ON(ret == -EIO); 1126 goto err_unlock; 1127 } 1128 1129 /* 1130 * Despite its name intel_init_clock_gating applies both display 1131 * clock gating workarounds; GT mmio workarounds and the occasional 1132 * GT power context workaround. Worse, sometimes it includes a context 1133 * register workaround which we need to apply before we record the 1134 * default HW state for all contexts. 1135 * 1136 * FIXME: break up the workarounds and apply them at the right time! 1137 */ 1138 intel_init_clock_gating(dev_priv); 1139 1140 ret = intel_gt_init(&dev_priv->gt); 1141 if (ret) 1142 goto err_unlock; 1143 1144 return 0; 1145 1146 /* 1147 * Unwinding is complicated by that we want to handle -EIO to mean 1148 * disable GPU submission but keep KMS alive. We want to mark the 1149 * HW as irrevisibly wedged, but keep enough state around that the 1150 * driver doesn't explode during runtime. 1151 */ 1152 err_unlock: 1153 i915_gem_drain_workqueue(dev_priv); 1154 1155 if (ret != -EIO) { 1156 intel_uc_cleanup_firmwares(&dev_priv->gt.uc); 1157 i915_gem_cleanup_userptr(dev_priv); 1158 } 1159 1160 if (ret == -EIO) { 1161 /* 1162 * Allow engines or uC initialisation to fail by marking the GPU 1163 * as wedged. But we only want to do this when the GPU is angry, 1164 * for all other failure, such as an allocation failure, bail. 1165 */ 1166 if (!intel_gt_is_wedged(&dev_priv->gt)) { 1167 i915_probe_error(dev_priv, 1168 "Failed to initialize GPU, declaring it wedged!\n"); 1169 intel_gt_set_wedged(&dev_priv->gt); 1170 } 1171 1172 /* Minimal basic recovery for KMS */ 1173 ret = i915_ggtt_enable_hw(dev_priv); 1174 i915_ggtt_resume(&dev_priv->ggtt); 1175 intel_init_clock_gating(dev_priv); 1176 } 1177 1178 i915_gem_drain_freed_objects(dev_priv); 1179 return ret; 1180 } 1181 1182 void i915_gem_driver_register(struct drm_i915_private *i915) 1183 { 1184 i915_gem_driver_register__shrinker(i915); 1185 1186 intel_engines_driver_register(i915); 1187 } 1188 1189 void i915_gem_driver_unregister(struct drm_i915_private *i915) 1190 { 1191 i915_gem_driver_unregister__shrinker(i915); 1192 } 1193 1194 void i915_gem_driver_remove(struct drm_i915_private *dev_priv) 1195 { 1196 intel_wakeref_auto_fini(&dev_priv->ggtt.userfault_wakeref); 1197 1198 i915_gem_suspend_late(dev_priv); 1199 intel_gt_driver_remove(&dev_priv->gt); 1200 dev_priv->uabi_engines = RB_ROOT; 1201 1202 /* Flush any outstanding unpin_work. */ 1203 i915_gem_drain_workqueue(dev_priv); 1204 1205 i915_gem_drain_freed_objects(dev_priv); 1206 } 1207 1208 void i915_gem_driver_release(struct drm_i915_private *dev_priv) 1209 { 1210 i915_gem_driver_release__contexts(dev_priv); 1211 1212 intel_gt_driver_release(&dev_priv->gt); 1213 1214 intel_wa_list_free(&dev_priv->gt_wa_list); 1215 1216 intel_uc_cleanup_firmwares(&dev_priv->gt.uc); 1217 i915_gem_cleanup_userptr(dev_priv); 1218 1219 i915_gem_drain_freed_objects(dev_priv); 1220 1221 drm_WARN_ON(&dev_priv->drm, !list_empty(&dev_priv->gem.contexts.list)); 1222 } 1223 1224 static void i915_gem_init__mm(struct drm_i915_private *i915) 1225 { 1226 spin_lock_init(&i915->mm.obj_lock); 1227 1228 init_llist_head(&i915->mm.free_list); 1229 1230 INIT_LIST_HEAD(&i915->mm.purge_list); 1231 INIT_LIST_HEAD(&i915->mm.shrink_list); 1232 1233 i915_gem_init__objects(i915); 1234 } 1235 1236 void i915_gem_init_early(struct drm_i915_private *dev_priv) 1237 { 1238 i915_gem_init__mm(dev_priv); 1239 i915_gem_init__contexts(dev_priv); 1240 1241 spin_lock_init(&dev_priv->fb_tracking.lock); 1242 } 1243 1244 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) 1245 { 1246 i915_gem_drain_freed_objects(dev_priv); 1247 GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list)); 1248 GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count)); 1249 drm_WARN_ON(&dev_priv->drm, dev_priv->mm.shrink_count); 1250 } 1251 1252 int i915_gem_freeze(struct drm_i915_private *dev_priv) 1253 { 1254 /* Discard all purgeable objects, let userspace recover those as 1255 * required after resuming. 1256 */ 1257 i915_gem_shrink_all(dev_priv); 1258 1259 return 0; 1260 } 1261 1262 int i915_gem_freeze_late(struct drm_i915_private *i915) 1263 { 1264 struct drm_i915_gem_object *obj; 1265 intel_wakeref_t wakeref; 1266 1267 /* 1268 * Called just before we write the hibernation image. 1269 * 1270 * We need to update the domain tracking to reflect that the CPU 1271 * will be accessing all the pages to create and restore from the 1272 * hibernation, and so upon restoration those pages will be in the 1273 * CPU domain. 1274 * 1275 * To make sure the hibernation image contains the latest state, 1276 * we update that state just before writing out the image. 1277 * 1278 * To try and reduce the hibernation image, we manually shrink 1279 * the objects as well, see i915_gem_freeze() 1280 */ 1281 1282 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 1283 1284 i915_gem_shrink(i915, -1UL, NULL, ~0); 1285 i915_gem_drain_freed_objects(i915); 1286 1287 list_for_each_entry(obj, &i915->mm.shrink_list, mm.link) { 1288 i915_gem_object_lock(obj, NULL); 1289 drm_WARN_ON(&i915->drm, 1290 i915_gem_object_set_to_cpu_domain(obj, true)); 1291 i915_gem_object_unlock(obj); 1292 } 1293 1294 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 1295 1296 return 0; 1297 } 1298 1299 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) 1300 { 1301 struct drm_i915_file_private *file_priv; 1302 int ret; 1303 1304 DRM_DEBUG("\n"); 1305 1306 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 1307 if (!file_priv) 1308 return -ENOMEM; 1309 1310 file->driver_priv = file_priv; 1311 file_priv->dev_priv = i915; 1312 file_priv->file = file; 1313 1314 file_priv->bsd_engine = -1; 1315 file_priv->hang_timestamp = jiffies; 1316 1317 ret = i915_gem_context_open(i915, file); 1318 if (ret) 1319 kfree(file_priv); 1320 1321 return ret; 1322 } 1323 1324 void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ww, bool intr) 1325 { 1326 ww_acquire_init(&ww->ctx, &reservation_ww_class); 1327 INIT_LIST_HEAD(&ww->obj_list); 1328 ww->intr = intr; 1329 ww->contended = NULL; 1330 } 1331 1332 static void i915_gem_ww_ctx_unlock_all(struct i915_gem_ww_ctx *ww) 1333 { 1334 struct drm_i915_gem_object *obj; 1335 1336 while ((obj = list_first_entry_or_null(&ww->obj_list, struct drm_i915_gem_object, obj_link))) { 1337 list_del(&obj->obj_link); 1338 i915_gem_object_unlock(obj); 1339 } 1340 } 1341 1342 void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj) 1343 { 1344 list_del(&obj->obj_link); 1345 i915_gem_object_unlock(obj); 1346 } 1347 1348 void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ww) 1349 { 1350 i915_gem_ww_ctx_unlock_all(ww); 1351 WARN_ON(ww->contended); 1352 ww_acquire_fini(&ww->ctx); 1353 } 1354 1355 int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ww) 1356 { 1357 int ret = 0; 1358 1359 if (WARN_ON(!ww->contended)) 1360 return -EINVAL; 1361 1362 i915_gem_ww_ctx_unlock_all(ww); 1363 if (ww->intr) 1364 ret = dma_resv_lock_slow_interruptible(ww->contended->base.resv, &ww->ctx); 1365 else 1366 dma_resv_lock_slow(ww->contended->base.resv, &ww->ctx); 1367 1368 if (!ret) 1369 list_add_tail(&ww->contended->obj_link, &ww->obj_list); 1370 1371 ww->contended = NULL; 1372 1373 return ret; 1374 } 1375 1376 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 1377 #include "selftests/mock_gem_device.c" 1378 #include "selftests/i915_gem.c" 1379 #endif 1380