1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drm_vma_manager.h> 29 #include <linux/dma-fence-array.h> 30 #include <linux/kthread.h> 31 #include <linux/dma-resv.h> 32 #include <linux/shmem_fs.h> 33 #include <linux/slab.h> 34 #include <linux/stop_machine.h> 35 #include <linux/swap.h> 36 #include <linux/pci.h> 37 #include <linux/dma-buf.h> 38 #include <linux/mman.h> 39 40 #include "display/intel_display.h" 41 #include "display/intel_frontbuffer.h" 42 43 #include "gem/i915_gem_clflush.h" 44 #include "gem/i915_gem_context.h" 45 #include "gem/i915_gem_ioctls.h" 46 #include "gem/i915_gem_mman.h" 47 #include "gem/i915_gem_region.h" 48 #include "gt/intel_engine_user.h" 49 #include "gt/intel_gt.h" 50 #include "gt/intel_gt_pm.h" 51 #include "gt/intel_workarounds.h" 52 53 #include "i915_drv.h" 54 #include "i915_trace.h" 55 #include "i915_vgpu.h" 56 57 #include "intel_pm.h" 58 59 static int 60 insert_mappable_node(struct i915_ggtt *ggtt, struct drm_mm_node *node, u32 size) 61 { 62 int err; 63 64 err = mutex_lock_interruptible(&ggtt->vm.mutex); 65 if (err) 66 return err; 67 68 memset(node, 0, sizeof(*node)); 69 err = drm_mm_insert_node_in_range(&ggtt->vm.mm, node, 70 size, 0, I915_COLOR_UNEVICTABLE, 71 0, ggtt->mappable_end, 72 DRM_MM_INSERT_LOW); 73 74 mutex_unlock(&ggtt->vm.mutex); 75 76 return err; 77 } 78 79 static void 80 remove_mappable_node(struct i915_ggtt *ggtt, struct drm_mm_node *node) 81 { 82 mutex_lock(&ggtt->vm.mutex); 83 drm_mm_remove_node(node); 84 mutex_unlock(&ggtt->vm.mutex); 85 } 86 87 int 88 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 89 struct drm_file *file) 90 { 91 struct i915_ggtt *ggtt = &to_i915(dev)->ggtt; 92 struct drm_i915_gem_get_aperture *args = data; 93 struct i915_vma *vma; 94 u64 pinned; 95 96 if (mutex_lock_interruptible(&ggtt->vm.mutex)) 97 return -EINTR; 98 99 pinned = ggtt->vm.reserved; 100 list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) 101 if (i915_vma_is_pinned(vma)) 102 pinned += vma->node.size; 103 104 mutex_unlock(&ggtt->vm.mutex); 105 106 args->aper_size = ggtt->vm.total; 107 args->aper_available_size = args->aper_size - pinned; 108 109 return 0; 110 } 111 112 int i915_gem_object_unbind(struct drm_i915_gem_object *obj, 113 unsigned long flags) 114 { 115 struct intel_runtime_pm *rpm = &to_i915(obj->base.dev)->runtime_pm; 116 LIST_HEAD(still_in_list); 117 intel_wakeref_t wakeref; 118 struct i915_vma *vma; 119 int ret; 120 121 if (list_empty(&obj->vma.list)) 122 return 0; 123 124 /* 125 * As some machines use ACPI to handle runtime-resume callbacks, and 126 * ACPI is quite kmalloc happy, we cannot resume beneath the vm->mutex 127 * as they are required by the shrinker. Ergo, we wake the device up 128 * first just in case. 129 */ 130 wakeref = intel_runtime_pm_get(rpm); 131 132 try_again: 133 ret = 0; 134 spin_lock(&obj->vma.lock); 135 while (!ret && (vma = list_first_entry_or_null(&obj->vma.list, 136 struct i915_vma, 137 obj_link))) { 138 struct i915_address_space *vm = vma->vm; 139 140 list_move_tail(&vma->obj_link, &still_in_list); 141 if (!i915_vma_is_bound(vma, I915_VMA_BIND_MASK)) 142 continue; 143 144 if (flags & I915_GEM_OBJECT_UNBIND_TEST) { 145 ret = -EBUSY; 146 break; 147 } 148 149 ret = -EAGAIN; 150 if (!i915_vm_tryopen(vm)) 151 break; 152 153 /* Prevent vma being freed by i915_vma_parked as we unbind */ 154 vma = __i915_vma_get(vma); 155 spin_unlock(&obj->vma.lock); 156 157 if (vma) { 158 ret = -EBUSY; 159 if (flags & I915_GEM_OBJECT_UNBIND_ACTIVE || 160 !i915_vma_is_active(vma)) 161 ret = i915_vma_unbind(vma); 162 163 __i915_vma_put(vma); 164 } 165 166 i915_vm_close(vm); 167 spin_lock(&obj->vma.lock); 168 } 169 list_splice_init(&still_in_list, &obj->vma.list); 170 spin_unlock(&obj->vma.lock); 171 172 if (ret == -EAGAIN && flags & I915_GEM_OBJECT_UNBIND_BARRIER) { 173 rcu_barrier(); /* flush the i915_vm_release() */ 174 goto try_again; 175 } 176 177 intel_runtime_pm_put(rpm, wakeref); 178 179 return ret; 180 } 181 182 static int 183 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 184 struct drm_i915_gem_pwrite *args, 185 struct drm_file *file) 186 { 187 void *vaddr = sg_page(obj->mm.pages->sgl) + args->offset; 188 char __user *user_data = u64_to_user_ptr(args->data_ptr); 189 190 /* 191 * We manually control the domain here and pretend that it 192 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 193 */ 194 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); 195 196 if (copy_from_user(vaddr, user_data, args->size)) 197 return -EFAULT; 198 199 drm_clflush_virt_range(vaddr, args->size); 200 intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt); 201 202 i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); 203 return 0; 204 } 205 206 static int 207 i915_gem_create(struct drm_file *file, 208 struct intel_memory_region *mr, 209 u64 *size_p, 210 u32 *handle_p) 211 { 212 struct drm_i915_gem_object *obj; 213 u32 handle; 214 u64 size; 215 int ret; 216 217 GEM_BUG_ON(!is_power_of_2(mr->min_page_size)); 218 size = round_up(*size_p, mr->min_page_size); 219 if (size == 0) 220 return -EINVAL; 221 222 /* For most of the ABI (e.g. mmap) we think in system pages */ 223 GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE)); 224 225 /* Allocate the new object */ 226 obj = i915_gem_object_create_region(mr, size, 0); 227 if (IS_ERR(obj)) 228 return PTR_ERR(obj); 229 230 ret = drm_gem_handle_create(file, &obj->base, &handle); 231 /* drop reference from allocate - handle holds it now */ 232 i915_gem_object_put(obj); 233 if (ret) 234 return ret; 235 236 *handle_p = handle; 237 *size_p = size; 238 return 0; 239 } 240 241 int 242 i915_gem_dumb_create(struct drm_file *file, 243 struct drm_device *dev, 244 struct drm_mode_create_dumb *args) 245 { 246 enum intel_memory_type mem_type; 247 int cpp = DIV_ROUND_UP(args->bpp, 8); 248 u32 format; 249 250 switch (cpp) { 251 case 1: 252 format = DRM_FORMAT_C8; 253 break; 254 case 2: 255 format = DRM_FORMAT_RGB565; 256 break; 257 case 4: 258 format = DRM_FORMAT_XRGB8888; 259 break; 260 default: 261 return -EINVAL; 262 } 263 264 /* have to work out size/pitch and return them */ 265 args->pitch = ALIGN(args->width * cpp, 64); 266 267 /* align stride to page size so that we can remap */ 268 if (args->pitch > intel_plane_fb_max_stride(to_i915(dev), format, 269 DRM_FORMAT_MOD_LINEAR)) 270 args->pitch = ALIGN(args->pitch, 4096); 271 272 if (args->pitch < args->width) 273 return -EINVAL; 274 275 args->size = mul_u32_u32(args->pitch, args->height); 276 277 mem_type = INTEL_MEMORY_SYSTEM; 278 if (HAS_LMEM(to_i915(dev))) 279 mem_type = INTEL_MEMORY_LOCAL; 280 281 return i915_gem_create(file, 282 intel_memory_region_by_type(to_i915(dev), 283 mem_type), 284 &args->size, &args->handle); 285 } 286 287 /** 288 * Creates a new mm object and returns a handle to it. 289 * @dev: drm device pointer 290 * @data: ioctl data blob 291 * @file: drm file pointer 292 */ 293 int 294 i915_gem_create_ioctl(struct drm_device *dev, void *data, 295 struct drm_file *file) 296 { 297 struct drm_i915_private *i915 = to_i915(dev); 298 struct drm_i915_gem_create *args = data; 299 300 i915_gem_flush_free_objects(i915); 301 302 return i915_gem_create(file, 303 intel_memory_region_by_type(i915, 304 INTEL_MEMORY_SYSTEM), 305 &args->size, &args->handle); 306 } 307 308 static int 309 shmem_pread(struct page *page, int offset, int len, char __user *user_data, 310 bool needs_clflush) 311 { 312 char *vaddr; 313 int ret; 314 315 vaddr = kmap(page); 316 317 if (needs_clflush) 318 drm_clflush_virt_range(vaddr + offset, len); 319 320 ret = __copy_to_user(user_data, vaddr + offset, len); 321 322 kunmap(page); 323 324 return ret ? -EFAULT : 0; 325 } 326 327 static int 328 i915_gem_shmem_pread(struct drm_i915_gem_object *obj, 329 struct drm_i915_gem_pread *args) 330 { 331 unsigned int needs_clflush; 332 unsigned int idx, offset; 333 struct dma_fence *fence; 334 char __user *user_data; 335 u64 remain; 336 int ret; 337 338 ret = i915_gem_object_lock_interruptible(obj, NULL); 339 if (ret) 340 return ret; 341 342 ret = i915_gem_object_prepare_read(obj, &needs_clflush); 343 if (ret) { 344 i915_gem_object_unlock(obj); 345 return ret; 346 } 347 348 fence = i915_gem_object_lock_fence(obj); 349 i915_gem_object_finish_access(obj); 350 i915_gem_object_unlock(obj); 351 352 if (!fence) 353 return -ENOMEM; 354 355 remain = args->size; 356 user_data = u64_to_user_ptr(args->data_ptr); 357 offset = offset_in_page(args->offset); 358 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 359 struct page *page = i915_gem_object_get_page(obj, idx); 360 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); 361 362 ret = shmem_pread(page, offset, length, user_data, 363 needs_clflush); 364 if (ret) 365 break; 366 367 remain -= length; 368 user_data += length; 369 offset = 0; 370 } 371 372 i915_gem_object_unlock_fence(obj, fence); 373 return ret; 374 } 375 376 static inline bool 377 gtt_user_read(struct io_mapping *mapping, 378 loff_t base, int offset, 379 char __user *user_data, int length) 380 { 381 void __iomem *vaddr; 382 unsigned long unwritten; 383 384 /* We can use the cpu mem copy function because this is X86. */ 385 vaddr = io_mapping_map_atomic_wc(mapping, base); 386 unwritten = __copy_to_user_inatomic(user_data, 387 (void __force *)vaddr + offset, 388 length); 389 io_mapping_unmap_atomic(vaddr); 390 if (unwritten) { 391 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 392 unwritten = copy_to_user(user_data, 393 (void __force *)vaddr + offset, 394 length); 395 io_mapping_unmap(vaddr); 396 } 397 return unwritten; 398 } 399 400 static int 401 i915_gem_gtt_pread(struct drm_i915_gem_object *obj, 402 const struct drm_i915_gem_pread *args) 403 { 404 struct drm_i915_private *i915 = to_i915(obj->base.dev); 405 struct i915_ggtt *ggtt = &i915->ggtt; 406 intel_wakeref_t wakeref; 407 struct drm_mm_node node; 408 struct dma_fence *fence; 409 void __user *user_data; 410 struct i915_vma *vma; 411 u64 remain, offset; 412 int ret; 413 414 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 415 vma = ERR_PTR(-ENODEV); 416 if (!i915_gem_object_is_tiled(obj)) 417 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 418 PIN_MAPPABLE | 419 PIN_NONBLOCK /* NOWARN */ | 420 PIN_NOEVICT); 421 if (!IS_ERR(vma)) { 422 node.start = i915_ggtt_offset(vma); 423 node.flags = 0; 424 } else { 425 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 426 if (ret) 427 goto out_rpm; 428 GEM_BUG_ON(!drm_mm_node_allocated(&node)); 429 } 430 431 ret = i915_gem_object_lock_interruptible(obj, NULL); 432 if (ret) 433 goto out_unpin; 434 435 ret = i915_gem_object_set_to_gtt_domain(obj, false); 436 if (ret) { 437 i915_gem_object_unlock(obj); 438 goto out_unpin; 439 } 440 441 fence = i915_gem_object_lock_fence(obj); 442 i915_gem_object_unlock(obj); 443 if (!fence) { 444 ret = -ENOMEM; 445 goto out_unpin; 446 } 447 448 user_data = u64_to_user_ptr(args->data_ptr); 449 remain = args->size; 450 offset = args->offset; 451 452 while (remain > 0) { 453 /* Operation in this page 454 * 455 * page_base = page offset within aperture 456 * page_offset = offset within page 457 * page_length = bytes to copy for this page 458 */ 459 u32 page_base = node.start; 460 unsigned page_offset = offset_in_page(offset); 461 unsigned page_length = PAGE_SIZE - page_offset; 462 page_length = remain < page_length ? remain : page_length; 463 if (drm_mm_node_allocated(&node)) { 464 ggtt->vm.insert_page(&ggtt->vm, 465 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 466 node.start, I915_CACHE_NONE, 0); 467 } else { 468 page_base += offset & PAGE_MASK; 469 } 470 471 if (gtt_user_read(&ggtt->iomap, page_base, page_offset, 472 user_data, page_length)) { 473 ret = -EFAULT; 474 break; 475 } 476 477 remain -= page_length; 478 user_data += page_length; 479 offset += page_length; 480 } 481 482 i915_gem_object_unlock_fence(obj, fence); 483 out_unpin: 484 if (drm_mm_node_allocated(&node)) { 485 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); 486 remove_mappable_node(ggtt, &node); 487 } else { 488 i915_vma_unpin(vma); 489 } 490 out_rpm: 491 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 492 return ret; 493 } 494 495 /** 496 * Reads data from the object referenced by handle. 497 * @dev: drm device pointer 498 * @data: ioctl data blob 499 * @file: drm file pointer 500 * 501 * On error, the contents of *data are undefined. 502 */ 503 int 504 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 505 struct drm_file *file) 506 { 507 struct drm_i915_gem_pread *args = data; 508 struct drm_i915_gem_object *obj; 509 int ret; 510 511 if (args->size == 0) 512 return 0; 513 514 if (!access_ok(u64_to_user_ptr(args->data_ptr), 515 args->size)) 516 return -EFAULT; 517 518 obj = i915_gem_object_lookup(file, args->handle); 519 if (!obj) 520 return -ENOENT; 521 522 /* Bounds check source. */ 523 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 524 ret = -EINVAL; 525 goto out; 526 } 527 528 trace_i915_gem_object_pread(obj, args->offset, args->size); 529 530 ret = i915_gem_object_wait(obj, 531 I915_WAIT_INTERRUPTIBLE, 532 MAX_SCHEDULE_TIMEOUT); 533 if (ret) 534 goto out; 535 536 ret = i915_gem_object_pin_pages(obj); 537 if (ret) 538 goto out; 539 540 ret = i915_gem_shmem_pread(obj, args); 541 if (ret == -EFAULT || ret == -ENODEV) 542 ret = i915_gem_gtt_pread(obj, args); 543 544 i915_gem_object_unpin_pages(obj); 545 out: 546 i915_gem_object_put(obj); 547 return ret; 548 } 549 550 /* This is the fast write path which cannot handle 551 * page faults in the source data 552 */ 553 554 static inline bool 555 ggtt_write(struct io_mapping *mapping, 556 loff_t base, int offset, 557 char __user *user_data, int length) 558 { 559 void __iomem *vaddr; 560 unsigned long unwritten; 561 562 /* We can use the cpu mem copy function because this is X86. */ 563 vaddr = io_mapping_map_atomic_wc(mapping, base); 564 unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset, 565 user_data, length); 566 io_mapping_unmap_atomic(vaddr); 567 if (unwritten) { 568 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 569 unwritten = copy_from_user((void __force *)vaddr + offset, 570 user_data, length); 571 io_mapping_unmap(vaddr); 572 } 573 574 return unwritten; 575 } 576 577 /** 578 * This is the fast pwrite path, where we copy the data directly from the 579 * user into the GTT, uncached. 580 * @obj: i915 GEM object 581 * @args: pwrite arguments structure 582 */ 583 static int 584 i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, 585 const struct drm_i915_gem_pwrite *args) 586 { 587 struct drm_i915_private *i915 = to_i915(obj->base.dev); 588 struct i915_ggtt *ggtt = &i915->ggtt; 589 struct intel_runtime_pm *rpm = &i915->runtime_pm; 590 intel_wakeref_t wakeref; 591 struct drm_mm_node node; 592 struct dma_fence *fence; 593 struct i915_vma *vma; 594 u64 remain, offset; 595 void __user *user_data; 596 int ret; 597 598 if (i915_gem_object_has_struct_page(obj)) { 599 /* 600 * Avoid waking the device up if we can fallback, as 601 * waking/resuming is very slow (worst-case 10-100 ms 602 * depending on PCI sleeps and our own resume time). 603 * This easily dwarfs any performance advantage from 604 * using the cache bypass of indirect GGTT access. 605 */ 606 wakeref = intel_runtime_pm_get_if_in_use(rpm); 607 if (!wakeref) 608 return -EFAULT; 609 } else { 610 /* No backing pages, no fallback, we must force GGTT access */ 611 wakeref = intel_runtime_pm_get(rpm); 612 } 613 614 vma = ERR_PTR(-ENODEV); 615 if (!i915_gem_object_is_tiled(obj)) 616 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 617 PIN_MAPPABLE | 618 PIN_NONBLOCK /* NOWARN */ | 619 PIN_NOEVICT); 620 if (!IS_ERR(vma)) { 621 node.start = i915_ggtt_offset(vma); 622 node.flags = 0; 623 } else { 624 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 625 if (ret) 626 goto out_rpm; 627 GEM_BUG_ON(!drm_mm_node_allocated(&node)); 628 } 629 630 ret = i915_gem_object_lock_interruptible(obj, NULL); 631 if (ret) 632 goto out_unpin; 633 634 ret = i915_gem_object_set_to_gtt_domain(obj, true); 635 if (ret) { 636 i915_gem_object_unlock(obj); 637 goto out_unpin; 638 } 639 640 fence = i915_gem_object_lock_fence(obj); 641 i915_gem_object_unlock(obj); 642 if (!fence) { 643 ret = -ENOMEM; 644 goto out_unpin; 645 } 646 647 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); 648 649 user_data = u64_to_user_ptr(args->data_ptr); 650 offset = args->offset; 651 remain = args->size; 652 while (remain) { 653 /* Operation in this page 654 * 655 * page_base = page offset within aperture 656 * page_offset = offset within page 657 * page_length = bytes to copy for this page 658 */ 659 u32 page_base = node.start; 660 unsigned int page_offset = offset_in_page(offset); 661 unsigned int page_length = PAGE_SIZE - page_offset; 662 page_length = remain < page_length ? remain : page_length; 663 if (drm_mm_node_allocated(&node)) { 664 /* flush the write before we modify the GGTT */ 665 intel_gt_flush_ggtt_writes(ggtt->vm.gt); 666 ggtt->vm.insert_page(&ggtt->vm, 667 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 668 node.start, I915_CACHE_NONE, 0); 669 wmb(); /* flush modifications to the GGTT (insert_page) */ 670 } else { 671 page_base += offset & PAGE_MASK; 672 } 673 /* If we get a fault while copying data, then (presumably) our 674 * source page isn't available. Return the error and we'll 675 * retry in the slow path. 676 * If the object is non-shmem backed, we retry again with the 677 * path that handles page fault. 678 */ 679 if (ggtt_write(&ggtt->iomap, page_base, page_offset, 680 user_data, page_length)) { 681 ret = -EFAULT; 682 break; 683 } 684 685 remain -= page_length; 686 user_data += page_length; 687 offset += page_length; 688 } 689 690 intel_gt_flush_ggtt_writes(ggtt->vm.gt); 691 i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); 692 693 i915_gem_object_unlock_fence(obj, fence); 694 out_unpin: 695 if (drm_mm_node_allocated(&node)) { 696 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); 697 remove_mappable_node(ggtt, &node); 698 } else { 699 i915_vma_unpin(vma); 700 } 701 out_rpm: 702 intel_runtime_pm_put(rpm, wakeref); 703 return ret; 704 } 705 706 /* Per-page copy function for the shmem pwrite fastpath. 707 * Flushes invalid cachelines before writing to the target if 708 * needs_clflush_before is set and flushes out any written cachelines after 709 * writing if needs_clflush is set. 710 */ 711 static int 712 shmem_pwrite(struct page *page, int offset, int len, char __user *user_data, 713 bool needs_clflush_before, 714 bool needs_clflush_after) 715 { 716 char *vaddr; 717 int ret; 718 719 vaddr = kmap(page); 720 721 if (needs_clflush_before) 722 drm_clflush_virt_range(vaddr + offset, len); 723 724 ret = __copy_from_user(vaddr + offset, user_data, len); 725 if (!ret && needs_clflush_after) 726 drm_clflush_virt_range(vaddr + offset, len); 727 728 kunmap(page); 729 730 return ret ? -EFAULT : 0; 731 } 732 733 static int 734 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, 735 const struct drm_i915_gem_pwrite *args) 736 { 737 unsigned int partial_cacheline_write; 738 unsigned int needs_clflush; 739 unsigned int offset, idx; 740 struct dma_fence *fence; 741 void __user *user_data; 742 u64 remain; 743 int ret; 744 745 ret = i915_gem_object_lock_interruptible(obj, NULL); 746 if (ret) 747 return ret; 748 749 ret = i915_gem_object_prepare_write(obj, &needs_clflush); 750 if (ret) { 751 i915_gem_object_unlock(obj); 752 return ret; 753 } 754 755 fence = i915_gem_object_lock_fence(obj); 756 i915_gem_object_finish_access(obj); 757 i915_gem_object_unlock(obj); 758 759 if (!fence) 760 return -ENOMEM; 761 762 /* If we don't overwrite a cacheline completely we need to be 763 * careful to have up-to-date data by first clflushing. Don't 764 * overcomplicate things and flush the entire patch. 765 */ 766 partial_cacheline_write = 0; 767 if (needs_clflush & CLFLUSH_BEFORE) 768 partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1; 769 770 user_data = u64_to_user_ptr(args->data_ptr); 771 remain = args->size; 772 offset = offset_in_page(args->offset); 773 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 774 struct page *page = i915_gem_object_get_page(obj, idx); 775 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); 776 777 ret = shmem_pwrite(page, offset, length, user_data, 778 (offset | length) & partial_cacheline_write, 779 needs_clflush & CLFLUSH_AFTER); 780 if (ret) 781 break; 782 783 remain -= length; 784 user_data += length; 785 offset = 0; 786 } 787 788 i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); 789 i915_gem_object_unlock_fence(obj, fence); 790 791 return ret; 792 } 793 794 /** 795 * Writes data to the object referenced by handle. 796 * @dev: drm device 797 * @data: ioctl data blob 798 * @file: drm file 799 * 800 * On error, the contents of the buffer that were to be modified are undefined. 801 */ 802 int 803 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 804 struct drm_file *file) 805 { 806 struct drm_i915_gem_pwrite *args = data; 807 struct drm_i915_gem_object *obj; 808 int ret; 809 810 if (args->size == 0) 811 return 0; 812 813 if (!access_ok(u64_to_user_ptr(args->data_ptr), args->size)) 814 return -EFAULT; 815 816 obj = i915_gem_object_lookup(file, args->handle); 817 if (!obj) 818 return -ENOENT; 819 820 /* Bounds check destination. */ 821 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 822 ret = -EINVAL; 823 goto err; 824 } 825 826 /* Writes not allowed into this read-only object */ 827 if (i915_gem_object_is_readonly(obj)) { 828 ret = -EINVAL; 829 goto err; 830 } 831 832 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 833 834 ret = -ENODEV; 835 if (obj->ops->pwrite) 836 ret = obj->ops->pwrite(obj, args); 837 if (ret != -ENODEV) 838 goto err; 839 840 ret = i915_gem_object_wait(obj, 841 I915_WAIT_INTERRUPTIBLE | 842 I915_WAIT_ALL, 843 MAX_SCHEDULE_TIMEOUT); 844 if (ret) 845 goto err; 846 847 ret = i915_gem_object_pin_pages(obj); 848 if (ret) 849 goto err; 850 851 ret = -EFAULT; 852 /* We can only do the GTT pwrite on untiled buffers, as otherwise 853 * it would end up going through the fenced access, and we'll get 854 * different detiling behavior between reading and writing. 855 * pread/pwrite currently are reading and writing from the CPU 856 * perspective, requiring manual detiling by the client. 857 */ 858 if (!i915_gem_object_has_struct_page(obj) || 859 cpu_write_needs_clflush(obj)) 860 /* Note that the gtt paths might fail with non-page-backed user 861 * pointers (e.g. gtt mappings when moving data between 862 * textures). Fallback to the shmem path in that case. 863 */ 864 ret = i915_gem_gtt_pwrite_fast(obj, args); 865 866 if (ret == -EFAULT || ret == -ENOSPC) { 867 if (i915_gem_object_has_struct_page(obj)) 868 ret = i915_gem_shmem_pwrite(obj, args); 869 else 870 ret = i915_gem_phys_pwrite(obj, args, file); 871 } 872 873 i915_gem_object_unpin_pages(obj); 874 err: 875 i915_gem_object_put(obj); 876 return ret; 877 } 878 879 /** 880 * Called when user space has done writes to this buffer 881 * @dev: drm device 882 * @data: ioctl data blob 883 * @file: drm file 884 */ 885 int 886 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 887 struct drm_file *file) 888 { 889 struct drm_i915_gem_sw_finish *args = data; 890 struct drm_i915_gem_object *obj; 891 892 obj = i915_gem_object_lookup(file, args->handle); 893 if (!obj) 894 return -ENOENT; 895 896 /* 897 * Proxy objects are barred from CPU access, so there is no 898 * need to ban sw_finish as it is a nop. 899 */ 900 901 /* Pinned buffers may be scanout, so flush the cache */ 902 i915_gem_object_flush_if_display(obj); 903 i915_gem_object_put(obj); 904 905 return 0; 906 } 907 908 void i915_gem_runtime_suspend(struct drm_i915_private *i915) 909 { 910 struct drm_i915_gem_object *obj, *on; 911 int i; 912 913 /* 914 * Only called during RPM suspend. All users of the userfault_list 915 * must be holding an RPM wakeref to ensure that this can not 916 * run concurrently with themselves (and use the struct_mutex for 917 * protection between themselves). 918 */ 919 920 list_for_each_entry_safe(obj, on, 921 &i915->ggtt.userfault_list, userfault_link) 922 __i915_gem_object_release_mmap_gtt(obj); 923 924 /* 925 * The fence will be lost when the device powers down. If any were 926 * in use by hardware (i.e. they are pinned), we should not be powering 927 * down! All other fences will be reacquired by the user upon waking. 928 */ 929 for (i = 0; i < i915->ggtt.num_fences; i++) { 930 struct i915_fence_reg *reg = &i915->ggtt.fence_regs[i]; 931 932 /* 933 * Ideally we want to assert that the fence register is not 934 * live at this point (i.e. that no piece of code will be 935 * trying to write through fence + GTT, as that both violates 936 * our tracking of activity and associated locking/barriers, 937 * but also is illegal given that the hw is powered down). 938 * 939 * Previously we used reg->pin_count as a "liveness" indicator. 940 * That is not sufficient, and we need a more fine-grained 941 * tool if we want to have a sanity check here. 942 */ 943 944 if (!reg->vma) 945 continue; 946 947 GEM_BUG_ON(i915_vma_has_userfault(reg->vma)); 948 reg->dirty = true; 949 } 950 } 951 952 static void discard_ggtt_vma(struct i915_vma *vma) 953 { 954 struct drm_i915_gem_object *obj = vma->obj; 955 956 spin_lock(&obj->vma.lock); 957 if (!RB_EMPTY_NODE(&vma->obj_node)) { 958 rb_erase(&vma->obj_node, &obj->vma.tree); 959 RB_CLEAR_NODE(&vma->obj_node); 960 } 961 spin_unlock(&obj->vma.lock); 962 } 963 964 struct i915_vma * 965 i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj, 966 struct i915_gem_ww_ctx *ww, 967 const struct i915_ggtt_view *view, 968 u64 size, u64 alignment, u64 flags) 969 { 970 struct drm_i915_private *i915 = to_i915(obj->base.dev); 971 struct i915_ggtt *ggtt = &i915->ggtt; 972 struct i915_vma *vma; 973 int ret; 974 975 if (flags & PIN_MAPPABLE && 976 (!view || view->type == I915_GGTT_VIEW_NORMAL)) { 977 /* 978 * If the required space is larger than the available 979 * aperture, we will not able to find a slot for the 980 * object and unbinding the object now will be in 981 * vain. Worse, doing so may cause us to ping-pong 982 * the object in and out of the Global GTT and 983 * waste a lot of cycles under the mutex. 984 */ 985 if (obj->base.size > ggtt->mappable_end) 986 return ERR_PTR(-E2BIG); 987 988 /* 989 * If NONBLOCK is set the caller is optimistically 990 * trying to cache the full object within the mappable 991 * aperture, and *must* have a fallback in place for 992 * situations where we cannot bind the object. We 993 * can be a little more lax here and use the fallback 994 * more often to avoid costly migrations of ourselves 995 * and other objects within the aperture. 996 * 997 * Half-the-aperture is used as a simple heuristic. 998 * More interesting would to do search for a free 999 * block prior to making the commitment to unbind. 1000 * That caters for the self-harm case, and with a 1001 * little more heuristics (e.g. NOFAULT, NOEVICT) 1002 * we could try to minimise harm to others. 1003 */ 1004 if (flags & PIN_NONBLOCK && 1005 obj->base.size > ggtt->mappable_end / 2) 1006 return ERR_PTR(-ENOSPC); 1007 } 1008 1009 new_vma: 1010 vma = i915_vma_instance(obj, &ggtt->vm, view); 1011 if (IS_ERR(vma)) 1012 return vma; 1013 1014 if (i915_vma_misplaced(vma, size, alignment, flags)) { 1015 if (flags & PIN_NONBLOCK) { 1016 if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) 1017 return ERR_PTR(-ENOSPC); 1018 1019 if (flags & PIN_MAPPABLE && 1020 vma->fence_size > ggtt->mappable_end / 2) 1021 return ERR_PTR(-ENOSPC); 1022 } 1023 1024 if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) { 1025 discard_ggtt_vma(vma); 1026 goto new_vma; 1027 } 1028 1029 ret = i915_vma_unbind(vma); 1030 if (ret) 1031 return ERR_PTR(ret); 1032 } 1033 1034 ret = i915_vma_pin_ww(vma, ww, size, alignment, flags | PIN_GLOBAL); 1035 if (ret) 1036 return ERR_PTR(ret); 1037 1038 if (vma->fence && !i915_gem_object_is_tiled(obj)) { 1039 mutex_lock(&ggtt->vm.mutex); 1040 i915_vma_revoke_fence(vma); 1041 mutex_unlock(&ggtt->vm.mutex); 1042 } 1043 1044 ret = i915_vma_wait_for_bind(vma); 1045 if (ret) { 1046 i915_vma_unpin(vma); 1047 return ERR_PTR(ret); 1048 } 1049 1050 return vma; 1051 } 1052 1053 int 1054 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 1055 struct drm_file *file_priv) 1056 { 1057 struct drm_i915_private *i915 = to_i915(dev); 1058 struct drm_i915_gem_madvise *args = data; 1059 struct drm_i915_gem_object *obj; 1060 int err; 1061 1062 switch (args->madv) { 1063 case I915_MADV_DONTNEED: 1064 case I915_MADV_WILLNEED: 1065 break; 1066 default: 1067 return -EINVAL; 1068 } 1069 1070 obj = i915_gem_object_lookup(file_priv, args->handle); 1071 if (!obj) 1072 return -ENOENT; 1073 1074 err = mutex_lock_interruptible(&obj->mm.lock); 1075 if (err) 1076 goto out; 1077 1078 if (i915_gem_object_has_pages(obj) && 1079 i915_gem_object_is_tiled(obj) && 1080 i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 1081 if (obj->mm.madv == I915_MADV_WILLNEED) { 1082 GEM_BUG_ON(!obj->mm.quirked); 1083 __i915_gem_object_unpin_pages(obj); 1084 obj->mm.quirked = false; 1085 } 1086 if (args->madv == I915_MADV_WILLNEED) { 1087 GEM_BUG_ON(obj->mm.quirked); 1088 __i915_gem_object_pin_pages(obj); 1089 obj->mm.quirked = true; 1090 } 1091 } 1092 1093 if (obj->mm.madv != __I915_MADV_PURGED) 1094 obj->mm.madv = args->madv; 1095 1096 if (i915_gem_object_has_pages(obj)) { 1097 struct list_head *list; 1098 1099 if (i915_gem_object_is_shrinkable(obj)) { 1100 unsigned long flags; 1101 1102 spin_lock_irqsave(&i915->mm.obj_lock, flags); 1103 1104 if (obj->mm.madv != I915_MADV_WILLNEED) 1105 list = &i915->mm.purge_list; 1106 else 1107 list = &i915->mm.shrink_list; 1108 list_move_tail(&obj->mm.link, list); 1109 1110 spin_unlock_irqrestore(&i915->mm.obj_lock, flags); 1111 } 1112 } 1113 1114 /* if the object is no longer attached, discard its backing storage */ 1115 if (obj->mm.madv == I915_MADV_DONTNEED && 1116 !i915_gem_object_has_pages(obj)) 1117 i915_gem_object_truncate(obj); 1118 1119 args->retained = obj->mm.madv != __I915_MADV_PURGED; 1120 mutex_unlock(&obj->mm.lock); 1121 1122 out: 1123 i915_gem_object_put(obj); 1124 return err; 1125 } 1126 1127 int i915_gem_init(struct drm_i915_private *dev_priv) 1128 { 1129 int ret; 1130 1131 /* We need to fallback to 4K pages if host doesn't support huge gtt. */ 1132 if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_huge_gtt(dev_priv)) 1133 mkwrite_device_info(dev_priv)->page_sizes = 1134 I915_GTT_PAGE_SIZE_4K; 1135 1136 ret = i915_gem_init_userptr(dev_priv); 1137 if (ret) 1138 return ret; 1139 1140 intel_uc_fetch_firmwares(&dev_priv->gt.uc); 1141 intel_wopcm_init(&dev_priv->wopcm); 1142 1143 ret = i915_init_ggtt(dev_priv); 1144 if (ret) { 1145 GEM_BUG_ON(ret == -EIO); 1146 goto err_unlock; 1147 } 1148 1149 /* 1150 * Despite its name intel_init_clock_gating applies both display 1151 * clock gating workarounds; GT mmio workarounds and the occasional 1152 * GT power context workaround. Worse, sometimes it includes a context 1153 * register workaround which we need to apply before we record the 1154 * default HW state for all contexts. 1155 * 1156 * FIXME: break up the workarounds and apply them at the right time! 1157 */ 1158 intel_init_clock_gating(dev_priv); 1159 1160 ret = intel_gt_init(&dev_priv->gt); 1161 if (ret) 1162 goto err_unlock; 1163 1164 return 0; 1165 1166 /* 1167 * Unwinding is complicated by that we want to handle -EIO to mean 1168 * disable GPU submission but keep KMS alive. We want to mark the 1169 * HW as irrevisibly wedged, but keep enough state around that the 1170 * driver doesn't explode during runtime. 1171 */ 1172 err_unlock: 1173 i915_gem_drain_workqueue(dev_priv); 1174 1175 if (ret != -EIO) { 1176 intel_uc_cleanup_firmwares(&dev_priv->gt.uc); 1177 i915_gem_cleanup_userptr(dev_priv); 1178 } 1179 1180 if (ret == -EIO) { 1181 /* 1182 * Allow engines or uC initialisation to fail by marking the GPU 1183 * as wedged. But we only want to do this when the GPU is angry, 1184 * for all other failure, such as an allocation failure, bail. 1185 */ 1186 if (!intel_gt_is_wedged(&dev_priv->gt)) { 1187 i915_probe_error(dev_priv, 1188 "Failed to initialize GPU, declaring it wedged!\n"); 1189 intel_gt_set_wedged(&dev_priv->gt); 1190 } 1191 1192 /* Minimal basic recovery for KMS */ 1193 ret = i915_ggtt_enable_hw(dev_priv); 1194 i915_ggtt_resume(&dev_priv->ggtt); 1195 intel_init_clock_gating(dev_priv); 1196 } 1197 1198 i915_gem_drain_freed_objects(dev_priv); 1199 return ret; 1200 } 1201 1202 void i915_gem_driver_register(struct drm_i915_private *i915) 1203 { 1204 i915_gem_driver_register__shrinker(i915); 1205 1206 intel_engines_driver_register(i915); 1207 } 1208 1209 void i915_gem_driver_unregister(struct drm_i915_private *i915) 1210 { 1211 i915_gem_driver_unregister__shrinker(i915); 1212 } 1213 1214 void i915_gem_driver_remove(struct drm_i915_private *dev_priv) 1215 { 1216 intel_wakeref_auto_fini(&dev_priv->ggtt.userfault_wakeref); 1217 1218 i915_gem_suspend_late(dev_priv); 1219 intel_gt_driver_remove(&dev_priv->gt); 1220 dev_priv->uabi_engines = RB_ROOT; 1221 1222 /* Flush any outstanding unpin_work. */ 1223 i915_gem_drain_workqueue(dev_priv); 1224 1225 i915_gem_drain_freed_objects(dev_priv); 1226 } 1227 1228 void i915_gem_driver_release(struct drm_i915_private *dev_priv) 1229 { 1230 i915_gem_driver_release__contexts(dev_priv); 1231 1232 intel_gt_driver_release(&dev_priv->gt); 1233 1234 intel_wa_list_free(&dev_priv->gt_wa_list); 1235 1236 intel_uc_cleanup_firmwares(&dev_priv->gt.uc); 1237 i915_gem_cleanup_userptr(dev_priv); 1238 1239 i915_gem_drain_freed_objects(dev_priv); 1240 1241 drm_WARN_ON(&dev_priv->drm, !list_empty(&dev_priv->gem.contexts.list)); 1242 } 1243 1244 static void i915_gem_init__mm(struct drm_i915_private *i915) 1245 { 1246 spin_lock_init(&i915->mm.obj_lock); 1247 1248 init_llist_head(&i915->mm.free_list); 1249 1250 INIT_LIST_HEAD(&i915->mm.purge_list); 1251 INIT_LIST_HEAD(&i915->mm.shrink_list); 1252 1253 i915_gem_init__objects(i915); 1254 } 1255 1256 void i915_gem_init_early(struct drm_i915_private *dev_priv) 1257 { 1258 i915_gem_init__mm(dev_priv); 1259 i915_gem_init__contexts(dev_priv); 1260 1261 spin_lock_init(&dev_priv->fb_tracking.lock); 1262 } 1263 1264 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) 1265 { 1266 i915_gem_drain_freed_objects(dev_priv); 1267 GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list)); 1268 GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count)); 1269 drm_WARN_ON(&dev_priv->drm, dev_priv->mm.shrink_count); 1270 } 1271 1272 int i915_gem_freeze(struct drm_i915_private *dev_priv) 1273 { 1274 /* Discard all purgeable objects, let userspace recover those as 1275 * required after resuming. 1276 */ 1277 i915_gem_shrink_all(dev_priv); 1278 1279 return 0; 1280 } 1281 1282 int i915_gem_freeze_late(struct drm_i915_private *i915) 1283 { 1284 struct drm_i915_gem_object *obj; 1285 intel_wakeref_t wakeref; 1286 1287 /* 1288 * Called just before we write the hibernation image. 1289 * 1290 * We need to update the domain tracking to reflect that the CPU 1291 * will be accessing all the pages to create and restore from the 1292 * hibernation, and so upon restoration those pages will be in the 1293 * CPU domain. 1294 * 1295 * To make sure the hibernation image contains the latest state, 1296 * we update that state just before writing out the image. 1297 * 1298 * To try and reduce the hibernation image, we manually shrink 1299 * the objects as well, see i915_gem_freeze() 1300 */ 1301 1302 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 1303 1304 i915_gem_shrink(i915, -1UL, NULL, ~0); 1305 i915_gem_drain_freed_objects(i915); 1306 1307 list_for_each_entry(obj, &i915->mm.shrink_list, mm.link) { 1308 i915_gem_object_lock(obj, NULL); 1309 drm_WARN_ON(&i915->drm, 1310 i915_gem_object_set_to_cpu_domain(obj, true)); 1311 i915_gem_object_unlock(obj); 1312 } 1313 1314 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 1315 1316 return 0; 1317 } 1318 1319 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) 1320 { 1321 struct drm_i915_file_private *file_priv; 1322 int ret; 1323 1324 DRM_DEBUG("\n"); 1325 1326 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 1327 if (!file_priv) 1328 return -ENOMEM; 1329 1330 file->driver_priv = file_priv; 1331 file_priv->dev_priv = i915; 1332 file_priv->file = file; 1333 1334 file_priv->bsd_engine = -1; 1335 file_priv->hang_timestamp = jiffies; 1336 1337 ret = i915_gem_context_open(i915, file); 1338 if (ret) 1339 kfree(file_priv); 1340 1341 return ret; 1342 } 1343 1344 void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ww, bool intr) 1345 { 1346 ww_acquire_init(&ww->ctx, &reservation_ww_class); 1347 INIT_LIST_HEAD(&ww->obj_list); 1348 ww->intr = intr; 1349 ww->contended = NULL; 1350 } 1351 1352 static void i915_gem_ww_ctx_unlock_all(struct i915_gem_ww_ctx *ww) 1353 { 1354 struct drm_i915_gem_object *obj; 1355 1356 while ((obj = list_first_entry_or_null(&ww->obj_list, struct drm_i915_gem_object, obj_link))) { 1357 list_del(&obj->obj_link); 1358 i915_gem_object_unlock(obj); 1359 } 1360 } 1361 1362 void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj) 1363 { 1364 list_del(&obj->obj_link); 1365 i915_gem_object_unlock(obj); 1366 } 1367 1368 void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ww) 1369 { 1370 i915_gem_ww_ctx_unlock_all(ww); 1371 WARN_ON(ww->contended); 1372 ww_acquire_fini(&ww->ctx); 1373 } 1374 1375 int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ww) 1376 { 1377 int ret = 0; 1378 1379 if (WARN_ON(!ww->contended)) 1380 return -EINVAL; 1381 1382 i915_gem_ww_ctx_unlock_all(ww); 1383 if (ww->intr) 1384 ret = dma_resv_lock_slow_interruptible(ww->contended->base.resv, &ww->ctx); 1385 else 1386 dma_resv_lock_slow(ww->contended->base.resv, &ww->ctx); 1387 1388 if (!ret) 1389 list_add_tail(&ww->contended->obj_link, &ww->obj_list); 1390 1391 ww->contended = NULL; 1392 1393 return ret; 1394 } 1395 1396 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 1397 #include "selftests/mock_gem_device.c" 1398 #include "selftests/i915_gem.c" 1399 #endif 1400