1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drm_vma_manager.h> 29 #include <drm/i915_drm.h> 30 #include <linux/dma-fence-array.h> 31 #include <linux/kthread.h> 32 #include <linux/dma-resv.h> 33 #include <linux/shmem_fs.h> 34 #include <linux/slab.h> 35 #include <linux/stop_machine.h> 36 #include <linux/swap.h> 37 #include <linux/pci.h> 38 #include <linux/dma-buf.h> 39 #include <linux/mman.h> 40 41 #include "display/intel_display.h" 42 #include "display/intel_frontbuffer.h" 43 44 #include "gem/i915_gem_clflush.h" 45 #include "gem/i915_gem_context.h" 46 #include "gem/i915_gem_ioctls.h" 47 #include "gem/i915_gem_mman.h" 48 #include "gem/i915_gem_region.h" 49 #include "gt/intel_engine_user.h" 50 #include "gt/intel_gt.h" 51 #include "gt/intel_gt_pm.h" 52 #include "gt/intel_workarounds.h" 53 54 #include "i915_drv.h" 55 #include "i915_trace.h" 56 #include "i915_vgpu.h" 57 58 #include "intel_pm.h" 59 60 static int 61 insert_mappable_node(struct i915_ggtt *ggtt, struct drm_mm_node *node, u32 size) 62 { 63 int err; 64 65 err = mutex_lock_interruptible(&ggtt->vm.mutex); 66 if (err) 67 return err; 68 69 memset(node, 0, sizeof(*node)); 70 err = drm_mm_insert_node_in_range(&ggtt->vm.mm, node, 71 size, 0, I915_COLOR_UNEVICTABLE, 72 0, ggtt->mappable_end, 73 DRM_MM_INSERT_LOW); 74 75 mutex_unlock(&ggtt->vm.mutex); 76 77 return err; 78 } 79 80 static void 81 remove_mappable_node(struct i915_ggtt *ggtt, struct drm_mm_node *node) 82 { 83 mutex_lock(&ggtt->vm.mutex); 84 drm_mm_remove_node(node); 85 mutex_unlock(&ggtt->vm.mutex); 86 } 87 88 int 89 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 90 struct drm_file *file) 91 { 92 struct i915_ggtt *ggtt = &to_i915(dev)->ggtt; 93 struct drm_i915_gem_get_aperture *args = data; 94 struct i915_vma *vma; 95 u64 pinned; 96 97 if (mutex_lock_interruptible(&ggtt->vm.mutex)) 98 return -EINTR; 99 100 pinned = ggtt->vm.reserved; 101 list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) 102 if (i915_vma_is_pinned(vma)) 103 pinned += vma->node.size; 104 105 mutex_unlock(&ggtt->vm.mutex); 106 107 args->aper_size = ggtt->vm.total; 108 args->aper_available_size = args->aper_size - pinned; 109 110 return 0; 111 } 112 113 int i915_gem_object_unbind(struct drm_i915_gem_object *obj, 114 unsigned long flags) 115 { 116 struct intel_runtime_pm *rpm = &to_i915(obj->base.dev)->runtime_pm; 117 LIST_HEAD(still_in_list); 118 intel_wakeref_t wakeref; 119 struct i915_vma *vma; 120 int ret; 121 122 if (!atomic_read(&obj->bind_count)) 123 return 0; 124 125 /* 126 * As some machines use ACPI to handle runtime-resume callbacks, and 127 * ACPI is quite kmalloc happy, we cannot resume beneath the vm->mutex 128 * as they are required by the shrinker. Ergo, we wake the device up 129 * first just in case. 130 */ 131 wakeref = intel_runtime_pm_get(rpm); 132 133 try_again: 134 ret = 0; 135 spin_lock(&obj->vma.lock); 136 while (!ret && (vma = list_first_entry_or_null(&obj->vma.list, 137 struct i915_vma, 138 obj_link))) { 139 struct i915_address_space *vm = vma->vm; 140 141 list_move_tail(&vma->obj_link, &still_in_list); 142 if (!i915_vma_is_bound(vma, I915_VMA_BIND_MASK)) 143 continue; 144 145 ret = -EAGAIN; 146 if (!i915_vm_tryopen(vm)) 147 break; 148 149 /* Prevent vma being freed by i915_vma_parked as we unbind */ 150 vma = __i915_vma_get(vma); 151 spin_unlock(&obj->vma.lock); 152 153 if (vma) { 154 ret = -EBUSY; 155 if (flags & I915_GEM_OBJECT_UNBIND_ACTIVE || 156 !i915_vma_is_active(vma)) 157 ret = i915_vma_unbind(vma); 158 159 __i915_vma_put(vma); 160 } 161 162 i915_vm_close(vm); 163 spin_lock(&obj->vma.lock); 164 } 165 list_splice_init(&still_in_list, &obj->vma.list); 166 spin_unlock(&obj->vma.lock); 167 168 if (ret == -EAGAIN && flags & I915_GEM_OBJECT_UNBIND_BARRIER) { 169 rcu_barrier(); /* flush the i915_vm_release() */ 170 goto try_again; 171 } 172 173 intel_runtime_pm_put(rpm, wakeref); 174 175 return ret; 176 } 177 178 static int 179 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 180 struct drm_i915_gem_pwrite *args, 181 struct drm_file *file) 182 { 183 void *vaddr = sg_page(obj->mm.pages->sgl) + args->offset; 184 char __user *user_data = u64_to_user_ptr(args->data_ptr); 185 186 /* 187 * We manually control the domain here and pretend that it 188 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 189 */ 190 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); 191 192 if (copy_from_user(vaddr, user_data, args->size)) 193 return -EFAULT; 194 195 drm_clflush_virt_range(vaddr, args->size); 196 intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt); 197 198 i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); 199 return 0; 200 } 201 202 static int 203 i915_gem_create(struct drm_file *file, 204 struct intel_memory_region *mr, 205 u64 *size_p, 206 u32 *handle_p) 207 { 208 struct drm_i915_gem_object *obj; 209 u32 handle; 210 u64 size; 211 int ret; 212 213 GEM_BUG_ON(!is_power_of_2(mr->min_page_size)); 214 size = round_up(*size_p, mr->min_page_size); 215 if (size == 0) 216 return -EINVAL; 217 218 /* For most of the ABI (e.g. mmap) we think in system pages */ 219 GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE)); 220 221 /* Allocate the new object */ 222 obj = i915_gem_object_create_region(mr, size, 0); 223 if (IS_ERR(obj)) 224 return PTR_ERR(obj); 225 226 ret = drm_gem_handle_create(file, &obj->base, &handle); 227 /* drop reference from allocate - handle holds it now */ 228 i915_gem_object_put(obj); 229 if (ret) 230 return ret; 231 232 *handle_p = handle; 233 *size_p = size; 234 return 0; 235 } 236 237 int 238 i915_gem_dumb_create(struct drm_file *file, 239 struct drm_device *dev, 240 struct drm_mode_create_dumb *args) 241 { 242 enum intel_memory_type mem_type; 243 int cpp = DIV_ROUND_UP(args->bpp, 8); 244 u32 format; 245 246 switch (cpp) { 247 case 1: 248 format = DRM_FORMAT_C8; 249 break; 250 case 2: 251 format = DRM_FORMAT_RGB565; 252 break; 253 case 4: 254 format = DRM_FORMAT_XRGB8888; 255 break; 256 default: 257 return -EINVAL; 258 } 259 260 /* have to work out size/pitch and return them */ 261 args->pitch = ALIGN(args->width * cpp, 64); 262 263 /* align stride to page size so that we can remap */ 264 if (args->pitch > intel_plane_fb_max_stride(to_i915(dev), format, 265 DRM_FORMAT_MOD_LINEAR)) 266 args->pitch = ALIGN(args->pitch, 4096); 267 268 if (args->pitch < args->width) 269 return -EINVAL; 270 271 args->size = mul_u32_u32(args->pitch, args->height); 272 273 mem_type = INTEL_MEMORY_SYSTEM; 274 if (HAS_LMEM(to_i915(dev))) 275 mem_type = INTEL_MEMORY_LOCAL; 276 277 return i915_gem_create(file, 278 intel_memory_region_by_type(to_i915(dev), 279 mem_type), 280 &args->size, &args->handle); 281 } 282 283 /** 284 * Creates a new mm object and returns a handle to it. 285 * @dev: drm device pointer 286 * @data: ioctl data blob 287 * @file: drm file pointer 288 */ 289 int 290 i915_gem_create_ioctl(struct drm_device *dev, void *data, 291 struct drm_file *file) 292 { 293 struct drm_i915_private *i915 = to_i915(dev); 294 struct drm_i915_gem_create *args = data; 295 296 i915_gem_flush_free_objects(i915); 297 298 return i915_gem_create(file, 299 intel_memory_region_by_type(i915, 300 INTEL_MEMORY_SYSTEM), 301 &args->size, &args->handle); 302 } 303 304 static int 305 shmem_pread(struct page *page, int offset, int len, char __user *user_data, 306 bool needs_clflush) 307 { 308 char *vaddr; 309 int ret; 310 311 vaddr = kmap(page); 312 313 if (needs_clflush) 314 drm_clflush_virt_range(vaddr + offset, len); 315 316 ret = __copy_to_user(user_data, vaddr + offset, len); 317 318 kunmap(page); 319 320 return ret ? -EFAULT : 0; 321 } 322 323 static int 324 i915_gem_shmem_pread(struct drm_i915_gem_object *obj, 325 struct drm_i915_gem_pread *args) 326 { 327 unsigned int needs_clflush; 328 unsigned int idx, offset; 329 struct dma_fence *fence; 330 char __user *user_data; 331 u64 remain; 332 int ret; 333 334 ret = i915_gem_object_prepare_read(obj, &needs_clflush); 335 if (ret) 336 return ret; 337 338 fence = i915_gem_object_lock_fence(obj); 339 i915_gem_object_finish_access(obj); 340 if (!fence) 341 return -ENOMEM; 342 343 remain = args->size; 344 user_data = u64_to_user_ptr(args->data_ptr); 345 offset = offset_in_page(args->offset); 346 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 347 struct page *page = i915_gem_object_get_page(obj, idx); 348 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); 349 350 ret = shmem_pread(page, offset, length, user_data, 351 needs_clflush); 352 if (ret) 353 break; 354 355 remain -= length; 356 user_data += length; 357 offset = 0; 358 } 359 360 i915_gem_object_unlock_fence(obj, fence); 361 return ret; 362 } 363 364 static inline bool 365 gtt_user_read(struct io_mapping *mapping, 366 loff_t base, int offset, 367 char __user *user_data, int length) 368 { 369 void __iomem *vaddr; 370 unsigned long unwritten; 371 372 /* We can use the cpu mem copy function because this is X86. */ 373 vaddr = io_mapping_map_atomic_wc(mapping, base); 374 unwritten = __copy_to_user_inatomic(user_data, 375 (void __force *)vaddr + offset, 376 length); 377 io_mapping_unmap_atomic(vaddr); 378 if (unwritten) { 379 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 380 unwritten = copy_to_user(user_data, 381 (void __force *)vaddr + offset, 382 length); 383 io_mapping_unmap(vaddr); 384 } 385 return unwritten; 386 } 387 388 static int 389 i915_gem_gtt_pread(struct drm_i915_gem_object *obj, 390 const struct drm_i915_gem_pread *args) 391 { 392 struct drm_i915_private *i915 = to_i915(obj->base.dev); 393 struct i915_ggtt *ggtt = &i915->ggtt; 394 intel_wakeref_t wakeref; 395 struct drm_mm_node node; 396 struct dma_fence *fence; 397 void __user *user_data; 398 struct i915_vma *vma; 399 u64 remain, offset; 400 int ret; 401 402 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 403 vma = ERR_PTR(-ENODEV); 404 if (!i915_gem_object_is_tiled(obj)) 405 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 406 PIN_MAPPABLE | 407 PIN_NONBLOCK /* NOWARN */ | 408 PIN_NOEVICT); 409 if (!IS_ERR(vma)) { 410 node.start = i915_ggtt_offset(vma); 411 node.flags = 0; 412 } else { 413 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 414 if (ret) 415 goto out_rpm; 416 GEM_BUG_ON(!drm_mm_node_allocated(&node)); 417 } 418 419 ret = i915_gem_object_lock_interruptible(obj); 420 if (ret) 421 goto out_unpin; 422 423 ret = i915_gem_object_set_to_gtt_domain(obj, false); 424 if (ret) { 425 i915_gem_object_unlock(obj); 426 goto out_unpin; 427 } 428 429 fence = i915_gem_object_lock_fence(obj); 430 i915_gem_object_unlock(obj); 431 if (!fence) { 432 ret = -ENOMEM; 433 goto out_unpin; 434 } 435 436 user_data = u64_to_user_ptr(args->data_ptr); 437 remain = args->size; 438 offset = args->offset; 439 440 while (remain > 0) { 441 /* Operation in this page 442 * 443 * page_base = page offset within aperture 444 * page_offset = offset within page 445 * page_length = bytes to copy for this page 446 */ 447 u32 page_base = node.start; 448 unsigned page_offset = offset_in_page(offset); 449 unsigned page_length = PAGE_SIZE - page_offset; 450 page_length = remain < page_length ? remain : page_length; 451 if (drm_mm_node_allocated(&node)) { 452 ggtt->vm.insert_page(&ggtt->vm, 453 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 454 node.start, I915_CACHE_NONE, 0); 455 } else { 456 page_base += offset & PAGE_MASK; 457 } 458 459 if (gtt_user_read(&ggtt->iomap, page_base, page_offset, 460 user_data, page_length)) { 461 ret = -EFAULT; 462 break; 463 } 464 465 remain -= page_length; 466 user_data += page_length; 467 offset += page_length; 468 } 469 470 i915_gem_object_unlock_fence(obj, fence); 471 out_unpin: 472 if (drm_mm_node_allocated(&node)) { 473 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); 474 remove_mappable_node(ggtt, &node); 475 } else { 476 i915_vma_unpin(vma); 477 } 478 out_rpm: 479 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 480 return ret; 481 } 482 483 /** 484 * Reads data from the object referenced by handle. 485 * @dev: drm device pointer 486 * @data: ioctl data blob 487 * @file: drm file pointer 488 * 489 * On error, the contents of *data are undefined. 490 */ 491 int 492 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 493 struct drm_file *file) 494 { 495 struct drm_i915_gem_pread *args = data; 496 struct drm_i915_gem_object *obj; 497 int ret; 498 499 if (args->size == 0) 500 return 0; 501 502 if (!access_ok(u64_to_user_ptr(args->data_ptr), 503 args->size)) 504 return -EFAULT; 505 506 obj = i915_gem_object_lookup(file, args->handle); 507 if (!obj) 508 return -ENOENT; 509 510 /* Bounds check source. */ 511 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 512 ret = -EINVAL; 513 goto out; 514 } 515 516 trace_i915_gem_object_pread(obj, args->offset, args->size); 517 518 ret = i915_gem_object_wait(obj, 519 I915_WAIT_INTERRUPTIBLE, 520 MAX_SCHEDULE_TIMEOUT); 521 if (ret) 522 goto out; 523 524 ret = i915_gem_object_pin_pages(obj); 525 if (ret) 526 goto out; 527 528 ret = i915_gem_shmem_pread(obj, args); 529 if (ret == -EFAULT || ret == -ENODEV) 530 ret = i915_gem_gtt_pread(obj, args); 531 532 i915_gem_object_unpin_pages(obj); 533 out: 534 i915_gem_object_put(obj); 535 return ret; 536 } 537 538 /* This is the fast write path which cannot handle 539 * page faults in the source data 540 */ 541 542 static inline bool 543 ggtt_write(struct io_mapping *mapping, 544 loff_t base, int offset, 545 char __user *user_data, int length) 546 { 547 void __iomem *vaddr; 548 unsigned long unwritten; 549 550 /* We can use the cpu mem copy function because this is X86. */ 551 vaddr = io_mapping_map_atomic_wc(mapping, base); 552 unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset, 553 user_data, length); 554 io_mapping_unmap_atomic(vaddr); 555 if (unwritten) { 556 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 557 unwritten = copy_from_user((void __force *)vaddr + offset, 558 user_data, length); 559 io_mapping_unmap(vaddr); 560 } 561 562 return unwritten; 563 } 564 565 /** 566 * This is the fast pwrite path, where we copy the data directly from the 567 * user into the GTT, uncached. 568 * @obj: i915 GEM object 569 * @args: pwrite arguments structure 570 */ 571 static int 572 i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, 573 const struct drm_i915_gem_pwrite *args) 574 { 575 struct drm_i915_private *i915 = to_i915(obj->base.dev); 576 struct i915_ggtt *ggtt = &i915->ggtt; 577 struct intel_runtime_pm *rpm = &i915->runtime_pm; 578 intel_wakeref_t wakeref; 579 struct drm_mm_node node; 580 struct dma_fence *fence; 581 struct i915_vma *vma; 582 u64 remain, offset; 583 void __user *user_data; 584 int ret; 585 586 if (i915_gem_object_has_struct_page(obj)) { 587 /* 588 * Avoid waking the device up if we can fallback, as 589 * waking/resuming is very slow (worst-case 10-100 ms 590 * depending on PCI sleeps and our own resume time). 591 * This easily dwarfs any performance advantage from 592 * using the cache bypass of indirect GGTT access. 593 */ 594 wakeref = intel_runtime_pm_get_if_in_use(rpm); 595 if (!wakeref) 596 return -EFAULT; 597 } else { 598 /* No backing pages, no fallback, we must force GGTT access */ 599 wakeref = intel_runtime_pm_get(rpm); 600 } 601 602 vma = ERR_PTR(-ENODEV); 603 if (!i915_gem_object_is_tiled(obj)) 604 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 605 PIN_MAPPABLE | 606 PIN_NONBLOCK /* NOWARN */ | 607 PIN_NOEVICT); 608 if (!IS_ERR(vma)) { 609 node.start = i915_ggtt_offset(vma); 610 node.flags = 0; 611 } else { 612 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 613 if (ret) 614 goto out_rpm; 615 GEM_BUG_ON(!drm_mm_node_allocated(&node)); 616 } 617 618 ret = i915_gem_object_lock_interruptible(obj); 619 if (ret) 620 goto out_unpin; 621 622 ret = i915_gem_object_set_to_gtt_domain(obj, true); 623 if (ret) { 624 i915_gem_object_unlock(obj); 625 goto out_unpin; 626 } 627 628 fence = i915_gem_object_lock_fence(obj); 629 i915_gem_object_unlock(obj); 630 if (!fence) { 631 ret = -ENOMEM; 632 goto out_unpin; 633 } 634 635 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); 636 637 user_data = u64_to_user_ptr(args->data_ptr); 638 offset = args->offset; 639 remain = args->size; 640 while (remain) { 641 /* Operation in this page 642 * 643 * page_base = page offset within aperture 644 * page_offset = offset within page 645 * page_length = bytes to copy for this page 646 */ 647 u32 page_base = node.start; 648 unsigned int page_offset = offset_in_page(offset); 649 unsigned int page_length = PAGE_SIZE - page_offset; 650 page_length = remain < page_length ? remain : page_length; 651 if (drm_mm_node_allocated(&node)) { 652 /* flush the write before we modify the GGTT */ 653 intel_gt_flush_ggtt_writes(ggtt->vm.gt); 654 ggtt->vm.insert_page(&ggtt->vm, 655 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 656 node.start, I915_CACHE_NONE, 0); 657 wmb(); /* flush modifications to the GGTT (insert_page) */ 658 } else { 659 page_base += offset & PAGE_MASK; 660 } 661 /* If we get a fault while copying data, then (presumably) our 662 * source page isn't available. Return the error and we'll 663 * retry in the slow path. 664 * If the object is non-shmem backed, we retry again with the 665 * path that handles page fault. 666 */ 667 if (ggtt_write(&ggtt->iomap, page_base, page_offset, 668 user_data, page_length)) { 669 ret = -EFAULT; 670 break; 671 } 672 673 remain -= page_length; 674 user_data += page_length; 675 offset += page_length; 676 } 677 678 intel_gt_flush_ggtt_writes(ggtt->vm.gt); 679 i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); 680 681 i915_gem_object_unlock_fence(obj, fence); 682 out_unpin: 683 if (drm_mm_node_allocated(&node)) { 684 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); 685 remove_mappable_node(ggtt, &node); 686 } else { 687 i915_vma_unpin(vma); 688 } 689 out_rpm: 690 intel_runtime_pm_put(rpm, wakeref); 691 return ret; 692 } 693 694 /* Per-page copy function for the shmem pwrite fastpath. 695 * Flushes invalid cachelines before writing to the target if 696 * needs_clflush_before is set and flushes out any written cachelines after 697 * writing if needs_clflush is set. 698 */ 699 static int 700 shmem_pwrite(struct page *page, int offset, int len, char __user *user_data, 701 bool needs_clflush_before, 702 bool needs_clflush_after) 703 { 704 char *vaddr; 705 int ret; 706 707 vaddr = kmap(page); 708 709 if (needs_clflush_before) 710 drm_clflush_virt_range(vaddr + offset, len); 711 712 ret = __copy_from_user(vaddr + offset, user_data, len); 713 if (!ret && needs_clflush_after) 714 drm_clflush_virt_range(vaddr + offset, len); 715 716 kunmap(page); 717 718 return ret ? -EFAULT : 0; 719 } 720 721 static int 722 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, 723 const struct drm_i915_gem_pwrite *args) 724 { 725 unsigned int partial_cacheline_write; 726 unsigned int needs_clflush; 727 unsigned int offset, idx; 728 struct dma_fence *fence; 729 void __user *user_data; 730 u64 remain; 731 int ret; 732 733 ret = i915_gem_object_prepare_write(obj, &needs_clflush); 734 if (ret) 735 return ret; 736 737 fence = i915_gem_object_lock_fence(obj); 738 i915_gem_object_finish_access(obj); 739 if (!fence) 740 return -ENOMEM; 741 742 /* If we don't overwrite a cacheline completely we need to be 743 * careful to have up-to-date data by first clflushing. Don't 744 * overcomplicate things and flush the entire patch. 745 */ 746 partial_cacheline_write = 0; 747 if (needs_clflush & CLFLUSH_BEFORE) 748 partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1; 749 750 user_data = u64_to_user_ptr(args->data_ptr); 751 remain = args->size; 752 offset = offset_in_page(args->offset); 753 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 754 struct page *page = i915_gem_object_get_page(obj, idx); 755 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); 756 757 ret = shmem_pwrite(page, offset, length, user_data, 758 (offset | length) & partial_cacheline_write, 759 needs_clflush & CLFLUSH_AFTER); 760 if (ret) 761 break; 762 763 remain -= length; 764 user_data += length; 765 offset = 0; 766 } 767 768 i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); 769 i915_gem_object_unlock_fence(obj, fence); 770 771 return ret; 772 } 773 774 /** 775 * Writes data to the object referenced by handle. 776 * @dev: drm device 777 * @data: ioctl data blob 778 * @file: drm file 779 * 780 * On error, the contents of the buffer that were to be modified are undefined. 781 */ 782 int 783 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 784 struct drm_file *file) 785 { 786 struct drm_i915_gem_pwrite *args = data; 787 struct drm_i915_gem_object *obj; 788 int ret; 789 790 if (args->size == 0) 791 return 0; 792 793 if (!access_ok(u64_to_user_ptr(args->data_ptr), args->size)) 794 return -EFAULT; 795 796 obj = i915_gem_object_lookup(file, args->handle); 797 if (!obj) 798 return -ENOENT; 799 800 /* Bounds check destination. */ 801 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 802 ret = -EINVAL; 803 goto err; 804 } 805 806 /* Writes not allowed into this read-only object */ 807 if (i915_gem_object_is_readonly(obj)) { 808 ret = -EINVAL; 809 goto err; 810 } 811 812 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 813 814 ret = -ENODEV; 815 if (obj->ops->pwrite) 816 ret = obj->ops->pwrite(obj, args); 817 if (ret != -ENODEV) 818 goto err; 819 820 ret = i915_gem_object_wait(obj, 821 I915_WAIT_INTERRUPTIBLE | 822 I915_WAIT_ALL, 823 MAX_SCHEDULE_TIMEOUT); 824 if (ret) 825 goto err; 826 827 ret = i915_gem_object_pin_pages(obj); 828 if (ret) 829 goto err; 830 831 ret = -EFAULT; 832 /* We can only do the GTT pwrite on untiled buffers, as otherwise 833 * it would end up going through the fenced access, and we'll get 834 * different detiling behavior between reading and writing. 835 * pread/pwrite currently are reading and writing from the CPU 836 * perspective, requiring manual detiling by the client. 837 */ 838 if (!i915_gem_object_has_struct_page(obj) || 839 cpu_write_needs_clflush(obj)) 840 /* Note that the gtt paths might fail with non-page-backed user 841 * pointers (e.g. gtt mappings when moving data between 842 * textures). Fallback to the shmem path in that case. 843 */ 844 ret = i915_gem_gtt_pwrite_fast(obj, args); 845 846 if (ret == -EFAULT || ret == -ENOSPC) { 847 if (i915_gem_object_has_struct_page(obj)) 848 ret = i915_gem_shmem_pwrite(obj, args); 849 else 850 ret = i915_gem_phys_pwrite(obj, args, file); 851 } 852 853 i915_gem_object_unpin_pages(obj); 854 err: 855 i915_gem_object_put(obj); 856 return ret; 857 } 858 859 /** 860 * Called when user space has done writes to this buffer 861 * @dev: drm device 862 * @data: ioctl data blob 863 * @file: drm file 864 */ 865 int 866 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 867 struct drm_file *file) 868 { 869 struct drm_i915_gem_sw_finish *args = data; 870 struct drm_i915_gem_object *obj; 871 872 obj = i915_gem_object_lookup(file, args->handle); 873 if (!obj) 874 return -ENOENT; 875 876 /* 877 * Proxy objects are barred from CPU access, so there is no 878 * need to ban sw_finish as it is a nop. 879 */ 880 881 /* Pinned buffers may be scanout, so flush the cache */ 882 i915_gem_object_flush_if_display(obj); 883 i915_gem_object_put(obj); 884 885 return 0; 886 } 887 888 void i915_gem_runtime_suspend(struct drm_i915_private *i915) 889 { 890 struct drm_i915_gem_object *obj, *on; 891 int i; 892 893 /* 894 * Only called during RPM suspend. All users of the userfault_list 895 * must be holding an RPM wakeref to ensure that this can not 896 * run concurrently with themselves (and use the struct_mutex for 897 * protection between themselves). 898 */ 899 900 list_for_each_entry_safe(obj, on, 901 &i915->ggtt.userfault_list, userfault_link) 902 __i915_gem_object_release_mmap_gtt(obj); 903 904 /* 905 * The fence will be lost when the device powers down. If any were 906 * in use by hardware (i.e. they are pinned), we should not be powering 907 * down! All other fences will be reacquired by the user upon waking. 908 */ 909 for (i = 0; i < i915->ggtt.num_fences; i++) { 910 struct i915_fence_reg *reg = &i915->ggtt.fence_regs[i]; 911 912 /* 913 * Ideally we want to assert that the fence register is not 914 * live at this point (i.e. that no piece of code will be 915 * trying to write through fence + GTT, as that both violates 916 * our tracking of activity and associated locking/barriers, 917 * but also is illegal given that the hw is powered down). 918 * 919 * Previously we used reg->pin_count as a "liveness" indicator. 920 * That is not sufficient, and we need a more fine-grained 921 * tool if we want to have a sanity check here. 922 */ 923 924 if (!reg->vma) 925 continue; 926 927 GEM_BUG_ON(i915_vma_has_userfault(reg->vma)); 928 reg->dirty = true; 929 } 930 } 931 932 struct i915_vma * 933 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 934 const struct i915_ggtt_view *view, 935 u64 size, 936 u64 alignment, 937 u64 flags) 938 { 939 struct drm_i915_private *i915 = to_i915(obj->base.dev); 940 struct i915_ggtt *ggtt = &i915->ggtt; 941 struct i915_vma *vma; 942 int ret; 943 944 if (i915_gem_object_never_bind_ggtt(obj)) 945 return ERR_PTR(-ENODEV); 946 947 if (flags & PIN_MAPPABLE && 948 (!view || view->type == I915_GGTT_VIEW_NORMAL)) { 949 /* 950 * If the required space is larger than the available 951 * aperture, we will not able to find a slot for the 952 * object and unbinding the object now will be in 953 * vain. Worse, doing so may cause us to ping-pong 954 * the object in and out of the Global GTT and 955 * waste a lot of cycles under the mutex. 956 */ 957 if (obj->base.size > ggtt->mappable_end) 958 return ERR_PTR(-E2BIG); 959 960 /* 961 * If NONBLOCK is set the caller is optimistically 962 * trying to cache the full object within the mappable 963 * aperture, and *must* have a fallback in place for 964 * situations where we cannot bind the object. We 965 * can be a little more lax here and use the fallback 966 * more often to avoid costly migrations of ourselves 967 * and other objects within the aperture. 968 * 969 * Half-the-aperture is used as a simple heuristic. 970 * More interesting would to do search for a free 971 * block prior to making the commitment to unbind. 972 * That caters for the self-harm case, and with a 973 * little more heuristics (e.g. NOFAULT, NOEVICT) 974 * we could try to minimise harm to others. 975 */ 976 if (flags & PIN_NONBLOCK && 977 obj->base.size > ggtt->mappable_end / 2) 978 return ERR_PTR(-ENOSPC); 979 } 980 981 vma = i915_vma_instance(obj, &ggtt->vm, view); 982 if (IS_ERR(vma)) 983 return vma; 984 985 if (i915_vma_misplaced(vma, size, alignment, flags)) { 986 if (flags & PIN_NONBLOCK) { 987 if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) 988 return ERR_PTR(-ENOSPC); 989 990 if (flags & PIN_MAPPABLE && 991 vma->fence_size > ggtt->mappable_end / 2) 992 return ERR_PTR(-ENOSPC); 993 } 994 995 ret = i915_vma_unbind(vma); 996 if (ret) 997 return ERR_PTR(ret); 998 } 999 1000 if (vma->fence && !i915_gem_object_is_tiled(obj)) { 1001 mutex_lock(&ggtt->vm.mutex); 1002 ret = i915_vma_revoke_fence(vma); 1003 mutex_unlock(&ggtt->vm.mutex); 1004 if (ret) 1005 return ERR_PTR(ret); 1006 } 1007 1008 ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); 1009 if (ret) 1010 return ERR_PTR(ret); 1011 1012 return vma; 1013 } 1014 1015 int 1016 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 1017 struct drm_file *file_priv) 1018 { 1019 struct drm_i915_private *i915 = to_i915(dev); 1020 struct drm_i915_gem_madvise *args = data; 1021 struct drm_i915_gem_object *obj; 1022 int err; 1023 1024 switch (args->madv) { 1025 case I915_MADV_DONTNEED: 1026 case I915_MADV_WILLNEED: 1027 break; 1028 default: 1029 return -EINVAL; 1030 } 1031 1032 obj = i915_gem_object_lookup(file_priv, args->handle); 1033 if (!obj) 1034 return -ENOENT; 1035 1036 err = mutex_lock_interruptible(&obj->mm.lock); 1037 if (err) 1038 goto out; 1039 1040 if (i915_gem_object_has_pages(obj) && 1041 i915_gem_object_is_tiled(obj) && 1042 i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 1043 if (obj->mm.madv == I915_MADV_WILLNEED) { 1044 GEM_BUG_ON(!obj->mm.quirked); 1045 __i915_gem_object_unpin_pages(obj); 1046 obj->mm.quirked = false; 1047 } 1048 if (args->madv == I915_MADV_WILLNEED) { 1049 GEM_BUG_ON(obj->mm.quirked); 1050 __i915_gem_object_pin_pages(obj); 1051 obj->mm.quirked = true; 1052 } 1053 } 1054 1055 if (obj->mm.madv != __I915_MADV_PURGED) 1056 obj->mm.madv = args->madv; 1057 1058 if (i915_gem_object_has_pages(obj)) { 1059 struct list_head *list; 1060 1061 if (i915_gem_object_is_shrinkable(obj)) { 1062 unsigned long flags; 1063 1064 spin_lock_irqsave(&i915->mm.obj_lock, flags); 1065 1066 if (obj->mm.madv != I915_MADV_WILLNEED) 1067 list = &i915->mm.purge_list; 1068 else 1069 list = &i915->mm.shrink_list; 1070 list_move_tail(&obj->mm.link, list); 1071 1072 spin_unlock_irqrestore(&i915->mm.obj_lock, flags); 1073 } 1074 } 1075 1076 /* if the object is no longer attached, discard its backing storage */ 1077 if (obj->mm.madv == I915_MADV_DONTNEED && 1078 !i915_gem_object_has_pages(obj)) 1079 i915_gem_object_truncate(obj); 1080 1081 args->retained = obj->mm.madv != __I915_MADV_PURGED; 1082 mutex_unlock(&obj->mm.lock); 1083 1084 out: 1085 i915_gem_object_put(obj); 1086 return err; 1087 } 1088 1089 int i915_gem_init(struct drm_i915_private *dev_priv) 1090 { 1091 int ret; 1092 1093 /* We need to fallback to 4K pages if host doesn't support huge gtt. */ 1094 if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_huge_gtt(dev_priv)) 1095 mkwrite_device_info(dev_priv)->page_sizes = 1096 I915_GTT_PAGE_SIZE_4K; 1097 1098 ret = i915_gem_init_userptr(dev_priv); 1099 if (ret) 1100 return ret; 1101 1102 intel_uc_fetch_firmwares(&dev_priv->gt.uc); 1103 intel_wopcm_init(&dev_priv->wopcm); 1104 1105 ret = i915_init_ggtt(dev_priv); 1106 if (ret) { 1107 GEM_BUG_ON(ret == -EIO); 1108 goto err_unlock; 1109 } 1110 1111 /* 1112 * Despite its name intel_init_clock_gating applies both display 1113 * clock gating workarounds; GT mmio workarounds and the occasional 1114 * GT power context workaround. Worse, sometimes it includes a context 1115 * register workaround which we need to apply before we record the 1116 * default HW state for all contexts. 1117 * 1118 * FIXME: break up the workarounds and apply them at the right time! 1119 */ 1120 intel_init_clock_gating(dev_priv); 1121 1122 ret = intel_gt_init(&dev_priv->gt); 1123 if (ret) 1124 goto err_unlock; 1125 1126 return 0; 1127 1128 /* 1129 * Unwinding is complicated by that we want to handle -EIO to mean 1130 * disable GPU submission but keep KMS alive. We want to mark the 1131 * HW as irrevisibly wedged, but keep enough state around that the 1132 * driver doesn't explode during runtime. 1133 */ 1134 err_unlock: 1135 i915_gem_drain_workqueue(dev_priv); 1136 1137 if (ret != -EIO) { 1138 intel_uc_cleanup_firmwares(&dev_priv->gt.uc); 1139 i915_gem_cleanup_userptr(dev_priv); 1140 } 1141 1142 if (ret == -EIO) { 1143 /* 1144 * Allow engines or uC initialisation to fail by marking the GPU 1145 * as wedged. But we only want to do this when the GPU is angry, 1146 * for all other failure, such as an allocation failure, bail. 1147 */ 1148 if (!intel_gt_is_wedged(&dev_priv->gt)) { 1149 i915_probe_error(dev_priv, 1150 "Failed to initialize GPU, declaring it wedged!\n"); 1151 intel_gt_set_wedged(&dev_priv->gt); 1152 } 1153 1154 /* Minimal basic recovery for KMS */ 1155 ret = i915_ggtt_enable_hw(dev_priv); 1156 i915_gem_restore_gtt_mappings(dev_priv); 1157 i915_gem_restore_fences(&dev_priv->ggtt); 1158 intel_init_clock_gating(dev_priv); 1159 } 1160 1161 i915_gem_drain_freed_objects(dev_priv); 1162 return ret; 1163 } 1164 1165 void i915_gem_driver_register(struct drm_i915_private *i915) 1166 { 1167 i915_gem_driver_register__shrinker(i915); 1168 1169 intel_engines_driver_register(i915); 1170 } 1171 1172 void i915_gem_driver_unregister(struct drm_i915_private *i915) 1173 { 1174 i915_gem_driver_unregister__shrinker(i915); 1175 } 1176 1177 void i915_gem_driver_remove(struct drm_i915_private *dev_priv) 1178 { 1179 intel_wakeref_auto_fini(&dev_priv->ggtt.userfault_wakeref); 1180 1181 i915_gem_suspend_late(dev_priv); 1182 intel_gt_driver_remove(&dev_priv->gt); 1183 dev_priv->uabi_engines = RB_ROOT; 1184 1185 /* Flush any outstanding unpin_work. */ 1186 i915_gem_drain_workqueue(dev_priv); 1187 1188 i915_gem_drain_freed_objects(dev_priv); 1189 } 1190 1191 void i915_gem_driver_release(struct drm_i915_private *dev_priv) 1192 { 1193 i915_gem_driver_release__contexts(dev_priv); 1194 1195 intel_gt_driver_release(&dev_priv->gt); 1196 1197 intel_wa_list_free(&dev_priv->gt_wa_list); 1198 1199 intel_uc_cleanup_firmwares(&dev_priv->gt.uc); 1200 i915_gem_cleanup_userptr(dev_priv); 1201 1202 i915_gem_drain_freed_objects(dev_priv); 1203 1204 WARN_ON(!list_empty(&dev_priv->gem.contexts.list)); 1205 } 1206 1207 static void i915_gem_init__mm(struct drm_i915_private *i915) 1208 { 1209 spin_lock_init(&i915->mm.obj_lock); 1210 1211 init_llist_head(&i915->mm.free_list); 1212 1213 INIT_LIST_HEAD(&i915->mm.purge_list); 1214 INIT_LIST_HEAD(&i915->mm.shrink_list); 1215 1216 i915_gem_init__objects(i915); 1217 } 1218 1219 void i915_gem_init_early(struct drm_i915_private *dev_priv) 1220 { 1221 i915_gem_init__mm(dev_priv); 1222 i915_gem_init__contexts(dev_priv); 1223 1224 spin_lock_init(&dev_priv->fb_tracking.lock); 1225 } 1226 1227 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) 1228 { 1229 i915_gem_drain_freed_objects(dev_priv); 1230 GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list)); 1231 GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count)); 1232 WARN_ON(dev_priv->mm.shrink_count); 1233 } 1234 1235 int i915_gem_freeze(struct drm_i915_private *dev_priv) 1236 { 1237 /* Discard all purgeable objects, let userspace recover those as 1238 * required after resuming. 1239 */ 1240 i915_gem_shrink_all(dev_priv); 1241 1242 return 0; 1243 } 1244 1245 int i915_gem_freeze_late(struct drm_i915_private *i915) 1246 { 1247 struct drm_i915_gem_object *obj; 1248 intel_wakeref_t wakeref; 1249 1250 /* 1251 * Called just before we write the hibernation image. 1252 * 1253 * We need to update the domain tracking to reflect that the CPU 1254 * will be accessing all the pages to create and restore from the 1255 * hibernation, and so upon restoration those pages will be in the 1256 * CPU domain. 1257 * 1258 * To make sure the hibernation image contains the latest state, 1259 * we update that state just before writing out the image. 1260 * 1261 * To try and reduce the hibernation image, we manually shrink 1262 * the objects as well, see i915_gem_freeze() 1263 */ 1264 1265 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 1266 1267 i915_gem_shrink(i915, -1UL, NULL, ~0); 1268 i915_gem_drain_freed_objects(i915); 1269 1270 list_for_each_entry(obj, &i915->mm.shrink_list, mm.link) { 1271 i915_gem_object_lock(obj); 1272 WARN_ON(i915_gem_object_set_to_cpu_domain(obj, true)); 1273 i915_gem_object_unlock(obj); 1274 } 1275 1276 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 1277 1278 return 0; 1279 } 1280 1281 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 1282 { 1283 struct drm_i915_file_private *file_priv = file->driver_priv; 1284 struct i915_request *request; 1285 1286 /* Clean up our request list when the client is going away, so that 1287 * later retire_requests won't dereference our soon-to-be-gone 1288 * file_priv. 1289 */ 1290 spin_lock(&file_priv->mm.lock); 1291 list_for_each_entry(request, &file_priv->mm.request_list, client_link) 1292 request->file_priv = NULL; 1293 spin_unlock(&file_priv->mm.lock); 1294 } 1295 1296 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) 1297 { 1298 struct drm_i915_file_private *file_priv; 1299 int ret; 1300 1301 DRM_DEBUG("\n"); 1302 1303 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 1304 if (!file_priv) 1305 return -ENOMEM; 1306 1307 file->driver_priv = file_priv; 1308 file_priv->dev_priv = i915; 1309 file_priv->file = file; 1310 1311 spin_lock_init(&file_priv->mm.lock); 1312 INIT_LIST_HEAD(&file_priv->mm.request_list); 1313 1314 file_priv->bsd_engine = -1; 1315 file_priv->hang_timestamp = jiffies; 1316 1317 ret = i915_gem_context_open(i915, file); 1318 if (ret) 1319 kfree(file_priv); 1320 1321 return ret; 1322 } 1323 1324 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 1325 #include "selftests/mock_gem_device.c" 1326 #include "selftests/i915_gem.c" 1327 #endif 1328