1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drm_vma_manager.h> 29 #include <drm/i915_drm.h> 30 #include <linux/dma-fence-array.h> 31 #include <linux/kthread.h> 32 #include <linux/dma-resv.h> 33 #include <linux/shmem_fs.h> 34 #include <linux/slab.h> 35 #include <linux/stop_machine.h> 36 #include <linux/swap.h> 37 #include <linux/pci.h> 38 #include <linux/dma-buf.h> 39 #include <linux/mman.h> 40 41 #include "display/intel_display.h" 42 #include "display/intel_frontbuffer.h" 43 44 #include "gem/i915_gem_clflush.h" 45 #include "gem/i915_gem_context.h" 46 #include "gem/i915_gem_ioctls.h" 47 #include "gem/i915_gem_mman.h" 48 #include "gem/i915_gem_region.h" 49 #include "gt/intel_engine_user.h" 50 #include "gt/intel_gt.h" 51 #include "gt/intel_gt_pm.h" 52 #include "gt/intel_workarounds.h" 53 54 #include "i915_drv.h" 55 #include "i915_trace.h" 56 #include "i915_vgpu.h" 57 58 #include "intel_pm.h" 59 60 static int 61 insert_mappable_node(struct i915_ggtt *ggtt, struct drm_mm_node *node, u32 size) 62 { 63 int err; 64 65 err = mutex_lock_interruptible(&ggtt->vm.mutex); 66 if (err) 67 return err; 68 69 memset(node, 0, sizeof(*node)); 70 err = drm_mm_insert_node_in_range(&ggtt->vm.mm, node, 71 size, 0, I915_COLOR_UNEVICTABLE, 72 0, ggtt->mappable_end, 73 DRM_MM_INSERT_LOW); 74 75 mutex_unlock(&ggtt->vm.mutex); 76 77 return err; 78 } 79 80 static void 81 remove_mappable_node(struct i915_ggtt *ggtt, struct drm_mm_node *node) 82 { 83 mutex_lock(&ggtt->vm.mutex); 84 drm_mm_remove_node(node); 85 mutex_unlock(&ggtt->vm.mutex); 86 } 87 88 int 89 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 90 struct drm_file *file) 91 { 92 struct i915_ggtt *ggtt = &to_i915(dev)->ggtt; 93 struct drm_i915_gem_get_aperture *args = data; 94 struct i915_vma *vma; 95 u64 pinned; 96 97 if (mutex_lock_interruptible(&ggtt->vm.mutex)) 98 return -EINTR; 99 100 pinned = ggtt->vm.reserved; 101 list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) 102 if (i915_vma_is_pinned(vma)) 103 pinned += vma->node.size; 104 105 mutex_unlock(&ggtt->vm.mutex); 106 107 args->aper_size = ggtt->vm.total; 108 args->aper_available_size = args->aper_size - pinned; 109 110 return 0; 111 } 112 113 int i915_gem_object_unbind(struct drm_i915_gem_object *obj, 114 unsigned long flags) 115 { 116 struct intel_runtime_pm *rpm = &to_i915(obj->base.dev)->runtime_pm; 117 LIST_HEAD(still_in_list); 118 intel_wakeref_t wakeref; 119 struct i915_vma *vma; 120 int ret; 121 122 if (!atomic_read(&obj->bind_count)) 123 return 0; 124 125 /* 126 * As some machines use ACPI to handle runtime-resume callbacks, and 127 * ACPI is quite kmalloc happy, we cannot resume beneath the vm->mutex 128 * as they are required by the shrinker. Ergo, we wake the device up 129 * first just in case. 130 */ 131 wakeref = intel_runtime_pm_get(rpm); 132 133 try_again: 134 ret = 0; 135 spin_lock(&obj->vma.lock); 136 while (!ret && (vma = list_first_entry_or_null(&obj->vma.list, 137 struct i915_vma, 138 obj_link))) { 139 struct i915_address_space *vm = vma->vm; 140 141 list_move_tail(&vma->obj_link, &still_in_list); 142 if (!i915_vma_is_bound(vma, I915_VMA_BIND_MASK)) 143 continue; 144 145 ret = -EAGAIN; 146 if (!i915_vm_tryopen(vm)) 147 break; 148 149 /* Prevent vma being freed by i915_vma_parked as we unbind */ 150 vma = __i915_vma_get(vma); 151 spin_unlock(&obj->vma.lock); 152 153 if (vma) { 154 ret = -EBUSY; 155 if (flags & I915_GEM_OBJECT_UNBIND_ACTIVE || 156 !i915_vma_is_active(vma)) 157 ret = i915_vma_unbind(vma); 158 159 __i915_vma_put(vma); 160 } 161 162 i915_vm_close(vm); 163 spin_lock(&obj->vma.lock); 164 } 165 list_splice_init(&still_in_list, &obj->vma.list); 166 spin_unlock(&obj->vma.lock); 167 168 if (ret == -EAGAIN && flags & I915_GEM_OBJECT_UNBIND_BARRIER) { 169 rcu_barrier(); /* flush the i915_vm_release() */ 170 goto try_again; 171 } 172 173 intel_runtime_pm_put(rpm, wakeref); 174 175 return ret; 176 } 177 178 static int 179 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 180 struct drm_i915_gem_pwrite *args, 181 struct drm_file *file) 182 { 183 void *vaddr = obj->phys_handle->vaddr + args->offset; 184 char __user *user_data = u64_to_user_ptr(args->data_ptr); 185 186 /* 187 * We manually control the domain here and pretend that it 188 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 189 */ 190 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); 191 192 if (copy_from_user(vaddr, user_data, args->size)) 193 return -EFAULT; 194 195 drm_clflush_virt_range(vaddr, args->size); 196 intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt); 197 198 i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); 199 return 0; 200 } 201 202 static int 203 i915_gem_create(struct drm_file *file, 204 struct intel_memory_region *mr, 205 u64 *size_p, 206 u32 *handle_p) 207 { 208 struct drm_i915_gem_object *obj; 209 u32 handle; 210 u64 size; 211 int ret; 212 213 GEM_BUG_ON(!is_power_of_2(mr->min_page_size)); 214 size = round_up(*size_p, mr->min_page_size); 215 if (size == 0) 216 return -EINVAL; 217 218 /* For most of the ABI (e.g. mmap) we think in system pages */ 219 GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE)); 220 221 /* Allocate the new object */ 222 obj = i915_gem_object_create_region(mr, size, 0); 223 if (IS_ERR(obj)) 224 return PTR_ERR(obj); 225 226 ret = drm_gem_handle_create(file, &obj->base, &handle); 227 /* drop reference from allocate - handle holds it now */ 228 i915_gem_object_put(obj); 229 if (ret) 230 return ret; 231 232 *handle_p = handle; 233 *size_p = size; 234 return 0; 235 } 236 237 int 238 i915_gem_dumb_create(struct drm_file *file, 239 struct drm_device *dev, 240 struct drm_mode_create_dumb *args) 241 { 242 enum intel_memory_type mem_type; 243 int cpp = DIV_ROUND_UP(args->bpp, 8); 244 u32 format; 245 246 switch (cpp) { 247 case 1: 248 format = DRM_FORMAT_C8; 249 break; 250 case 2: 251 format = DRM_FORMAT_RGB565; 252 break; 253 case 4: 254 format = DRM_FORMAT_XRGB8888; 255 break; 256 default: 257 return -EINVAL; 258 } 259 260 /* have to work out size/pitch and return them */ 261 args->pitch = ALIGN(args->width * cpp, 64); 262 263 /* align stride to page size so that we can remap */ 264 if (args->pitch > intel_plane_fb_max_stride(to_i915(dev), format, 265 DRM_FORMAT_MOD_LINEAR)) 266 args->pitch = ALIGN(args->pitch, 4096); 267 268 args->size = args->pitch * args->height; 269 270 mem_type = INTEL_MEMORY_SYSTEM; 271 if (HAS_LMEM(to_i915(dev))) 272 mem_type = INTEL_MEMORY_LOCAL; 273 274 return i915_gem_create(file, 275 intel_memory_region_by_type(to_i915(dev), 276 mem_type), 277 &args->size, &args->handle); 278 } 279 280 /** 281 * Creates a new mm object and returns a handle to it. 282 * @dev: drm device pointer 283 * @data: ioctl data blob 284 * @file: drm file pointer 285 */ 286 int 287 i915_gem_create_ioctl(struct drm_device *dev, void *data, 288 struct drm_file *file) 289 { 290 struct drm_i915_private *i915 = to_i915(dev); 291 struct drm_i915_gem_create *args = data; 292 293 i915_gem_flush_free_objects(i915); 294 295 return i915_gem_create(file, 296 intel_memory_region_by_type(i915, 297 INTEL_MEMORY_SYSTEM), 298 &args->size, &args->handle); 299 } 300 301 static int 302 shmem_pread(struct page *page, int offset, int len, char __user *user_data, 303 bool needs_clflush) 304 { 305 char *vaddr; 306 int ret; 307 308 vaddr = kmap(page); 309 310 if (needs_clflush) 311 drm_clflush_virt_range(vaddr + offset, len); 312 313 ret = __copy_to_user(user_data, vaddr + offset, len); 314 315 kunmap(page); 316 317 return ret ? -EFAULT : 0; 318 } 319 320 static int 321 i915_gem_shmem_pread(struct drm_i915_gem_object *obj, 322 struct drm_i915_gem_pread *args) 323 { 324 unsigned int needs_clflush; 325 unsigned int idx, offset; 326 struct dma_fence *fence; 327 char __user *user_data; 328 u64 remain; 329 int ret; 330 331 ret = i915_gem_object_prepare_read(obj, &needs_clflush); 332 if (ret) 333 return ret; 334 335 fence = i915_gem_object_lock_fence(obj); 336 i915_gem_object_finish_access(obj); 337 if (!fence) 338 return -ENOMEM; 339 340 remain = args->size; 341 user_data = u64_to_user_ptr(args->data_ptr); 342 offset = offset_in_page(args->offset); 343 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 344 struct page *page = i915_gem_object_get_page(obj, idx); 345 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); 346 347 ret = shmem_pread(page, offset, length, user_data, 348 needs_clflush); 349 if (ret) 350 break; 351 352 remain -= length; 353 user_data += length; 354 offset = 0; 355 } 356 357 i915_gem_object_unlock_fence(obj, fence); 358 return ret; 359 } 360 361 static inline bool 362 gtt_user_read(struct io_mapping *mapping, 363 loff_t base, int offset, 364 char __user *user_data, int length) 365 { 366 void __iomem *vaddr; 367 unsigned long unwritten; 368 369 /* We can use the cpu mem copy function because this is X86. */ 370 vaddr = io_mapping_map_atomic_wc(mapping, base); 371 unwritten = __copy_to_user_inatomic(user_data, 372 (void __force *)vaddr + offset, 373 length); 374 io_mapping_unmap_atomic(vaddr); 375 if (unwritten) { 376 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 377 unwritten = copy_to_user(user_data, 378 (void __force *)vaddr + offset, 379 length); 380 io_mapping_unmap(vaddr); 381 } 382 return unwritten; 383 } 384 385 static int 386 i915_gem_gtt_pread(struct drm_i915_gem_object *obj, 387 const struct drm_i915_gem_pread *args) 388 { 389 struct drm_i915_private *i915 = to_i915(obj->base.dev); 390 struct i915_ggtt *ggtt = &i915->ggtt; 391 intel_wakeref_t wakeref; 392 struct drm_mm_node node; 393 struct dma_fence *fence; 394 void __user *user_data; 395 struct i915_vma *vma; 396 u64 remain, offset; 397 int ret; 398 399 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 400 vma = ERR_PTR(-ENODEV); 401 if (!i915_gem_object_is_tiled(obj)) 402 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 403 PIN_MAPPABLE | 404 PIN_NONBLOCK /* NOWARN */ | 405 PIN_NOEVICT); 406 if (!IS_ERR(vma)) { 407 node.start = i915_ggtt_offset(vma); 408 node.flags = 0; 409 } else { 410 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 411 if (ret) 412 goto out_rpm; 413 GEM_BUG_ON(!drm_mm_node_allocated(&node)); 414 } 415 416 ret = i915_gem_object_lock_interruptible(obj); 417 if (ret) 418 goto out_unpin; 419 420 ret = i915_gem_object_set_to_gtt_domain(obj, false); 421 if (ret) { 422 i915_gem_object_unlock(obj); 423 goto out_unpin; 424 } 425 426 fence = i915_gem_object_lock_fence(obj); 427 i915_gem_object_unlock(obj); 428 if (!fence) { 429 ret = -ENOMEM; 430 goto out_unpin; 431 } 432 433 user_data = u64_to_user_ptr(args->data_ptr); 434 remain = args->size; 435 offset = args->offset; 436 437 while (remain > 0) { 438 /* Operation in this page 439 * 440 * page_base = page offset within aperture 441 * page_offset = offset within page 442 * page_length = bytes to copy for this page 443 */ 444 u32 page_base = node.start; 445 unsigned page_offset = offset_in_page(offset); 446 unsigned page_length = PAGE_SIZE - page_offset; 447 page_length = remain < page_length ? remain : page_length; 448 if (drm_mm_node_allocated(&node)) { 449 ggtt->vm.insert_page(&ggtt->vm, 450 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 451 node.start, I915_CACHE_NONE, 0); 452 } else { 453 page_base += offset & PAGE_MASK; 454 } 455 456 if (gtt_user_read(&ggtt->iomap, page_base, page_offset, 457 user_data, page_length)) { 458 ret = -EFAULT; 459 break; 460 } 461 462 remain -= page_length; 463 user_data += page_length; 464 offset += page_length; 465 } 466 467 i915_gem_object_unlock_fence(obj, fence); 468 out_unpin: 469 if (drm_mm_node_allocated(&node)) { 470 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); 471 remove_mappable_node(ggtt, &node); 472 } else { 473 i915_vma_unpin(vma); 474 } 475 out_rpm: 476 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 477 return ret; 478 } 479 480 /** 481 * Reads data from the object referenced by handle. 482 * @dev: drm device pointer 483 * @data: ioctl data blob 484 * @file: drm file pointer 485 * 486 * On error, the contents of *data are undefined. 487 */ 488 int 489 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 490 struct drm_file *file) 491 { 492 struct drm_i915_gem_pread *args = data; 493 struct drm_i915_gem_object *obj; 494 int ret; 495 496 if (args->size == 0) 497 return 0; 498 499 if (!access_ok(u64_to_user_ptr(args->data_ptr), 500 args->size)) 501 return -EFAULT; 502 503 obj = i915_gem_object_lookup(file, args->handle); 504 if (!obj) 505 return -ENOENT; 506 507 /* Bounds check source. */ 508 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 509 ret = -EINVAL; 510 goto out; 511 } 512 513 trace_i915_gem_object_pread(obj, args->offset, args->size); 514 515 ret = i915_gem_object_wait(obj, 516 I915_WAIT_INTERRUPTIBLE, 517 MAX_SCHEDULE_TIMEOUT); 518 if (ret) 519 goto out; 520 521 ret = i915_gem_object_pin_pages(obj); 522 if (ret) 523 goto out; 524 525 ret = i915_gem_shmem_pread(obj, args); 526 if (ret == -EFAULT || ret == -ENODEV) 527 ret = i915_gem_gtt_pread(obj, args); 528 529 i915_gem_object_unpin_pages(obj); 530 out: 531 i915_gem_object_put(obj); 532 return ret; 533 } 534 535 /* This is the fast write path which cannot handle 536 * page faults in the source data 537 */ 538 539 static inline bool 540 ggtt_write(struct io_mapping *mapping, 541 loff_t base, int offset, 542 char __user *user_data, int length) 543 { 544 void __iomem *vaddr; 545 unsigned long unwritten; 546 547 /* We can use the cpu mem copy function because this is X86. */ 548 vaddr = io_mapping_map_atomic_wc(mapping, base); 549 unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset, 550 user_data, length); 551 io_mapping_unmap_atomic(vaddr); 552 if (unwritten) { 553 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 554 unwritten = copy_from_user((void __force *)vaddr + offset, 555 user_data, length); 556 io_mapping_unmap(vaddr); 557 } 558 559 return unwritten; 560 } 561 562 /** 563 * This is the fast pwrite path, where we copy the data directly from the 564 * user into the GTT, uncached. 565 * @obj: i915 GEM object 566 * @args: pwrite arguments structure 567 */ 568 static int 569 i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, 570 const struct drm_i915_gem_pwrite *args) 571 { 572 struct drm_i915_private *i915 = to_i915(obj->base.dev); 573 struct i915_ggtt *ggtt = &i915->ggtt; 574 struct intel_runtime_pm *rpm = &i915->runtime_pm; 575 intel_wakeref_t wakeref; 576 struct drm_mm_node node; 577 struct dma_fence *fence; 578 struct i915_vma *vma; 579 u64 remain, offset; 580 void __user *user_data; 581 int ret; 582 583 if (i915_gem_object_has_struct_page(obj)) { 584 /* 585 * Avoid waking the device up if we can fallback, as 586 * waking/resuming is very slow (worst-case 10-100 ms 587 * depending on PCI sleeps and our own resume time). 588 * This easily dwarfs any performance advantage from 589 * using the cache bypass of indirect GGTT access. 590 */ 591 wakeref = intel_runtime_pm_get_if_in_use(rpm); 592 if (!wakeref) 593 return -EFAULT; 594 } else { 595 /* No backing pages, no fallback, we must force GGTT access */ 596 wakeref = intel_runtime_pm_get(rpm); 597 } 598 599 vma = ERR_PTR(-ENODEV); 600 if (!i915_gem_object_is_tiled(obj)) 601 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 602 PIN_MAPPABLE | 603 PIN_NONBLOCK /* NOWARN */ | 604 PIN_NOEVICT); 605 if (!IS_ERR(vma)) { 606 node.start = i915_ggtt_offset(vma); 607 node.flags = 0; 608 } else { 609 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 610 if (ret) 611 goto out_rpm; 612 GEM_BUG_ON(!drm_mm_node_allocated(&node)); 613 } 614 615 ret = i915_gem_object_lock_interruptible(obj); 616 if (ret) 617 goto out_unpin; 618 619 ret = i915_gem_object_set_to_gtt_domain(obj, true); 620 if (ret) { 621 i915_gem_object_unlock(obj); 622 goto out_unpin; 623 } 624 625 fence = i915_gem_object_lock_fence(obj); 626 i915_gem_object_unlock(obj); 627 if (!fence) { 628 ret = -ENOMEM; 629 goto out_unpin; 630 } 631 632 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); 633 634 user_data = u64_to_user_ptr(args->data_ptr); 635 offset = args->offset; 636 remain = args->size; 637 while (remain) { 638 /* Operation in this page 639 * 640 * page_base = page offset within aperture 641 * page_offset = offset within page 642 * page_length = bytes to copy for this page 643 */ 644 u32 page_base = node.start; 645 unsigned int page_offset = offset_in_page(offset); 646 unsigned int page_length = PAGE_SIZE - page_offset; 647 page_length = remain < page_length ? remain : page_length; 648 if (drm_mm_node_allocated(&node)) { 649 /* flush the write before we modify the GGTT */ 650 intel_gt_flush_ggtt_writes(ggtt->vm.gt); 651 ggtt->vm.insert_page(&ggtt->vm, 652 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 653 node.start, I915_CACHE_NONE, 0); 654 wmb(); /* flush modifications to the GGTT (insert_page) */ 655 } else { 656 page_base += offset & PAGE_MASK; 657 } 658 /* If we get a fault while copying data, then (presumably) our 659 * source page isn't available. Return the error and we'll 660 * retry in the slow path. 661 * If the object is non-shmem backed, we retry again with the 662 * path that handles page fault. 663 */ 664 if (ggtt_write(&ggtt->iomap, page_base, page_offset, 665 user_data, page_length)) { 666 ret = -EFAULT; 667 break; 668 } 669 670 remain -= page_length; 671 user_data += page_length; 672 offset += page_length; 673 } 674 675 intel_gt_flush_ggtt_writes(ggtt->vm.gt); 676 i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); 677 678 i915_gem_object_unlock_fence(obj, fence); 679 out_unpin: 680 if (drm_mm_node_allocated(&node)) { 681 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); 682 remove_mappable_node(ggtt, &node); 683 } else { 684 i915_vma_unpin(vma); 685 } 686 out_rpm: 687 intel_runtime_pm_put(rpm, wakeref); 688 return ret; 689 } 690 691 /* Per-page copy function for the shmem pwrite fastpath. 692 * Flushes invalid cachelines before writing to the target if 693 * needs_clflush_before is set and flushes out any written cachelines after 694 * writing if needs_clflush is set. 695 */ 696 static int 697 shmem_pwrite(struct page *page, int offset, int len, char __user *user_data, 698 bool needs_clflush_before, 699 bool needs_clflush_after) 700 { 701 char *vaddr; 702 int ret; 703 704 vaddr = kmap(page); 705 706 if (needs_clflush_before) 707 drm_clflush_virt_range(vaddr + offset, len); 708 709 ret = __copy_from_user(vaddr + offset, user_data, len); 710 if (!ret && needs_clflush_after) 711 drm_clflush_virt_range(vaddr + offset, len); 712 713 kunmap(page); 714 715 return ret ? -EFAULT : 0; 716 } 717 718 static int 719 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, 720 const struct drm_i915_gem_pwrite *args) 721 { 722 unsigned int partial_cacheline_write; 723 unsigned int needs_clflush; 724 unsigned int offset, idx; 725 struct dma_fence *fence; 726 void __user *user_data; 727 u64 remain; 728 int ret; 729 730 ret = i915_gem_object_prepare_write(obj, &needs_clflush); 731 if (ret) 732 return ret; 733 734 fence = i915_gem_object_lock_fence(obj); 735 i915_gem_object_finish_access(obj); 736 if (!fence) 737 return -ENOMEM; 738 739 /* If we don't overwrite a cacheline completely we need to be 740 * careful to have up-to-date data by first clflushing. Don't 741 * overcomplicate things and flush the entire patch. 742 */ 743 partial_cacheline_write = 0; 744 if (needs_clflush & CLFLUSH_BEFORE) 745 partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1; 746 747 user_data = u64_to_user_ptr(args->data_ptr); 748 remain = args->size; 749 offset = offset_in_page(args->offset); 750 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 751 struct page *page = i915_gem_object_get_page(obj, idx); 752 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); 753 754 ret = shmem_pwrite(page, offset, length, user_data, 755 (offset | length) & partial_cacheline_write, 756 needs_clflush & CLFLUSH_AFTER); 757 if (ret) 758 break; 759 760 remain -= length; 761 user_data += length; 762 offset = 0; 763 } 764 765 i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); 766 i915_gem_object_unlock_fence(obj, fence); 767 768 return ret; 769 } 770 771 /** 772 * Writes data to the object referenced by handle. 773 * @dev: drm device 774 * @data: ioctl data blob 775 * @file: drm file 776 * 777 * On error, the contents of the buffer that were to be modified are undefined. 778 */ 779 int 780 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 781 struct drm_file *file) 782 { 783 struct drm_i915_gem_pwrite *args = data; 784 struct drm_i915_gem_object *obj; 785 int ret; 786 787 if (args->size == 0) 788 return 0; 789 790 if (!access_ok(u64_to_user_ptr(args->data_ptr), args->size)) 791 return -EFAULT; 792 793 obj = i915_gem_object_lookup(file, args->handle); 794 if (!obj) 795 return -ENOENT; 796 797 /* Bounds check destination. */ 798 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 799 ret = -EINVAL; 800 goto err; 801 } 802 803 /* Writes not allowed into this read-only object */ 804 if (i915_gem_object_is_readonly(obj)) { 805 ret = -EINVAL; 806 goto err; 807 } 808 809 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 810 811 ret = -ENODEV; 812 if (obj->ops->pwrite) 813 ret = obj->ops->pwrite(obj, args); 814 if (ret != -ENODEV) 815 goto err; 816 817 ret = i915_gem_object_wait(obj, 818 I915_WAIT_INTERRUPTIBLE | 819 I915_WAIT_ALL, 820 MAX_SCHEDULE_TIMEOUT); 821 if (ret) 822 goto err; 823 824 ret = i915_gem_object_pin_pages(obj); 825 if (ret) 826 goto err; 827 828 ret = -EFAULT; 829 /* We can only do the GTT pwrite on untiled buffers, as otherwise 830 * it would end up going through the fenced access, and we'll get 831 * different detiling behavior between reading and writing. 832 * pread/pwrite currently are reading and writing from the CPU 833 * perspective, requiring manual detiling by the client. 834 */ 835 if (!i915_gem_object_has_struct_page(obj) || 836 cpu_write_needs_clflush(obj)) 837 /* Note that the gtt paths might fail with non-page-backed user 838 * pointers (e.g. gtt mappings when moving data between 839 * textures). Fallback to the shmem path in that case. 840 */ 841 ret = i915_gem_gtt_pwrite_fast(obj, args); 842 843 if (ret == -EFAULT || ret == -ENOSPC) { 844 if (obj->phys_handle) 845 ret = i915_gem_phys_pwrite(obj, args, file); 846 else 847 ret = i915_gem_shmem_pwrite(obj, args); 848 } 849 850 i915_gem_object_unpin_pages(obj); 851 err: 852 i915_gem_object_put(obj); 853 return ret; 854 } 855 856 /** 857 * Called when user space has done writes to this buffer 858 * @dev: drm device 859 * @data: ioctl data blob 860 * @file: drm file 861 */ 862 int 863 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 864 struct drm_file *file) 865 { 866 struct drm_i915_gem_sw_finish *args = data; 867 struct drm_i915_gem_object *obj; 868 869 obj = i915_gem_object_lookup(file, args->handle); 870 if (!obj) 871 return -ENOENT; 872 873 /* 874 * Proxy objects are barred from CPU access, so there is no 875 * need to ban sw_finish as it is a nop. 876 */ 877 878 /* Pinned buffers may be scanout, so flush the cache */ 879 i915_gem_object_flush_if_display(obj); 880 i915_gem_object_put(obj); 881 882 return 0; 883 } 884 885 void i915_gem_runtime_suspend(struct drm_i915_private *i915) 886 { 887 struct drm_i915_gem_object *obj, *on; 888 int i; 889 890 /* 891 * Only called during RPM suspend. All users of the userfault_list 892 * must be holding an RPM wakeref to ensure that this can not 893 * run concurrently with themselves (and use the struct_mutex for 894 * protection between themselves). 895 */ 896 897 list_for_each_entry_safe(obj, on, 898 &i915->ggtt.userfault_list, userfault_link) 899 __i915_gem_object_release_mmap_gtt(obj); 900 901 /* 902 * The fence will be lost when the device powers down. If any were 903 * in use by hardware (i.e. they are pinned), we should not be powering 904 * down! All other fences will be reacquired by the user upon waking. 905 */ 906 for (i = 0; i < i915->ggtt.num_fences; i++) { 907 struct i915_fence_reg *reg = &i915->ggtt.fence_regs[i]; 908 909 /* 910 * Ideally we want to assert that the fence register is not 911 * live at this point (i.e. that no piece of code will be 912 * trying to write through fence + GTT, as that both violates 913 * our tracking of activity and associated locking/barriers, 914 * but also is illegal given that the hw is powered down). 915 * 916 * Previously we used reg->pin_count as a "liveness" indicator. 917 * That is not sufficient, and we need a more fine-grained 918 * tool if we want to have a sanity check here. 919 */ 920 921 if (!reg->vma) 922 continue; 923 924 GEM_BUG_ON(i915_vma_has_userfault(reg->vma)); 925 reg->dirty = true; 926 } 927 } 928 929 struct i915_vma * 930 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 931 const struct i915_ggtt_view *view, 932 u64 size, 933 u64 alignment, 934 u64 flags) 935 { 936 struct drm_i915_private *i915 = to_i915(obj->base.dev); 937 struct i915_ggtt *ggtt = &i915->ggtt; 938 struct i915_vma *vma; 939 int ret; 940 941 if (i915_gem_object_never_bind_ggtt(obj)) 942 return ERR_PTR(-ENODEV); 943 944 if (flags & PIN_MAPPABLE && 945 (!view || view->type == I915_GGTT_VIEW_NORMAL)) { 946 /* 947 * If the required space is larger than the available 948 * aperture, we will not able to find a slot for the 949 * object and unbinding the object now will be in 950 * vain. Worse, doing so may cause us to ping-pong 951 * the object in and out of the Global GTT and 952 * waste a lot of cycles under the mutex. 953 */ 954 if (obj->base.size > ggtt->mappable_end) 955 return ERR_PTR(-E2BIG); 956 957 /* 958 * If NONBLOCK is set the caller is optimistically 959 * trying to cache the full object within the mappable 960 * aperture, and *must* have a fallback in place for 961 * situations where we cannot bind the object. We 962 * can be a little more lax here and use the fallback 963 * more often to avoid costly migrations of ourselves 964 * and other objects within the aperture. 965 * 966 * Half-the-aperture is used as a simple heuristic. 967 * More interesting would to do search for a free 968 * block prior to making the commitment to unbind. 969 * That caters for the self-harm case, and with a 970 * little more heuristics (e.g. NOFAULT, NOEVICT) 971 * we could try to minimise harm to others. 972 */ 973 if (flags & PIN_NONBLOCK && 974 obj->base.size > ggtt->mappable_end / 2) 975 return ERR_PTR(-ENOSPC); 976 } 977 978 vma = i915_vma_instance(obj, &ggtt->vm, view); 979 if (IS_ERR(vma)) 980 return vma; 981 982 if (i915_vma_misplaced(vma, size, alignment, flags)) { 983 if (flags & PIN_NONBLOCK) { 984 if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) 985 return ERR_PTR(-ENOSPC); 986 987 if (flags & PIN_MAPPABLE && 988 vma->fence_size > ggtt->mappable_end / 2) 989 return ERR_PTR(-ENOSPC); 990 } 991 992 ret = i915_vma_unbind(vma); 993 if (ret) 994 return ERR_PTR(ret); 995 } 996 997 if (vma->fence && !i915_gem_object_is_tiled(obj)) { 998 mutex_lock(&ggtt->vm.mutex); 999 ret = i915_vma_revoke_fence(vma); 1000 mutex_unlock(&ggtt->vm.mutex); 1001 if (ret) 1002 return ERR_PTR(ret); 1003 } 1004 1005 ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); 1006 if (ret) 1007 return ERR_PTR(ret); 1008 1009 return vma; 1010 } 1011 1012 int 1013 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 1014 struct drm_file *file_priv) 1015 { 1016 struct drm_i915_private *i915 = to_i915(dev); 1017 struct drm_i915_gem_madvise *args = data; 1018 struct drm_i915_gem_object *obj; 1019 int err; 1020 1021 switch (args->madv) { 1022 case I915_MADV_DONTNEED: 1023 case I915_MADV_WILLNEED: 1024 break; 1025 default: 1026 return -EINVAL; 1027 } 1028 1029 obj = i915_gem_object_lookup(file_priv, args->handle); 1030 if (!obj) 1031 return -ENOENT; 1032 1033 err = mutex_lock_interruptible(&obj->mm.lock); 1034 if (err) 1035 goto out; 1036 1037 if (i915_gem_object_has_pages(obj) && 1038 i915_gem_object_is_tiled(obj) && 1039 i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 1040 if (obj->mm.madv == I915_MADV_WILLNEED) { 1041 GEM_BUG_ON(!obj->mm.quirked); 1042 __i915_gem_object_unpin_pages(obj); 1043 obj->mm.quirked = false; 1044 } 1045 if (args->madv == I915_MADV_WILLNEED) { 1046 GEM_BUG_ON(obj->mm.quirked); 1047 __i915_gem_object_pin_pages(obj); 1048 obj->mm.quirked = true; 1049 } 1050 } 1051 1052 if (obj->mm.madv != __I915_MADV_PURGED) 1053 obj->mm.madv = args->madv; 1054 1055 if (i915_gem_object_has_pages(obj)) { 1056 struct list_head *list; 1057 1058 if (i915_gem_object_is_shrinkable(obj)) { 1059 unsigned long flags; 1060 1061 spin_lock_irqsave(&i915->mm.obj_lock, flags); 1062 1063 if (obj->mm.madv != I915_MADV_WILLNEED) 1064 list = &i915->mm.purge_list; 1065 else 1066 list = &i915->mm.shrink_list; 1067 list_move_tail(&obj->mm.link, list); 1068 1069 spin_unlock_irqrestore(&i915->mm.obj_lock, flags); 1070 } 1071 } 1072 1073 /* if the object is no longer attached, discard its backing storage */ 1074 if (obj->mm.madv == I915_MADV_DONTNEED && 1075 !i915_gem_object_has_pages(obj)) 1076 i915_gem_object_truncate(obj); 1077 1078 args->retained = obj->mm.madv != __I915_MADV_PURGED; 1079 mutex_unlock(&obj->mm.lock); 1080 1081 out: 1082 i915_gem_object_put(obj); 1083 return err; 1084 } 1085 1086 int i915_gem_init(struct drm_i915_private *dev_priv) 1087 { 1088 int ret; 1089 1090 /* We need to fallback to 4K pages if host doesn't support huge gtt. */ 1091 if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_huge_gtt(dev_priv)) 1092 mkwrite_device_info(dev_priv)->page_sizes = 1093 I915_GTT_PAGE_SIZE_4K; 1094 1095 ret = i915_gem_init_userptr(dev_priv); 1096 if (ret) 1097 return ret; 1098 1099 intel_uc_fetch_firmwares(&dev_priv->gt.uc); 1100 intel_wopcm_init(&dev_priv->wopcm); 1101 1102 ret = i915_init_ggtt(dev_priv); 1103 if (ret) { 1104 GEM_BUG_ON(ret == -EIO); 1105 goto err_unlock; 1106 } 1107 1108 /* 1109 * Despite its name intel_init_clock_gating applies both display 1110 * clock gating workarounds; GT mmio workarounds and the occasional 1111 * GT power context workaround. Worse, sometimes it includes a context 1112 * register workaround which we need to apply before we record the 1113 * default HW state for all contexts. 1114 * 1115 * FIXME: break up the workarounds and apply them at the right time! 1116 */ 1117 intel_init_clock_gating(dev_priv); 1118 1119 ret = intel_gt_init(&dev_priv->gt); 1120 if (ret) 1121 goto err_unlock; 1122 1123 return 0; 1124 1125 /* 1126 * Unwinding is complicated by that we want to handle -EIO to mean 1127 * disable GPU submission but keep KMS alive. We want to mark the 1128 * HW as irrevisibly wedged, but keep enough state around that the 1129 * driver doesn't explode during runtime. 1130 */ 1131 err_unlock: 1132 i915_gem_drain_workqueue(dev_priv); 1133 1134 if (ret != -EIO) { 1135 intel_uc_cleanup_firmwares(&dev_priv->gt.uc); 1136 i915_gem_cleanup_userptr(dev_priv); 1137 } 1138 1139 if (ret == -EIO) { 1140 /* 1141 * Allow engines or uC initialisation to fail by marking the GPU 1142 * as wedged. But we only want to do this when the GPU is angry, 1143 * for all other failure, such as an allocation failure, bail. 1144 */ 1145 if (!intel_gt_is_wedged(&dev_priv->gt)) { 1146 i915_probe_error(dev_priv, 1147 "Failed to initialize GPU, declaring it wedged!\n"); 1148 intel_gt_set_wedged(&dev_priv->gt); 1149 } 1150 1151 /* Minimal basic recovery for KMS */ 1152 ret = i915_ggtt_enable_hw(dev_priv); 1153 i915_gem_restore_gtt_mappings(dev_priv); 1154 i915_gem_restore_fences(&dev_priv->ggtt); 1155 intel_init_clock_gating(dev_priv); 1156 } 1157 1158 i915_gem_drain_freed_objects(dev_priv); 1159 return ret; 1160 } 1161 1162 void i915_gem_driver_register(struct drm_i915_private *i915) 1163 { 1164 i915_gem_driver_register__shrinker(i915); 1165 1166 intel_engines_driver_register(i915); 1167 } 1168 1169 void i915_gem_driver_unregister(struct drm_i915_private *i915) 1170 { 1171 i915_gem_driver_unregister__shrinker(i915); 1172 } 1173 1174 void i915_gem_driver_remove(struct drm_i915_private *dev_priv) 1175 { 1176 intel_wakeref_auto_fini(&dev_priv->ggtt.userfault_wakeref); 1177 1178 i915_gem_suspend_late(dev_priv); 1179 intel_gt_driver_remove(&dev_priv->gt); 1180 dev_priv->uabi_engines = RB_ROOT; 1181 1182 /* Flush any outstanding unpin_work. */ 1183 i915_gem_drain_workqueue(dev_priv); 1184 1185 i915_gem_drain_freed_objects(dev_priv); 1186 } 1187 1188 void i915_gem_driver_release(struct drm_i915_private *dev_priv) 1189 { 1190 i915_gem_driver_release__contexts(dev_priv); 1191 1192 intel_gt_driver_release(&dev_priv->gt); 1193 1194 intel_wa_list_free(&dev_priv->gt_wa_list); 1195 1196 intel_uc_cleanup_firmwares(&dev_priv->gt.uc); 1197 i915_gem_cleanup_userptr(dev_priv); 1198 1199 i915_gem_drain_freed_objects(dev_priv); 1200 1201 WARN_ON(!list_empty(&dev_priv->gem.contexts.list)); 1202 } 1203 1204 static void i915_gem_init__mm(struct drm_i915_private *i915) 1205 { 1206 spin_lock_init(&i915->mm.obj_lock); 1207 1208 init_llist_head(&i915->mm.free_list); 1209 1210 INIT_LIST_HEAD(&i915->mm.purge_list); 1211 INIT_LIST_HEAD(&i915->mm.shrink_list); 1212 1213 i915_gem_init__objects(i915); 1214 } 1215 1216 void i915_gem_init_early(struct drm_i915_private *dev_priv) 1217 { 1218 i915_gem_init__mm(dev_priv); 1219 i915_gem_init__contexts(dev_priv); 1220 1221 spin_lock_init(&dev_priv->fb_tracking.lock); 1222 } 1223 1224 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) 1225 { 1226 i915_gem_drain_freed_objects(dev_priv); 1227 GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list)); 1228 GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count)); 1229 WARN_ON(dev_priv->mm.shrink_count); 1230 } 1231 1232 int i915_gem_freeze(struct drm_i915_private *dev_priv) 1233 { 1234 /* Discard all purgeable objects, let userspace recover those as 1235 * required after resuming. 1236 */ 1237 i915_gem_shrink_all(dev_priv); 1238 1239 return 0; 1240 } 1241 1242 int i915_gem_freeze_late(struct drm_i915_private *i915) 1243 { 1244 struct drm_i915_gem_object *obj; 1245 intel_wakeref_t wakeref; 1246 1247 /* 1248 * Called just before we write the hibernation image. 1249 * 1250 * We need to update the domain tracking to reflect that the CPU 1251 * will be accessing all the pages to create and restore from the 1252 * hibernation, and so upon restoration those pages will be in the 1253 * CPU domain. 1254 * 1255 * To make sure the hibernation image contains the latest state, 1256 * we update that state just before writing out the image. 1257 * 1258 * To try and reduce the hibernation image, we manually shrink 1259 * the objects as well, see i915_gem_freeze() 1260 */ 1261 1262 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 1263 1264 i915_gem_shrink(i915, -1UL, NULL, ~0); 1265 i915_gem_drain_freed_objects(i915); 1266 1267 list_for_each_entry(obj, &i915->mm.shrink_list, mm.link) { 1268 i915_gem_object_lock(obj); 1269 WARN_ON(i915_gem_object_set_to_cpu_domain(obj, true)); 1270 i915_gem_object_unlock(obj); 1271 } 1272 1273 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 1274 1275 return 0; 1276 } 1277 1278 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 1279 { 1280 struct drm_i915_file_private *file_priv = file->driver_priv; 1281 struct i915_request *request; 1282 1283 /* Clean up our request list when the client is going away, so that 1284 * later retire_requests won't dereference our soon-to-be-gone 1285 * file_priv. 1286 */ 1287 spin_lock(&file_priv->mm.lock); 1288 list_for_each_entry(request, &file_priv->mm.request_list, client_link) 1289 request->file_priv = NULL; 1290 spin_unlock(&file_priv->mm.lock); 1291 } 1292 1293 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) 1294 { 1295 struct drm_i915_file_private *file_priv; 1296 int ret; 1297 1298 DRM_DEBUG("\n"); 1299 1300 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 1301 if (!file_priv) 1302 return -ENOMEM; 1303 1304 file->driver_priv = file_priv; 1305 file_priv->dev_priv = i915; 1306 file_priv->file = file; 1307 1308 spin_lock_init(&file_priv->mm.lock); 1309 INIT_LIST_HEAD(&file_priv->mm.request_list); 1310 1311 file_priv->bsd_engine = -1; 1312 file_priv->hang_timestamp = jiffies; 1313 1314 ret = i915_gem_context_open(i915, file); 1315 if (ret) 1316 kfree(file_priv); 1317 1318 return ret; 1319 } 1320 1321 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 1322 #include "selftests/mock_gem_device.c" 1323 #include "selftests/i915_gem.c" 1324 #endif 1325