1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drm_vma_manager.h> 29 #include <drm/drm_pci.h> 30 #include <drm/i915_drm.h> 31 #include <linux/dma-fence-array.h> 32 #include <linux/kthread.h> 33 #include <linux/reservation.h> 34 #include <linux/shmem_fs.h> 35 #include <linux/slab.h> 36 #include <linux/stop_machine.h> 37 #include <linux/swap.h> 38 #include <linux/pci.h> 39 #include <linux/dma-buf.h> 40 #include <linux/mman.h> 41 42 #include "i915_drv.h" 43 #include "i915_gem_clflush.h" 44 #include "i915_gemfs.h" 45 #include "i915_reset.h" 46 #include "i915_trace.h" 47 #include "i915_vgpu.h" 48 49 #include "intel_drv.h" 50 #include "intel_frontbuffer.h" 51 #include "intel_mocs.h" 52 #include "intel_workarounds.h" 53 54 static void i915_gem_flush_free_objects(struct drm_i915_private *i915); 55 56 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 57 { 58 if (obj->cache_dirty) 59 return false; 60 61 if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) 62 return true; 63 64 return obj->pin_global; /* currently in use by HW, keep flushed */ 65 } 66 67 static int 68 insert_mappable_node(struct i915_ggtt *ggtt, 69 struct drm_mm_node *node, u32 size) 70 { 71 memset(node, 0, sizeof(*node)); 72 return drm_mm_insert_node_in_range(&ggtt->vm.mm, node, 73 size, 0, I915_COLOR_UNEVICTABLE, 74 0, ggtt->mappable_end, 75 DRM_MM_INSERT_LOW); 76 } 77 78 static void 79 remove_mappable_node(struct drm_mm_node *node) 80 { 81 drm_mm_remove_node(node); 82 } 83 84 /* some bookkeeping */ 85 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 86 u64 size) 87 { 88 spin_lock(&dev_priv->mm.object_stat_lock); 89 dev_priv->mm.object_count++; 90 dev_priv->mm.object_memory += size; 91 spin_unlock(&dev_priv->mm.object_stat_lock); 92 } 93 94 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 95 u64 size) 96 { 97 spin_lock(&dev_priv->mm.object_stat_lock); 98 dev_priv->mm.object_count--; 99 dev_priv->mm.object_memory -= size; 100 spin_unlock(&dev_priv->mm.object_stat_lock); 101 } 102 103 static int 104 i915_gem_wait_for_error(struct i915_gpu_error *error) 105 { 106 int ret; 107 108 might_sleep(); 109 110 /* 111 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 112 * userspace. If it takes that long something really bad is going on and 113 * we should simply try to bail out and fail as gracefully as possible. 114 */ 115 ret = wait_event_interruptible_timeout(error->reset_queue, 116 !i915_reset_backoff(error), 117 I915_RESET_TIMEOUT); 118 if (ret == 0) { 119 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 120 return -EIO; 121 } else if (ret < 0) { 122 return ret; 123 } else { 124 return 0; 125 } 126 } 127 128 int i915_mutex_lock_interruptible(struct drm_device *dev) 129 { 130 struct drm_i915_private *dev_priv = to_i915(dev); 131 int ret; 132 133 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 134 if (ret) 135 return ret; 136 137 ret = mutex_lock_interruptible(&dev->struct_mutex); 138 if (ret) 139 return ret; 140 141 return 0; 142 } 143 144 static u32 __i915_gem_park(struct drm_i915_private *i915) 145 { 146 intel_wakeref_t wakeref; 147 148 GEM_TRACE("\n"); 149 150 lockdep_assert_held(&i915->drm.struct_mutex); 151 GEM_BUG_ON(i915->gt.active_requests); 152 GEM_BUG_ON(!list_empty(&i915->gt.active_rings)); 153 154 if (!i915->gt.awake) 155 return I915_EPOCH_INVALID; 156 157 GEM_BUG_ON(i915->gt.epoch == I915_EPOCH_INVALID); 158 159 /* 160 * Be paranoid and flush a concurrent interrupt to make sure 161 * we don't reactivate any irq tasklets after parking. 162 * 163 * FIXME: Note that even though we have waited for execlists to be idle, 164 * there may still be an in-flight interrupt even though the CSB 165 * is now empty. synchronize_irq() makes sure that a residual interrupt 166 * is completed before we continue, but it doesn't prevent the HW from 167 * raising a spurious interrupt later. To complete the shield we should 168 * coordinate disabling the CS irq with flushing the interrupts. 169 */ 170 synchronize_irq(i915->drm.irq); 171 172 intel_engines_park(i915); 173 i915_timelines_park(i915); 174 175 i915_pmu_gt_parked(i915); 176 i915_vma_parked(i915); 177 178 wakeref = fetch_and_zero(&i915->gt.awake); 179 GEM_BUG_ON(!wakeref); 180 181 if (INTEL_GEN(i915) >= 6) 182 gen6_rps_idle(i915); 183 184 intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref); 185 186 return i915->gt.epoch; 187 } 188 189 void i915_gem_park(struct drm_i915_private *i915) 190 { 191 GEM_TRACE("\n"); 192 193 lockdep_assert_held(&i915->drm.struct_mutex); 194 GEM_BUG_ON(i915->gt.active_requests); 195 196 if (!i915->gt.awake) 197 return; 198 199 /* Defer the actual call to __i915_gem_park() to prevent ping-pongs */ 200 mod_delayed_work(i915->wq, &i915->gt.idle_work, msecs_to_jiffies(100)); 201 } 202 203 void i915_gem_unpark(struct drm_i915_private *i915) 204 { 205 GEM_TRACE("\n"); 206 207 lockdep_assert_held(&i915->drm.struct_mutex); 208 GEM_BUG_ON(!i915->gt.active_requests); 209 assert_rpm_wakelock_held(i915); 210 211 if (i915->gt.awake) 212 return; 213 214 /* 215 * It seems that the DMC likes to transition between the DC states a lot 216 * when there are no connected displays (no active power domains) during 217 * command submission. 218 * 219 * This activity has negative impact on the performance of the chip with 220 * huge latencies observed in the interrupt handler and elsewhere. 221 * 222 * Work around it by grabbing a GT IRQ power domain whilst there is any 223 * GT activity, preventing any DC state transitions. 224 */ 225 i915->gt.awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); 226 GEM_BUG_ON(!i915->gt.awake); 227 228 if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */ 229 i915->gt.epoch = 1; 230 231 intel_enable_gt_powersave(i915); 232 i915_update_gfx_val(i915); 233 if (INTEL_GEN(i915) >= 6) 234 gen6_rps_busy(i915); 235 i915_pmu_gt_unparked(i915); 236 237 intel_engines_unpark(i915); 238 239 i915_queue_hangcheck(i915); 240 241 queue_delayed_work(i915->wq, 242 &i915->gt.retire_work, 243 round_jiffies_up_relative(HZ)); 244 } 245 246 int 247 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 248 struct drm_file *file) 249 { 250 struct i915_ggtt *ggtt = &to_i915(dev)->ggtt; 251 struct drm_i915_gem_get_aperture *args = data; 252 struct i915_vma *vma; 253 u64 pinned; 254 255 mutex_lock(&ggtt->vm.mutex); 256 257 pinned = ggtt->vm.reserved; 258 list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) 259 if (i915_vma_is_pinned(vma)) 260 pinned += vma->node.size; 261 262 mutex_unlock(&ggtt->vm.mutex); 263 264 args->aper_size = ggtt->vm.total; 265 args->aper_available_size = args->aper_size - pinned; 266 267 return 0; 268 } 269 270 static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) 271 { 272 struct address_space *mapping = obj->base.filp->f_mapping; 273 drm_dma_handle_t *phys; 274 struct sg_table *st; 275 struct scatterlist *sg; 276 char *vaddr; 277 int i; 278 int err; 279 280 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) 281 return -EINVAL; 282 283 /* Always aligning to the object size, allows a single allocation 284 * to handle all possible callers, and given typical object sizes, 285 * the alignment of the buddy allocation will naturally match. 286 */ 287 phys = drm_pci_alloc(obj->base.dev, 288 roundup_pow_of_two(obj->base.size), 289 roundup_pow_of_two(obj->base.size)); 290 if (!phys) 291 return -ENOMEM; 292 293 vaddr = phys->vaddr; 294 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 295 struct page *page; 296 char *src; 297 298 page = shmem_read_mapping_page(mapping, i); 299 if (IS_ERR(page)) { 300 err = PTR_ERR(page); 301 goto err_phys; 302 } 303 304 src = kmap_atomic(page); 305 memcpy(vaddr, src, PAGE_SIZE); 306 drm_clflush_virt_range(vaddr, PAGE_SIZE); 307 kunmap_atomic(src); 308 309 put_page(page); 310 vaddr += PAGE_SIZE; 311 } 312 313 i915_gem_chipset_flush(to_i915(obj->base.dev)); 314 315 st = kmalloc(sizeof(*st), GFP_KERNEL); 316 if (!st) { 317 err = -ENOMEM; 318 goto err_phys; 319 } 320 321 if (sg_alloc_table(st, 1, GFP_KERNEL)) { 322 kfree(st); 323 err = -ENOMEM; 324 goto err_phys; 325 } 326 327 sg = st->sgl; 328 sg->offset = 0; 329 sg->length = obj->base.size; 330 331 sg_dma_address(sg) = phys->busaddr; 332 sg_dma_len(sg) = obj->base.size; 333 334 obj->phys_handle = phys; 335 336 __i915_gem_object_set_pages(obj, st, sg->length); 337 338 return 0; 339 340 err_phys: 341 drm_pci_free(obj->base.dev, phys); 342 343 return err; 344 } 345 346 static void __start_cpu_write(struct drm_i915_gem_object *obj) 347 { 348 obj->read_domains = I915_GEM_DOMAIN_CPU; 349 obj->write_domain = I915_GEM_DOMAIN_CPU; 350 if (cpu_write_needs_clflush(obj)) 351 obj->cache_dirty = true; 352 } 353 354 static void 355 __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, 356 struct sg_table *pages, 357 bool needs_clflush) 358 { 359 GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED); 360 361 if (obj->mm.madv == I915_MADV_DONTNEED) 362 obj->mm.dirty = false; 363 364 if (needs_clflush && 365 (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 && 366 !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) 367 drm_clflush_sg(pages); 368 369 __start_cpu_write(obj); 370 } 371 372 static void 373 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj, 374 struct sg_table *pages) 375 { 376 __i915_gem_object_release_shmem(obj, pages, false); 377 378 if (obj->mm.dirty) { 379 struct address_space *mapping = obj->base.filp->f_mapping; 380 char *vaddr = obj->phys_handle->vaddr; 381 int i; 382 383 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 384 struct page *page; 385 char *dst; 386 387 page = shmem_read_mapping_page(mapping, i); 388 if (IS_ERR(page)) 389 continue; 390 391 dst = kmap_atomic(page); 392 drm_clflush_virt_range(vaddr, PAGE_SIZE); 393 memcpy(dst, vaddr, PAGE_SIZE); 394 kunmap_atomic(dst); 395 396 set_page_dirty(page); 397 if (obj->mm.madv == I915_MADV_WILLNEED) 398 mark_page_accessed(page); 399 put_page(page); 400 vaddr += PAGE_SIZE; 401 } 402 obj->mm.dirty = false; 403 } 404 405 sg_free_table(pages); 406 kfree(pages); 407 408 drm_pci_free(obj->base.dev, obj->phys_handle); 409 } 410 411 static void 412 i915_gem_object_release_phys(struct drm_i915_gem_object *obj) 413 { 414 i915_gem_object_unpin_pages(obj); 415 } 416 417 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { 418 .get_pages = i915_gem_object_get_pages_phys, 419 .put_pages = i915_gem_object_put_pages_phys, 420 .release = i915_gem_object_release_phys, 421 }; 422 423 static const struct drm_i915_gem_object_ops i915_gem_object_ops; 424 425 int i915_gem_object_unbind(struct drm_i915_gem_object *obj) 426 { 427 struct i915_vma *vma; 428 LIST_HEAD(still_in_list); 429 int ret; 430 431 lockdep_assert_held(&obj->base.dev->struct_mutex); 432 433 /* Closed vma are removed from the obj->vma_list - but they may 434 * still have an active binding on the object. To remove those we 435 * must wait for all rendering to complete to the object (as unbinding 436 * must anyway), and retire the requests. 437 */ 438 ret = i915_gem_object_set_to_cpu_domain(obj, false); 439 if (ret) 440 return ret; 441 442 spin_lock(&obj->vma.lock); 443 while (!ret && (vma = list_first_entry_or_null(&obj->vma.list, 444 struct i915_vma, 445 obj_link))) { 446 list_move_tail(&vma->obj_link, &still_in_list); 447 spin_unlock(&obj->vma.lock); 448 449 ret = i915_vma_unbind(vma); 450 451 spin_lock(&obj->vma.lock); 452 } 453 list_splice(&still_in_list, &obj->vma.list); 454 spin_unlock(&obj->vma.lock); 455 456 return ret; 457 } 458 459 static long 460 i915_gem_object_wait_fence(struct dma_fence *fence, 461 unsigned int flags, 462 long timeout, 463 struct intel_rps_client *rps_client) 464 { 465 struct i915_request *rq; 466 467 BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1); 468 469 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) 470 return timeout; 471 472 if (!dma_fence_is_i915(fence)) 473 return dma_fence_wait_timeout(fence, 474 flags & I915_WAIT_INTERRUPTIBLE, 475 timeout); 476 477 rq = to_request(fence); 478 if (i915_request_completed(rq)) 479 goto out; 480 481 /* 482 * This client is about to stall waiting for the GPU. In many cases 483 * this is undesirable and limits the throughput of the system, as 484 * many clients cannot continue processing user input/output whilst 485 * blocked. RPS autotuning may take tens of milliseconds to respond 486 * to the GPU load and thus incurs additional latency for the client. 487 * We can circumvent that by promoting the GPU frequency to maximum 488 * before we wait. This makes the GPU throttle up much more quickly 489 * (good for benchmarks and user experience, e.g. window animations), 490 * but at a cost of spending more power processing the workload 491 * (bad for battery). Not all clients even want their results 492 * immediately and for them we should just let the GPU select its own 493 * frequency to maximise efficiency. To prevent a single client from 494 * forcing the clocks too high for the whole system, we only allow 495 * each client to waitboost once in a busy period. 496 */ 497 if (rps_client && !i915_request_started(rq)) { 498 if (INTEL_GEN(rq->i915) >= 6) 499 gen6_rps_boost(rq, rps_client); 500 } 501 502 timeout = i915_request_wait(rq, flags, timeout); 503 504 out: 505 if (flags & I915_WAIT_LOCKED && i915_request_completed(rq)) 506 i915_request_retire_upto(rq); 507 508 return timeout; 509 } 510 511 static long 512 i915_gem_object_wait_reservation(struct reservation_object *resv, 513 unsigned int flags, 514 long timeout, 515 struct intel_rps_client *rps_client) 516 { 517 unsigned int seq = __read_seqcount_begin(&resv->seq); 518 struct dma_fence *excl; 519 bool prune_fences = false; 520 521 if (flags & I915_WAIT_ALL) { 522 struct dma_fence **shared; 523 unsigned int count, i; 524 int ret; 525 526 ret = reservation_object_get_fences_rcu(resv, 527 &excl, &count, &shared); 528 if (ret) 529 return ret; 530 531 for (i = 0; i < count; i++) { 532 timeout = i915_gem_object_wait_fence(shared[i], 533 flags, timeout, 534 rps_client); 535 if (timeout < 0) 536 break; 537 538 dma_fence_put(shared[i]); 539 } 540 541 for (; i < count; i++) 542 dma_fence_put(shared[i]); 543 kfree(shared); 544 545 /* 546 * If both shared fences and an exclusive fence exist, 547 * then by construction the shared fences must be later 548 * than the exclusive fence. If we successfully wait for 549 * all the shared fences, we know that the exclusive fence 550 * must all be signaled. If all the shared fences are 551 * signaled, we can prune the array and recover the 552 * floating references on the fences/requests. 553 */ 554 prune_fences = count && timeout >= 0; 555 } else { 556 excl = reservation_object_get_excl_rcu(resv); 557 } 558 559 if (excl && timeout >= 0) 560 timeout = i915_gem_object_wait_fence(excl, flags, timeout, 561 rps_client); 562 563 dma_fence_put(excl); 564 565 /* 566 * Opportunistically prune the fences iff we know they have *all* been 567 * signaled and that the reservation object has not been changed (i.e. 568 * no new fences have been added). 569 */ 570 if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) { 571 if (reservation_object_trylock(resv)) { 572 if (!__read_seqcount_retry(&resv->seq, seq)) 573 reservation_object_add_excl_fence(resv, NULL); 574 reservation_object_unlock(resv); 575 } 576 } 577 578 return timeout; 579 } 580 581 static void __fence_set_priority(struct dma_fence *fence, 582 const struct i915_sched_attr *attr) 583 { 584 struct i915_request *rq; 585 struct intel_engine_cs *engine; 586 587 if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence)) 588 return; 589 590 rq = to_request(fence); 591 engine = rq->engine; 592 593 local_bh_disable(); 594 rcu_read_lock(); /* RCU serialisation for set-wedged protection */ 595 if (engine->schedule) 596 engine->schedule(rq, attr); 597 rcu_read_unlock(); 598 local_bh_enable(); /* kick the tasklets if queues were reprioritised */ 599 } 600 601 static void fence_set_priority(struct dma_fence *fence, 602 const struct i915_sched_attr *attr) 603 { 604 /* Recurse once into a fence-array */ 605 if (dma_fence_is_array(fence)) { 606 struct dma_fence_array *array = to_dma_fence_array(fence); 607 int i; 608 609 for (i = 0; i < array->num_fences; i++) 610 __fence_set_priority(array->fences[i], attr); 611 } else { 612 __fence_set_priority(fence, attr); 613 } 614 } 615 616 int 617 i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, 618 unsigned int flags, 619 const struct i915_sched_attr *attr) 620 { 621 struct dma_fence *excl; 622 623 if (flags & I915_WAIT_ALL) { 624 struct dma_fence **shared; 625 unsigned int count, i; 626 int ret; 627 628 ret = reservation_object_get_fences_rcu(obj->resv, 629 &excl, &count, &shared); 630 if (ret) 631 return ret; 632 633 for (i = 0; i < count; i++) { 634 fence_set_priority(shared[i], attr); 635 dma_fence_put(shared[i]); 636 } 637 638 kfree(shared); 639 } else { 640 excl = reservation_object_get_excl_rcu(obj->resv); 641 } 642 643 if (excl) { 644 fence_set_priority(excl, attr); 645 dma_fence_put(excl); 646 } 647 return 0; 648 } 649 650 /** 651 * Waits for rendering to the object to be completed 652 * @obj: i915 gem object 653 * @flags: how to wait (under a lock, for all rendering or just for writes etc) 654 * @timeout: how long to wait 655 * @rps_client: client (user process) to charge for any waitboosting 656 */ 657 int 658 i915_gem_object_wait(struct drm_i915_gem_object *obj, 659 unsigned int flags, 660 long timeout, 661 struct intel_rps_client *rps_client) 662 { 663 might_sleep(); 664 GEM_BUG_ON(timeout < 0); 665 666 timeout = i915_gem_object_wait_reservation(obj->resv, 667 flags, timeout, 668 rps_client); 669 return timeout < 0 ? timeout : 0; 670 } 671 672 static struct intel_rps_client *to_rps_client(struct drm_file *file) 673 { 674 struct drm_i915_file_private *fpriv = file->driver_priv; 675 676 return &fpriv->rps_client; 677 } 678 679 static int 680 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 681 struct drm_i915_gem_pwrite *args, 682 struct drm_file *file) 683 { 684 void *vaddr = obj->phys_handle->vaddr + args->offset; 685 char __user *user_data = u64_to_user_ptr(args->data_ptr); 686 687 /* We manually control the domain here and pretend that it 688 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 689 */ 690 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 691 if (copy_from_user(vaddr, user_data, args->size)) 692 return -EFAULT; 693 694 drm_clflush_virt_range(vaddr, args->size); 695 i915_gem_chipset_flush(to_i915(obj->base.dev)); 696 697 intel_fb_obj_flush(obj, ORIGIN_CPU); 698 return 0; 699 } 700 701 void *i915_gem_object_alloc(struct drm_i915_private *dev_priv) 702 { 703 return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL); 704 } 705 706 void i915_gem_object_free(struct drm_i915_gem_object *obj) 707 { 708 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 709 kmem_cache_free(dev_priv->objects, obj); 710 } 711 712 static int 713 i915_gem_create(struct drm_file *file, 714 struct drm_i915_private *dev_priv, 715 u64 size, 716 u32 *handle_p) 717 { 718 struct drm_i915_gem_object *obj; 719 int ret; 720 u32 handle; 721 722 size = roundup(size, PAGE_SIZE); 723 if (size == 0) 724 return -EINVAL; 725 726 /* Allocate the new object */ 727 obj = i915_gem_object_create(dev_priv, size); 728 if (IS_ERR(obj)) 729 return PTR_ERR(obj); 730 731 ret = drm_gem_handle_create(file, &obj->base, &handle); 732 /* drop reference from allocate - handle holds it now */ 733 i915_gem_object_put(obj); 734 if (ret) 735 return ret; 736 737 *handle_p = handle; 738 return 0; 739 } 740 741 int 742 i915_gem_dumb_create(struct drm_file *file, 743 struct drm_device *dev, 744 struct drm_mode_create_dumb *args) 745 { 746 /* have to work out size/pitch and return them */ 747 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); 748 args->size = args->pitch * args->height; 749 return i915_gem_create(file, to_i915(dev), 750 args->size, &args->handle); 751 } 752 753 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) 754 { 755 return !(obj->cache_level == I915_CACHE_NONE || 756 obj->cache_level == I915_CACHE_WT); 757 } 758 759 /** 760 * Creates a new mm object and returns a handle to it. 761 * @dev: drm device pointer 762 * @data: ioctl data blob 763 * @file: drm file pointer 764 */ 765 int 766 i915_gem_create_ioctl(struct drm_device *dev, void *data, 767 struct drm_file *file) 768 { 769 struct drm_i915_private *dev_priv = to_i915(dev); 770 struct drm_i915_gem_create *args = data; 771 772 i915_gem_flush_free_objects(dev_priv); 773 774 return i915_gem_create(file, dev_priv, 775 args->size, &args->handle); 776 } 777 778 static inline enum fb_op_origin 779 fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain) 780 { 781 return (domain == I915_GEM_DOMAIN_GTT ? 782 obj->frontbuffer_ggtt_origin : ORIGIN_CPU); 783 } 784 785 void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv) 786 { 787 intel_wakeref_t wakeref; 788 789 /* 790 * No actual flushing is required for the GTT write domain for reads 791 * from the GTT domain. Writes to it "immediately" go to main memory 792 * as far as we know, so there's no chipset flush. It also doesn't 793 * land in the GPU render cache. 794 * 795 * However, we do have to enforce the order so that all writes through 796 * the GTT land before any writes to the device, such as updates to 797 * the GATT itself. 798 * 799 * We also have to wait a bit for the writes to land from the GTT. 800 * An uncached read (i.e. mmio) seems to be ideal for the round-trip 801 * timing. This issue has only been observed when switching quickly 802 * between GTT writes and CPU reads from inside the kernel on recent hw, 803 * and it appears to only affect discrete GTT blocks (i.e. on LLC 804 * system agents we cannot reproduce this behaviour, until Cannonlake 805 * that was!). 806 */ 807 808 wmb(); 809 810 if (INTEL_INFO(dev_priv)->has_coherent_ggtt) 811 return; 812 813 i915_gem_chipset_flush(dev_priv); 814 815 with_intel_runtime_pm(dev_priv, wakeref) { 816 spin_lock_irq(&dev_priv->uncore.lock); 817 818 POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE)); 819 820 spin_unlock_irq(&dev_priv->uncore.lock); 821 } 822 } 823 824 static void 825 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) 826 { 827 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 828 struct i915_vma *vma; 829 830 if (!(obj->write_domain & flush_domains)) 831 return; 832 833 switch (obj->write_domain) { 834 case I915_GEM_DOMAIN_GTT: 835 i915_gem_flush_ggtt_writes(dev_priv); 836 837 intel_fb_obj_flush(obj, 838 fb_write_origin(obj, I915_GEM_DOMAIN_GTT)); 839 840 for_each_ggtt_vma(vma, obj) { 841 if (vma->iomap) 842 continue; 843 844 i915_vma_unset_ggtt_write(vma); 845 } 846 break; 847 848 case I915_GEM_DOMAIN_WC: 849 wmb(); 850 break; 851 852 case I915_GEM_DOMAIN_CPU: 853 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 854 break; 855 856 case I915_GEM_DOMAIN_RENDER: 857 if (gpu_write_needs_clflush(obj)) 858 obj->cache_dirty = true; 859 break; 860 } 861 862 obj->write_domain = 0; 863 } 864 865 /* 866 * Pins the specified object's pages and synchronizes the object with 867 * GPU accesses. Sets needs_clflush to non-zero if the caller should 868 * flush the object from the CPU cache. 869 */ 870 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, 871 unsigned int *needs_clflush) 872 { 873 int ret; 874 875 lockdep_assert_held(&obj->base.dev->struct_mutex); 876 877 *needs_clflush = 0; 878 if (!i915_gem_object_has_struct_page(obj)) 879 return -ENODEV; 880 881 ret = i915_gem_object_wait(obj, 882 I915_WAIT_INTERRUPTIBLE | 883 I915_WAIT_LOCKED, 884 MAX_SCHEDULE_TIMEOUT, 885 NULL); 886 if (ret) 887 return ret; 888 889 ret = i915_gem_object_pin_pages(obj); 890 if (ret) 891 return ret; 892 893 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ || 894 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 895 ret = i915_gem_object_set_to_cpu_domain(obj, false); 896 if (ret) 897 goto err_unpin; 898 else 899 goto out; 900 } 901 902 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 903 904 /* If we're not in the cpu read domain, set ourself into the gtt 905 * read domain and manually flush cachelines (if required). This 906 * optimizes for the case when the gpu will dirty the data 907 * anyway again before the next pread happens. 908 */ 909 if (!obj->cache_dirty && 910 !(obj->read_domains & I915_GEM_DOMAIN_CPU)) 911 *needs_clflush = CLFLUSH_BEFORE; 912 913 out: 914 /* return with the pages pinned */ 915 return 0; 916 917 err_unpin: 918 i915_gem_object_unpin_pages(obj); 919 return ret; 920 } 921 922 int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, 923 unsigned int *needs_clflush) 924 { 925 int ret; 926 927 lockdep_assert_held(&obj->base.dev->struct_mutex); 928 929 *needs_clflush = 0; 930 if (!i915_gem_object_has_struct_page(obj)) 931 return -ENODEV; 932 933 ret = i915_gem_object_wait(obj, 934 I915_WAIT_INTERRUPTIBLE | 935 I915_WAIT_LOCKED | 936 I915_WAIT_ALL, 937 MAX_SCHEDULE_TIMEOUT, 938 NULL); 939 if (ret) 940 return ret; 941 942 ret = i915_gem_object_pin_pages(obj); 943 if (ret) 944 return ret; 945 946 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE || 947 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 948 ret = i915_gem_object_set_to_cpu_domain(obj, true); 949 if (ret) 950 goto err_unpin; 951 else 952 goto out; 953 } 954 955 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 956 957 /* If we're not in the cpu write domain, set ourself into the 958 * gtt write domain and manually flush cachelines (as required). 959 * This optimizes for the case when the gpu will use the data 960 * right away and we therefore have to clflush anyway. 961 */ 962 if (!obj->cache_dirty) { 963 *needs_clflush |= CLFLUSH_AFTER; 964 965 /* 966 * Same trick applies to invalidate partially written 967 * cachelines read before writing. 968 */ 969 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU)) 970 *needs_clflush |= CLFLUSH_BEFORE; 971 } 972 973 out: 974 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 975 obj->mm.dirty = true; 976 /* return with the pages pinned */ 977 return 0; 978 979 err_unpin: 980 i915_gem_object_unpin_pages(obj); 981 return ret; 982 } 983 984 static int 985 shmem_pread(struct page *page, int offset, int len, char __user *user_data, 986 bool needs_clflush) 987 { 988 char *vaddr; 989 int ret; 990 991 vaddr = kmap(page); 992 993 if (needs_clflush) 994 drm_clflush_virt_range(vaddr + offset, len); 995 996 ret = __copy_to_user(user_data, vaddr + offset, len); 997 998 kunmap(page); 999 1000 return ret ? -EFAULT : 0; 1001 } 1002 1003 static int 1004 i915_gem_shmem_pread(struct drm_i915_gem_object *obj, 1005 struct drm_i915_gem_pread *args) 1006 { 1007 char __user *user_data; 1008 u64 remain; 1009 unsigned int needs_clflush; 1010 unsigned int idx, offset; 1011 int ret; 1012 1013 ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex); 1014 if (ret) 1015 return ret; 1016 1017 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); 1018 mutex_unlock(&obj->base.dev->struct_mutex); 1019 if (ret) 1020 return ret; 1021 1022 remain = args->size; 1023 user_data = u64_to_user_ptr(args->data_ptr); 1024 offset = offset_in_page(args->offset); 1025 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 1026 struct page *page = i915_gem_object_get_page(obj, idx); 1027 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); 1028 1029 ret = shmem_pread(page, offset, length, user_data, 1030 needs_clflush); 1031 if (ret) 1032 break; 1033 1034 remain -= length; 1035 user_data += length; 1036 offset = 0; 1037 } 1038 1039 i915_gem_obj_finish_shmem_access(obj); 1040 return ret; 1041 } 1042 1043 static inline bool 1044 gtt_user_read(struct io_mapping *mapping, 1045 loff_t base, int offset, 1046 char __user *user_data, int length) 1047 { 1048 void __iomem *vaddr; 1049 unsigned long unwritten; 1050 1051 /* We can use the cpu mem copy function because this is X86. */ 1052 vaddr = io_mapping_map_atomic_wc(mapping, base); 1053 unwritten = __copy_to_user_inatomic(user_data, 1054 (void __force *)vaddr + offset, 1055 length); 1056 io_mapping_unmap_atomic(vaddr); 1057 if (unwritten) { 1058 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 1059 unwritten = copy_to_user(user_data, 1060 (void __force *)vaddr + offset, 1061 length); 1062 io_mapping_unmap(vaddr); 1063 } 1064 return unwritten; 1065 } 1066 1067 static int 1068 i915_gem_gtt_pread(struct drm_i915_gem_object *obj, 1069 const struct drm_i915_gem_pread *args) 1070 { 1071 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1072 struct i915_ggtt *ggtt = &i915->ggtt; 1073 intel_wakeref_t wakeref; 1074 struct drm_mm_node node; 1075 struct i915_vma *vma; 1076 void __user *user_data; 1077 u64 remain, offset; 1078 int ret; 1079 1080 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1081 if (ret) 1082 return ret; 1083 1084 wakeref = intel_runtime_pm_get(i915); 1085 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 1086 PIN_MAPPABLE | 1087 PIN_NONFAULT | 1088 PIN_NONBLOCK); 1089 if (!IS_ERR(vma)) { 1090 node.start = i915_ggtt_offset(vma); 1091 node.allocated = false; 1092 ret = i915_vma_put_fence(vma); 1093 if (ret) { 1094 i915_vma_unpin(vma); 1095 vma = ERR_PTR(ret); 1096 } 1097 } 1098 if (IS_ERR(vma)) { 1099 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 1100 if (ret) 1101 goto out_unlock; 1102 GEM_BUG_ON(!node.allocated); 1103 } 1104 1105 ret = i915_gem_object_set_to_gtt_domain(obj, false); 1106 if (ret) 1107 goto out_unpin; 1108 1109 mutex_unlock(&i915->drm.struct_mutex); 1110 1111 user_data = u64_to_user_ptr(args->data_ptr); 1112 remain = args->size; 1113 offset = args->offset; 1114 1115 while (remain > 0) { 1116 /* Operation in this page 1117 * 1118 * page_base = page offset within aperture 1119 * page_offset = offset within page 1120 * page_length = bytes to copy for this page 1121 */ 1122 u32 page_base = node.start; 1123 unsigned page_offset = offset_in_page(offset); 1124 unsigned page_length = PAGE_SIZE - page_offset; 1125 page_length = remain < page_length ? remain : page_length; 1126 if (node.allocated) { 1127 wmb(); 1128 ggtt->vm.insert_page(&ggtt->vm, 1129 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 1130 node.start, I915_CACHE_NONE, 0); 1131 wmb(); 1132 } else { 1133 page_base += offset & PAGE_MASK; 1134 } 1135 1136 if (gtt_user_read(&ggtt->iomap, page_base, page_offset, 1137 user_data, page_length)) { 1138 ret = -EFAULT; 1139 break; 1140 } 1141 1142 remain -= page_length; 1143 user_data += page_length; 1144 offset += page_length; 1145 } 1146 1147 mutex_lock(&i915->drm.struct_mutex); 1148 out_unpin: 1149 if (node.allocated) { 1150 wmb(); 1151 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); 1152 remove_mappable_node(&node); 1153 } else { 1154 i915_vma_unpin(vma); 1155 } 1156 out_unlock: 1157 intel_runtime_pm_put(i915, wakeref); 1158 mutex_unlock(&i915->drm.struct_mutex); 1159 1160 return ret; 1161 } 1162 1163 /** 1164 * Reads data from the object referenced by handle. 1165 * @dev: drm device pointer 1166 * @data: ioctl data blob 1167 * @file: drm file pointer 1168 * 1169 * On error, the contents of *data are undefined. 1170 */ 1171 int 1172 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 1173 struct drm_file *file) 1174 { 1175 struct drm_i915_gem_pread *args = data; 1176 struct drm_i915_gem_object *obj; 1177 int ret; 1178 1179 if (args->size == 0) 1180 return 0; 1181 1182 if (!access_ok(u64_to_user_ptr(args->data_ptr), 1183 args->size)) 1184 return -EFAULT; 1185 1186 obj = i915_gem_object_lookup(file, args->handle); 1187 if (!obj) 1188 return -ENOENT; 1189 1190 /* Bounds check source. */ 1191 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 1192 ret = -EINVAL; 1193 goto out; 1194 } 1195 1196 trace_i915_gem_object_pread(obj, args->offset, args->size); 1197 1198 ret = i915_gem_object_wait(obj, 1199 I915_WAIT_INTERRUPTIBLE, 1200 MAX_SCHEDULE_TIMEOUT, 1201 to_rps_client(file)); 1202 if (ret) 1203 goto out; 1204 1205 ret = i915_gem_object_pin_pages(obj); 1206 if (ret) 1207 goto out; 1208 1209 ret = i915_gem_shmem_pread(obj, args); 1210 if (ret == -EFAULT || ret == -ENODEV) 1211 ret = i915_gem_gtt_pread(obj, args); 1212 1213 i915_gem_object_unpin_pages(obj); 1214 out: 1215 i915_gem_object_put(obj); 1216 return ret; 1217 } 1218 1219 /* This is the fast write path which cannot handle 1220 * page faults in the source data 1221 */ 1222 1223 static inline bool 1224 ggtt_write(struct io_mapping *mapping, 1225 loff_t base, int offset, 1226 char __user *user_data, int length) 1227 { 1228 void __iomem *vaddr; 1229 unsigned long unwritten; 1230 1231 /* We can use the cpu mem copy function because this is X86. */ 1232 vaddr = io_mapping_map_atomic_wc(mapping, base); 1233 unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset, 1234 user_data, length); 1235 io_mapping_unmap_atomic(vaddr); 1236 if (unwritten) { 1237 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 1238 unwritten = copy_from_user((void __force *)vaddr + offset, 1239 user_data, length); 1240 io_mapping_unmap(vaddr); 1241 } 1242 1243 return unwritten; 1244 } 1245 1246 /** 1247 * This is the fast pwrite path, where we copy the data directly from the 1248 * user into the GTT, uncached. 1249 * @obj: i915 GEM object 1250 * @args: pwrite arguments structure 1251 */ 1252 static int 1253 i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, 1254 const struct drm_i915_gem_pwrite *args) 1255 { 1256 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1257 struct i915_ggtt *ggtt = &i915->ggtt; 1258 intel_wakeref_t wakeref; 1259 struct drm_mm_node node; 1260 struct i915_vma *vma; 1261 u64 remain, offset; 1262 void __user *user_data; 1263 int ret; 1264 1265 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1266 if (ret) 1267 return ret; 1268 1269 if (i915_gem_object_has_struct_page(obj)) { 1270 /* 1271 * Avoid waking the device up if we can fallback, as 1272 * waking/resuming is very slow (worst-case 10-100 ms 1273 * depending on PCI sleeps and our own resume time). 1274 * This easily dwarfs any performance advantage from 1275 * using the cache bypass of indirect GGTT access. 1276 */ 1277 wakeref = intel_runtime_pm_get_if_in_use(i915); 1278 if (!wakeref) { 1279 ret = -EFAULT; 1280 goto out_unlock; 1281 } 1282 } else { 1283 /* No backing pages, no fallback, we must force GGTT access */ 1284 wakeref = intel_runtime_pm_get(i915); 1285 } 1286 1287 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 1288 PIN_MAPPABLE | 1289 PIN_NONFAULT | 1290 PIN_NONBLOCK); 1291 if (!IS_ERR(vma)) { 1292 node.start = i915_ggtt_offset(vma); 1293 node.allocated = false; 1294 ret = i915_vma_put_fence(vma); 1295 if (ret) { 1296 i915_vma_unpin(vma); 1297 vma = ERR_PTR(ret); 1298 } 1299 } 1300 if (IS_ERR(vma)) { 1301 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 1302 if (ret) 1303 goto out_rpm; 1304 GEM_BUG_ON(!node.allocated); 1305 } 1306 1307 ret = i915_gem_object_set_to_gtt_domain(obj, true); 1308 if (ret) 1309 goto out_unpin; 1310 1311 mutex_unlock(&i915->drm.struct_mutex); 1312 1313 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 1314 1315 user_data = u64_to_user_ptr(args->data_ptr); 1316 offset = args->offset; 1317 remain = args->size; 1318 while (remain) { 1319 /* Operation in this page 1320 * 1321 * page_base = page offset within aperture 1322 * page_offset = offset within page 1323 * page_length = bytes to copy for this page 1324 */ 1325 u32 page_base = node.start; 1326 unsigned int page_offset = offset_in_page(offset); 1327 unsigned int page_length = PAGE_SIZE - page_offset; 1328 page_length = remain < page_length ? remain : page_length; 1329 if (node.allocated) { 1330 wmb(); /* flush the write before we modify the GGTT */ 1331 ggtt->vm.insert_page(&ggtt->vm, 1332 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 1333 node.start, I915_CACHE_NONE, 0); 1334 wmb(); /* flush modifications to the GGTT (insert_page) */ 1335 } else { 1336 page_base += offset & PAGE_MASK; 1337 } 1338 /* If we get a fault while copying data, then (presumably) our 1339 * source page isn't available. Return the error and we'll 1340 * retry in the slow path. 1341 * If the object is non-shmem backed, we retry again with the 1342 * path that handles page fault. 1343 */ 1344 if (ggtt_write(&ggtt->iomap, page_base, page_offset, 1345 user_data, page_length)) { 1346 ret = -EFAULT; 1347 break; 1348 } 1349 1350 remain -= page_length; 1351 user_data += page_length; 1352 offset += page_length; 1353 } 1354 intel_fb_obj_flush(obj, ORIGIN_CPU); 1355 1356 mutex_lock(&i915->drm.struct_mutex); 1357 out_unpin: 1358 if (node.allocated) { 1359 wmb(); 1360 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); 1361 remove_mappable_node(&node); 1362 } else { 1363 i915_vma_unpin(vma); 1364 } 1365 out_rpm: 1366 intel_runtime_pm_put(i915, wakeref); 1367 out_unlock: 1368 mutex_unlock(&i915->drm.struct_mutex); 1369 return ret; 1370 } 1371 1372 /* Per-page copy function for the shmem pwrite fastpath. 1373 * Flushes invalid cachelines before writing to the target if 1374 * needs_clflush_before is set and flushes out any written cachelines after 1375 * writing if needs_clflush is set. 1376 */ 1377 static int 1378 shmem_pwrite(struct page *page, int offset, int len, char __user *user_data, 1379 bool needs_clflush_before, 1380 bool needs_clflush_after) 1381 { 1382 char *vaddr; 1383 int ret; 1384 1385 vaddr = kmap(page); 1386 1387 if (needs_clflush_before) 1388 drm_clflush_virt_range(vaddr + offset, len); 1389 1390 ret = __copy_from_user(vaddr + offset, user_data, len); 1391 if (!ret && needs_clflush_after) 1392 drm_clflush_virt_range(vaddr + offset, len); 1393 1394 kunmap(page); 1395 1396 return ret ? -EFAULT : 0; 1397 } 1398 1399 static int 1400 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, 1401 const struct drm_i915_gem_pwrite *args) 1402 { 1403 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1404 void __user *user_data; 1405 u64 remain; 1406 unsigned int partial_cacheline_write; 1407 unsigned int needs_clflush; 1408 unsigned int offset, idx; 1409 int ret; 1410 1411 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1412 if (ret) 1413 return ret; 1414 1415 ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush); 1416 mutex_unlock(&i915->drm.struct_mutex); 1417 if (ret) 1418 return ret; 1419 1420 /* If we don't overwrite a cacheline completely we need to be 1421 * careful to have up-to-date data by first clflushing. Don't 1422 * overcomplicate things and flush the entire patch. 1423 */ 1424 partial_cacheline_write = 0; 1425 if (needs_clflush & CLFLUSH_BEFORE) 1426 partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1; 1427 1428 user_data = u64_to_user_ptr(args->data_ptr); 1429 remain = args->size; 1430 offset = offset_in_page(args->offset); 1431 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 1432 struct page *page = i915_gem_object_get_page(obj, idx); 1433 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); 1434 1435 ret = shmem_pwrite(page, offset, length, user_data, 1436 (offset | length) & partial_cacheline_write, 1437 needs_clflush & CLFLUSH_AFTER); 1438 if (ret) 1439 break; 1440 1441 remain -= length; 1442 user_data += length; 1443 offset = 0; 1444 } 1445 1446 intel_fb_obj_flush(obj, ORIGIN_CPU); 1447 i915_gem_obj_finish_shmem_access(obj); 1448 return ret; 1449 } 1450 1451 /** 1452 * Writes data to the object referenced by handle. 1453 * @dev: drm device 1454 * @data: ioctl data blob 1455 * @file: drm file 1456 * 1457 * On error, the contents of the buffer that were to be modified are undefined. 1458 */ 1459 int 1460 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1461 struct drm_file *file) 1462 { 1463 struct drm_i915_gem_pwrite *args = data; 1464 struct drm_i915_gem_object *obj; 1465 int ret; 1466 1467 if (args->size == 0) 1468 return 0; 1469 1470 if (!access_ok(u64_to_user_ptr(args->data_ptr), args->size)) 1471 return -EFAULT; 1472 1473 obj = i915_gem_object_lookup(file, args->handle); 1474 if (!obj) 1475 return -ENOENT; 1476 1477 /* Bounds check destination. */ 1478 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 1479 ret = -EINVAL; 1480 goto err; 1481 } 1482 1483 /* Writes not allowed into this read-only object */ 1484 if (i915_gem_object_is_readonly(obj)) { 1485 ret = -EINVAL; 1486 goto err; 1487 } 1488 1489 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1490 1491 ret = -ENODEV; 1492 if (obj->ops->pwrite) 1493 ret = obj->ops->pwrite(obj, args); 1494 if (ret != -ENODEV) 1495 goto err; 1496 1497 ret = i915_gem_object_wait(obj, 1498 I915_WAIT_INTERRUPTIBLE | 1499 I915_WAIT_ALL, 1500 MAX_SCHEDULE_TIMEOUT, 1501 to_rps_client(file)); 1502 if (ret) 1503 goto err; 1504 1505 ret = i915_gem_object_pin_pages(obj); 1506 if (ret) 1507 goto err; 1508 1509 ret = -EFAULT; 1510 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1511 * it would end up going through the fenced access, and we'll get 1512 * different detiling behavior between reading and writing. 1513 * pread/pwrite currently are reading and writing from the CPU 1514 * perspective, requiring manual detiling by the client. 1515 */ 1516 if (!i915_gem_object_has_struct_page(obj) || 1517 cpu_write_needs_clflush(obj)) 1518 /* Note that the gtt paths might fail with non-page-backed user 1519 * pointers (e.g. gtt mappings when moving data between 1520 * textures). Fallback to the shmem path in that case. 1521 */ 1522 ret = i915_gem_gtt_pwrite_fast(obj, args); 1523 1524 if (ret == -EFAULT || ret == -ENOSPC) { 1525 if (obj->phys_handle) 1526 ret = i915_gem_phys_pwrite(obj, args, file); 1527 else 1528 ret = i915_gem_shmem_pwrite(obj, args); 1529 } 1530 1531 i915_gem_object_unpin_pages(obj); 1532 err: 1533 i915_gem_object_put(obj); 1534 return ret; 1535 } 1536 1537 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) 1538 { 1539 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1540 struct list_head *list; 1541 struct i915_vma *vma; 1542 1543 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 1544 1545 mutex_lock(&i915->ggtt.vm.mutex); 1546 for_each_ggtt_vma(vma, obj) { 1547 if (!drm_mm_node_allocated(&vma->node)) 1548 continue; 1549 1550 list_move_tail(&vma->vm_link, &vma->vm->bound_list); 1551 } 1552 mutex_unlock(&i915->ggtt.vm.mutex); 1553 1554 spin_lock(&i915->mm.obj_lock); 1555 list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list; 1556 list_move_tail(&obj->mm.link, list); 1557 spin_unlock(&i915->mm.obj_lock); 1558 } 1559 1560 /** 1561 * Called when user space prepares to use an object with the CPU, either 1562 * through the mmap ioctl's mapping or a GTT mapping. 1563 * @dev: drm device 1564 * @data: ioctl data blob 1565 * @file: drm file 1566 */ 1567 int 1568 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1569 struct drm_file *file) 1570 { 1571 struct drm_i915_gem_set_domain *args = data; 1572 struct drm_i915_gem_object *obj; 1573 u32 read_domains = args->read_domains; 1574 u32 write_domain = args->write_domain; 1575 int err; 1576 1577 /* Only handle setting domains to types used by the CPU. */ 1578 if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) 1579 return -EINVAL; 1580 1581 /* Having something in the write domain implies it's in the read 1582 * domain, and only that read domain. Enforce that in the request. 1583 */ 1584 if (write_domain != 0 && read_domains != write_domain) 1585 return -EINVAL; 1586 1587 obj = i915_gem_object_lookup(file, args->handle); 1588 if (!obj) 1589 return -ENOENT; 1590 1591 /* Try to flush the object off the GPU without holding the lock. 1592 * We will repeat the flush holding the lock in the normal manner 1593 * to catch cases where we are gazumped. 1594 */ 1595 err = i915_gem_object_wait(obj, 1596 I915_WAIT_INTERRUPTIBLE | 1597 I915_WAIT_PRIORITY | 1598 (write_domain ? I915_WAIT_ALL : 0), 1599 MAX_SCHEDULE_TIMEOUT, 1600 to_rps_client(file)); 1601 if (err) 1602 goto out; 1603 1604 /* 1605 * Proxy objects do not control access to the backing storage, ergo 1606 * they cannot be used as a means to manipulate the cache domain 1607 * tracking for that backing storage. The proxy object is always 1608 * considered to be outside of any cache domain. 1609 */ 1610 if (i915_gem_object_is_proxy(obj)) { 1611 err = -ENXIO; 1612 goto out; 1613 } 1614 1615 /* 1616 * Flush and acquire obj->pages so that we are coherent through 1617 * direct access in memory with previous cached writes through 1618 * shmemfs and that our cache domain tracking remains valid. 1619 * For example, if the obj->filp was moved to swap without us 1620 * being notified and releasing the pages, we would mistakenly 1621 * continue to assume that the obj remained out of the CPU cached 1622 * domain. 1623 */ 1624 err = i915_gem_object_pin_pages(obj); 1625 if (err) 1626 goto out; 1627 1628 err = i915_mutex_lock_interruptible(dev); 1629 if (err) 1630 goto out_unpin; 1631 1632 if (read_domains & I915_GEM_DOMAIN_WC) 1633 err = i915_gem_object_set_to_wc_domain(obj, write_domain); 1634 else if (read_domains & I915_GEM_DOMAIN_GTT) 1635 err = i915_gem_object_set_to_gtt_domain(obj, write_domain); 1636 else 1637 err = i915_gem_object_set_to_cpu_domain(obj, write_domain); 1638 1639 /* And bump the LRU for this access */ 1640 i915_gem_object_bump_inactive_ggtt(obj); 1641 1642 mutex_unlock(&dev->struct_mutex); 1643 1644 if (write_domain != 0) 1645 intel_fb_obj_invalidate(obj, 1646 fb_write_origin(obj, write_domain)); 1647 1648 out_unpin: 1649 i915_gem_object_unpin_pages(obj); 1650 out: 1651 i915_gem_object_put(obj); 1652 return err; 1653 } 1654 1655 /** 1656 * Called when user space has done writes to this buffer 1657 * @dev: drm device 1658 * @data: ioctl data blob 1659 * @file: drm file 1660 */ 1661 int 1662 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1663 struct drm_file *file) 1664 { 1665 struct drm_i915_gem_sw_finish *args = data; 1666 struct drm_i915_gem_object *obj; 1667 1668 obj = i915_gem_object_lookup(file, args->handle); 1669 if (!obj) 1670 return -ENOENT; 1671 1672 /* 1673 * Proxy objects are barred from CPU access, so there is no 1674 * need to ban sw_finish as it is a nop. 1675 */ 1676 1677 /* Pinned buffers may be scanout, so flush the cache */ 1678 i915_gem_object_flush_if_display(obj); 1679 i915_gem_object_put(obj); 1680 1681 return 0; 1682 } 1683 1684 static inline bool 1685 __vma_matches(struct vm_area_struct *vma, struct file *filp, 1686 unsigned long addr, unsigned long size) 1687 { 1688 if (vma->vm_file != filp) 1689 return false; 1690 1691 return vma->vm_start == addr && 1692 (vma->vm_end - vma->vm_start) == PAGE_ALIGN(size); 1693 } 1694 1695 /** 1696 * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address 1697 * it is mapped to. 1698 * @dev: drm device 1699 * @data: ioctl data blob 1700 * @file: drm file 1701 * 1702 * While the mapping holds a reference on the contents of the object, it doesn't 1703 * imply a ref on the object itself. 1704 * 1705 * IMPORTANT: 1706 * 1707 * DRM driver writers who look a this function as an example for how to do GEM 1708 * mmap support, please don't implement mmap support like here. The modern way 1709 * to implement DRM mmap support is with an mmap offset ioctl (like 1710 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly. 1711 * That way debug tooling like valgrind will understand what's going on, hiding 1712 * the mmap call in a driver private ioctl will break that. The i915 driver only 1713 * does cpu mmaps this way because we didn't know better. 1714 */ 1715 int 1716 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1717 struct drm_file *file) 1718 { 1719 struct drm_i915_gem_mmap *args = data; 1720 struct drm_i915_gem_object *obj; 1721 unsigned long addr; 1722 1723 if (args->flags & ~(I915_MMAP_WC)) 1724 return -EINVAL; 1725 1726 if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT)) 1727 return -ENODEV; 1728 1729 obj = i915_gem_object_lookup(file, args->handle); 1730 if (!obj) 1731 return -ENOENT; 1732 1733 /* prime objects have no backing filp to GEM mmap 1734 * pages from. 1735 */ 1736 if (!obj->base.filp) { 1737 addr = -ENXIO; 1738 goto err; 1739 } 1740 1741 if (range_overflows(args->offset, args->size, (u64)obj->base.size)) { 1742 addr = -EINVAL; 1743 goto err; 1744 } 1745 1746 addr = vm_mmap(obj->base.filp, 0, args->size, 1747 PROT_READ | PROT_WRITE, MAP_SHARED, 1748 args->offset); 1749 if (IS_ERR_VALUE(addr)) 1750 goto err; 1751 1752 if (args->flags & I915_MMAP_WC) { 1753 struct mm_struct *mm = current->mm; 1754 struct vm_area_struct *vma; 1755 1756 if (down_write_killable(&mm->mmap_sem)) { 1757 addr = -EINTR; 1758 goto err; 1759 } 1760 vma = find_vma(mm, addr); 1761 if (vma && __vma_matches(vma, obj->base.filp, addr, args->size)) 1762 vma->vm_page_prot = 1763 pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); 1764 else 1765 addr = -ENOMEM; 1766 up_write(&mm->mmap_sem); 1767 if (IS_ERR_VALUE(addr)) 1768 goto err; 1769 1770 /* This may race, but that's ok, it only gets set */ 1771 WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU); 1772 } 1773 i915_gem_object_put(obj); 1774 1775 args->addr_ptr = (u64)addr; 1776 return 0; 1777 1778 err: 1779 i915_gem_object_put(obj); 1780 return addr; 1781 } 1782 1783 static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj) 1784 { 1785 return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT; 1786 } 1787 1788 /** 1789 * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps 1790 * 1791 * A history of the GTT mmap interface: 1792 * 1793 * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to 1794 * aligned and suitable for fencing, and still fit into the available 1795 * mappable space left by the pinned display objects. A classic problem 1796 * we called the page-fault-of-doom where we would ping-pong between 1797 * two objects that could not fit inside the GTT and so the memcpy 1798 * would page one object in at the expense of the other between every 1799 * single byte. 1800 * 1801 * 1 - Objects can be any size, and have any compatible fencing (X Y, or none 1802 * as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the 1803 * object is too large for the available space (or simply too large 1804 * for the mappable aperture!), a view is created instead and faulted 1805 * into userspace. (This view is aligned and sized appropriately for 1806 * fenced access.) 1807 * 1808 * 2 - Recognise WC as a separate cache domain so that we can flush the 1809 * delayed writes via GTT before performing direct access via WC. 1810 * 1811 * Restrictions: 1812 * 1813 * * snoopable objects cannot be accessed via the GTT. It can cause machine 1814 * hangs on some architectures, corruption on others. An attempt to service 1815 * a GTT page fault from a snoopable object will generate a SIGBUS. 1816 * 1817 * * the object must be able to fit into RAM (physical memory, though no 1818 * limited to the mappable aperture). 1819 * 1820 * 1821 * Caveats: 1822 * 1823 * * a new GTT page fault will synchronize rendering from the GPU and flush 1824 * all data to system memory. Subsequent access will not be synchronized. 1825 * 1826 * * all mappings are revoked on runtime device suspend. 1827 * 1828 * * there are only 8, 16 or 32 fence registers to share between all users 1829 * (older machines require fence register for display and blitter access 1830 * as well). Contention of the fence registers will cause the previous users 1831 * to be unmapped and any new access will generate new page faults. 1832 * 1833 * * running out of memory while servicing a fault may generate a SIGBUS, 1834 * rather than the expected SIGSEGV. 1835 */ 1836 int i915_gem_mmap_gtt_version(void) 1837 { 1838 return 2; 1839 } 1840 1841 static inline struct i915_ggtt_view 1842 compute_partial_view(const struct drm_i915_gem_object *obj, 1843 pgoff_t page_offset, 1844 unsigned int chunk) 1845 { 1846 struct i915_ggtt_view view; 1847 1848 if (i915_gem_object_is_tiled(obj)) 1849 chunk = roundup(chunk, tile_row_pages(obj)); 1850 1851 view.type = I915_GGTT_VIEW_PARTIAL; 1852 view.partial.offset = rounddown(page_offset, chunk); 1853 view.partial.size = 1854 min_t(unsigned int, chunk, 1855 (obj->base.size >> PAGE_SHIFT) - view.partial.offset); 1856 1857 /* If the partial covers the entire object, just create a normal VMA. */ 1858 if (chunk >= obj->base.size >> PAGE_SHIFT) 1859 view.type = I915_GGTT_VIEW_NORMAL; 1860 1861 return view; 1862 } 1863 1864 /** 1865 * i915_gem_fault - fault a page into the GTT 1866 * @vmf: fault info 1867 * 1868 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1869 * from userspace. The fault handler takes care of binding the object to 1870 * the GTT (if needed), allocating and programming a fence register (again, 1871 * only if needed based on whether the old reg is still valid or the object 1872 * is tiled) and inserting a new PTE into the faulting process. 1873 * 1874 * Note that the faulting process may involve evicting existing objects 1875 * from the GTT and/or fence registers to make room. So performance may 1876 * suffer if the GTT working set is large or there are few fence registers 1877 * left. 1878 * 1879 * The current feature set supported by i915_gem_fault() and thus GTT mmaps 1880 * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version). 1881 */ 1882 vm_fault_t i915_gem_fault(struct vm_fault *vmf) 1883 { 1884 #define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT) 1885 struct vm_area_struct *area = vmf->vma; 1886 struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data); 1887 struct drm_device *dev = obj->base.dev; 1888 struct drm_i915_private *dev_priv = to_i915(dev); 1889 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1890 bool write = area->vm_flags & VM_WRITE; 1891 intel_wakeref_t wakeref; 1892 struct i915_vma *vma; 1893 pgoff_t page_offset; 1894 int ret; 1895 1896 /* Sanity check that we allow writing into this object */ 1897 if (i915_gem_object_is_readonly(obj) && write) 1898 return VM_FAULT_SIGBUS; 1899 1900 /* We don't use vmf->pgoff since that has the fake offset */ 1901 page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT; 1902 1903 trace_i915_gem_object_fault(obj, page_offset, true, write); 1904 1905 /* Try to flush the object off the GPU first without holding the lock. 1906 * Upon acquiring the lock, we will perform our sanity checks and then 1907 * repeat the flush holding the lock in the normal manner to catch cases 1908 * where we are gazumped. 1909 */ 1910 ret = i915_gem_object_wait(obj, 1911 I915_WAIT_INTERRUPTIBLE, 1912 MAX_SCHEDULE_TIMEOUT, 1913 NULL); 1914 if (ret) 1915 goto err; 1916 1917 ret = i915_gem_object_pin_pages(obj); 1918 if (ret) 1919 goto err; 1920 1921 wakeref = intel_runtime_pm_get(dev_priv); 1922 1923 ret = i915_mutex_lock_interruptible(dev); 1924 if (ret) 1925 goto err_rpm; 1926 1927 /* Access to snoopable pages through the GTT is incoherent. */ 1928 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) { 1929 ret = -EFAULT; 1930 goto err_unlock; 1931 } 1932 1933 1934 /* Now pin it into the GTT as needed */ 1935 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 1936 PIN_MAPPABLE | 1937 PIN_NONBLOCK | 1938 PIN_NONFAULT); 1939 if (IS_ERR(vma)) { 1940 /* Use a partial view if it is bigger than available space */ 1941 struct i915_ggtt_view view = 1942 compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES); 1943 unsigned int flags; 1944 1945 flags = PIN_MAPPABLE; 1946 if (view.type == I915_GGTT_VIEW_NORMAL) 1947 flags |= PIN_NONBLOCK; /* avoid warnings for pinned */ 1948 1949 /* 1950 * Userspace is now writing through an untracked VMA, abandon 1951 * all hope that the hardware is able to track future writes. 1952 */ 1953 obj->frontbuffer_ggtt_origin = ORIGIN_CPU; 1954 1955 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); 1956 if (IS_ERR(vma) && !view.type) { 1957 flags = PIN_MAPPABLE; 1958 view.type = I915_GGTT_VIEW_PARTIAL; 1959 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); 1960 } 1961 } 1962 if (IS_ERR(vma)) { 1963 ret = PTR_ERR(vma); 1964 goto err_unlock; 1965 } 1966 1967 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1968 if (ret) 1969 goto err_unpin; 1970 1971 ret = i915_vma_pin_fence(vma); 1972 if (ret) 1973 goto err_unpin; 1974 1975 /* Finally, remap it using the new GTT offset */ 1976 ret = remap_io_mapping(area, 1977 area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT), 1978 (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT, 1979 min_t(u64, vma->size, area->vm_end - area->vm_start), 1980 &ggtt->iomap); 1981 if (ret) 1982 goto err_fence; 1983 1984 /* Mark as being mmapped into userspace for later revocation */ 1985 assert_rpm_wakelock_held(dev_priv); 1986 if (!i915_vma_set_userfault(vma) && !obj->userfault_count++) 1987 list_add(&obj->userfault_link, &dev_priv->mm.userfault_list); 1988 GEM_BUG_ON(!obj->userfault_count); 1989 1990 i915_vma_set_ggtt_write(vma); 1991 1992 err_fence: 1993 i915_vma_unpin_fence(vma); 1994 err_unpin: 1995 __i915_vma_unpin(vma); 1996 err_unlock: 1997 mutex_unlock(&dev->struct_mutex); 1998 err_rpm: 1999 intel_runtime_pm_put(dev_priv, wakeref); 2000 i915_gem_object_unpin_pages(obj); 2001 err: 2002 switch (ret) { 2003 case -EIO: 2004 /* 2005 * We eat errors when the gpu is terminally wedged to avoid 2006 * userspace unduly crashing (gl has no provisions for mmaps to 2007 * fail). But any other -EIO isn't ours (e.g. swap in failure) 2008 * and so needs to be reported. 2009 */ 2010 if (!i915_terminally_wedged(&dev_priv->gpu_error)) 2011 return VM_FAULT_SIGBUS; 2012 /* else: fall through */ 2013 case -EAGAIN: 2014 /* 2015 * EAGAIN means the gpu is hung and we'll wait for the error 2016 * handler to reset everything when re-faulting in 2017 * i915_mutex_lock_interruptible. 2018 */ 2019 case 0: 2020 case -ERESTARTSYS: 2021 case -EINTR: 2022 case -EBUSY: 2023 /* 2024 * EBUSY is ok: this just means that another thread 2025 * already did the job. 2026 */ 2027 return VM_FAULT_NOPAGE; 2028 case -ENOMEM: 2029 return VM_FAULT_OOM; 2030 case -ENOSPC: 2031 case -EFAULT: 2032 return VM_FAULT_SIGBUS; 2033 default: 2034 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 2035 return VM_FAULT_SIGBUS; 2036 } 2037 } 2038 2039 static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) 2040 { 2041 struct i915_vma *vma; 2042 2043 GEM_BUG_ON(!obj->userfault_count); 2044 2045 obj->userfault_count = 0; 2046 list_del(&obj->userfault_link); 2047 drm_vma_node_unmap(&obj->base.vma_node, 2048 obj->base.dev->anon_inode->i_mapping); 2049 2050 for_each_ggtt_vma(vma, obj) 2051 i915_vma_unset_userfault(vma); 2052 } 2053 2054 /** 2055 * i915_gem_release_mmap - remove physical page mappings 2056 * @obj: obj in question 2057 * 2058 * Preserve the reservation of the mmapping with the DRM core code, but 2059 * relinquish ownership of the pages back to the system. 2060 * 2061 * It is vital that we remove the page mapping if we have mapped a tiled 2062 * object through the GTT and then lose the fence register due to 2063 * resource pressure. Similarly if the object has been moved out of the 2064 * aperture, than pages mapped into userspace must be revoked. Removing the 2065 * mapping will then trigger a page fault on the next user access, allowing 2066 * fixup by i915_gem_fault(). 2067 */ 2068 void 2069 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 2070 { 2071 struct drm_i915_private *i915 = to_i915(obj->base.dev); 2072 intel_wakeref_t wakeref; 2073 2074 /* Serialisation between user GTT access and our code depends upon 2075 * revoking the CPU's PTE whilst the mutex is held. The next user 2076 * pagefault then has to wait until we release the mutex. 2077 * 2078 * Note that RPM complicates somewhat by adding an additional 2079 * requirement that operations to the GGTT be made holding the RPM 2080 * wakeref. 2081 */ 2082 lockdep_assert_held(&i915->drm.struct_mutex); 2083 wakeref = intel_runtime_pm_get(i915); 2084 2085 if (!obj->userfault_count) 2086 goto out; 2087 2088 __i915_gem_object_release_mmap(obj); 2089 2090 /* Ensure that the CPU's PTE are revoked and there are not outstanding 2091 * memory transactions from userspace before we return. The TLB 2092 * flushing implied above by changing the PTE above *should* be 2093 * sufficient, an extra barrier here just provides us with a bit 2094 * of paranoid documentation about our requirement to serialise 2095 * memory writes before touching registers / GSM. 2096 */ 2097 wmb(); 2098 2099 out: 2100 intel_runtime_pm_put(i915, wakeref); 2101 } 2102 2103 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv) 2104 { 2105 struct drm_i915_gem_object *obj, *on; 2106 int i; 2107 2108 /* 2109 * Only called during RPM suspend. All users of the userfault_list 2110 * must be holding an RPM wakeref to ensure that this can not 2111 * run concurrently with themselves (and use the struct_mutex for 2112 * protection between themselves). 2113 */ 2114 2115 list_for_each_entry_safe(obj, on, 2116 &dev_priv->mm.userfault_list, userfault_link) 2117 __i915_gem_object_release_mmap(obj); 2118 2119 /* The fence will be lost when the device powers down. If any were 2120 * in use by hardware (i.e. they are pinned), we should not be powering 2121 * down! All other fences will be reacquired by the user upon waking. 2122 */ 2123 for (i = 0; i < dev_priv->num_fence_regs; i++) { 2124 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 2125 2126 /* Ideally we want to assert that the fence register is not 2127 * live at this point (i.e. that no piece of code will be 2128 * trying to write through fence + GTT, as that both violates 2129 * our tracking of activity and associated locking/barriers, 2130 * but also is illegal given that the hw is powered down). 2131 * 2132 * Previously we used reg->pin_count as a "liveness" indicator. 2133 * That is not sufficient, and we need a more fine-grained 2134 * tool if we want to have a sanity check here. 2135 */ 2136 2137 if (!reg->vma) 2138 continue; 2139 2140 GEM_BUG_ON(i915_vma_has_userfault(reg->vma)); 2141 reg->dirty = true; 2142 } 2143 } 2144 2145 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 2146 { 2147 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2148 int err; 2149 2150 err = drm_gem_create_mmap_offset(&obj->base); 2151 if (likely(!err)) 2152 return 0; 2153 2154 /* Attempt to reap some mmap space from dead objects */ 2155 do { 2156 err = i915_gem_wait_for_idle(dev_priv, 2157 I915_WAIT_INTERRUPTIBLE, 2158 MAX_SCHEDULE_TIMEOUT); 2159 if (err) 2160 break; 2161 2162 i915_gem_drain_freed_objects(dev_priv); 2163 err = drm_gem_create_mmap_offset(&obj->base); 2164 if (!err) 2165 break; 2166 2167 } while (flush_delayed_work(&dev_priv->gt.retire_work)); 2168 2169 return err; 2170 } 2171 2172 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 2173 { 2174 drm_gem_free_mmap_offset(&obj->base); 2175 } 2176 2177 int 2178 i915_gem_mmap_gtt(struct drm_file *file, 2179 struct drm_device *dev, 2180 u32 handle, 2181 u64 *offset) 2182 { 2183 struct drm_i915_gem_object *obj; 2184 int ret; 2185 2186 obj = i915_gem_object_lookup(file, handle); 2187 if (!obj) 2188 return -ENOENT; 2189 2190 ret = i915_gem_object_create_mmap_offset(obj); 2191 if (ret == 0) 2192 *offset = drm_vma_node_offset_addr(&obj->base.vma_node); 2193 2194 i915_gem_object_put(obj); 2195 return ret; 2196 } 2197 2198 /** 2199 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 2200 * @dev: DRM device 2201 * @data: GTT mapping ioctl data 2202 * @file: GEM object info 2203 * 2204 * Simply returns the fake offset to userspace so it can mmap it. 2205 * The mmap call will end up in drm_gem_mmap(), which will set things 2206 * up so we can get faults in the handler above. 2207 * 2208 * The fault handler will take care of binding the object into the GTT 2209 * (since it may have been evicted to make room for something), allocating 2210 * a fence register, and mapping the appropriate aperture address into 2211 * userspace. 2212 */ 2213 int 2214 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 2215 struct drm_file *file) 2216 { 2217 struct drm_i915_gem_mmap_gtt *args = data; 2218 2219 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 2220 } 2221 2222 /* Immediately discard the backing storage */ 2223 static void 2224 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 2225 { 2226 i915_gem_object_free_mmap_offset(obj); 2227 2228 if (obj->base.filp == NULL) 2229 return; 2230 2231 /* Our goal here is to return as much of the memory as 2232 * is possible back to the system as we are called from OOM. 2233 * To do this we must instruct the shmfs to drop all of its 2234 * backing pages, *now*. 2235 */ 2236 shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1); 2237 obj->mm.madv = __I915_MADV_PURGED; 2238 obj->mm.pages = ERR_PTR(-EFAULT); 2239 } 2240 2241 /* Try to discard unwanted pages */ 2242 void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj) 2243 { 2244 struct address_space *mapping; 2245 2246 lockdep_assert_held(&obj->mm.lock); 2247 GEM_BUG_ON(i915_gem_object_has_pages(obj)); 2248 2249 switch (obj->mm.madv) { 2250 case I915_MADV_DONTNEED: 2251 i915_gem_object_truncate(obj); 2252 case __I915_MADV_PURGED: 2253 return; 2254 } 2255 2256 if (obj->base.filp == NULL) 2257 return; 2258 2259 mapping = obj->base.filp->f_mapping, 2260 invalidate_mapping_pages(mapping, 0, (loff_t)-1); 2261 } 2262 2263 /* 2264 * Move pages to appropriate lru and release the pagevec, decrementing the 2265 * ref count of those pages. 2266 */ 2267 static void check_release_pagevec(struct pagevec *pvec) 2268 { 2269 check_move_unevictable_pages(pvec); 2270 __pagevec_release(pvec); 2271 cond_resched(); 2272 } 2273 2274 static void 2275 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj, 2276 struct sg_table *pages) 2277 { 2278 struct sgt_iter sgt_iter; 2279 struct pagevec pvec; 2280 struct page *page; 2281 2282 __i915_gem_object_release_shmem(obj, pages, true); 2283 2284 i915_gem_gtt_finish_pages(obj, pages); 2285 2286 if (i915_gem_object_needs_bit17_swizzle(obj)) 2287 i915_gem_object_save_bit_17_swizzle(obj, pages); 2288 2289 mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping); 2290 2291 pagevec_init(&pvec); 2292 for_each_sgt_page(page, sgt_iter, pages) { 2293 if (obj->mm.dirty) 2294 set_page_dirty(page); 2295 2296 if (obj->mm.madv == I915_MADV_WILLNEED) 2297 mark_page_accessed(page); 2298 2299 if (!pagevec_add(&pvec, page)) 2300 check_release_pagevec(&pvec); 2301 } 2302 if (pagevec_count(&pvec)) 2303 check_release_pagevec(&pvec); 2304 obj->mm.dirty = false; 2305 2306 sg_free_table(pages); 2307 kfree(pages); 2308 } 2309 2310 static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj) 2311 { 2312 struct radix_tree_iter iter; 2313 void __rcu **slot; 2314 2315 rcu_read_lock(); 2316 radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0) 2317 radix_tree_delete(&obj->mm.get_page.radix, iter.index); 2318 rcu_read_unlock(); 2319 } 2320 2321 static struct sg_table * 2322 __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) 2323 { 2324 struct drm_i915_private *i915 = to_i915(obj->base.dev); 2325 struct sg_table *pages; 2326 2327 pages = fetch_and_zero(&obj->mm.pages); 2328 if (IS_ERR_OR_NULL(pages)) 2329 return pages; 2330 2331 spin_lock(&i915->mm.obj_lock); 2332 list_del(&obj->mm.link); 2333 spin_unlock(&i915->mm.obj_lock); 2334 2335 if (obj->mm.mapping) { 2336 void *ptr; 2337 2338 ptr = page_mask_bits(obj->mm.mapping); 2339 if (is_vmalloc_addr(ptr)) 2340 vunmap(ptr); 2341 else 2342 kunmap(kmap_to_page(ptr)); 2343 2344 obj->mm.mapping = NULL; 2345 } 2346 2347 __i915_gem_object_reset_page_iter(obj); 2348 obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0; 2349 2350 return pages; 2351 } 2352 2353 int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, 2354 enum i915_mm_subclass subclass) 2355 { 2356 struct sg_table *pages; 2357 int ret; 2358 2359 if (i915_gem_object_has_pinned_pages(obj)) 2360 return -EBUSY; 2361 2362 GEM_BUG_ON(obj->bind_count); 2363 2364 /* May be called by shrinker from within get_pages() (on another bo) */ 2365 mutex_lock_nested(&obj->mm.lock, subclass); 2366 if (unlikely(atomic_read(&obj->mm.pages_pin_count))) { 2367 ret = -EBUSY; 2368 goto unlock; 2369 } 2370 2371 /* 2372 * ->put_pages might need to allocate memory for the bit17 swizzle 2373 * array, hence protect them from being reaped by removing them from gtt 2374 * lists early. 2375 */ 2376 pages = __i915_gem_object_unset_pages(obj); 2377 2378 /* 2379 * XXX Temporary hijinx to avoid updating all backends to handle 2380 * NULL pages. In the future, when we have more asynchronous 2381 * get_pages backends we should be better able to handle the 2382 * cancellation of the async task in a more uniform manner. 2383 */ 2384 if (!pages && !i915_gem_object_needs_async_cancel(obj)) 2385 pages = ERR_PTR(-EINVAL); 2386 2387 if (!IS_ERR(pages)) 2388 obj->ops->put_pages(obj, pages); 2389 2390 ret = 0; 2391 unlock: 2392 mutex_unlock(&obj->mm.lock); 2393 2394 return ret; 2395 } 2396 2397 bool i915_sg_trim(struct sg_table *orig_st) 2398 { 2399 struct sg_table new_st; 2400 struct scatterlist *sg, *new_sg; 2401 unsigned int i; 2402 2403 if (orig_st->nents == orig_st->orig_nents) 2404 return false; 2405 2406 if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN)) 2407 return false; 2408 2409 new_sg = new_st.sgl; 2410 for_each_sg(orig_st->sgl, sg, orig_st->nents, i) { 2411 sg_set_page(new_sg, sg_page(sg), sg->length, 0); 2412 sg_dma_address(new_sg) = sg_dma_address(sg); 2413 sg_dma_len(new_sg) = sg_dma_len(sg); 2414 2415 new_sg = sg_next(new_sg); 2416 } 2417 GEM_BUG_ON(new_sg); /* Should walk exactly nents and hit the end */ 2418 2419 sg_free_table(orig_st); 2420 2421 *orig_st = new_st; 2422 return true; 2423 } 2424 2425 static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2426 { 2427 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2428 const unsigned long page_count = obj->base.size / PAGE_SIZE; 2429 unsigned long i; 2430 struct address_space *mapping; 2431 struct sg_table *st; 2432 struct scatterlist *sg; 2433 struct sgt_iter sgt_iter; 2434 struct page *page; 2435 unsigned long last_pfn = 0; /* suppress gcc warning */ 2436 unsigned int max_segment = i915_sg_segment_size(); 2437 unsigned int sg_page_sizes; 2438 struct pagevec pvec; 2439 gfp_t noreclaim; 2440 int ret; 2441 2442 /* 2443 * Assert that the object is not currently in any GPU domain. As it 2444 * wasn't in the GTT, there shouldn't be any way it could have been in 2445 * a GPU cache 2446 */ 2447 GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS); 2448 GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS); 2449 2450 /* 2451 * If there's no chance of allocating enough pages for the whole 2452 * object, bail early. 2453 */ 2454 if (page_count > totalram_pages()) 2455 return -ENOMEM; 2456 2457 st = kmalloc(sizeof(*st), GFP_KERNEL); 2458 if (st == NULL) 2459 return -ENOMEM; 2460 2461 rebuild_st: 2462 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 2463 kfree(st); 2464 return -ENOMEM; 2465 } 2466 2467 /* 2468 * Get the list of pages out of our struct file. They'll be pinned 2469 * at this point until we release them. 2470 * 2471 * Fail silently without starting the shrinker 2472 */ 2473 mapping = obj->base.filp->f_mapping; 2474 mapping_set_unevictable(mapping); 2475 noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM); 2476 noreclaim |= __GFP_NORETRY | __GFP_NOWARN; 2477 2478 sg = st->sgl; 2479 st->nents = 0; 2480 sg_page_sizes = 0; 2481 for (i = 0; i < page_count; i++) { 2482 const unsigned int shrink[] = { 2483 I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE, 2484 0, 2485 }, *s = shrink; 2486 gfp_t gfp = noreclaim; 2487 2488 do { 2489 cond_resched(); 2490 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2491 if (likely(!IS_ERR(page))) 2492 break; 2493 2494 if (!*s) { 2495 ret = PTR_ERR(page); 2496 goto err_sg; 2497 } 2498 2499 i915_gem_shrink(dev_priv, 2 * page_count, NULL, *s++); 2500 2501 /* 2502 * We've tried hard to allocate the memory by reaping 2503 * our own buffer, now let the real VM do its job and 2504 * go down in flames if truly OOM. 2505 * 2506 * However, since graphics tend to be disposable, 2507 * defer the oom here by reporting the ENOMEM back 2508 * to userspace. 2509 */ 2510 if (!*s) { 2511 /* reclaim and warn, but no oom */ 2512 gfp = mapping_gfp_mask(mapping); 2513 2514 /* 2515 * Our bo are always dirty and so we require 2516 * kswapd to reclaim our pages (direct reclaim 2517 * does not effectively begin pageout of our 2518 * buffers on its own). However, direct reclaim 2519 * only waits for kswapd when under allocation 2520 * congestion. So as a result __GFP_RECLAIM is 2521 * unreliable and fails to actually reclaim our 2522 * dirty pages -- unless you try over and over 2523 * again with !__GFP_NORETRY. However, we still 2524 * want to fail this allocation rather than 2525 * trigger the out-of-memory killer and for 2526 * this we want __GFP_RETRY_MAYFAIL. 2527 */ 2528 gfp |= __GFP_RETRY_MAYFAIL; 2529 } 2530 } while (1); 2531 2532 if (!i || 2533 sg->length >= max_segment || 2534 page_to_pfn(page) != last_pfn + 1) { 2535 if (i) { 2536 sg_page_sizes |= sg->length; 2537 sg = sg_next(sg); 2538 } 2539 st->nents++; 2540 sg_set_page(sg, page, PAGE_SIZE, 0); 2541 } else { 2542 sg->length += PAGE_SIZE; 2543 } 2544 last_pfn = page_to_pfn(page); 2545 2546 /* Check that the i965g/gm workaround works. */ 2547 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL)); 2548 } 2549 if (sg) { /* loop terminated early; short sg table */ 2550 sg_page_sizes |= sg->length; 2551 sg_mark_end(sg); 2552 } 2553 2554 /* Trim unused sg entries to avoid wasting memory. */ 2555 i915_sg_trim(st); 2556 2557 ret = i915_gem_gtt_prepare_pages(obj, st); 2558 if (ret) { 2559 /* 2560 * DMA remapping failed? One possible cause is that 2561 * it could not reserve enough large entries, asking 2562 * for PAGE_SIZE chunks instead may be helpful. 2563 */ 2564 if (max_segment > PAGE_SIZE) { 2565 for_each_sgt_page(page, sgt_iter, st) 2566 put_page(page); 2567 sg_free_table(st); 2568 2569 max_segment = PAGE_SIZE; 2570 goto rebuild_st; 2571 } else { 2572 dev_warn(&dev_priv->drm.pdev->dev, 2573 "Failed to DMA remap %lu pages\n", 2574 page_count); 2575 goto err_pages; 2576 } 2577 } 2578 2579 if (i915_gem_object_needs_bit17_swizzle(obj)) 2580 i915_gem_object_do_bit_17_swizzle(obj, st); 2581 2582 __i915_gem_object_set_pages(obj, st, sg_page_sizes); 2583 2584 return 0; 2585 2586 err_sg: 2587 sg_mark_end(sg); 2588 err_pages: 2589 mapping_clear_unevictable(mapping); 2590 pagevec_init(&pvec); 2591 for_each_sgt_page(page, sgt_iter, st) { 2592 if (!pagevec_add(&pvec, page)) 2593 check_release_pagevec(&pvec); 2594 } 2595 if (pagevec_count(&pvec)) 2596 check_release_pagevec(&pvec); 2597 sg_free_table(st); 2598 kfree(st); 2599 2600 /* 2601 * shmemfs first checks if there is enough memory to allocate the page 2602 * and reports ENOSPC should there be insufficient, along with the usual 2603 * ENOMEM for a genuine allocation failure. 2604 * 2605 * We use ENOSPC in our driver to mean that we have run out of aperture 2606 * space and so want to translate the error from shmemfs back to our 2607 * usual understanding of ENOMEM. 2608 */ 2609 if (ret == -ENOSPC) 2610 ret = -ENOMEM; 2611 2612 return ret; 2613 } 2614 2615 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, 2616 struct sg_table *pages, 2617 unsigned int sg_page_sizes) 2618 { 2619 struct drm_i915_private *i915 = to_i915(obj->base.dev); 2620 unsigned long supported = INTEL_INFO(i915)->page_sizes; 2621 int i; 2622 2623 lockdep_assert_held(&obj->mm.lock); 2624 2625 obj->mm.get_page.sg_pos = pages->sgl; 2626 obj->mm.get_page.sg_idx = 0; 2627 2628 obj->mm.pages = pages; 2629 2630 if (i915_gem_object_is_tiled(obj) && 2631 i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 2632 GEM_BUG_ON(obj->mm.quirked); 2633 __i915_gem_object_pin_pages(obj); 2634 obj->mm.quirked = true; 2635 } 2636 2637 GEM_BUG_ON(!sg_page_sizes); 2638 obj->mm.page_sizes.phys = sg_page_sizes; 2639 2640 /* 2641 * Calculate the supported page-sizes which fit into the given 2642 * sg_page_sizes. This will give us the page-sizes which we may be able 2643 * to use opportunistically when later inserting into the GTT. For 2644 * example if phys=2G, then in theory we should be able to use 1G, 2M, 2645 * 64K or 4K pages, although in practice this will depend on a number of 2646 * other factors. 2647 */ 2648 obj->mm.page_sizes.sg = 0; 2649 for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { 2650 if (obj->mm.page_sizes.phys & ~0u << i) 2651 obj->mm.page_sizes.sg |= BIT(i); 2652 } 2653 GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg)); 2654 2655 spin_lock(&i915->mm.obj_lock); 2656 list_add(&obj->mm.link, &i915->mm.unbound_list); 2657 spin_unlock(&i915->mm.obj_lock); 2658 } 2659 2660 static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2661 { 2662 int err; 2663 2664 if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) { 2665 DRM_DEBUG("Attempting to obtain a purgeable object\n"); 2666 return -EFAULT; 2667 } 2668 2669 err = obj->ops->get_pages(obj); 2670 GEM_BUG_ON(!err && !i915_gem_object_has_pages(obj)); 2671 2672 return err; 2673 } 2674 2675 /* Ensure that the associated pages are gathered from the backing storage 2676 * and pinned into our object. i915_gem_object_pin_pages() may be called 2677 * multiple times before they are released by a single call to 2678 * i915_gem_object_unpin_pages() - once the pages are no longer referenced 2679 * either as a result of memory pressure (reaping pages under the shrinker) 2680 * or as the object is itself released. 2681 */ 2682 int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2683 { 2684 int err; 2685 2686 err = mutex_lock_interruptible(&obj->mm.lock); 2687 if (err) 2688 return err; 2689 2690 if (unlikely(!i915_gem_object_has_pages(obj))) { 2691 GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); 2692 2693 err = ____i915_gem_object_get_pages(obj); 2694 if (err) 2695 goto unlock; 2696 2697 smp_mb__before_atomic(); 2698 } 2699 atomic_inc(&obj->mm.pages_pin_count); 2700 2701 unlock: 2702 mutex_unlock(&obj->mm.lock); 2703 return err; 2704 } 2705 2706 /* The 'mapping' part of i915_gem_object_pin_map() below */ 2707 static void *i915_gem_object_map(const struct drm_i915_gem_object *obj, 2708 enum i915_map_type type) 2709 { 2710 unsigned long n_pages = obj->base.size >> PAGE_SHIFT; 2711 struct sg_table *sgt = obj->mm.pages; 2712 struct sgt_iter sgt_iter; 2713 struct page *page; 2714 struct page *stack_pages[32]; 2715 struct page **pages = stack_pages; 2716 unsigned long i = 0; 2717 pgprot_t pgprot; 2718 void *addr; 2719 2720 /* A single page can always be kmapped */ 2721 if (n_pages == 1 && type == I915_MAP_WB) 2722 return kmap(sg_page(sgt->sgl)); 2723 2724 if (n_pages > ARRAY_SIZE(stack_pages)) { 2725 /* Too big for stack -- allocate temporary array instead */ 2726 pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL); 2727 if (!pages) 2728 return NULL; 2729 } 2730 2731 for_each_sgt_page(page, sgt_iter, sgt) 2732 pages[i++] = page; 2733 2734 /* Check that we have the expected number of pages */ 2735 GEM_BUG_ON(i != n_pages); 2736 2737 switch (type) { 2738 default: 2739 MISSING_CASE(type); 2740 /* fallthrough to use PAGE_KERNEL anyway */ 2741 case I915_MAP_WB: 2742 pgprot = PAGE_KERNEL; 2743 break; 2744 case I915_MAP_WC: 2745 pgprot = pgprot_writecombine(PAGE_KERNEL_IO); 2746 break; 2747 } 2748 addr = vmap(pages, n_pages, 0, pgprot); 2749 2750 if (pages != stack_pages) 2751 kvfree(pages); 2752 2753 return addr; 2754 } 2755 2756 /* get, pin, and map the pages of the object into kernel space */ 2757 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, 2758 enum i915_map_type type) 2759 { 2760 enum i915_map_type has_type; 2761 bool pinned; 2762 void *ptr; 2763 int ret; 2764 2765 if (unlikely(!i915_gem_object_has_struct_page(obj))) 2766 return ERR_PTR(-ENXIO); 2767 2768 ret = mutex_lock_interruptible(&obj->mm.lock); 2769 if (ret) 2770 return ERR_PTR(ret); 2771 2772 pinned = !(type & I915_MAP_OVERRIDE); 2773 type &= ~I915_MAP_OVERRIDE; 2774 2775 if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) { 2776 if (unlikely(!i915_gem_object_has_pages(obj))) { 2777 GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); 2778 2779 ret = ____i915_gem_object_get_pages(obj); 2780 if (ret) 2781 goto err_unlock; 2782 2783 smp_mb__before_atomic(); 2784 } 2785 atomic_inc(&obj->mm.pages_pin_count); 2786 pinned = false; 2787 } 2788 GEM_BUG_ON(!i915_gem_object_has_pages(obj)); 2789 2790 ptr = page_unpack_bits(obj->mm.mapping, &has_type); 2791 if (ptr && has_type != type) { 2792 if (pinned) { 2793 ret = -EBUSY; 2794 goto err_unpin; 2795 } 2796 2797 if (is_vmalloc_addr(ptr)) 2798 vunmap(ptr); 2799 else 2800 kunmap(kmap_to_page(ptr)); 2801 2802 ptr = obj->mm.mapping = NULL; 2803 } 2804 2805 if (!ptr) { 2806 ptr = i915_gem_object_map(obj, type); 2807 if (!ptr) { 2808 ret = -ENOMEM; 2809 goto err_unpin; 2810 } 2811 2812 obj->mm.mapping = page_pack_bits(ptr, type); 2813 } 2814 2815 out_unlock: 2816 mutex_unlock(&obj->mm.lock); 2817 return ptr; 2818 2819 err_unpin: 2820 atomic_dec(&obj->mm.pages_pin_count); 2821 err_unlock: 2822 ptr = ERR_PTR(ret); 2823 goto out_unlock; 2824 } 2825 2826 static int 2827 i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj, 2828 const struct drm_i915_gem_pwrite *arg) 2829 { 2830 struct address_space *mapping = obj->base.filp->f_mapping; 2831 char __user *user_data = u64_to_user_ptr(arg->data_ptr); 2832 u64 remain, offset; 2833 unsigned int pg; 2834 2835 /* Before we instantiate/pin the backing store for our use, we 2836 * can prepopulate the shmemfs filp efficiently using a write into 2837 * the pagecache. We avoid the penalty of instantiating all the 2838 * pages, important if the user is just writing to a few and never 2839 * uses the object on the GPU, and using a direct write into shmemfs 2840 * allows it to avoid the cost of retrieving a page (either swapin 2841 * or clearing-before-use) before it is overwritten. 2842 */ 2843 if (i915_gem_object_has_pages(obj)) 2844 return -ENODEV; 2845 2846 if (obj->mm.madv != I915_MADV_WILLNEED) 2847 return -EFAULT; 2848 2849 /* Before the pages are instantiated the object is treated as being 2850 * in the CPU domain. The pages will be clflushed as required before 2851 * use, and we can freely write into the pages directly. If userspace 2852 * races pwrite with any other operation; corruption will ensue - 2853 * that is userspace's prerogative! 2854 */ 2855 2856 remain = arg->size; 2857 offset = arg->offset; 2858 pg = offset_in_page(offset); 2859 2860 do { 2861 unsigned int len, unwritten; 2862 struct page *page; 2863 void *data, *vaddr; 2864 int err; 2865 2866 len = PAGE_SIZE - pg; 2867 if (len > remain) 2868 len = remain; 2869 2870 err = pagecache_write_begin(obj->base.filp, mapping, 2871 offset, len, 0, 2872 &page, &data); 2873 if (err < 0) 2874 return err; 2875 2876 vaddr = kmap(page); 2877 unwritten = copy_from_user(vaddr + pg, user_data, len); 2878 kunmap(page); 2879 2880 err = pagecache_write_end(obj->base.filp, mapping, 2881 offset, len, len - unwritten, 2882 page, data); 2883 if (err < 0) 2884 return err; 2885 2886 if (unwritten) 2887 return -EFAULT; 2888 2889 remain -= len; 2890 user_data += len; 2891 offset += len; 2892 pg = 0; 2893 } while (remain); 2894 2895 return 0; 2896 } 2897 2898 static bool match_ring(struct i915_request *rq) 2899 { 2900 struct drm_i915_private *dev_priv = rq->i915; 2901 u32 ring = I915_READ(RING_START(rq->engine->mmio_base)); 2902 2903 return ring == i915_ggtt_offset(rq->ring->vma); 2904 } 2905 2906 struct i915_request * 2907 i915_gem_find_active_request(struct intel_engine_cs *engine) 2908 { 2909 struct i915_request *request, *active = NULL; 2910 unsigned long flags; 2911 2912 /* 2913 * We are called by the error capture, reset and to dump engine 2914 * state at random points in time. In particular, note that neither is 2915 * crucially ordered with an interrupt. After a hang, the GPU is dead 2916 * and we assume that no more writes can happen (we waited long enough 2917 * for all writes that were in transaction to be flushed) - adding an 2918 * extra delay for a recent interrupt is pointless. Hence, we do 2919 * not need an engine->irq_seqno_barrier() before the seqno reads. 2920 * At all other times, we must assume the GPU is still running, but 2921 * we only care about the snapshot of this moment. 2922 */ 2923 spin_lock_irqsave(&engine->timeline.lock, flags); 2924 list_for_each_entry(request, &engine->timeline.requests, link) { 2925 if (i915_request_completed(request)) 2926 continue; 2927 2928 if (!i915_request_started(request)) 2929 break; 2930 2931 /* More than one preemptible request may match! */ 2932 if (!match_ring(request)) 2933 break; 2934 2935 active = request; 2936 break; 2937 } 2938 spin_unlock_irqrestore(&engine->timeline.lock, flags); 2939 2940 return active; 2941 } 2942 2943 static void 2944 i915_gem_retire_work_handler(struct work_struct *work) 2945 { 2946 struct drm_i915_private *dev_priv = 2947 container_of(work, typeof(*dev_priv), gt.retire_work.work); 2948 struct drm_device *dev = &dev_priv->drm; 2949 2950 /* Come back later if the device is busy... */ 2951 if (mutex_trylock(&dev->struct_mutex)) { 2952 i915_retire_requests(dev_priv); 2953 mutex_unlock(&dev->struct_mutex); 2954 } 2955 2956 /* 2957 * Keep the retire handler running until we are finally idle. 2958 * We do not need to do this test under locking as in the worst-case 2959 * we queue the retire worker once too often. 2960 */ 2961 if (READ_ONCE(dev_priv->gt.awake)) 2962 queue_delayed_work(dev_priv->wq, 2963 &dev_priv->gt.retire_work, 2964 round_jiffies_up_relative(HZ)); 2965 } 2966 2967 static void shrink_caches(struct drm_i915_private *i915) 2968 { 2969 /* 2970 * kmem_cache_shrink() discards empty slabs and reorders partially 2971 * filled slabs to prioritise allocating from the mostly full slabs, 2972 * with the aim of reducing fragmentation. 2973 */ 2974 kmem_cache_shrink(i915->priorities); 2975 kmem_cache_shrink(i915->dependencies); 2976 kmem_cache_shrink(i915->requests); 2977 kmem_cache_shrink(i915->luts); 2978 kmem_cache_shrink(i915->vmas); 2979 kmem_cache_shrink(i915->objects); 2980 } 2981 2982 struct sleep_rcu_work { 2983 union { 2984 struct rcu_head rcu; 2985 struct work_struct work; 2986 }; 2987 struct drm_i915_private *i915; 2988 unsigned int epoch; 2989 }; 2990 2991 static inline bool 2992 same_epoch(struct drm_i915_private *i915, unsigned int epoch) 2993 { 2994 /* 2995 * There is a small chance that the epoch wrapped since we started 2996 * sleeping. If we assume that epoch is at least a u32, then it will 2997 * take at least 2^32 * 100ms for it to wrap, or about 326 years. 2998 */ 2999 return epoch == READ_ONCE(i915->gt.epoch); 3000 } 3001 3002 static void __sleep_work(struct work_struct *work) 3003 { 3004 struct sleep_rcu_work *s = container_of(work, typeof(*s), work); 3005 struct drm_i915_private *i915 = s->i915; 3006 unsigned int epoch = s->epoch; 3007 3008 kfree(s); 3009 if (same_epoch(i915, epoch)) 3010 shrink_caches(i915); 3011 } 3012 3013 static void __sleep_rcu(struct rcu_head *rcu) 3014 { 3015 struct sleep_rcu_work *s = container_of(rcu, typeof(*s), rcu); 3016 struct drm_i915_private *i915 = s->i915; 3017 3018 destroy_rcu_head(&s->rcu); 3019 3020 if (same_epoch(i915, s->epoch)) { 3021 INIT_WORK(&s->work, __sleep_work); 3022 queue_work(i915->wq, &s->work); 3023 } else { 3024 kfree(s); 3025 } 3026 } 3027 3028 static inline bool 3029 new_requests_since_last_retire(const struct drm_i915_private *i915) 3030 { 3031 return (READ_ONCE(i915->gt.active_requests) || 3032 work_pending(&i915->gt.idle_work.work)); 3033 } 3034 3035 static void assert_kernel_context_is_current(struct drm_i915_private *i915) 3036 { 3037 struct intel_engine_cs *engine; 3038 enum intel_engine_id id; 3039 3040 if (i915_terminally_wedged(&i915->gpu_error)) 3041 return; 3042 3043 GEM_BUG_ON(i915->gt.active_requests); 3044 for_each_engine(engine, i915, id) { 3045 GEM_BUG_ON(__i915_active_request_peek(&engine->timeline.last_request)); 3046 GEM_BUG_ON(engine->last_retired_context != 3047 to_intel_context(i915->kernel_context, engine)); 3048 } 3049 } 3050 3051 static void 3052 i915_gem_idle_work_handler(struct work_struct *work) 3053 { 3054 struct drm_i915_private *dev_priv = 3055 container_of(work, typeof(*dev_priv), gt.idle_work.work); 3056 unsigned int epoch = I915_EPOCH_INVALID; 3057 bool rearm_hangcheck; 3058 3059 if (!READ_ONCE(dev_priv->gt.awake)) 3060 return; 3061 3062 if (READ_ONCE(dev_priv->gt.active_requests)) 3063 return; 3064 3065 /* 3066 * Flush out the last user context, leaving only the pinned 3067 * kernel context resident. When we are idling on the kernel_context, 3068 * no more new requests (with a context switch) are emitted and we 3069 * can finally rest. A consequence is that the idle work handler is 3070 * always called at least twice before idling (and if the system is 3071 * idle that implies a round trip through the retire worker). 3072 */ 3073 mutex_lock(&dev_priv->drm.struct_mutex); 3074 i915_gem_switch_to_kernel_context(dev_priv); 3075 mutex_unlock(&dev_priv->drm.struct_mutex); 3076 3077 GEM_TRACE("active_requests=%d (after switch-to-kernel-context)\n", 3078 READ_ONCE(dev_priv->gt.active_requests)); 3079 3080 /* 3081 * Wait for last execlists context complete, but bail out in case a 3082 * new request is submitted. As we don't trust the hardware, we 3083 * continue on if the wait times out. This is necessary to allow 3084 * the machine to suspend even if the hardware dies, and we will 3085 * try to recover in resume (after depriving the hardware of power, 3086 * it may be in a better mmod). 3087 */ 3088 __wait_for(if (new_requests_since_last_retire(dev_priv)) return, 3089 intel_engines_are_idle(dev_priv), 3090 I915_IDLE_ENGINES_TIMEOUT * 1000, 3091 10, 500); 3092 3093 rearm_hangcheck = 3094 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 3095 3096 if (!mutex_trylock(&dev_priv->drm.struct_mutex)) { 3097 /* Currently busy, come back later */ 3098 mod_delayed_work(dev_priv->wq, 3099 &dev_priv->gt.idle_work, 3100 msecs_to_jiffies(50)); 3101 goto out_rearm; 3102 } 3103 3104 /* 3105 * New request retired after this work handler started, extend active 3106 * period until next instance of the work. 3107 */ 3108 if (new_requests_since_last_retire(dev_priv)) 3109 goto out_unlock; 3110 3111 epoch = __i915_gem_park(dev_priv); 3112 3113 assert_kernel_context_is_current(dev_priv); 3114 3115 rearm_hangcheck = false; 3116 out_unlock: 3117 mutex_unlock(&dev_priv->drm.struct_mutex); 3118 3119 out_rearm: 3120 if (rearm_hangcheck) { 3121 GEM_BUG_ON(!dev_priv->gt.awake); 3122 i915_queue_hangcheck(dev_priv); 3123 } 3124 3125 /* 3126 * When we are idle, it is an opportune time to reap our caches. 3127 * However, we have many objects that utilise RCU and the ordered 3128 * i915->wq that this work is executing on. To try and flush any 3129 * pending frees now we are idle, we first wait for an RCU grace 3130 * period, and then queue a task (that will run last on the wq) to 3131 * shrink and re-optimize the caches. 3132 */ 3133 if (same_epoch(dev_priv, epoch)) { 3134 struct sleep_rcu_work *s = kmalloc(sizeof(*s), GFP_KERNEL); 3135 if (s) { 3136 init_rcu_head(&s->rcu); 3137 s->i915 = dev_priv; 3138 s->epoch = epoch; 3139 call_rcu(&s->rcu, __sleep_rcu); 3140 } 3141 } 3142 } 3143 3144 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) 3145 { 3146 struct drm_i915_private *i915 = to_i915(gem->dev); 3147 struct drm_i915_gem_object *obj = to_intel_bo(gem); 3148 struct drm_i915_file_private *fpriv = file->driver_priv; 3149 struct i915_lut_handle *lut, *ln; 3150 3151 mutex_lock(&i915->drm.struct_mutex); 3152 3153 list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) { 3154 struct i915_gem_context *ctx = lut->ctx; 3155 struct i915_vma *vma; 3156 3157 GEM_BUG_ON(ctx->file_priv == ERR_PTR(-EBADF)); 3158 if (ctx->file_priv != fpriv) 3159 continue; 3160 3161 vma = radix_tree_delete(&ctx->handles_vma, lut->handle); 3162 GEM_BUG_ON(vma->obj != obj); 3163 3164 /* We allow the process to have multiple handles to the same 3165 * vma, in the same fd namespace, by virtue of flink/open. 3166 */ 3167 GEM_BUG_ON(!vma->open_count); 3168 if (!--vma->open_count && !i915_vma_is_ggtt(vma)) 3169 i915_vma_close(vma); 3170 3171 list_del(&lut->obj_link); 3172 list_del(&lut->ctx_link); 3173 3174 kmem_cache_free(i915->luts, lut); 3175 __i915_gem_object_release_unless_active(obj); 3176 } 3177 3178 mutex_unlock(&i915->drm.struct_mutex); 3179 } 3180 3181 static unsigned long to_wait_timeout(s64 timeout_ns) 3182 { 3183 if (timeout_ns < 0) 3184 return MAX_SCHEDULE_TIMEOUT; 3185 3186 if (timeout_ns == 0) 3187 return 0; 3188 3189 return nsecs_to_jiffies_timeout(timeout_ns); 3190 } 3191 3192 /** 3193 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 3194 * @dev: drm device pointer 3195 * @data: ioctl data blob 3196 * @file: drm file pointer 3197 * 3198 * Returns 0 if successful, else an error is returned with the remaining time in 3199 * the timeout parameter. 3200 * -ETIME: object is still busy after timeout 3201 * -ERESTARTSYS: signal interrupted the wait 3202 * -ENONENT: object doesn't exist 3203 * Also possible, but rare: 3204 * -EAGAIN: incomplete, restart syscall 3205 * -ENOMEM: damn 3206 * -ENODEV: Internal IRQ fail 3207 * -E?: The add request failed 3208 * 3209 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 3210 * non-zero timeout parameter the wait ioctl will wait for the given number of 3211 * nanoseconds on an object becoming unbusy. Since the wait itself does so 3212 * without holding struct_mutex the object may become re-busied before this 3213 * function completes. A similar but shorter * race condition exists in the busy 3214 * ioctl 3215 */ 3216 int 3217 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3218 { 3219 struct drm_i915_gem_wait *args = data; 3220 struct drm_i915_gem_object *obj; 3221 ktime_t start; 3222 long ret; 3223 3224 if (args->flags != 0) 3225 return -EINVAL; 3226 3227 obj = i915_gem_object_lookup(file, args->bo_handle); 3228 if (!obj) 3229 return -ENOENT; 3230 3231 start = ktime_get(); 3232 3233 ret = i915_gem_object_wait(obj, 3234 I915_WAIT_INTERRUPTIBLE | 3235 I915_WAIT_PRIORITY | 3236 I915_WAIT_ALL, 3237 to_wait_timeout(args->timeout_ns), 3238 to_rps_client(file)); 3239 3240 if (args->timeout_ns > 0) { 3241 args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start)); 3242 if (args->timeout_ns < 0) 3243 args->timeout_ns = 0; 3244 3245 /* 3246 * Apparently ktime isn't accurate enough and occasionally has a 3247 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch 3248 * things up to make the test happy. We allow up to 1 jiffy. 3249 * 3250 * This is a regression from the timespec->ktime conversion. 3251 */ 3252 if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns)) 3253 args->timeout_ns = 0; 3254 3255 /* Asked to wait beyond the jiffie/scheduler precision? */ 3256 if (ret == -ETIME && args->timeout_ns) 3257 ret = -EAGAIN; 3258 } 3259 3260 i915_gem_object_put(obj); 3261 return ret; 3262 } 3263 3264 static int wait_for_engines(struct drm_i915_private *i915) 3265 { 3266 if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) { 3267 dev_err(i915->drm.dev, 3268 "Failed to idle engines, declaring wedged!\n"); 3269 GEM_TRACE_DUMP(); 3270 i915_gem_set_wedged(i915); 3271 return -EIO; 3272 } 3273 3274 return 0; 3275 } 3276 3277 static long 3278 wait_for_timelines(struct drm_i915_private *i915, 3279 unsigned int flags, long timeout) 3280 { 3281 struct i915_gt_timelines *gt = &i915->gt.timelines; 3282 struct i915_timeline *tl; 3283 3284 if (!READ_ONCE(i915->gt.active_requests)) 3285 return timeout; 3286 3287 mutex_lock(>->mutex); 3288 list_for_each_entry(tl, >->active_list, link) { 3289 struct i915_request *rq; 3290 3291 rq = i915_active_request_get_unlocked(&tl->last_request); 3292 if (!rq) 3293 continue; 3294 3295 mutex_unlock(>->mutex); 3296 3297 /* 3298 * "Race-to-idle". 3299 * 3300 * Switching to the kernel context is often used a synchronous 3301 * step prior to idling, e.g. in suspend for flushing all 3302 * current operations to memory before sleeping. These we 3303 * want to complete as quickly as possible to avoid prolonged 3304 * stalls, so allow the gpu to boost to maximum clocks. 3305 */ 3306 if (flags & I915_WAIT_FOR_IDLE_BOOST) 3307 gen6_rps_boost(rq, NULL); 3308 3309 timeout = i915_request_wait(rq, flags, timeout); 3310 i915_request_put(rq); 3311 if (timeout < 0) 3312 return timeout; 3313 3314 /* restart after reacquiring the lock */ 3315 mutex_lock(>->mutex); 3316 tl = list_entry(>->active_list, typeof(*tl), link); 3317 } 3318 mutex_unlock(>->mutex); 3319 3320 return timeout; 3321 } 3322 3323 int i915_gem_wait_for_idle(struct drm_i915_private *i915, 3324 unsigned int flags, long timeout) 3325 { 3326 GEM_TRACE("flags=%x (%s), timeout=%ld%s\n", 3327 flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked", 3328 timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : ""); 3329 3330 /* If the device is asleep, we have no requests outstanding */ 3331 if (!READ_ONCE(i915->gt.awake)) 3332 return 0; 3333 3334 timeout = wait_for_timelines(i915, flags, timeout); 3335 if (timeout < 0) 3336 return timeout; 3337 3338 if (flags & I915_WAIT_LOCKED) { 3339 int err; 3340 3341 lockdep_assert_held(&i915->drm.struct_mutex); 3342 3343 if (GEM_SHOW_DEBUG() && !timeout) { 3344 /* Presume that timeout was non-zero to begin with! */ 3345 dev_warn(&i915->drm.pdev->dev, 3346 "Missed idle-completion interrupt!\n"); 3347 GEM_TRACE_DUMP(); 3348 } 3349 3350 err = wait_for_engines(i915); 3351 if (err) 3352 return err; 3353 3354 i915_retire_requests(i915); 3355 GEM_BUG_ON(i915->gt.active_requests); 3356 } 3357 3358 return 0; 3359 } 3360 3361 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) 3362 { 3363 /* 3364 * We manually flush the CPU domain so that we can override and 3365 * force the flush for the display, and perform it asyncrhonously. 3366 */ 3367 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 3368 if (obj->cache_dirty) 3369 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); 3370 obj->write_domain = 0; 3371 } 3372 3373 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) 3374 { 3375 if (!READ_ONCE(obj->pin_global)) 3376 return; 3377 3378 mutex_lock(&obj->base.dev->struct_mutex); 3379 __i915_gem_object_flush_for_display(obj); 3380 mutex_unlock(&obj->base.dev->struct_mutex); 3381 } 3382 3383 /** 3384 * Moves a single object to the WC read, and possibly write domain. 3385 * @obj: object to act on 3386 * @write: ask for write access or read only 3387 * 3388 * This function returns when the move is complete, including waiting on 3389 * flushes to occur. 3390 */ 3391 int 3392 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) 3393 { 3394 int ret; 3395 3396 lockdep_assert_held(&obj->base.dev->struct_mutex); 3397 3398 ret = i915_gem_object_wait(obj, 3399 I915_WAIT_INTERRUPTIBLE | 3400 I915_WAIT_LOCKED | 3401 (write ? I915_WAIT_ALL : 0), 3402 MAX_SCHEDULE_TIMEOUT, 3403 NULL); 3404 if (ret) 3405 return ret; 3406 3407 if (obj->write_domain == I915_GEM_DOMAIN_WC) 3408 return 0; 3409 3410 /* Flush and acquire obj->pages so that we are coherent through 3411 * direct access in memory with previous cached writes through 3412 * shmemfs and that our cache domain tracking remains valid. 3413 * For example, if the obj->filp was moved to swap without us 3414 * being notified and releasing the pages, we would mistakenly 3415 * continue to assume that the obj remained out of the CPU cached 3416 * domain. 3417 */ 3418 ret = i915_gem_object_pin_pages(obj); 3419 if (ret) 3420 return ret; 3421 3422 flush_write_domain(obj, ~I915_GEM_DOMAIN_WC); 3423 3424 /* Serialise direct access to this object with the barriers for 3425 * coherent writes from the GPU, by effectively invalidating the 3426 * WC domain upon first access. 3427 */ 3428 if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0) 3429 mb(); 3430 3431 /* It should now be out of any other write domains, and we can update 3432 * the domain values for our changes. 3433 */ 3434 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0); 3435 obj->read_domains |= I915_GEM_DOMAIN_WC; 3436 if (write) { 3437 obj->read_domains = I915_GEM_DOMAIN_WC; 3438 obj->write_domain = I915_GEM_DOMAIN_WC; 3439 obj->mm.dirty = true; 3440 } 3441 3442 i915_gem_object_unpin_pages(obj); 3443 return 0; 3444 } 3445 3446 /** 3447 * Moves a single object to the GTT read, and possibly write domain. 3448 * @obj: object to act on 3449 * @write: ask for write access or read only 3450 * 3451 * This function returns when the move is complete, including waiting on 3452 * flushes to occur. 3453 */ 3454 int 3455 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3456 { 3457 int ret; 3458 3459 lockdep_assert_held(&obj->base.dev->struct_mutex); 3460 3461 ret = i915_gem_object_wait(obj, 3462 I915_WAIT_INTERRUPTIBLE | 3463 I915_WAIT_LOCKED | 3464 (write ? I915_WAIT_ALL : 0), 3465 MAX_SCHEDULE_TIMEOUT, 3466 NULL); 3467 if (ret) 3468 return ret; 3469 3470 if (obj->write_domain == I915_GEM_DOMAIN_GTT) 3471 return 0; 3472 3473 /* Flush and acquire obj->pages so that we are coherent through 3474 * direct access in memory with previous cached writes through 3475 * shmemfs and that our cache domain tracking remains valid. 3476 * For example, if the obj->filp was moved to swap without us 3477 * being notified and releasing the pages, we would mistakenly 3478 * continue to assume that the obj remained out of the CPU cached 3479 * domain. 3480 */ 3481 ret = i915_gem_object_pin_pages(obj); 3482 if (ret) 3483 return ret; 3484 3485 flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT); 3486 3487 /* Serialise direct access to this object with the barriers for 3488 * coherent writes from the GPU, by effectively invalidating the 3489 * GTT domain upon first access. 3490 */ 3491 if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0) 3492 mb(); 3493 3494 /* It should now be out of any other write domains, and we can update 3495 * the domain values for our changes. 3496 */ 3497 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3498 obj->read_domains |= I915_GEM_DOMAIN_GTT; 3499 if (write) { 3500 obj->read_domains = I915_GEM_DOMAIN_GTT; 3501 obj->write_domain = I915_GEM_DOMAIN_GTT; 3502 obj->mm.dirty = true; 3503 } 3504 3505 i915_gem_object_unpin_pages(obj); 3506 return 0; 3507 } 3508 3509 /** 3510 * Changes the cache-level of an object across all VMA. 3511 * @obj: object to act on 3512 * @cache_level: new cache level to set for the object 3513 * 3514 * After this function returns, the object will be in the new cache-level 3515 * across all GTT and the contents of the backing storage will be coherent, 3516 * with respect to the new cache-level. In order to keep the backing storage 3517 * coherent for all users, we only allow a single cache level to be set 3518 * globally on the object and prevent it from being changed whilst the 3519 * hardware is reading from the object. That is if the object is currently 3520 * on the scanout it will be set to uncached (or equivalent display 3521 * cache coherency) and all non-MOCS GPU access will also be uncached so 3522 * that all direct access to the scanout remains coherent. 3523 */ 3524 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 3525 enum i915_cache_level cache_level) 3526 { 3527 struct i915_vma *vma; 3528 int ret; 3529 3530 lockdep_assert_held(&obj->base.dev->struct_mutex); 3531 3532 if (obj->cache_level == cache_level) 3533 return 0; 3534 3535 /* Inspect the list of currently bound VMA and unbind any that would 3536 * be invalid given the new cache-level. This is principally to 3537 * catch the issue of the CS prefetch crossing page boundaries and 3538 * reading an invalid PTE on older architectures. 3539 */ 3540 restart: 3541 list_for_each_entry(vma, &obj->vma.list, obj_link) { 3542 if (!drm_mm_node_allocated(&vma->node)) 3543 continue; 3544 3545 if (i915_vma_is_pinned(vma)) { 3546 DRM_DEBUG("can not change the cache level of pinned objects\n"); 3547 return -EBUSY; 3548 } 3549 3550 if (!i915_vma_is_closed(vma) && 3551 i915_gem_valid_gtt_space(vma, cache_level)) 3552 continue; 3553 3554 ret = i915_vma_unbind(vma); 3555 if (ret) 3556 return ret; 3557 3558 /* As unbinding may affect other elements in the 3559 * obj->vma_list (due to side-effects from retiring 3560 * an active vma), play safe and restart the iterator. 3561 */ 3562 goto restart; 3563 } 3564 3565 /* We can reuse the existing drm_mm nodes but need to change the 3566 * cache-level on the PTE. We could simply unbind them all and 3567 * rebind with the correct cache-level on next use. However since 3568 * we already have a valid slot, dma mapping, pages etc, we may as 3569 * rewrite the PTE in the belief that doing so tramples upon less 3570 * state and so involves less work. 3571 */ 3572 if (obj->bind_count) { 3573 /* Before we change the PTE, the GPU must not be accessing it. 3574 * If we wait upon the object, we know that all the bound 3575 * VMA are no longer active. 3576 */ 3577 ret = i915_gem_object_wait(obj, 3578 I915_WAIT_INTERRUPTIBLE | 3579 I915_WAIT_LOCKED | 3580 I915_WAIT_ALL, 3581 MAX_SCHEDULE_TIMEOUT, 3582 NULL); 3583 if (ret) 3584 return ret; 3585 3586 if (!HAS_LLC(to_i915(obj->base.dev)) && 3587 cache_level != I915_CACHE_NONE) { 3588 /* Access to snoopable pages through the GTT is 3589 * incoherent and on some machines causes a hard 3590 * lockup. Relinquish the CPU mmaping to force 3591 * userspace to refault in the pages and we can 3592 * then double check if the GTT mapping is still 3593 * valid for that pointer access. 3594 */ 3595 i915_gem_release_mmap(obj); 3596 3597 /* As we no longer need a fence for GTT access, 3598 * we can relinquish it now (and so prevent having 3599 * to steal a fence from someone else on the next 3600 * fence request). Note GPU activity would have 3601 * dropped the fence as all snoopable access is 3602 * supposed to be linear. 3603 */ 3604 for_each_ggtt_vma(vma, obj) { 3605 ret = i915_vma_put_fence(vma); 3606 if (ret) 3607 return ret; 3608 } 3609 } else { 3610 /* We either have incoherent backing store and 3611 * so no GTT access or the architecture is fully 3612 * coherent. In such cases, existing GTT mmaps 3613 * ignore the cache bit in the PTE and we can 3614 * rewrite it without confusing the GPU or having 3615 * to force userspace to fault back in its mmaps. 3616 */ 3617 } 3618 3619 list_for_each_entry(vma, &obj->vma.list, obj_link) { 3620 if (!drm_mm_node_allocated(&vma->node)) 3621 continue; 3622 3623 ret = i915_vma_bind(vma, cache_level, PIN_UPDATE); 3624 if (ret) 3625 return ret; 3626 } 3627 } 3628 3629 list_for_each_entry(vma, &obj->vma.list, obj_link) 3630 vma->node.color = cache_level; 3631 i915_gem_object_set_cache_coherency(obj, cache_level); 3632 obj->cache_dirty = true; /* Always invalidate stale cachelines */ 3633 3634 return 0; 3635 } 3636 3637 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 3638 struct drm_file *file) 3639 { 3640 struct drm_i915_gem_caching *args = data; 3641 struct drm_i915_gem_object *obj; 3642 int err = 0; 3643 3644 rcu_read_lock(); 3645 obj = i915_gem_object_lookup_rcu(file, args->handle); 3646 if (!obj) { 3647 err = -ENOENT; 3648 goto out; 3649 } 3650 3651 switch (obj->cache_level) { 3652 case I915_CACHE_LLC: 3653 case I915_CACHE_L3_LLC: 3654 args->caching = I915_CACHING_CACHED; 3655 break; 3656 3657 case I915_CACHE_WT: 3658 args->caching = I915_CACHING_DISPLAY; 3659 break; 3660 3661 default: 3662 args->caching = I915_CACHING_NONE; 3663 break; 3664 } 3665 out: 3666 rcu_read_unlock(); 3667 return err; 3668 } 3669 3670 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 3671 struct drm_file *file) 3672 { 3673 struct drm_i915_private *i915 = to_i915(dev); 3674 struct drm_i915_gem_caching *args = data; 3675 struct drm_i915_gem_object *obj; 3676 enum i915_cache_level level; 3677 int ret = 0; 3678 3679 switch (args->caching) { 3680 case I915_CACHING_NONE: 3681 level = I915_CACHE_NONE; 3682 break; 3683 case I915_CACHING_CACHED: 3684 /* 3685 * Due to a HW issue on BXT A stepping, GPU stores via a 3686 * snooped mapping may leave stale data in a corresponding CPU 3687 * cacheline, whereas normally such cachelines would get 3688 * invalidated. 3689 */ 3690 if (!HAS_LLC(i915) && !HAS_SNOOP(i915)) 3691 return -ENODEV; 3692 3693 level = I915_CACHE_LLC; 3694 break; 3695 case I915_CACHING_DISPLAY: 3696 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE; 3697 break; 3698 default: 3699 return -EINVAL; 3700 } 3701 3702 obj = i915_gem_object_lookup(file, args->handle); 3703 if (!obj) 3704 return -ENOENT; 3705 3706 /* 3707 * The caching mode of proxy object is handled by its generator, and 3708 * not allowed to be changed by userspace. 3709 */ 3710 if (i915_gem_object_is_proxy(obj)) { 3711 ret = -ENXIO; 3712 goto out; 3713 } 3714 3715 if (obj->cache_level == level) 3716 goto out; 3717 3718 ret = i915_gem_object_wait(obj, 3719 I915_WAIT_INTERRUPTIBLE, 3720 MAX_SCHEDULE_TIMEOUT, 3721 to_rps_client(file)); 3722 if (ret) 3723 goto out; 3724 3725 ret = i915_mutex_lock_interruptible(dev); 3726 if (ret) 3727 goto out; 3728 3729 ret = i915_gem_object_set_cache_level(obj, level); 3730 mutex_unlock(&dev->struct_mutex); 3731 3732 out: 3733 i915_gem_object_put(obj); 3734 return ret; 3735 } 3736 3737 /* 3738 * Prepare buffer for display plane (scanout, cursors, etc). Can be called from 3739 * an uninterruptible phase (modesetting) and allows any flushes to be pipelined 3740 * (for pageflips). We only flush the caches while preparing the buffer for 3741 * display, the callers are responsible for frontbuffer flush. 3742 */ 3743 struct i915_vma * 3744 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 3745 u32 alignment, 3746 const struct i915_ggtt_view *view, 3747 unsigned int flags) 3748 { 3749 struct i915_vma *vma; 3750 int ret; 3751 3752 lockdep_assert_held(&obj->base.dev->struct_mutex); 3753 3754 /* Mark the global pin early so that we account for the 3755 * display coherency whilst setting up the cache domains. 3756 */ 3757 obj->pin_global++; 3758 3759 /* The display engine is not coherent with the LLC cache on gen6. As 3760 * a result, we make sure that the pinning that is about to occur is 3761 * done with uncached PTEs. This is lowest common denominator for all 3762 * chipsets. 3763 * 3764 * However for gen6+, we could do better by using the GFDT bit instead 3765 * of uncaching, which would allow us to flush all the LLC-cached data 3766 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 3767 */ 3768 ret = i915_gem_object_set_cache_level(obj, 3769 HAS_WT(to_i915(obj->base.dev)) ? 3770 I915_CACHE_WT : I915_CACHE_NONE); 3771 if (ret) { 3772 vma = ERR_PTR(ret); 3773 goto err_unpin_global; 3774 } 3775 3776 /* As the user may map the buffer once pinned in the display plane 3777 * (e.g. libkms for the bootup splash), we have to ensure that we 3778 * always use map_and_fenceable for all scanout buffers. However, 3779 * it may simply be too big to fit into mappable, in which case 3780 * put it anyway and hope that userspace can cope (but always first 3781 * try to preserve the existing ABI). 3782 */ 3783 vma = ERR_PTR(-ENOSPC); 3784 if ((flags & PIN_MAPPABLE) == 0 && 3785 (!view || view->type == I915_GGTT_VIEW_NORMAL)) 3786 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 3787 flags | 3788 PIN_MAPPABLE | 3789 PIN_NONBLOCK); 3790 if (IS_ERR(vma)) 3791 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags); 3792 if (IS_ERR(vma)) 3793 goto err_unpin_global; 3794 3795 vma->display_alignment = max_t(u64, vma->display_alignment, alignment); 3796 3797 __i915_gem_object_flush_for_display(obj); 3798 3799 /* It should now be out of any other write domains, and we can update 3800 * the domain values for our changes. 3801 */ 3802 obj->read_domains |= I915_GEM_DOMAIN_GTT; 3803 3804 return vma; 3805 3806 err_unpin_global: 3807 obj->pin_global--; 3808 return vma; 3809 } 3810 3811 void 3812 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) 3813 { 3814 lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); 3815 3816 if (WARN_ON(vma->obj->pin_global == 0)) 3817 return; 3818 3819 if (--vma->obj->pin_global == 0) 3820 vma->display_alignment = I915_GTT_MIN_ALIGNMENT; 3821 3822 /* Bump the LRU to try and avoid premature eviction whilst flipping */ 3823 i915_gem_object_bump_inactive_ggtt(vma->obj); 3824 3825 i915_vma_unpin(vma); 3826 } 3827 3828 /** 3829 * Moves a single object to the CPU read, and possibly write domain. 3830 * @obj: object to act on 3831 * @write: requesting write or read-only access 3832 * 3833 * This function returns when the move is complete, including waiting on 3834 * flushes to occur. 3835 */ 3836 int 3837 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 3838 { 3839 int ret; 3840 3841 lockdep_assert_held(&obj->base.dev->struct_mutex); 3842 3843 ret = i915_gem_object_wait(obj, 3844 I915_WAIT_INTERRUPTIBLE | 3845 I915_WAIT_LOCKED | 3846 (write ? I915_WAIT_ALL : 0), 3847 MAX_SCHEDULE_TIMEOUT, 3848 NULL); 3849 if (ret) 3850 return ret; 3851 3852 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 3853 3854 /* Flush the CPU cache if it's still invalid. */ 3855 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { 3856 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 3857 obj->read_domains |= I915_GEM_DOMAIN_CPU; 3858 } 3859 3860 /* It should now be out of any other write domains, and we can update 3861 * the domain values for our changes. 3862 */ 3863 GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU); 3864 3865 /* If we're writing through the CPU, then the GPU read domains will 3866 * need to be invalidated at next use. 3867 */ 3868 if (write) 3869 __start_cpu_write(obj); 3870 3871 return 0; 3872 } 3873 3874 /* Throttle our rendering by waiting until the ring has completed our requests 3875 * emitted over 20 msec ago. 3876 * 3877 * Note that if we were to use the current jiffies each time around the loop, 3878 * we wouldn't escape the function with any frames outstanding if the time to 3879 * render a frame was over 20ms. 3880 * 3881 * This should get us reasonable parallelism between CPU and GPU but also 3882 * relatively low latency when blocking on a particular request to finish. 3883 */ 3884 static int 3885 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 3886 { 3887 struct drm_i915_private *dev_priv = to_i915(dev); 3888 struct drm_i915_file_private *file_priv = file->driver_priv; 3889 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; 3890 struct i915_request *request, *target = NULL; 3891 long ret; 3892 3893 /* ABI: return -EIO if already wedged */ 3894 if (i915_terminally_wedged(&dev_priv->gpu_error)) 3895 return -EIO; 3896 3897 spin_lock(&file_priv->mm.lock); 3898 list_for_each_entry(request, &file_priv->mm.request_list, client_link) { 3899 if (time_after_eq(request->emitted_jiffies, recent_enough)) 3900 break; 3901 3902 if (target) { 3903 list_del(&target->client_link); 3904 target->file_priv = NULL; 3905 } 3906 3907 target = request; 3908 } 3909 if (target) 3910 i915_request_get(target); 3911 spin_unlock(&file_priv->mm.lock); 3912 3913 if (target == NULL) 3914 return 0; 3915 3916 ret = i915_request_wait(target, 3917 I915_WAIT_INTERRUPTIBLE, 3918 MAX_SCHEDULE_TIMEOUT); 3919 i915_request_put(target); 3920 3921 return ret < 0 ? ret : 0; 3922 } 3923 3924 struct i915_vma * 3925 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 3926 const struct i915_ggtt_view *view, 3927 u64 size, 3928 u64 alignment, 3929 u64 flags) 3930 { 3931 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 3932 struct i915_address_space *vm = &dev_priv->ggtt.vm; 3933 struct i915_vma *vma; 3934 int ret; 3935 3936 lockdep_assert_held(&obj->base.dev->struct_mutex); 3937 3938 if (flags & PIN_MAPPABLE && 3939 (!view || view->type == I915_GGTT_VIEW_NORMAL)) { 3940 /* If the required space is larger than the available 3941 * aperture, we will not able to find a slot for the 3942 * object and unbinding the object now will be in 3943 * vain. Worse, doing so may cause us to ping-pong 3944 * the object in and out of the Global GTT and 3945 * waste a lot of cycles under the mutex. 3946 */ 3947 if (obj->base.size > dev_priv->ggtt.mappable_end) 3948 return ERR_PTR(-E2BIG); 3949 3950 /* If NONBLOCK is set the caller is optimistically 3951 * trying to cache the full object within the mappable 3952 * aperture, and *must* have a fallback in place for 3953 * situations where we cannot bind the object. We 3954 * can be a little more lax here and use the fallback 3955 * more often to avoid costly migrations of ourselves 3956 * and other objects within the aperture. 3957 * 3958 * Half-the-aperture is used as a simple heuristic. 3959 * More interesting would to do search for a free 3960 * block prior to making the commitment to unbind. 3961 * That caters for the self-harm case, and with a 3962 * little more heuristics (e.g. NOFAULT, NOEVICT) 3963 * we could try to minimise harm to others. 3964 */ 3965 if (flags & PIN_NONBLOCK && 3966 obj->base.size > dev_priv->ggtt.mappable_end / 2) 3967 return ERR_PTR(-ENOSPC); 3968 } 3969 3970 vma = i915_vma_instance(obj, vm, view); 3971 if (unlikely(IS_ERR(vma))) 3972 return vma; 3973 3974 if (i915_vma_misplaced(vma, size, alignment, flags)) { 3975 if (flags & PIN_NONBLOCK) { 3976 if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) 3977 return ERR_PTR(-ENOSPC); 3978 3979 if (flags & PIN_MAPPABLE && 3980 vma->fence_size > dev_priv->ggtt.mappable_end / 2) 3981 return ERR_PTR(-ENOSPC); 3982 } 3983 3984 WARN(i915_vma_is_pinned(vma), 3985 "bo is already pinned in ggtt with incorrect alignment:" 3986 " offset=%08x, req.alignment=%llx," 3987 " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n", 3988 i915_ggtt_offset(vma), alignment, 3989 !!(flags & PIN_MAPPABLE), 3990 i915_vma_is_map_and_fenceable(vma)); 3991 ret = i915_vma_unbind(vma); 3992 if (ret) 3993 return ERR_PTR(ret); 3994 } 3995 3996 ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); 3997 if (ret) 3998 return ERR_PTR(ret); 3999 4000 return vma; 4001 } 4002 4003 static __always_inline unsigned int __busy_read_flag(unsigned int id) 4004 { 4005 /* Note that we could alias engines in the execbuf API, but 4006 * that would be very unwise as it prevents userspace from 4007 * fine control over engine selection. Ahem. 4008 * 4009 * This should be something like EXEC_MAX_ENGINE instead of 4010 * I915_NUM_ENGINES. 4011 */ 4012 BUILD_BUG_ON(I915_NUM_ENGINES > 16); 4013 return 0x10000 << id; 4014 } 4015 4016 static __always_inline unsigned int __busy_write_id(unsigned int id) 4017 { 4018 /* The uABI guarantees an active writer is also amongst the read 4019 * engines. This would be true if we accessed the activity tracking 4020 * under the lock, but as we perform the lookup of the object and 4021 * its activity locklessly we can not guarantee that the last_write 4022 * being active implies that we have set the same engine flag from 4023 * last_read - hence we always set both read and write busy for 4024 * last_write. 4025 */ 4026 return id | __busy_read_flag(id); 4027 } 4028 4029 static __always_inline unsigned int 4030 __busy_set_if_active(const struct dma_fence *fence, 4031 unsigned int (*flag)(unsigned int id)) 4032 { 4033 struct i915_request *rq; 4034 4035 /* We have to check the current hw status of the fence as the uABI 4036 * guarantees forward progress. We could rely on the idle worker 4037 * to eventually flush us, but to minimise latency just ask the 4038 * hardware. 4039 * 4040 * Note we only report on the status of native fences. 4041 */ 4042 if (!dma_fence_is_i915(fence)) 4043 return 0; 4044 4045 /* opencode to_request() in order to avoid const warnings */ 4046 rq = container_of(fence, struct i915_request, fence); 4047 if (i915_request_completed(rq)) 4048 return 0; 4049 4050 return flag(rq->engine->uabi_id); 4051 } 4052 4053 static __always_inline unsigned int 4054 busy_check_reader(const struct dma_fence *fence) 4055 { 4056 return __busy_set_if_active(fence, __busy_read_flag); 4057 } 4058 4059 static __always_inline unsigned int 4060 busy_check_writer(const struct dma_fence *fence) 4061 { 4062 if (!fence) 4063 return 0; 4064 4065 return __busy_set_if_active(fence, __busy_write_id); 4066 } 4067 4068 int 4069 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4070 struct drm_file *file) 4071 { 4072 struct drm_i915_gem_busy *args = data; 4073 struct drm_i915_gem_object *obj; 4074 struct reservation_object_list *list; 4075 unsigned int seq; 4076 int err; 4077 4078 err = -ENOENT; 4079 rcu_read_lock(); 4080 obj = i915_gem_object_lookup_rcu(file, args->handle); 4081 if (!obj) 4082 goto out; 4083 4084 /* A discrepancy here is that we do not report the status of 4085 * non-i915 fences, i.e. even though we may report the object as idle, 4086 * a call to set-domain may still stall waiting for foreign rendering. 4087 * This also means that wait-ioctl may report an object as busy, 4088 * where busy-ioctl considers it idle. 4089 * 4090 * We trade the ability to warn of foreign fences to report on which 4091 * i915 engines are active for the object. 4092 * 4093 * Alternatively, we can trade that extra information on read/write 4094 * activity with 4095 * args->busy = 4096 * !reservation_object_test_signaled_rcu(obj->resv, true); 4097 * to report the overall busyness. This is what the wait-ioctl does. 4098 * 4099 */ 4100 retry: 4101 seq = raw_read_seqcount(&obj->resv->seq); 4102 4103 /* Translate the exclusive fence to the READ *and* WRITE engine */ 4104 args->busy = busy_check_writer(rcu_dereference(obj->resv->fence_excl)); 4105 4106 /* Translate shared fences to READ set of engines */ 4107 list = rcu_dereference(obj->resv->fence); 4108 if (list) { 4109 unsigned int shared_count = list->shared_count, i; 4110 4111 for (i = 0; i < shared_count; ++i) { 4112 struct dma_fence *fence = 4113 rcu_dereference(list->shared[i]); 4114 4115 args->busy |= busy_check_reader(fence); 4116 } 4117 } 4118 4119 if (args->busy && read_seqcount_retry(&obj->resv->seq, seq)) 4120 goto retry; 4121 4122 err = 0; 4123 out: 4124 rcu_read_unlock(); 4125 return err; 4126 } 4127 4128 int 4129 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4130 struct drm_file *file_priv) 4131 { 4132 return i915_gem_ring_throttle(dev, file_priv); 4133 } 4134 4135 int 4136 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4137 struct drm_file *file_priv) 4138 { 4139 struct drm_i915_private *dev_priv = to_i915(dev); 4140 struct drm_i915_gem_madvise *args = data; 4141 struct drm_i915_gem_object *obj; 4142 int err; 4143 4144 switch (args->madv) { 4145 case I915_MADV_DONTNEED: 4146 case I915_MADV_WILLNEED: 4147 break; 4148 default: 4149 return -EINVAL; 4150 } 4151 4152 obj = i915_gem_object_lookup(file_priv, args->handle); 4153 if (!obj) 4154 return -ENOENT; 4155 4156 err = mutex_lock_interruptible(&obj->mm.lock); 4157 if (err) 4158 goto out; 4159 4160 if (i915_gem_object_has_pages(obj) && 4161 i915_gem_object_is_tiled(obj) && 4162 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 4163 if (obj->mm.madv == I915_MADV_WILLNEED) { 4164 GEM_BUG_ON(!obj->mm.quirked); 4165 __i915_gem_object_unpin_pages(obj); 4166 obj->mm.quirked = false; 4167 } 4168 if (args->madv == I915_MADV_WILLNEED) { 4169 GEM_BUG_ON(obj->mm.quirked); 4170 __i915_gem_object_pin_pages(obj); 4171 obj->mm.quirked = true; 4172 } 4173 } 4174 4175 if (obj->mm.madv != __I915_MADV_PURGED) 4176 obj->mm.madv = args->madv; 4177 4178 /* if the object is no longer attached, discard its backing storage */ 4179 if (obj->mm.madv == I915_MADV_DONTNEED && 4180 !i915_gem_object_has_pages(obj)) 4181 i915_gem_object_truncate(obj); 4182 4183 args->retained = obj->mm.madv != __I915_MADV_PURGED; 4184 mutex_unlock(&obj->mm.lock); 4185 4186 out: 4187 i915_gem_object_put(obj); 4188 return err; 4189 } 4190 4191 static void 4192 frontbuffer_retire(struct i915_active_request *active, 4193 struct i915_request *request) 4194 { 4195 struct drm_i915_gem_object *obj = 4196 container_of(active, typeof(*obj), frontbuffer_write); 4197 4198 intel_fb_obj_flush(obj, ORIGIN_CS); 4199 } 4200 4201 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4202 const struct drm_i915_gem_object_ops *ops) 4203 { 4204 mutex_init(&obj->mm.lock); 4205 4206 spin_lock_init(&obj->vma.lock); 4207 INIT_LIST_HEAD(&obj->vma.list); 4208 4209 INIT_LIST_HEAD(&obj->lut_list); 4210 INIT_LIST_HEAD(&obj->batch_pool_link); 4211 4212 init_rcu_head(&obj->rcu); 4213 4214 obj->ops = ops; 4215 4216 reservation_object_init(&obj->__builtin_resv); 4217 obj->resv = &obj->__builtin_resv; 4218 4219 obj->frontbuffer_ggtt_origin = ORIGIN_GTT; 4220 i915_active_request_init(&obj->frontbuffer_write, 4221 NULL, frontbuffer_retire); 4222 4223 obj->mm.madv = I915_MADV_WILLNEED; 4224 INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN); 4225 mutex_init(&obj->mm.get_page.lock); 4226 4227 i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size); 4228 } 4229 4230 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4231 .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | 4232 I915_GEM_OBJECT_IS_SHRINKABLE, 4233 4234 .get_pages = i915_gem_object_get_pages_gtt, 4235 .put_pages = i915_gem_object_put_pages_gtt, 4236 4237 .pwrite = i915_gem_object_pwrite_gtt, 4238 }; 4239 4240 static int i915_gem_object_create_shmem(struct drm_device *dev, 4241 struct drm_gem_object *obj, 4242 size_t size) 4243 { 4244 struct drm_i915_private *i915 = to_i915(dev); 4245 unsigned long flags = VM_NORESERVE; 4246 struct file *filp; 4247 4248 drm_gem_private_object_init(dev, obj, size); 4249 4250 if (i915->mm.gemfs) 4251 filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size, 4252 flags); 4253 else 4254 filp = shmem_file_setup("i915", size, flags); 4255 4256 if (IS_ERR(filp)) 4257 return PTR_ERR(filp); 4258 4259 obj->filp = filp; 4260 4261 return 0; 4262 } 4263 4264 struct drm_i915_gem_object * 4265 i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) 4266 { 4267 struct drm_i915_gem_object *obj; 4268 struct address_space *mapping; 4269 unsigned int cache_level; 4270 gfp_t mask; 4271 int ret; 4272 4273 /* There is a prevalence of the assumption that we fit the object's 4274 * page count inside a 32bit _signed_ variable. Let's document this and 4275 * catch if we ever need to fix it. In the meantime, if you do spot 4276 * such a local variable, please consider fixing! 4277 */ 4278 if (size >> PAGE_SHIFT > INT_MAX) 4279 return ERR_PTR(-E2BIG); 4280 4281 if (overflows_type(size, obj->base.size)) 4282 return ERR_PTR(-E2BIG); 4283 4284 obj = i915_gem_object_alloc(dev_priv); 4285 if (obj == NULL) 4286 return ERR_PTR(-ENOMEM); 4287 4288 ret = i915_gem_object_create_shmem(&dev_priv->drm, &obj->base, size); 4289 if (ret) 4290 goto fail; 4291 4292 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4293 if (IS_I965GM(dev_priv) || IS_I965G(dev_priv)) { 4294 /* 965gm cannot relocate objects above 4GiB. */ 4295 mask &= ~__GFP_HIGHMEM; 4296 mask |= __GFP_DMA32; 4297 } 4298 4299 mapping = obj->base.filp->f_mapping; 4300 mapping_set_gfp_mask(mapping, mask); 4301 GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM)); 4302 4303 i915_gem_object_init(obj, &i915_gem_object_ops); 4304 4305 obj->write_domain = I915_GEM_DOMAIN_CPU; 4306 obj->read_domains = I915_GEM_DOMAIN_CPU; 4307 4308 if (HAS_LLC(dev_priv)) 4309 /* On some devices, we can have the GPU use the LLC (the CPU 4310 * cache) for about a 10% performance improvement 4311 * compared to uncached. Graphics requests other than 4312 * display scanout are coherent with the CPU in 4313 * accessing this cache. This means in this mode we 4314 * don't need to clflush on the CPU side, and on the 4315 * GPU side we only need to flush internal caches to 4316 * get data visible to the CPU. 4317 * 4318 * However, we maintain the display planes as UC, and so 4319 * need to rebind when first used as such. 4320 */ 4321 cache_level = I915_CACHE_LLC; 4322 else 4323 cache_level = I915_CACHE_NONE; 4324 4325 i915_gem_object_set_cache_coherency(obj, cache_level); 4326 4327 trace_i915_gem_object_create(obj); 4328 4329 return obj; 4330 4331 fail: 4332 i915_gem_object_free(obj); 4333 return ERR_PTR(ret); 4334 } 4335 4336 static bool discard_backing_storage(struct drm_i915_gem_object *obj) 4337 { 4338 /* If we are the last user of the backing storage (be it shmemfs 4339 * pages or stolen etc), we know that the pages are going to be 4340 * immediately released. In this case, we can then skip copying 4341 * back the contents from the GPU. 4342 */ 4343 4344 if (obj->mm.madv != I915_MADV_WILLNEED) 4345 return false; 4346 4347 if (obj->base.filp == NULL) 4348 return true; 4349 4350 /* At first glance, this looks racy, but then again so would be 4351 * userspace racing mmap against close. However, the first external 4352 * reference to the filp can only be obtained through the 4353 * i915_gem_mmap_ioctl() which safeguards us against the user 4354 * acquiring such a reference whilst we are in the middle of 4355 * freeing the object. 4356 */ 4357 return atomic_long_read(&obj->base.filp->f_count) == 1; 4358 } 4359 4360 static void __i915_gem_free_objects(struct drm_i915_private *i915, 4361 struct llist_node *freed) 4362 { 4363 struct drm_i915_gem_object *obj, *on; 4364 intel_wakeref_t wakeref; 4365 4366 wakeref = intel_runtime_pm_get(i915); 4367 llist_for_each_entry_safe(obj, on, freed, freed) { 4368 struct i915_vma *vma, *vn; 4369 4370 trace_i915_gem_object_destroy(obj); 4371 4372 mutex_lock(&i915->drm.struct_mutex); 4373 4374 GEM_BUG_ON(i915_gem_object_is_active(obj)); 4375 list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) { 4376 GEM_BUG_ON(i915_vma_is_active(vma)); 4377 vma->flags &= ~I915_VMA_PIN_MASK; 4378 i915_vma_destroy(vma); 4379 } 4380 GEM_BUG_ON(!list_empty(&obj->vma.list)); 4381 GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree)); 4382 4383 /* This serializes freeing with the shrinker. Since the free 4384 * is delayed, first by RCU then by the workqueue, we want the 4385 * shrinker to be able to free pages of unreferenced objects, 4386 * or else we may oom whilst there are plenty of deferred 4387 * freed objects. 4388 */ 4389 if (i915_gem_object_has_pages(obj)) { 4390 spin_lock(&i915->mm.obj_lock); 4391 list_del_init(&obj->mm.link); 4392 spin_unlock(&i915->mm.obj_lock); 4393 } 4394 4395 mutex_unlock(&i915->drm.struct_mutex); 4396 4397 GEM_BUG_ON(obj->bind_count); 4398 GEM_BUG_ON(obj->userfault_count); 4399 GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits)); 4400 GEM_BUG_ON(!list_empty(&obj->lut_list)); 4401 4402 if (obj->ops->release) 4403 obj->ops->release(obj); 4404 4405 if (WARN_ON(i915_gem_object_has_pinned_pages(obj))) 4406 atomic_set(&obj->mm.pages_pin_count, 0); 4407 __i915_gem_object_put_pages(obj, I915_MM_NORMAL); 4408 GEM_BUG_ON(i915_gem_object_has_pages(obj)); 4409 4410 if (obj->base.import_attach) 4411 drm_prime_gem_destroy(&obj->base, NULL); 4412 4413 reservation_object_fini(&obj->__builtin_resv); 4414 drm_gem_object_release(&obj->base); 4415 i915_gem_info_remove_obj(i915, obj->base.size); 4416 4417 kfree(obj->bit_17); 4418 i915_gem_object_free(obj); 4419 4420 GEM_BUG_ON(!atomic_read(&i915->mm.free_count)); 4421 atomic_dec(&i915->mm.free_count); 4422 4423 if (on) 4424 cond_resched(); 4425 } 4426 intel_runtime_pm_put(i915, wakeref); 4427 } 4428 4429 static void i915_gem_flush_free_objects(struct drm_i915_private *i915) 4430 { 4431 struct llist_node *freed; 4432 4433 /* Free the oldest, most stale object to keep the free_list short */ 4434 freed = NULL; 4435 if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */ 4436 /* Only one consumer of llist_del_first() allowed */ 4437 spin_lock(&i915->mm.free_lock); 4438 freed = llist_del_first(&i915->mm.free_list); 4439 spin_unlock(&i915->mm.free_lock); 4440 } 4441 if (unlikely(freed)) { 4442 freed->next = NULL; 4443 __i915_gem_free_objects(i915, freed); 4444 } 4445 } 4446 4447 static void __i915_gem_free_work(struct work_struct *work) 4448 { 4449 struct drm_i915_private *i915 = 4450 container_of(work, struct drm_i915_private, mm.free_work); 4451 struct llist_node *freed; 4452 4453 /* 4454 * All file-owned VMA should have been released by this point through 4455 * i915_gem_close_object(), or earlier by i915_gem_context_close(). 4456 * However, the object may also be bound into the global GTT (e.g. 4457 * older GPUs without per-process support, or for direct access through 4458 * the GTT either for the user or for scanout). Those VMA still need to 4459 * unbound now. 4460 */ 4461 4462 spin_lock(&i915->mm.free_lock); 4463 while ((freed = llist_del_all(&i915->mm.free_list))) { 4464 spin_unlock(&i915->mm.free_lock); 4465 4466 __i915_gem_free_objects(i915, freed); 4467 if (need_resched()) 4468 return; 4469 4470 spin_lock(&i915->mm.free_lock); 4471 } 4472 spin_unlock(&i915->mm.free_lock); 4473 } 4474 4475 static void __i915_gem_free_object_rcu(struct rcu_head *head) 4476 { 4477 struct drm_i915_gem_object *obj = 4478 container_of(head, typeof(*obj), rcu); 4479 struct drm_i915_private *i915 = to_i915(obj->base.dev); 4480 4481 /* 4482 * We reuse obj->rcu for the freed list, so we had better not treat 4483 * it like a rcu_head from this point forwards. And we expect all 4484 * objects to be freed via this path. 4485 */ 4486 destroy_rcu_head(&obj->rcu); 4487 4488 /* 4489 * Since we require blocking on struct_mutex to unbind the freed 4490 * object from the GPU before releasing resources back to the 4491 * system, we can not do that directly from the RCU callback (which may 4492 * be a softirq context), but must instead then defer that work onto a 4493 * kthread. We use the RCU callback rather than move the freed object 4494 * directly onto the work queue so that we can mix between using the 4495 * worker and performing frees directly from subsequent allocations for 4496 * crude but effective memory throttling. 4497 */ 4498 if (llist_add(&obj->freed, &i915->mm.free_list)) 4499 queue_work(i915->wq, &i915->mm.free_work); 4500 } 4501 4502 void i915_gem_free_object(struct drm_gem_object *gem_obj) 4503 { 4504 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 4505 4506 if (obj->mm.quirked) 4507 __i915_gem_object_unpin_pages(obj); 4508 4509 if (discard_backing_storage(obj)) 4510 obj->mm.madv = I915_MADV_DONTNEED; 4511 4512 /* 4513 * Before we free the object, make sure any pure RCU-only 4514 * read-side critical sections are complete, e.g. 4515 * i915_gem_busy_ioctl(). For the corresponding synchronized 4516 * lookup see i915_gem_object_lookup_rcu(). 4517 */ 4518 atomic_inc(&to_i915(obj->base.dev)->mm.free_count); 4519 call_rcu(&obj->rcu, __i915_gem_free_object_rcu); 4520 } 4521 4522 void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj) 4523 { 4524 lockdep_assert_held(&obj->base.dev->struct_mutex); 4525 4526 if (!i915_gem_object_has_active_reference(obj) && 4527 i915_gem_object_is_active(obj)) 4528 i915_gem_object_set_active_reference(obj); 4529 else 4530 i915_gem_object_put(obj); 4531 } 4532 4533 void i915_gem_sanitize(struct drm_i915_private *i915) 4534 { 4535 intel_wakeref_t wakeref; 4536 4537 GEM_TRACE("\n"); 4538 4539 wakeref = intel_runtime_pm_get(i915); 4540 intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); 4541 4542 /* 4543 * As we have just resumed the machine and woken the device up from 4544 * deep PCI sleep (presumably D3_cold), assume the HW has been reset 4545 * back to defaults, recovering from whatever wedged state we left it 4546 * in and so worth trying to use the device once more. 4547 */ 4548 if (i915_terminally_wedged(&i915->gpu_error)) 4549 i915_gem_unset_wedged(i915); 4550 4551 /* 4552 * If we inherit context state from the BIOS or earlier occupants 4553 * of the GPU, the GPU may be in an inconsistent state when we 4554 * try to take over. The only way to remove the earlier state 4555 * is by resetting. However, resetting on earlier gen is tricky as 4556 * it may impact the display and we are uncertain about the stability 4557 * of the reset, so this could be applied to even earlier gen. 4558 */ 4559 intel_engines_sanitize(i915, false); 4560 4561 intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); 4562 intel_runtime_pm_put(i915, wakeref); 4563 4564 mutex_lock(&i915->drm.struct_mutex); 4565 i915_gem_contexts_lost(i915); 4566 mutex_unlock(&i915->drm.struct_mutex); 4567 } 4568 4569 int i915_gem_suspend(struct drm_i915_private *i915) 4570 { 4571 intel_wakeref_t wakeref; 4572 int ret; 4573 4574 GEM_TRACE("\n"); 4575 4576 wakeref = intel_runtime_pm_get(i915); 4577 intel_suspend_gt_powersave(i915); 4578 4579 flush_workqueue(i915->wq); 4580 4581 mutex_lock(&i915->drm.struct_mutex); 4582 4583 /* 4584 * We have to flush all the executing contexts to main memory so 4585 * that they can saved in the hibernation image. To ensure the last 4586 * context image is coherent, we have to switch away from it. That 4587 * leaves the i915->kernel_context still active when 4588 * we actually suspend, and its image in memory may not match the GPU 4589 * state. Fortunately, the kernel_context is disposable and we do 4590 * not rely on its state. 4591 */ 4592 if (!i915_terminally_wedged(&i915->gpu_error)) { 4593 ret = i915_gem_switch_to_kernel_context(i915); 4594 if (ret) 4595 goto err_unlock; 4596 4597 ret = i915_gem_wait_for_idle(i915, 4598 I915_WAIT_INTERRUPTIBLE | 4599 I915_WAIT_LOCKED | 4600 I915_WAIT_FOR_IDLE_BOOST, 4601 MAX_SCHEDULE_TIMEOUT); 4602 if (ret && ret != -EIO) 4603 goto err_unlock; 4604 4605 assert_kernel_context_is_current(i915); 4606 } 4607 i915_retire_requests(i915); /* ensure we flush after wedging */ 4608 4609 mutex_unlock(&i915->drm.struct_mutex); 4610 i915_reset_flush(i915); 4611 4612 drain_delayed_work(&i915->gt.retire_work); 4613 4614 /* 4615 * As the idle_work is rearming if it detects a race, play safe and 4616 * repeat the flush until it is definitely idle. 4617 */ 4618 drain_delayed_work(&i915->gt.idle_work); 4619 4620 intel_uc_suspend(i915); 4621 4622 /* 4623 * Assert that we successfully flushed all the work and 4624 * reset the GPU back to its idle, low power state. 4625 */ 4626 WARN_ON(i915->gt.awake); 4627 if (WARN_ON(!intel_engines_are_idle(i915))) 4628 i915_gem_set_wedged(i915); /* no hope, discard everything */ 4629 4630 intel_runtime_pm_put(i915, wakeref); 4631 return 0; 4632 4633 err_unlock: 4634 mutex_unlock(&i915->drm.struct_mutex); 4635 intel_runtime_pm_put(i915, wakeref); 4636 return ret; 4637 } 4638 4639 void i915_gem_suspend_late(struct drm_i915_private *i915) 4640 { 4641 struct drm_i915_gem_object *obj; 4642 struct list_head *phases[] = { 4643 &i915->mm.unbound_list, 4644 &i915->mm.bound_list, 4645 NULL 4646 }, **phase; 4647 4648 /* 4649 * Neither the BIOS, ourselves or any other kernel 4650 * expects the system to be in execlists mode on startup, 4651 * so we need to reset the GPU back to legacy mode. And the only 4652 * known way to disable logical contexts is through a GPU reset. 4653 * 4654 * So in order to leave the system in a known default configuration, 4655 * always reset the GPU upon unload and suspend. Afterwards we then 4656 * clean up the GEM state tracking, flushing off the requests and 4657 * leaving the system in a known idle state. 4658 * 4659 * Note that is of the upmost importance that the GPU is idle and 4660 * all stray writes are flushed *before* we dismantle the backing 4661 * storage for the pinned objects. 4662 * 4663 * However, since we are uncertain that resetting the GPU on older 4664 * machines is a good idea, we don't - just in case it leaves the 4665 * machine in an unusable condition. 4666 */ 4667 4668 mutex_lock(&i915->drm.struct_mutex); 4669 for (phase = phases; *phase; phase++) { 4670 list_for_each_entry(obj, *phase, mm.link) 4671 WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); 4672 } 4673 mutex_unlock(&i915->drm.struct_mutex); 4674 4675 intel_uc_sanitize(i915); 4676 i915_gem_sanitize(i915); 4677 } 4678 4679 void i915_gem_resume(struct drm_i915_private *i915) 4680 { 4681 GEM_TRACE("\n"); 4682 4683 WARN_ON(i915->gt.awake); 4684 4685 mutex_lock(&i915->drm.struct_mutex); 4686 intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); 4687 4688 i915_gem_restore_gtt_mappings(i915); 4689 i915_gem_restore_fences(i915); 4690 4691 /* 4692 * As we didn't flush the kernel context before suspend, we cannot 4693 * guarantee that the context image is complete. So let's just reset 4694 * it and start again. 4695 */ 4696 i915->gt.resume(i915); 4697 4698 if (i915_gem_init_hw(i915)) 4699 goto err_wedged; 4700 4701 intel_uc_resume(i915); 4702 4703 /* Always reload a context for powersaving. */ 4704 if (i915_gem_switch_to_kernel_context(i915)) 4705 goto err_wedged; 4706 4707 out_unlock: 4708 intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); 4709 mutex_unlock(&i915->drm.struct_mutex); 4710 return; 4711 4712 err_wedged: 4713 if (!i915_terminally_wedged(&i915->gpu_error)) { 4714 DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n"); 4715 i915_gem_set_wedged(i915); 4716 } 4717 goto out_unlock; 4718 } 4719 4720 void i915_gem_init_swizzling(struct drm_i915_private *dev_priv) 4721 { 4722 if (INTEL_GEN(dev_priv) < 5 || 4723 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 4724 return; 4725 4726 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 4727 DISP_TILE_SURFACE_SWIZZLING); 4728 4729 if (IS_GEN(dev_priv, 5)) 4730 return; 4731 4732 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 4733 if (IS_GEN(dev_priv, 6)) 4734 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 4735 else if (IS_GEN(dev_priv, 7)) 4736 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 4737 else if (IS_GEN(dev_priv, 8)) 4738 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 4739 else 4740 BUG(); 4741 } 4742 4743 static void init_unused_ring(struct drm_i915_private *dev_priv, u32 base) 4744 { 4745 I915_WRITE(RING_CTL(base), 0); 4746 I915_WRITE(RING_HEAD(base), 0); 4747 I915_WRITE(RING_TAIL(base), 0); 4748 I915_WRITE(RING_START(base), 0); 4749 } 4750 4751 static void init_unused_rings(struct drm_i915_private *dev_priv) 4752 { 4753 if (IS_I830(dev_priv)) { 4754 init_unused_ring(dev_priv, PRB1_BASE); 4755 init_unused_ring(dev_priv, SRB0_BASE); 4756 init_unused_ring(dev_priv, SRB1_BASE); 4757 init_unused_ring(dev_priv, SRB2_BASE); 4758 init_unused_ring(dev_priv, SRB3_BASE); 4759 } else if (IS_GEN(dev_priv, 2)) { 4760 init_unused_ring(dev_priv, SRB0_BASE); 4761 init_unused_ring(dev_priv, SRB1_BASE); 4762 } else if (IS_GEN(dev_priv, 3)) { 4763 init_unused_ring(dev_priv, PRB1_BASE); 4764 init_unused_ring(dev_priv, PRB2_BASE); 4765 } 4766 } 4767 4768 static int __i915_gem_restart_engines(void *data) 4769 { 4770 struct drm_i915_private *i915 = data; 4771 struct intel_engine_cs *engine; 4772 enum intel_engine_id id; 4773 int err; 4774 4775 for_each_engine(engine, i915, id) { 4776 err = engine->init_hw(engine); 4777 if (err) { 4778 DRM_ERROR("Failed to restart %s (%d)\n", 4779 engine->name, err); 4780 return err; 4781 } 4782 } 4783 4784 return 0; 4785 } 4786 4787 int i915_gem_init_hw(struct drm_i915_private *dev_priv) 4788 { 4789 int ret; 4790 4791 dev_priv->gt.last_init_time = ktime_get(); 4792 4793 /* Double layer security blanket, see i915_gem_init() */ 4794 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 4795 4796 if (HAS_EDRAM(dev_priv) && INTEL_GEN(dev_priv) < 9) 4797 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 4798 4799 if (IS_HASWELL(dev_priv)) 4800 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ? 4801 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 4802 4803 /* Apply the GT workarounds... */ 4804 intel_gt_apply_workarounds(dev_priv); 4805 /* ...and determine whether they are sticking. */ 4806 intel_gt_verify_workarounds(dev_priv, "init"); 4807 4808 i915_gem_init_swizzling(dev_priv); 4809 4810 /* 4811 * At least 830 can leave some of the unused rings 4812 * "active" (ie. head != tail) after resume which 4813 * will prevent c3 entry. Makes sure all unused rings 4814 * are totally idle. 4815 */ 4816 init_unused_rings(dev_priv); 4817 4818 BUG_ON(!dev_priv->kernel_context); 4819 if (i915_terminally_wedged(&dev_priv->gpu_error)) { 4820 ret = -EIO; 4821 goto out; 4822 } 4823 4824 ret = i915_ppgtt_init_hw(dev_priv); 4825 if (ret) { 4826 DRM_ERROR("Enabling PPGTT failed (%d)\n", ret); 4827 goto out; 4828 } 4829 4830 ret = intel_wopcm_init_hw(&dev_priv->wopcm); 4831 if (ret) { 4832 DRM_ERROR("Enabling WOPCM failed (%d)\n", ret); 4833 goto out; 4834 } 4835 4836 /* We can't enable contexts until all firmware is loaded */ 4837 ret = intel_uc_init_hw(dev_priv); 4838 if (ret) { 4839 DRM_ERROR("Enabling uc failed (%d)\n", ret); 4840 goto out; 4841 } 4842 4843 intel_mocs_init_l3cc_table(dev_priv); 4844 4845 /* Only when the HW is re-initialised, can we replay the requests */ 4846 ret = __i915_gem_restart_engines(dev_priv); 4847 if (ret) 4848 goto cleanup_uc; 4849 4850 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 4851 4852 return 0; 4853 4854 cleanup_uc: 4855 intel_uc_fini_hw(dev_priv); 4856 out: 4857 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 4858 4859 return ret; 4860 } 4861 4862 static int __intel_engines_record_defaults(struct drm_i915_private *i915) 4863 { 4864 struct i915_gem_context *ctx; 4865 struct intel_engine_cs *engine; 4866 enum intel_engine_id id; 4867 int err; 4868 4869 /* 4870 * As we reset the gpu during very early sanitisation, the current 4871 * register state on the GPU should reflect its defaults values. 4872 * We load a context onto the hw (with restore-inhibit), then switch 4873 * over to a second context to save that default register state. We 4874 * can then prime every new context with that state so they all start 4875 * from the same default HW values. 4876 */ 4877 4878 ctx = i915_gem_context_create_kernel(i915, 0); 4879 if (IS_ERR(ctx)) 4880 return PTR_ERR(ctx); 4881 4882 for_each_engine(engine, i915, id) { 4883 struct i915_request *rq; 4884 4885 rq = i915_request_alloc(engine, ctx); 4886 if (IS_ERR(rq)) { 4887 err = PTR_ERR(rq); 4888 goto out_ctx; 4889 } 4890 4891 err = 0; 4892 if (engine->init_context) 4893 err = engine->init_context(rq); 4894 4895 i915_request_add(rq); 4896 if (err) 4897 goto err_active; 4898 } 4899 4900 err = i915_gem_switch_to_kernel_context(i915); 4901 if (err) 4902 goto err_active; 4903 4904 if (i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED, HZ / 5)) { 4905 i915_gem_set_wedged(i915); 4906 err = -EIO; /* Caller will declare us wedged */ 4907 goto err_active; 4908 } 4909 4910 assert_kernel_context_is_current(i915); 4911 4912 /* 4913 * Immediately park the GPU so that we enable powersaving and 4914 * treat it as idle. The next time we issue a request, we will 4915 * unpark and start using the engine->pinned_default_state, otherwise 4916 * it is in limbo and an early reset may fail. 4917 */ 4918 __i915_gem_park(i915); 4919 4920 for_each_engine(engine, i915, id) { 4921 struct i915_vma *state; 4922 void *vaddr; 4923 4924 GEM_BUG_ON(to_intel_context(ctx, engine)->pin_count); 4925 4926 state = to_intel_context(ctx, engine)->state; 4927 if (!state) 4928 continue; 4929 4930 /* 4931 * As we will hold a reference to the logical state, it will 4932 * not be torn down with the context, and importantly the 4933 * object will hold onto its vma (making it possible for a 4934 * stray GTT write to corrupt our defaults). Unmap the vma 4935 * from the GTT to prevent such accidents and reclaim the 4936 * space. 4937 */ 4938 err = i915_vma_unbind(state); 4939 if (err) 4940 goto err_active; 4941 4942 err = i915_gem_object_set_to_cpu_domain(state->obj, false); 4943 if (err) 4944 goto err_active; 4945 4946 engine->default_state = i915_gem_object_get(state->obj); 4947 4948 /* Check we can acquire the image of the context state */ 4949 vaddr = i915_gem_object_pin_map(engine->default_state, 4950 I915_MAP_FORCE_WB); 4951 if (IS_ERR(vaddr)) { 4952 err = PTR_ERR(vaddr); 4953 goto err_active; 4954 } 4955 4956 i915_gem_object_unpin_map(engine->default_state); 4957 } 4958 4959 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) { 4960 unsigned int found = intel_engines_has_context_isolation(i915); 4961 4962 /* 4963 * Make sure that classes with multiple engine instances all 4964 * share the same basic configuration. 4965 */ 4966 for_each_engine(engine, i915, id) { 4967 unsigned int bit = BIT(engine->uabi_class); 4968 unsigned int expected = engine->default_state ? bit : 0; 4969 4970 if ((found & bit) != expected) { 4971 DRM_ERROR("mismatching default context state for class %d on engine %s\n", 4972 engine->uabi_class, engine->name); 4973 } 4974 } 4975 } 4976 4977 out_ctx: 4978 i915_gem_context_set_closed(ctx); 4979 i915_gem_context_put(ctx); 4980 return err; 4981 4982 err_active: 4983 /* 4984 * If we have to abandon now, we expect the engines to be idle 4985 * and ready to be torn-down. First try to flush any remaining 4986 * request, ensure we are pointing at the kernel context and 4987 * then remove it. 4988 */ 4989 if (WARN_ON(i915_gem_switch_to_kernel_context(i915))) 4990 goto out_ctx; 4991 4992 if (WARN_ON(i915_gem_wait_for_idle(i915, 4993 I915_WAIT_LOCKED, 4994 MAX_SCHEDULE_TIMEOUT))) 4995 goto out_ctx; 4996 4997 i915_gem_contexts_lost(i915); 4998 goto out_ctx; 4999 } 5000 5001 static int 5002 i915_gem_init_scratch(struct drm_i915_private *i915, unsigned int size) 5003 { 5004 struct drm_i915_gem_object *obj; 5005 struct i915_vma *vma; 5006 int ret; 5007 5008 obj = i915_gem_object_create_stolen(i915, size); 5009 if (!obj) 5010 obj = i915_gem_object_create_internal(i915, size); 5011 if (IS_ERR(obj)) { 5012 DRM_ERROR("Failed to allocate scratch page\n"); 5013 return PTR_ERR(obj); 5014 } 5015 5016 vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); 5017 if (IS_ERR(vma)) { 5018 ret = PTR_ERR(vma); 5019 goto err_unref; 5020 } 5021 5022 ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); 5023 if (ret) 5024 goto err_unref; 5025 5026 i915->gt.scratch = vma; 5027 return 0; 5028 5029 err_unref: 5030 i915_gem_object_put(obj); 5031 return ret; 5032 } 5033 5034 static void i915_gem_fini_scratch(struct drm_i915_private *i915) 5035 { 5036 i915_vma_unpin_and_release(&i915->gt.scratch, 0); 5037 } 5038 5039 int i915_gem_init(struct drm_i915_private *dev_priv) 5040 { 5041 int ret; 5042 5043 /* We need to fallback to 4K pages if host doesn't support huge gtt. */ 5044 if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_huge_gtt(dev_priv)) 5045 mkwrite_device_info(dev_priv)->page_sizes = 5046 I915_GTT_PAGE_SIZE_4K; 5047 5048 dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1); 5049 5050 if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { 5051 dev_priv->gt.resume = intel_lr_context_resume; 5052 dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup; 5053 } else { 5054 dev_priv->gt.resume = intel_legacy_submission_resume; 5055 dev_priv->gt.cleanup_engine = intel_engine_cleanup; 5056 } 5057 5058 i915_timelines_init(dev_priv); 5059 5060 ret = i915_gem_init_userptr(dev_priv); 5061 if (ret) 5062 return ret; 5063 5064 ret = intel_uc_init_misc(dev_priv); 5065 if (ret) 5066 return ret; 5067 5068 ret = intel_wopcm_init(&dev_priv->wopcm); 5069 if (ret) 5070 goto err_uc_misc; 5071 5072 /* This is just a security blanket to placate dragons. 5073 * On some systems, we very sporadically observe that the first TLBs 5074 * used by the CS may be stale, despite us poking the TLB reset. If 5075 * we hold the forcewake during initialisation these problems 5076 * just magically go away. 5077 */ 5078 mutex_lock(&dev_priv->drm.struct_mutex); 5079 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5080 5081 ret = i915_gem_init_ggtt(dev_priv); 5082 if (ret) { 5083 GEM_BUG_ON(ret == -EIO); 5084 goto err_unlock; 5085 } 5086 5087 ret = i915_gem_init_scratch(dev_priv, 5088 IS_GEN(dev_priv, 2) ? SZ_256K : PAGE_SIZE); 5089 if (ret) { 5090 GEM_BUG_ON(ret == -EIO); 5091 goto err_ggtt; 5092 } 5093 5094 ret = i915_gem_contexts_init(dev_priv); 5095 if (ret) { 5096 GEM_BUG_ON(ret == -EIO); 5097 goto err_scratch; 5098 } 5099 5100 ret = intel_engines_init(dev_priv); 5101 if (ret) { 5102 GEM_BUG_ON(ret == -EIO); 5103 goto err_context; 5104 } 5105 5106 intel_init_gt_powersave(dev_priv); 5107 5108 ret = intel_uc_init(dev_priv); 5109 if (ret) 5110 goto err_pm; 5111 5112 ret = i915_gem_init_hw(dev_priv); 5113 if (ret) 5114 goto err_uc_init; 5115 5116 /* 5117 * Despite its name intel_init_clock_gating applies both display 5118 * clock gating workarounds; GT mmio workarounds and the occasional 5119 * GT power context workaround. Worse, sometimes it includes a context 5120 * register workaround which we need to apply before we record the 5121 * default HW state for all contexts. 5122 * 5123 * FIXME: break up the workarounds and apply them at the right time! 5124 */ 5125 intel_init_clock_gating(dev_priv); 5126 5127 ret = __intel_engines_record_defaults(dev_priv); 5128 if (ret) 5129 goto err_init_hw; 5130 5131 if (i915_inject_load_failure()) { 5132 ret = -ENODEV; 5133 goto err_init_hw; 5134 } 5135 5136 if (i915_inject_load_failure()) { 5137 ret = -EIO; 5138 goto err_init_hw; 5139 } 5140 5141 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5142 mutex_unlock(&dev_priv->drm.struct_mutex); 5143 5144 return 0; 5145 5146 /* 5147 * Unwinding is complicated by that we want to handle -EIO to mean 5148 * disable GPU submission but keep KMS alive. We want to mark the 5149 * HW as irrevisibly wedged, but keep enough state around that the 5150 * driver doesn't explode during runtime. 5151 */ 5152 err_init_hw: 5153 mutex_unlock(&dev_priv->drm.struct_mutex); 5154 5155 WARN_ON(i915_gem_suspend(dev_priv)); 5156 i915_gem_suspend_late(dev_priv); 5157 5158 i915_gem_drain_workqueue(dev_priv); 5159 5160 mutex_lock(&dev_priv->drm.struct_mutex); 5161 intel_uc_fini_hw(dev_priv); 5162 err_uc_init: 5163 intel_uc_fini(dev_priv); 5164 err_pm: 5165 if (ret != -EIO) { 5166 intel_cleanup_gt_powersave(dev_priv); 5167 i915_gem_cleanup_engines(dev_priv); 5168 } 5169 err_context: 5170 if (ret != -EIO) 5171 i915_gem_contexts_fini(dev_priv); 5172 err_scratch: 5173 i915_gem_fini_scratch(dev_priv); 5174 err_ggtt: 5175 err_unlock: 5176 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5177 mutex_unlock(&dev_priv->drm.struct_mutex); 5178 5179 err_uc_misc: 5180 intel_uc_fini_misc(dev_priv); 5181 5182 if (ret != -EIO) { 5183 i915_gem_cleanup_userptr(dev_priv); 5184 i915_timelines_fini(dev_priv); 5185 } 5186 5187 if (ret == -EIO) { 5188 mutex_lock(&dev_priv->drm.struct_mutex); 5189 5190 /* 5191 * Allow engine initialisation to fail by marking the GPU as 5192 * wedged. But we only want to do this where the GPU is angry, 5193 * for all other failure, such as an allocation failure, bail. 5194 */ 5195 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 5196 i915_load_error(dev_priv, 5197 "Failed to initialize GPU, declaring it wedged!\n"); 5198 i915_gem_set_wedged(dev_priv); 5199 } 5200 5201 /* Minimal basic recovery for KMS */ 5202 ret = i915_ggtt_enable_hw(dev_priv); 5203 i915_gem_restore_gtt_mappings(dev_priv); 5204 i915_gem_restore_fences(dev_priv); 5205 intel_init_clock_gating(dev_priv); 5206 5207 mutex_unlock(&dev_priv->drm.struct_mutex); 5208 } 5209 5210 i915_gem_drain_freed_objects(dev_priv); 5211 return ret; 5212 } 5213 5214 void i915_gem_fini(struct drm_i915_private *dev_priv) 5215 { 5216 i915_gem_suspend_late(dev_priv); 5217 intel_disable_gt_powersave(dev_priv); 5218 5219 /* Flush any outstanding unpin_work. */ 5220 i915_gem_drain_workqueue(dev_priv); 5221 5222 mutex_lock(&dev_priv->drm.struct_mutex); 5223 intel_uc_fini_hw(dev_priv); 5224 intel_uc_fini(dev_priv); 5225 i915_gem_cleanup_engines(dev_priv); 5226 i915_gem_contexts_fini(dev_priv); 5227 i915_gem_fini_scratch(dev_priv); 5228 mutex_unlock(&dev_priv->drm.struct_mutex); 5229 5230 intel_wa_list_free(&dev_priv->gt_wa_list); 5231 5232 intel_cleanup_gt_powersave(dev_priv); 5233 5234 intel_uc_fini_misc(dev_priv); 5235 i915_gem_cleanup_userptr(dev_priv); 5236 i915_timelines_fini(dev_priv); 5237 5238 i915_gem_drain_freed_objects(dev_priv); 5239 5240 WARN_ON(!list_empty(&dev_priv->contexts.list)); 5241 } 5242 5243 void i915_gem_init_mmio(struct drm_i915_private *i915) 5244 { 5245 i915_gem_sanitize(i915); 5246 } 5247 5248 void 5249 i915_gem_cleanup_engines(struct drm_i915_private *dev_priv) 5250 { 5251 struct intel_engine_cs *engine; 5252 enum intel_engine_id id; 5253 5254 for_each_engine(engine, dev_priv, id) 5255 dev_priv->gt.cleanup_engine(engine); 5256 } 5257 5258 void 5259 i915_gem_load_init_fences(struct drm_i915_private *dev_priv) 5260 { 5261 int i; 5262 5263 if (INTEL_GEN(dev_priv) >= 7 && !IS_VALLEYVIEW(dev_priv) && 5264 !IS_CHERRYVIEW(dev_priv)) 5265 dev_priv->num_fence_regs = 32; 5266 else if (INTEL_GEN(dev_priv) >= 4 || 5267 IS_I945G(dev_priv) || IS_I945GM(dev_priv) || 5268 IS_G33(dev_priv) || IS_PINEVIEW(dev_priv)) 5269 dev_priv->num_fence_regs = 16; 5270 else 5271 dev_priv->num_fence_regs = 8; 5272 5273 if (intel_vgpu_active(dev_priv)) 5274 dev_priv->num_fence_regs = 5275 I915_READ(vgtif_reg(avail_rs.fence_num)); 5276 5277 /* Initialize fence registers to zero */ 5278 for (i = 0; i < dev_priv->num_fence_regs; i++) { 5279 struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i]; 5280 5281 fence->i915 = dev_priv; 5282 fence->id = i; 5283 list_add_tail(&fence->link, &dev_priv->mm.fence_list); 5284 } 5285 i915_gem_restore_fences(dev_priv); 5286 5287 i915_gem_detect_bit_6_swizzle(dev_priv); 5288 } 5289 5290 static void i915_gem_init__mm(struct drm_i915_private *i915) 5291 { 5292 spin_lock_init(&i915->mm.object_stat_lock); 5293 spin_lock_init(&i915->mm.obj_lock); 5294 spin_lock_init(&i915->mm.free_lock); 5295 5296 init_llist_head(&i915->mm.free_list); 5297 5298 INIT_LIST_HEAD(&i915->mm.unbound_list); 5299 INIT_LIST_HEAD(&i915->mm.bound_list); 5300 INIT_LIST_HEAD(&i915->mm.fence_list); 5301 INIT_LIST_HEAD(&i915->mm.userfault_list); 5302 5303 INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); 5304 } 5305 5306 int i915_gem_init_early(struct drm_i915_private *dev_priv) 5307 { 5308 int err = -ENOMEM; 5309 5310 dev_priv->objects = KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN); 5311 if (!dev_priv->objects) 5312 goto err_out; 5313 5314 dev_priv->vmas = KMEM_CACHE(i915_vma, SLAB_HWCACHE_ALIGN); 5315 if (!dev_priv->vmas) 5316 goto err_objects; 5317 5318 dev_priv->luts = KMEM_CACHE(i915_lut_handle, 0); 5319 if (!dev_priv->luts) 5320 goto err_vmas; 5321 5322 dev_priv->requests = KMEM_CACHE(i915_request, 5323 SLAB_HWCACHE_ALIGN | 5324 SLAB_RECLAIM_ACCOUNT | 5325 SLAB_TYPESAFE_BY_RCU); 5326 if (!dev_priv->requests) 5327 goto err_luts; 5328 5329 dev_priv->dependencies = KMEM_CACHE(i915_dependency, 5330 SLAB_HWCACHE_ALIGN | 5331 SLAB_RECLAIM_ACCOUNT); 5332 if (!dev_priv->dependencies) 5333 goto err_requests; 5334 5335 dev_priv->priorities = KMEM_CACHE(i915_priolist, SLAB_HWCACHE_ALIGN); 5336 if (!dev_priv->priorities) 5337 goto err_dependencies; 5338 5339 INIT_LIST_HEAD(&dev_priv->gt.active_rings); 5340 INIT_LIST_HEAD(&dev_priv->gt.closed_vma); 5341 5342 i915_gem_init__mm(dev_priv); 5343 5344 INIT_DELAYED_WORK(&dev_priv->gt.retire_work, 5345 i915_gem_retire_work_handler); 5346 INIT_DELAYED_WORK(&dev_priv->gt.idle_work, 5347 i915_gem_idle_work_handler); 5348 init_waitqueue_head(&dev_priv->gpu_error.wait_queue); 5349 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 5350 mutex_init(&dev_priv->gpu_error.wedge_mutex); 5351 5352 atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0); 5353 5354 spin_lock_init(&dev_priv->fb_tracking.lock); 5355 5356 err = i915_gemfs_init(dev_priv); 5357 if (err) 5358 DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err); 5359 5360 return 0; 5361 5362 err_dependencies: 5363 kmem_cache_destroy(dev_priv->dependencies); 5364 err_requests: 5365 kmem_cache_destroy(dev_priv->requests); 5366 err_luts: 5367 kmem_cache_destroy(dev_priv->luts); 5368 err_vmas: 5369 kmem_cache_destroy(dev_priv->vmas); 5370 err_objects: 5371 kmem_cache_destroy(dev_priv->objects); 5372 err_out: 5373 return err; 5374 } 5375 5376 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) 5377 { 5378 i915_gem_drain_freed_objects(dev_priv); 5379 GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list)); 5380 GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count)); 5381 WARN_ON(dev_priv->mm.object_count); 5382 5383 kmem_cache_destroy(dev_priv->priorities); 5384 kmem_cache_destroy(dev_priv->dependencies); 5385 kmem_cache_destroy(dev_priv->requests); 5386 kmem_cache_destroy(dev_priv->luts); 5387 kmem_cache_destroy(dev_priv->vmas); 5388 kmem_cache_destroy(dev_priv->objects); 5389 5390 /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */ 5391 rcu_barrier(); 5392 5393 i915_gemfs_fini(dev_priv); 5394 } 5395 5396 int i915_gem_freeze(struct drm_i915_private *dev_priv) 5397 { 5398 /* Discard all purgeable objects, let userspace recover those as 5399 * required after resuming. 5400 */ 5401 i915_gem_shrink_all(dev_priv); 5402 5403 return 0; 5404 } 5405 5406 int i915_gem_freeze_late(struct drm_i915_private *i915) 5407 { 5408 struct drm_i915_gem_object *obj; 5409 struct list_head *phases[] = { 5410 &i915->mm.unbound_list, 5411 &i915->mm.bound_list, 5412 NULL 5413 }, **phase; 5414 5415 /* 5416 * Called just before we write the hibernation image. 5417 * 5418 * We need to update the domain tracking to reflect that the CPU 5419 * will be accessing all the pages to create and restore from the 5420 * hibernation, and so upon restoration those pages will be in the 5421 * CPU domain. 5422 * 5423 * To make sure the hibernation image contains the latest state, 5424 * we update that state just before writing out the image. 5425 * 5426 * To try and reduce the hibernation image, we manually shrink 5427 * the objects as well, see i915_gem_freeze() 5428 */ 5429 5430 i915_gem_shrink(i915, -1UL, NULL, I915_SHRINK_UNBOUND); 5431 i915_gem_drain_freed_objects(i915); 5432 5433 mutex_lock(&i915->drm.struct_mutex); 5434 for (phase = phases; *phase; phase++) { 5435 list_for_each_entry(obj, *phase, mm.link) 5436 WARN_ON(i915_gem_object_set_to_cpu_domain(obj, true)); 5437 } 5438 mutex_unlock(&i915->drm.struct_mutex); 5439 5440 return 0; 5441 } 5442 5443 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 5444 { 5445 struct drm_i915_file_private *file_priv = file->driver_priv; 5446 struct i915_request *request; 5447 5448 /* Clean up our request list when the client is going away, so that 5449 * later retire_requests won't dereference our soon-to-be-gone 5450 * file_priv. 5451 */ 5452 spin_lock(&file_priv->mm.lock); 5453 list_for_each_entry(request, &file_priv->mm.request_list, client_link) 5454 request->file_priv = NULL; 5455 spin_unlock(&file_priv->mm.lock); 5456 } 5457 5458 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) 5459 { 5460 struct drm_i915_file_private *file_priv; 5461 int ret; 5462 5463 DRM_DEBUG("\n"); 5464 5465 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 5466 if (!file_priv) 5467 return -ENOMEM; 5468 5469 file->driver_priv = file_priv; 5470 file_priv->dev_priv = i915; 5471 file_priv->file = file; 5472 5473 spin_lock_init(&file_priv->mm.lock); 5474 INIT_LIST_HEAD(&file_priv->mm.request_list); 5475 5476 file_priv->bsd_engine = -1; 5477 file_priv->hang_timestamp = jiffies; 5478 5479 ret = i915_gem_context_open(i915, file); 5480 if (ret) 5481 kfree(file_priv); 5482 5483 return ret; 5484 } 5485 5486 /** 5487 * i915_gem_track_fb - update frontbuffer tracking 5488 * @old: current GEM buffer for the frontbuffer slots 5489 * @new: new GEM buffer for the frontbuffer slots 5490 * @frontbuffer_bits: bitmask of frontbuffer slots 5491 * 5492 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them 5493 * from @old and setting them in @new. Both @old and @new can be NULL. 5494 */ 5495 void i915_gem_track_fb(struct drm_i915_gem_object *old, 5496 struct drm_i915_gem_object *new, 5497 unsigned frontbuffer_bits) 5498 { 5499 /* Control of individual bits within the mask are guarded by 5500 * the owning plane->mutex, i.e. we can never see concurrent 5501 * manipulation of individual bits. But since the bitfield as a whole 5502 * is updated using RMW, we need to use atomics in order to update 5503 * the bits. 5504 */ 5505 BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES > 5506 BITS_PER_TYPE(atomic_t)); 5507 5508 if (old) { 5509 WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits)); 5510 atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits); 5511 } 5512 5513 if (new) { 5514 WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits); 5515 atomic_or(frontbuffer_bits, &new->frontbuffer_bits); 5516 } 5517 } 5518 5519 /* Allocate a new GEM object and fill it with the supplied data */ 5520 struct drm_i915_gem_object * 5521 i915_gem_object_create_from_data(struct drm_i915_private *dev_priv, 5522 const void *data, size_t size) 5523 { 5524 struct drm_i915_gem_object *obj; 5525 struct file *file; 5526 size_t offset; 5527 int err; 5528 5529 obj = i915_gem_object_create(dev_priv, round_up(size, PAGE_SIZE)); 5530 if (IS_ERR(obj)) 5531 return obj; 5532 5533 GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU); 5534 5535 file = obj->base.filp; 5536 offset = 0; 5537 do { 5538 unsigned int len = min_t(typeof(size), size, PAGE_SIZE); 5539 struct page *page; 5540 void *pgdata, *vaddr; 5541 5542 err = pagecache_write_begin(file, file->f_mapping, 5543 offset, len, 0, 5544 &page, &pgdata); 5545 if (err < 0) 5546 goto fail; 5547 5548 vaddr = kmap(page); 5549 memcpy(vaddr, data, len); 5550 kunmap(page); 5551 5552 err = pagecache_write_end(file, file->f_mapping, 5553 offset, len, len, 5554 page, pgdata); 5555 if (err < 0) 5556 goto fail; 5557 5558 size -= len; 5559 data += len; 5560 offset += len; 5561 } while (size); 5562 5563 return obj; 5564 5565 fail: 5566 i915_gem_object_put(obj); 5567 return ERR_PTR(err); 5568 } 5569 5570 struct scatterlist * 5571 i915_gem_object_get_sg(struct drm_i915_gem_object *obj, 5572 unsigned int n, 5573 unsigned int *offset) 5574 { 5575 struct i915_gem_object_page_iter *iter = &obj->mm.get_page; 5576 struct scatterlist *sg; 5577 unsigned int idx, count; 5578 5579 might_sleep(); 5580 GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT); 5581 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 5582 5583 /* As we iterate forward through the sg, we record each entry in a 5584 * radixtree for quick repeated (backwards) lookups. If we have seen 5585 * this index previously, we will have an entry for it. 5586 * 5587 * Initial lookup is O(N), but this is amortized to O(1) for 5588 * sequential page access (where each new request is consecutive 5589 * to the previous one). Repeated lookups are O(lg(obj->base.size)), 5590 * i.e. O(1) with a large constant! 5591 */ 5592 if (n < READ_ONCE(iter->sg_idx)) 5593 goto lookup; 5594 5595 mutex_lock(&iter->lock); 5596 5597 /* We prefer to reuse the last sg so that repeated lookup of this 5598 * (or the subsequent) sg are fast - comparing against the last 5599 * sg is faster than going through the radixtree. 5600 */ 5601 5602 sg = iter->sg_pos; 5603 idx = iter->sg_idx; 5604 count = __sg_page_count(sg); 5605 5606 while (idx + count <= n) { 5607 void *entry; 5608 unsigned long i; 5609 int ret; 5610 5611 /* If we cannot allocate and insert this entry, or the 5612 * individual pages from this range, cancel updating the 5613 * sg_idx so that on this lookup we are forced to linearly 5614 * scan onwards, but on future lookups we will try the 5615 * insertion again (in which case we need to be careful of 5616 * the error return reporting that we have already inserted 5617 * this index). 5618 */ 5619 ret = radix_tree_insert(&iter->radix, idx, sg); 5620 if (ret && ret != -EEXIST) 5621 goto scan; 5622 5623 entry = xa_mk_value(idx); 5624 for (i = 1; i < count; i++) { 5625 ret = radix_tree_insert(&iter->radix, idx + i, entry); 5626 if (ret && ret != -EEXIST) 5627 goto scan; 5628 } 5629 5630 idx += count; 5631 sg = ____sg_next(sg); 5632 count = __sg_page_count(sg); 5633 } 5634 5635 scan: 5636 iter->sg_pos = sg; 5637 iter->sg_idx = idx; 5638 5639 mutex_unlock(&iter->lock); 5640 5641 if (unlikely(n < idx)) /* insertion completed by another thread */ 5642 goto lookup; 5643 5644 /* In case we failed to insert the entry into the radixtree, we need 5645 * to look beyond the current sg. 5646 */ 5647 while (idx + count <= n) { 5648 idx += count; 5649 sg = ____sg_next(sg); 5650 count = __sg_page_count(sg); 5651 } 5652 5653 *offset = n - idx; 5654 return sg; 5655 5656 lookup: 5657 rcu_read_lock(); 5658 5659 sg = radix_tree_lookup(&iter->radix, n); 5660 GEM_BUG_ON(!sg); 5661 5662 /* If this index is in the middle of multi-page sg entry, 5663 * the radix tree will contain a value entry that points 5664 * to the start of that range. We will return the pointer to 5665 * the base page and the offset of this page within the 5666 * sg entry's range. 5667 */ 5668 *offset = 0; 5669 if (unlikely(xa_is_value(sg))) { 5670 unsigned long base = xa_to_value(sg); 5671 5672 sg = radix_tree_lookup(&iter->radix, base); 5673 GEM_BUG_ON(!sg); 5674 5675 *offset = n - base; 5676 } 5677 5678 rcu_read_unlock(); 5679 5680 return sg; 5681 } 5682 5683 struct page * 5684 i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n) 5685 { 5686 struct scatterlist *sg; 5687 unsigned int offset; 5688 5689 GEM_BUG_ON(!i915_gem_object_has_struct_page(obj)); 5690 5691 sg = i915_gem_object_get_sg(obj, n, &offset); 5692 return nth_page(sg_page(sg), offset); 5693 } 5694 5695 /* Like i915_gem_object_get_page(), but mark the returned page dirty */ 5696 struct page * 5697 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, 5698 unsigned int n) 5699 { 5700 struct page *page; 5701 5702 page = i915_gem_object_get_page(obj, n); 5703 if (!obj->mm.dirty) 5704 set_page_dirty(page); 5705 5706 return page; 5707 } 5708 5709 dma_addr_t 5710 i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, 5711 unsigned long n) 5712 { 5713 struct scatterlist *sg; 5714 unsigned int offset; 5715 5716 sg = i915_gem_object_get_sg(obj, n, &offset); 5717 return sg_dma_address(sg) + (offset << PAGE_SHIFT); 5718 } 5719 5720 int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) 5721 { 5722 struct sg_table *pages; 5723 int err; 5724 5725 if (align > obj->base.size) 5726 return -EINVAL; 5727 5728 if (obj->ops == &i915_gem_phys_ops) 5729 return 0; 5730 5731 if (obj->ops != &i915_gem_object_ops) 5732 return -EINVAL; 5733 5734 err = i915_gem_object_unbind(obj); 5735 if (err) 5736 return err; 5737 5738 mutex_lock(&obj->mm.lock); 5739 5740 if (obj->mm.madv != I915_MADV_WILLNEED) { 5741 err = -EFAULT; 5742 goto err_unlock; 5743 } 5744 5745 if (obj->mm.quirked) { 5746 err = -EFAULT; 5747 goto err_unlock; 5748 } 5749 5750 if (obj->mm.mapping) { 5751 err = -EBUSY; 5752 goto err_unlock; 5753 } 5754 5755 pages = __i915_gem_object_unset_pages(obj); 5756 5757 obj->ops = &i915_gem_phys_ops; 5758 5759 err = ____i915_gem_object_get_pages(obj); 5760 if (err) 5761 goto err_xfer; 5762 5763 /* Perma-pin (until release) the physical set of pages */ 5764 __i915_gem_object_pin_pages(obj); 5765 5766 if (!IS_ERR_OR_NULL(pages)) 5767 i915_gem_object_ops.put_pages(obj, pages); 5768 mutex_unlock(&obj->mm.lock); 5769 return 0; 5770 5771 err_xfer: 5772 obj->ops = &i915_gem_object_ops; 5773 if (!IS_ERR_OR_NULL(pages)) { 5774 unsigned int sg_page_sizes = i915_sg_page_sizes(pages->sgl); 5775 5776 __i915_gem_object_set_pages(obj, pages, sg_page_sizes); 5777 } 5778 err_unlock: 5779 mutex_unlock(&obj->mm.lock); 5780 return err; 5781 } 5782 5783 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 5784 #include "selftests/scatterlist.c" 5785 #include "selftests/mock_gem_device.c" 5786 #include "selftests/huge_gem_object.c" 5787 #include "selftests/huge_pages.c" 5788 #include "selftests/i915_gem_object.c" 5789 #include "selftests/i915_gem_coherency.c" 5790 #include "selftests/i915_gem.c" 5791 #endif 5792