1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drm_vma_manager.h> 29 #include <drm/drm_pci.h> 30 #include <drm/i915_drm.h> 31 #include <linux/dma-fence-array.h> 32 #include <linux/kthread.h> 33 #include <linux/reservation.h> 34 #include <linux/shmem_fs.h> 35 #include <linux/slab.h> 36 #include <linux/stop_machine.h> 37 #include <linux/swap.h> 38 #include <linux/pci.h> 39 #include <linux/dma-buf.h> 40 #include <linux/mman.h> 41 42 #include "i915_drv.h" 43 #include "i915_gem_clflush.h" 44 #include "i915_gemfs.h" 45 #include "i915_reset.h" 46 #include "i915_trace.h" 47 #include "i915_vgpu.h" 48 49 #include "intel_drv.h" 50 #include "intel_frontbuffer.h" 51 #include "intel_mocs.h" 52 #include "intel_workarounds.h" 53 54 static void i915_gem_flush_free_objects(struct drm_i915_private *i915); 55 56 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 57 { 58 if (obj->cache_dirty) 59 return false; 60 61 if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) 62 return true; 63 64 return obj->pin_global; /* currently in use by HW, keep flushed */ 65 } 66 67 static int 68 insert_mappable_node(struct i915_ggtt *ggtt, 69 struct drm_mm_node *node, u32 size) 70 { 71 memset(node, 0, sizeof(*node)); 72 return drm_mm_insert_node_in_range(&ggtt->vm.mm, node, 73 size, 0, I915_COLOR_UNEVICTABLE, 74 0, ggtt->mappable_end, 75 DRM_MM_INSERT_LOW); 76 } 77 78 static void 79 remove_mappable_node(struct drm_mm_node *node) 80 { 81 drm_mm_remove_node(node); 82 } 83 84 /* some bookkeeping */ 85 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 86 u64 size) 87 { 88 spin_lock(&dev_priv->mm.object_stat_lock); 89 dev_priv->mm.object_count++; 90 dev_priv->mm.object_memory += size; 91 spin_unlock(&dev_priv->mm.object_stat_lock); 92 } 93 94 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 95 u64 size) 96 { 97 spin_lock(&dev_priv->mm.object_stat_lock); 98 dev_priv->mm.object_count--; 99 dev_priv->mm.object_memory -= size; 100 spin_unlock(&dev_priv->mm.object_stat_lock); 101 } 102 103 static int 104 i915_gem_wait_for_error(struct i915_gpu_error *error) 105 { 106 int ret; 107 108 might_sleep(); 109 110 /* 111 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 112 * userspace. If it takes that long something really bad is going on and 113 * we should simply try to bail out and fail as gracefully as possible. 114 */ 115 ret = wait_event_interruptible_timeout(error->reset_queue, 116 !i915_reset_backoff(error), 117 I915_RESET_TIMEOUT); 118 if (ret == 0) { 119 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 120 return -EIO; 121 } else if (ret < 0) { 122 return ret; 123 } else { 124 return 0; 125 } 126 } 127 128 int i915_mutex_lock_interruptible(struct drm_device *dev) 129 { 130 struct drm_i915_private *dev_priv = to_i915(dev); 131 int ret; 132 133 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 134 if (ret) 135 return ret; 136 137 ret = mutex_lock_interruptible(&dev->struct_mutex); 138 if (ret) 139 return ret; 140 141 return 0; 142 } 143 144 static u32 __i915_gem_park(struct drm_i915_private *i915) 145 { 146 intel_wakeref_t wakeref; 147 148 GEM_TRACE("\n"); 149 150 lockdep_assert_held(&i915->drm.struct_mutex); 151 GEM_BUG_ON(i915->gt.active_requests); 152 GEM_BUG_ON(!list_empty(&i915->gt.active_rings)); 153 154 if (!i915->gt.awake) 155 return I915_EPOCH_INVALID; 156 157 GEM_BUG_ON(i915->gt.epoch == I915_EPOCH_INVALID); 158 159 /* 160 * Be paranoid and flush a concurrent interrupt to make sure 161 * we don't reactivate any irq tasklets after parking. 162 * 163 * FIXME: Note that even though we have waited for execlists to be idle, 164 * there may still be an in-flight interrupt even though the CSB 165 * is now empty. synchronize_irq() makes sure that a residual interrupt 166 * is completed before we continue, but it doesn't prevent the HW from 167 * raising a spurious interrupt later. To complete the shield we should 168 * coordinate disabling the CS irq with flushing the interrupts. 169 */ 170 synchronize_irq(i915->drm.irq); 171 172 intel_engines_park(i915); 173 i915_timelines_park(i915); 174 175 i915_pmu_gt_parked(i915); 176 i915_vma_parked(i915); 177 178 wakeref = fetch_and_zero(&i915->gt.awake); 179 GEM_BUG_ON(!wakeref); 180 181 if (INTEL_GEN(i915) >= 6) 182 gen6_rps_idle(i915); 183 184 intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref); 185 186 return i915->gt.epoch; 187 } 188 189 void i915_gem_park(struct drm_i915_private *i915) 190 { 191 GEM_TRACE("\n"); 192 193 lockdep_assert_held(&i915->drm.struct_mutex); 194 GEM_BUG_ON(i915->gt.active_requests); 195 196 if (!i915->gt.awake) 197 return; 198 199 /* Defer the actual call to __i915_gem_park() to prevent ping-pongs */ 200 mod_delayed_work(i915->wq, &i915->gt.idle_work, msecs_to_jiffies(100)); 201 } 202 203 void i915_gem_unpark(struct drm_i915_private *i915) 204 { 205 GEM_TRACE("\n"); 206 207 lockdep_assert_held(&i915->drm.struct_mutex); 208 GEM_BUG_ON(!i915->gt.active_requests); 209 assert_rpm_wakelock_held(i915); 210 211 if (i915->gt.awake) 212 return; 213 214 /* 215 * It seems that the DMC likes to transition between the DC states a lot 216 * when there are no connected displays (no active power domains) during 217 * command submission. 218 * 219 * This activity has negative impact on the performance of the chip with 220 * huge latencies observed in the interrupt handler and elsewhere. 221 * 222 * Work around it by grabbing a GT IRQ power domain whilst there is any 223 * GT activity, preventing any DC state transitions. 224 */ 225 i915->gt.awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); 226 GEM_BUG_ON(!i915->gt.awake); 227 228 if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */ 229 i915->gt.epoch = 1; 230 231 intel_enable_gt_powersave(i915); 232 i915_update_gfx_val(i915); 233 if (INTEL_GEN(i915) >= 6) 234 gen6_rps_busy(i915); 235 i915_pmu_gt_unparked(i915); 236 237 intel_engines_unpark(i915); 238 239 i915_queue_hangcheck(i915); 240 241 queue_delayed_work(i915->wq, 242 &i915->gt.retire_work, 243 round_jiffies_up_relative(HZ)); 244 } 245 246 int 247 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 248 struct drm_file *file) 249 { 250 struct i915_ggtt *ggtt = &to_i915(dev)->ggtt; 251 struct drm_i915_gem_get_aperture *args = data; 252 struct i915_vma *vma; 253 u64 pinned; 254 255 mutex_lock(&ggtt->vm.mutex); 256 257 pinned = ggtt->vm.reserved; 258 list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) 259 if (i915_vma_is_pinned(vma)) 260 pinned += vma->node.size; 261 262 mutex_unlock(&ggtt->vm.mutex); 263 264 args->aper_size = ggtt->vm.total; 265 args->aper_available_size = args->aper_size - pinned; 266 267 return 0; 268 } 269 270 static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) 271 { 272 struct address_space *mapping = obj->base.filp->f_mapping; 273 drm_dma_handle_t *phys; 274 struct sg_table *st; 275 struct scatterlist *sg; 276 char *vaddr; 277 int i; 278 int err; 279 280 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) 281 return -EINVAL; 282 283 /* Always aligning to the object size, allows a single allocation 284 * to handle all possible callers, and given typical object sizes, 285 * the alignment of the buddy allocation will naturally match. 286 */ 287 phys = drm_pci_alloc(obj->base.dev, 288 roundup_pow_of_two(obj->base.size), 289 roundup_pow_of_two(obj->base.size)); 290 if (!phys) 291 return -ENOMEM; 292 293 vaddr = phys->vaddr; 294 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 295 struct page *page; 296 char *src; 297 298 page = shmem_read_mapping_page(mapping, i); 299 if (IS_ERR(page)) { 300 err = PTR_ERR(page); 301 goto err_phys; 302 } 303 304 src = kmap_atomic(page); 305 memcpy(vaddr, src, PAGE_SIZE); 306 drm_clflush_virt_range(vaddr, PAGE_SIZE); 307 kunmap_atomic(src); 308 309 put_page(page); 310 vaddr += PAGE_SIZE; 311 } 312 313 i915_gem_chipset_flush(to_i915(obj->base.dev)); 314 315 st = kmalloc(sizeof(*st), GFP_KERNEL); 316 if (!st) { 317 err = -ENOMEM; 318 goto err_phys; 319 } 320 321 if (sg_alloc_table(st, 1, GFP_KERNEL)) { 322 kfree(st); 323 err = -ENOMEM; 324 goto err_phys; 325 } 326 327 sg = st->sgl; 328 sg->offset = 0; 329 sg->length = obj->base.size; 330 331 sg_dma_address(sg) = phys->busaddr; 332 sg_dma_len(sg) = obj->base.size; 333 334 obj->phys_handle = phys; 335 336 __i915_gem_object_set_pages(obj, st, sg->length); 337 338 return 0; 339 340 err_phys: 341 drm_pci_free(obj->base.dev, phys); 342 343 return err; 344 } 345 346 static void __start_cpu_write(struct drm_i915_gem_object *obj) 347 { 348 obj->read_domains = I915_GEM_DOMAIN_CPU; 349 obj->write_domain = I915_GEM_DOMAIN_CPU; 350 if (cpu_write_needs_clflush(obj)) 351 obj->cache_dirty = true; 352 } 353 354 static void 355 __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, 356 struct sg_table *pages, 357 bool needs_clflush) 358 { 359 GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED); 360 361 if (obj->mm.madv == I915_MADV_DONTNEED) 362 obj->mm.dirty = false; 363 364 if (needs_clflush && 365 (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 && 366 !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) 367 drm_clflush_sg(pages); 368 369 __start_cpu_write(obj); 370 } 371 372 static void 373 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj, 374 struct sg_table *pages) 375 { 376 __i915_gem_object_release_shmem(obj, pages, false); 377 378 if (obj->mm.dirty) { 379 struct address_space *mapping = obj->base.filp->f_mapping; 380 char *vaddr = obj->phys_handle->vaddr; 381 int i; 382 383 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 384 struct page *page; 385 char *dst; 386 387 page = shmem_read_mapping_page(mapping, i); 388 if (IS_ERR(page)) 389 continue; 390 391 dst = kmap_atomic(page); 392 drm_clflush_virt_range(vaddr, PAGE_SIZE); 393 memcpy(dst, vaddr, PAGE_SIZE); 394 kunmap_atomic(dst); 395 396 set_page_dirty(page); 397 if (obj->mm.madv == I915_MADV_WILLNEED) 398 mark_page_accessed(page); 399 put_page(page); 400 vaddr += PAGE_SIZE; 401 } 402 obj->mm.dirty = false; 403 } 404 405 sg_free_table(pages); 406 kfree(pages); 407 408 drm_pci_free(obj->base.dev, obj->phys_handle); 409 } 410 411 static void 412 i915_gem_object_release_phys(struct drm_i915_gem_object *obj) 413 { 414 i915_gem_object_unpin_pages(obj); 415 } 416 417 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { 418 .get_pages = i915_gem_object_get_pages_phys, 419 .put_pages = i915_gem_object_put_pages_phys, 420 .release = i915_gem_object_release_phys, 421 }; 422 423 static const struct drm_i915_gem_object_ops i915_gem_object_ops; 424 425 int i915_gem_object_unbind(struct drm_i915_gem_object *obj) 426 { 427 struct i915_vma *vma; 428 LIST_HEAD(still_in_list); 429 int ret; 430 431 lockdep_assert_held(&obj->base.dev->struct_mutex); 432 433 /* Closed vma are removed from the obj->vma_list - but they may 434 * still have an active binding on the object. To remove those we 435 * must wait for all rendering to complete to the object (as unbinding 436 * must anyway), and retire the requests. 437 */ 438 ret = i915_gem_object_set_to_cpu_domain(obj, false); 439 if (ret) 440 return ret; 441 442 spin_lock(&obj->vma.lock); 443 while (!ret && (vma = list_first_entry_or_null(&obj->vma.list, 444 struct i915_vma, 445 obj_link))) { 446 list_move_tail(&vma->obj_link, &still_in_list); 447 spin_unlock(&obj->vma.lock); 448 449 ret = i915_vma_unbind(vma); 450 451 spin_lock(&obj->vma.lock); 452 } 453 list_splice(&still_in_list, &obj->vma.list); 454 spin_unlock(&obj->vma.lock); 455 456 return ret; 457 } 458 459 static long 460 i915_gem_object_wait_fence(struct dma_fence *fence, 461 unsigned int flags, 462 long timeout, 463 struct intel_rps_client *rps_client) 464 { 465 struct i915_request *rq; 466 467 BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1); 468 469 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) 470 return timeout; 471 472 if (!dma_fence_is_i915(fence)) 473 return dma_fence_wait_timeout(fence, 474 flags & I915_WAIT_INTERRUPTIBLE, 475 timeout); 476 477 rq = to_request(fence); 478 if (i915_request_completed(rq)) 479 goto out; 480 481 /* 482 * This client is about to stall waiting for the GPU. In many cases 483 * this is undesirable and limits the throughput of the system, as 484 * many clients cannot continue processing user input/output whilst 485 * blocked. RPS autotuning may take tens of milliseconds to respond 486 * to the GPU load and thus incurs additional latency for the client. 487 * We can circumvent that by promoting the GPU frequency to maximum 488 * before we wait. This makes the GPU throttle up much more quickly 489 * (good for benchmarks and user experience, e.g. window animations), 490 * but at a cost of spending more power processing the workload 491 * (bad for battery). Not all clients even want their results 492 * immediately and for them we should just let the GPU select its own 493 * frequency to maximise efficiency. To prevent a single client from 494 * forcing the clocks too high for the whole system, we only allow 495 * each client to waitboost once in a busy period. 496 */ 497 if (rps_client && !i915_request_started(rq)) { 498 if (INTEL_GEN(rq->i915) >= 6) 499 gen6_rps_boost(rq, rps_client); 500 } 501 502 timeout = i915_request_wait(rq, flags, timeout); 503 504 out: 505 if (flags & I915_WAIT_LOCKED && i915_request_completed(rq)) 506 i915_request_retire_upto(rq); 507 508 return timeout; 509 } 510 511 static long 512 i915_gem_object_wait_reservation(struct reservation_object *resv, 513 unsigned int flags, 514 long timeout, 515 struct intel_rps_client *rps_client) 516 { 517 unsigned int seq = __read_seqcount_begin(&resv->seq); 518 struct dma_fence *excl; 519 bool prune_fences = false; 520 521 if (flags & I915_WAIT_ALL) { 522 struct dma_fence **shared; 523 unsigned int count, i; 524 int ret; 525 526 ret = reservation_object_get_fences_rcu(resv, 527 &excl, &count, &shared); 528 if (ret) 529 return ret; 530 531 for (i = 0; i < count; i++) { 532 timeout = i915_gem_object_wait_fence(shared[i], 533 flags, timeout, 534 rps_client); 535 if (timeout < 0) 536 break; 537 538 dma_fence_put(shared[i]); 539 } 540 541 for (; i < count; i++) 542 dma_fence_put(shared[i]); 543 kfree(shared); 544 545 /* 546 * If both shared fences and an exclusive fence exist, 547 * then by construction the shared fences must be later 548 * than the exclusive fence. If we successfully wait for 549 * all the shared fences, we know that the exclusive fence 550 * must all be signaled. If all the shared fences are 551 * signaled, we can prune the array and recover the 552 * floating references on the fences/requests. 553 */ 554 prune_fences = count && timeout >= 0; 555 } else { 556 excl = reservation_object_get_excl_rcu(resv); 557 } 558 559 if (excl && timeout >= 0) 560 timeout = i915_gem_object_wait_fence(excl, flags, timeout, 561 rps_client); 562 563 dma_fence_put(excl); 564 565 /* 566 * Opportunistically prune the fences iff we know they have *all* been 567 * signaled and that the reservation object has not been changed (i.e. 568 * no new fences have been added). 569 */ 570 if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) { 571 if (reservation_object_trylock(resv)) { 572 if (!__read_seqcount_retry(&resv->seq, seq)) 573 reservation_object_add_excl_fence(resv, NULL); 574 reservation_object_unlock(resv); 575 } 576 } 577 578 return timeout; 579 } 580 581 static void __fence_set_priority(struct dma_fence *fence, 582 const struct i915_sched_attr *attr) 583 { 584 struct i915_request *rq; 585 struct intel_engine_cs *engine; 586 587 if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence)) 588 return; 589 590 rq = to_request(fence); 591 engine = rq->engine; 592 593 local_bh_disable(); 594 rcu_read_lock(); /* RCU serialisation for set-wedged protection */ 595 if (engine->schedule) 596 engine->schedule(rq, attr); 597 rcu_read_unlock(); 598 local_bh_enable(); /* kick the tasklets if queues were reprioritised */ 599 } 600 601 static void fence_set_priority(struct dma_fence *fence, 602 const struct i915_sched_attr *attr) 603 { 604 /* Recurse once into a fence-array */ 605 if (dma_fence_is_array(fence)) { 606 struct dma_fence_array *array = to_dma_fence_array(fence); 607 int i; 608 609 for (i = 0; i < array->num_fences; i++) 610 __fence_set_priority(array->fences[i], attr); 611 } else { 612 __fence_set_priority(fence, attr); 613 } 614 } 615 616 int 617 i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, 618 unsigned int flags, 619 const struct i915_sched_attr *attr) 620 { 621 struct dma_fence *excl; 622 623 if (flags & I915_WAIT_ALL) { 624 struct dma_fence **shared; 625 unsigned int count, i; 626 int ret; 627 628 ret = reservation_object_get_fences_rcu(obj->resv, 629 &excl, &count, &shared); 630 if (ret) 631 return ret; 632 633 for (i = 0; i < count; i++) { 634 fence_set_priority(shared[i], attr); 635 dma_fence_put(shared[i]); 636 } 637 638 kfree(shared); 639 } else { 640 excl = reservation_object_get_excl_rcu(obj->resv); 641 } 642 643 if (excl) { 644 fence_set_priority(excl, attr); 645 dma_fence_put(excl); 646 } 647 return 0; 648 } 649 650 /** 651 * Waits for rendering to the object to be completed 652 * @obj: i915 gem object 653 * @flags: how to wait (under a lock, for all rendering or just for writes etc) 654 * @timeout: how long to wait 655 * @rps_client: client (user process) to charge for any waitboosting 656 */ 657 int 658 i915_gem_object_wait(struct drm_i915_gem_object *obj, 659 unsigned int flags, 660 long timeout, 661 struct intel_rps_client *rps_client) 662 { 663 might_sleep(); 664 GEM_BUG_ON(timeout < 0); 665 666 timeout = i915_gem_object_wait_reservation(obj->resv, 667 flags, timeout, 668 rps_client); 669 return timeout < 0 ? timeout : 0; 670 } 671 672 static struct intel_rps_client *to_rps_client(struct drm_file *file) 673 { 674 struct drm_i915_file_private *fpriv = file->driver_priv; 675 676 return &fpriv->rps_client; 677 } 678 679 static int 680 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 681 struct drm_i915_gem_pwrite *args, 682 struct drm_file *file) 683 { 684 void *vaddr = obj->phys_handle->vaddr + args->offset; 685 char __user *user_data = u64_to_user_ptr(args->data_ptr); 686 687 /* We manually control the domain here and pretend that it 688 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 689 */ 690 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 691 if (copy_from_user(vaddr, user_data, args->size)) 692 return -EFAULT; 693 694 drm_clflush_virt_range(vaddr, args->size); 695 i915_gem_chipset_flush(to_i915(obj->base.dev)); 696 697 intel_fb_obj_flush(obj, ORIGIN_CPU); 698 return 0; 699 } 700 701 void *i915_gem_object_alloc(struct drm_i915_private *dev_priv) 702 { 703 return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL); 704 } 705 706 void i915_gem_object_free(struct drm_i915_gem_object *obj) 707 { 708 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 709 kmem_cache_free(dev_priv->objects, obj); 710 } 711 712 static int 713 i915_gem_create(struct drm_file *file, 714 struct drm_i915_private *dev_priv, 715 u64 size, 716 u32 *handle_p) 717 { 718 struct drm_i915_gem_object *obj; 719 int ret; 720 u32 handle; 721 722 size = roundup(size, PAGE_SIZE); 723 if (size == 0) 724 return -EINVAL; 725 726 /* Allocate the new object */ 727 obj = i915_gem_object_create(dev_priv, size); 728 if (IS_ERR(obj)) 729 return PTR_ERR(obj); 730 731 ret = drm_gem_handle_create(file, &obj->base, &handle); 732 /* drop reference from allocate - handle holds it now */ 733 i915_gem_object_put(obj); 734 if (ret) 735 return ret; 736 737 *handle_p = handle; 738 return 0; 739 } 740 741 int 742 i915_gem_dumb_create(struct drm_file *file, 743 struct drm_device *dev, 744 struct drm_mode_create_dumb *args) 745 { 746 /* have to work out size/pitch and return them */ 747 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); 748 args->size = args->pitch * args->height; 749 return i915_gem_create(file, to_i915(dev), 750 args->size, &args->handle); 751 } 752 753 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) 754 { 755 return !(obj->cache_level == I915_CACHE_NONE || 756 obj->cache_level == I915_CACHE_WT); 757 } 758 759 /** 760 * Creates a new mm object and returns a handle to it. 761 * @dev: drm device pointer 762 * @data: ioctl data blob 763 * @file: drm file pointer 764 */ 765 int 766 i915_gem_create_ioctl(struct drm_device *dev, void *data, 767 struct drm_file *file) 768 { 769 struct drm_i915_private *dev_priv = to_i915(dev); 770 struct drm_i915_gem_create *args = data; 771 772 i915_gem_flush_free_objects(dev_priv); 773 774 return i915_gem_create(file, dev_priv, 775 args->size, &args->handle); 776 } 777 778 static inline enum fb_op_origin 779 fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain) 780 { 781 return (domain == I915_GEM_DOMAIN_GTT ? 782 obj->frontbuffer_ggtt_origin : ORIGIN_CPU); 783 } 784 785 void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv) 786 { 787 intel_wakeref_t wakeref; 788 789 /* 790 * No actual flushing is required for the GTT write domain for reads 791 * from the GTT domain. Writes to it "immediately" go to main memory 792 * as far as we know, so there's no chipset flush. It also doesn't 793 * land in the GPU render cache. 794 * 795 * However, we do have to enforce the order so that all writes through 796 * the GTT land before any writes to the device, such as updates to 797 * the GATT itself. 798 * 799 * We also have to wait a bit for the writes to land from the GTT. 800 * An uncached read (i.e. mmio) seems to be ideal for the round-trip 801 * timing. This issue has only been observed when switching quickly 802 * between GTT writes and CPU reads from inside the kernel on recent hw, 803 * and it appears to only affect discrete GTT blocks (i.e. on LLC 804 * system agents we cannot reproduce this behaviour, until Cannonlake 805 * that was!). 806 */ 807 808 wmb(); 809 810 if (INTEL_INFO(dev_priv)->has_coherent_ggtt) 811 return; 812 813 i915_gem_chipset_flush(dev_priv); 814 815 with_intel_runtime_pm(dev_priv, wakeref) { 816 spin_lock_irq(&dev_priv->uncore.lock); 817 818 POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE)); 819 820 spin_unlock_irq(&dev_priv->uncore.lock); 821 } 822 } 823 824 static void 825 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) 826 { 827 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 828 struct i915_vma *vma; 829 830 if (!(obj->write_domain & flush_domains)) 831 return; 832 833 switch (obj->write_domain) { 834 case I915_GEM_DOMAIN_GTT: 835 i915_gem_flush_ggtt_writes(dev_priv); 836 837 intel_fb_obj_flush(obj, 838 fb_write_origin(obj, I915_GEM_DOMAIN_GTT)); 839 840 for_each_ggtt_vma(vma, obj) { 841 if (vma->iomap) 842 continue; 843 844 i915_vma_unset_ggtt_write(vma); 845 } 846 break; 847 848 case I915_GEM_DOMAIN_WC: 849 wmb(); 850 break; 851 852 case I915_GEM_DOMAIN_CPU: 853 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 854 break; 855 856 case I915_GEM_DOMAIN_RENDER: 857 if (gpu_write_needs_clflush(obj)) 858 obj->cache_dirty = true; 859 break; 860 } 861 862 obj->write_domain = 0; 863 } 864 865 /* 866 * Pins the specified object's pages and synchronizes the object with 867 * GPU accesses. Sets needs_clflush to non-zero if the caller should 868 * flush the object from the CPU cache. 869 */ 870 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, 871 unsigned int *needs_clflush) 872 { 873 int ret; 874 875 lockdep_assert_held(&obj->base.dev->struct_mutex); 876 877 *needs_clflush = 0; 878 if (!i915_gem_object_has_struct_page(obj)) 879 return -ENODEV; 880 881 ret = i915_gem_object_wait(obj, 882 I915_WAIT_INTERRUPTIBLE | 883 I915_WAIT_LOCKED, 884 MAX_SCHEDULE_TIMEOUT, 885 NULL); 886 if (ret) 887 return ret; 888 889 ret = i915_gem_object_pin_pages(obj); 890 if (ret) 891 return ret; 892 893 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ || 894 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 895 ret = i915_gem_object_set_to_cpu_domain(obj, false); 896 if (ret) 897 goto err_unpin; 898 else 899 goto out; 900 } 901 902 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 903 904 /* If we're not in the cpu read domain, set ourself into the gtt 905 * read domain and manually flush cachelines (if required). This 906 * optimizes for the case when the gpu will dirty the data 907 * anyway again before the next pread happens. 908 */ 909 if (!obj->cache_dirty && 910 !(obj->read_domains & I915_GEM_DOMAIN_CPU)) 911 *needs_clflush = CLFLUSH_BEFORE; 912 913 out: 914 /* return with the pages pinned */ 915 return 0; 916 917 err_unpin: 918 i915_gem_object_unpin_pages(obj); 919 return ret; 920 } 921 922 int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, 923 unsigned int *needs_clflush) 924 { 925 int ret; 926 927 lockdep_assert_held(&obj->base.dev->struct_mutex); 928 929 *needs_clflush = 0; 930 if (!i915_gem_object_has_struct_page(obj)) 931 return -ENODEV; 932 933 ret = i915_gem_object_wait(obj, 934 I915_WAIT_INTERRUPTIBLE | 935 I915_WAIT_LOCKED | 936 I915_WAIT_ALL, 937 MAX_SCHEDULE_TIMEOUT, 938 NULL); 939 if (ret) 940 return ret; 941 942 ret = i915_gem_object_pin_pages(obj); 943 if (ret) 944 return ret; 945 946 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE || 947 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 948 ret = i915_gem_object_set_to_cpu_domain(obj, true); 949 if (ret) 950 goto err_unpin; 951 else 952 goto out; 953 } 954 955 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 956 957 /* If we're not in the cpu write domain, set ourself into the 958 * gtt write domain and manually flush cachelines (as required). 959 * This optimizes for the case when the gpu will use the data 960 * right away and we therefore have to clflush anyway. 961 */ 962 if (!obj->cache_dirty) { 963 *needs_clflush |= CLFLUSH_AFTER; 964 965 /* 966 * Same trick applies to invalidate partially written 967 * cachelines read before writing. 968 */ 969 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU)) 970 *needs_clflush |= CLFLUSH_BEFORE; 971 } 972 973 out: 974 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 975 obj->mm.dirty = true; 976 /* return with the pages pinned */ 977 return 0; 978 979 err_unpin: 980 i915_gem_object_unpin_pages(obj); 981 return ret; 982 } 983 984 static int 985 shmem_pread(struct page *page, int offset, int len, char __user *user_data, 986 bool needs_clflush) 987 { 988 char *vaddr; 989 int ret; 990 991 vaddr = kmap(page); 992 993 if (needs_clflush) 994 drm_clflush_virt_range(vaddr + offset, len); 995 996 ret = __copy_to_user(user_data, vaddr + offset, len); 997 998 kunmap(page); 999 1000 return ret ? -EFAULT : 0; 1001 } 1002 1003 static int 1004 i915_gem_shmem_pread(struct drm_i915_gem_object *obj, 1005 struct drm_i915_gem_pread *args) 1006 { 1007 char __user *user_data; 1008 u64 remain; 1009 unsigned int needs_clflush; 1010 unsigned int idx, offset; 1011 int ret; 1012 1013 ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex); 1014 if (ret) 1015 return ret; 1016 1017 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); 1018 mutex_unlock(&obj->base.dev->struct_mutex); 1019 if (ret) 1020 return ret; 1021 1022 remain = args->size; 1023 user_data = u64_to_user_ptr(args->data_ptr); 1024 offset = offset_in_page(args->offset); 1025 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 1026 struct page *page = i915_gem_object_get_page(obj, idx); 1027 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); 1028 1029 ret = shmem_pread(page, offset, length, user_data, 1030 needs_clflush); 1031 if (ret) 1032 break; 1033 1034 remain -= length; 1035 user_data += length; 1036 offset = 0; 1037 } 1038 1039 i915_gem_obj_finish_shmem_access(obj); 1040 return ret; 1041 } 1042 1043 static inline bool 1044 gtt_user_read(struct io_mapping *mapping, 1045 loff_t base, int offset, 1046 char __user *user_data, int length) 1047 { 1048 void __iomem *vaddr; 1049 unsigned long unwritten; 1050 1051 /* We can use the cpu mem copy function because this is X86. */ 1052 vaddr = io_mapping_map_atomic_wc(mapping, base); 1053 unwritten = __copy_to_user_inatomic(user_data, 1054 (void __force *)vaddr + offset, 1055 length); 1056 io_mapping_unmap_atomic(vaddr); 1057 if (unwritten) { 1058 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 1059 unwritten = copy_to_user(user_data, 1060 (void __force *)vaddr + offset, 1061 length); 1062 io_mapping_unmap(vaddr); 1063 } 1064 return unwritten; 1065 } 1066 1067 static int 1068 i915_gem_gtt_pread(struct drm_i915_gem_object *obj, 1069 const struct drm_i915_gem_pread *args) 1070 { 1071 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1072 struct i915_ggtt *ggtt = &i915->ggtt; 1073 intel_wakeref_t wakeref; 1074 struct drm_mm_node node; 1075 struct i915_vma *vma; 1076 void __user *user_data; 1077 u64 remain, offset; 1078 int ret; 1079 1080 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1081 if (ret) 1082 return ret; 1083 1084 wakeref = intel_runtime_pm_get(i915); 1085 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 1086 PIN_MAPPABLE | 1087 PIN_NONFAULT | 1088 PIN_NONBLOCK); 1089 if (!IS_ERR(vma)) { 1090 node.start = i915_ggtt_offset(vma); 1091 node.allocated = false; 1092 ret = i915_vma_put_fence(vma); 1093 if (ret) { 1094 i915_vma_unpin(vma); 1095 vma = ERR_PTR(ret); 1096 } 1097 } 1098 if (IS_ERR(vma)) { 1099 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 1100 if (ret) 1101 goto out_unlock; 1102 GEM_BUG_ON(!node.allocated); 1103 } 1104 1105 ret = i915_gem_object_set_to_gtt_domain(obj, false); 1106 if (ret) 1107 goto out_unpin; 1108 1109 mutex_unlock(&i915->drm.struct_mutex); 1110 1111 user_data = u64_to_user_ptr(args->data_ptr); 1112 remain = args->size; 1113 offset = args->offset; 1114 1115 while (remain > 0) { 1116 /* Operation in this page 1117 * 1118 * page_base = page offset within aperture 1119 * page_offset = offset within page 1120 * page_length = bytes to copy for this page 1121 */ 1122 u32 page_base = node.start; 1123 unsigned page_offset = offset_in_page(offset); 1124 unsigned page_length = PAGE_SIZE - page_offset; 1125 page_length = remain < page_length ? remain : page_length; 1126 if (node.allocated) { 1127 wmb(); 1128 ggtt->vm.insert_page(&ggtt->vm, 1129 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 1130 node.start, I915_CACHE_NONE, 0); 1131 wmb(); 1132 } else { 1133 page_base += offset & PAGE_MASK; 1134 } 1135 1136 if (gtt_user_read(&ggtt->iomap, page_base, page_offset, 1137 user_data, page_length)) { 1138 ret = -EFAULT; 1139 break; 1140 } 1141 1142 remain -= page_length; 1143 user_data += page_length; 1144 offset += page_length; 1145 } 1146 1147 mutex_lock(&i915->drm.struct_mutex); 1148 out_unpin: 1149 if (node.allocated) { 1150 wmb(); 1151 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); 1152 remove_mappable_node(&node); 1153 } else { 1154 i915_vma_unpin(vma); 1155 } 1156 out_unlock: 1157 intel_runtime_pm_put(i915, wakeref); 1158 mutex_unlock(&i915->drm.struct_mutex); 1159 1160 return ret; 1161 } 1162 1163 /** 1164 * Reads data from the object referenced by handle. 1165 * @dev: drm device pointer 1166 * @data: ioctl data blob 1167 * @file: drm file pointer 1168 * 1169 * On error, the contents of *data are undefined. 1170 */ 1171 int 1172 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 1173 struct drm_file *file) 1174 { 1175 struct drm_i915_gem_pread *args = data; 1176 struct drm_i915_gem_object *obj; 1177 int ret; 1178 1179 if (args->size == 0) 1180 return 0; 1181 1182 if (!access_ok(u64_to_user_ptr(args->data_ptr), 1183 args->size)) 1184 return -EFAULT; 1185 1186 obj = i915_gem_object_lookup(file, args->handle); 1187 if (!obj) 1188 return -ENOENT; 1189 1190 /* Bounds check source. */ 1191 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 1192 ret = -EINVAL; 1193 goto out; 1194 } 1195 1196 trace_i915_gem_object_pread(obj, args->offset, args->size); 1197 1198 ret = i915_gem_object_wait(obj, 1199 I915_WAIT_INTERRUPTIBLE, 1200 MAX_SCHEDULE_TIMEOUT, 1201 to_rps_client(file)); 1202 if (ret) 1203 goto out; 1204 1205 ret = i915_gem_object_pin_pages(obj); 1206 if (ret) 1207 goto out; 1208 1209 ret = i915_gem_shmem_pread(obj, args); 1210 if (ret == -EFAULT || ret == -ENODEV) 1211 ret = i915_gem_gtt_pread(obj, args); 1212 1213 i915_gem_object_unpin_pages(obj); 1214 out: 1215 i915_gem_object_put(obj); 1216 return ret; 1217 } 1218 1219 /* This is the fast write path which cannot handle 1220 * page faults in the source data 1221 */ 1222 1223 static inline bool 1224 ggtt_write(struct io_mapping *mapping, 1225 loff_t base, int offset, 1226 char __user *user_data, int length) 1227 { 1228 void __iomem *vaddr; 1229 unsigned long unwritten; 1230 1231 /* We can use the cpu mem copy function because this is X86. */ 1232 vaddr = io_mapping_map_atomic_wc(mapping, base); 1233 unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset, 1234 user_data, length); 1235 io_mapping_unmap_atomic(vaddr); 1236 if (unwritten) { 1237 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 1238 unwritten = copy_from_user((void __force *)vaddr + offset, 1239 user_data, length); 1240 io_mapping_unmap(vaddr); 1241 } 1242 1243 return unwritten; 1244 } 1245 1246 /** 1247 * This is the fast pwrite path, where we copy the data directly from the 1248 * user into the GTT, uncached. 1249 * @obj: i915 GEM object 1250 * @args: pwrite arguments structure 1251 */ 1252 static int 1253 i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, 1254 const struct drm_i915_gem_pwrite *args) 1255 { 1256 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1257 struct i915_ggtt *ggtt = &i915->ggtt; 1258 intel_wakeref_t wakeref; 1259 struct drm_mm_node node; 1260 struct i915_vma *vma; 1261 u64 remain, offset; 1262 void __user *user_data; 1263 int ret; 1264 1265 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1266 if (ret) 1267 return ret; 1268 1269 if (i915_gem_object_has_struct_page(obj)) { 1270 /* 1271 * Avoid waking the device up if we can fallback, as 1272 * waking/resuming is very slow (worst-case 10-100 ms 1273 * depending on PCI sleeps and our own resume time). 1274 * This easily dwarfs any performance advantage from 1275 * using the cache bypass of indirect GGTT access. 1276 */ 1277 wakeref = intel_runtime_pm_get_if_in_use(i915); 1278 if (!wakeref) { 1279 ret = -EFAULT; 1280 goto out_unlock; 1281 } 1282 } else { 1283 /* No backing pages, no fallback, we must force GGTT access */ 1284 wakeref = intel_runtime_pm_get(i915); 1285 } 1286 1287 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 1288 PIN_MAPPABLE | 1289 PIN_NONFAULT | 1290 PIN_NONBLOCK); 1291 if (!IS_ERR(vma)) { 1292 node.start = i915_ggtt_offset(vma); 1293 node.allocated = false; 1294 ret = i915_vma_put_fence(vma); 1295 if (ret) { 1296 i915_vma_unpin(vma); 1297 vma = ERR_PTR(ret); 1298 } 1299 } 1300 if (IS_ERR(vma)) { 1301 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 1302 if (ret) 1303 goto out_rpm; 1304 GEM_BUG_ON(!node.allocated); 1305 } 1306 1307 ret = i915_gem_object_set_to_gtt_domain(obj, true); 1308 if (ret) 1309 goto out_unpin; 1310 1311 mutex_unlock(&i915->drm.struct_mutex); 1312 1313 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 1314 1315 user_data = u64_to_user_ptr(args->data_ptr); 1316 offset = args->offset; 1317 remain = args->size; 1318 while (remain) { 1319 /* Operation in this page 1320 * 1321 * page_base = page offset within aperture 1322 * page_offset = offset within page 1323 * page_length = bytes to copy for this page 1324 */ 1325 u32 page_base = node.start; 1326 unsigned int page_offset = offset_in_page(offset); 1327 unsigned int page_length = PAGE_SIZE - page_offset; 1328 page_length = remain < page_length ? remain : page_length; 1329 if (node.allocated) { 1330 wmb(); /* flush the write before we modify the GGTT */ 1331 ggtt->vm.insert_page(&ggtt->vm, 1332 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 1333 node.start, I915_CACHE_NONE, 0); 1334 wmb(); /* flush modifications to the GGTT (insert_page) */ 1335 } else { 1336 page_base += offset & PAGE_MASK; 1337 } 1338 /* If we get a fault while copying data, then (presumably) our 1339 * source page isn't available. Return the error and we'll 1340 * retry in the slow path. 1341 * If the object is non-shmem backed, we retry again with the 1342 * path that handles page fault. 1343 */ 1344 if (ggtt_write(&ggtt->iomap, page_base, page_offset, 1345 user_data, page_length)) { 1346 ret = -EFAULT; 1347 break; 1348 } 1349 1350 remain -= page_length; 1351 user_data += page_length; 1352 offset += page_length; 1353 } 1354 intel_fb_obj_flush(obj, ORIGIN_CPU); 1355 1356 mutex_lock(&i915->drm.struct_mutex); 1357 out_unpin: 1358 if (node.allocated) { 1359 wmb(); 1360 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); 1361 remove_mappable_node(&node); 1362 } else { 1363 i915_vma_unpin(vma); 1364 } 1365 out_rpm: 1366 intel_runtime_pm_put(i915, wakeref); 1367 out_unlock: 1368 mutex_unlock(&i915->drm.struct_mutex); 1369 return ret; 1370 } 1371 1372 /* Per-page copy function for the shmem pwrite fastpath. 1373 * Flushes invalid cachelines before writing to the target if 1374 * needs_clflush_before is set and flushes out any written cachelines after 1375 * writing if needs_clflush is set. 1376 */ 1377 static int 1378 shmem_pwrite(struct page *page, int offset, int len, char __user *user_data, 1379 bool needs_clflush_before, 1380 bool needs_clflush_after) 1381 { 1382 char *vaddr; 1383 int ret; 1384 1385 vaddr = kmap(page); 1386 1387 if (needs_clflush_before) 1388 drm_clflush_virt_range(vaddr + offset, len); 1389 1390 ret = __copy_from_user(vaddr + offset, user_data, len); 1391 if (!ret && needs_clflush_after) 1392 drm_clflush_virt_range(vaddr + offset, len); 1393 1394 kunmap(page); 1395 1396 return ret ? -EFAULT : 0; 1397 } 1398 1399 static int 1400 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, 1401 const struct drm_i915_gem_pwrite *args) 1402 { 1403 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1404 void __user *user_data; 1405 u64 remain; 1406 unsigned int partial_cacheline_write; 1407 unsigned int needs_clflush; 1408 unsigned int offset, idx; 1409 int ret; 1410 1411 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1412 if (ret) 1413 return ret; 1414 1415 ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush); 1416 mutex_unlock(&i915->drm.struct_mutex); 1417 if (ret) 1418 return ret; 1419 1420 /* If we don't overwrite a cacheline completely we need to be 1421 * careful to have up-to-date data by first clflushing. Don't 1422 * overcomplicate things and flush the entire patch. 1423 */ 1424 partial_cacheline_write = 0; 1425 if (needs_clflush & CLFLUSH_BEFORE) 1426 partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1; 1427 1428 user_data = u64_to_user_ptr(args->data_ptr); 1429 remain = args->size; 1430 offset = offset_in_page(args->offset); 1431 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 1432 struct page *page = i915_gem_object_get_page(obj, idx); 1433 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); 1434 1435 ret = shmem_pwrite(page, offset, length, user_data, 1436 (offset | length) & partial_cacheline_write, 1437 needs_clflush & CLFLUSH_AFTER); 1438 if (ret) 1439 break; 1440 1441 remain -= length; 1442 user_data += length; 1443 offset = 0; 1444 } 1445 1446 intel_fb_obj_flush(obj, ORIGIN_CPU); 1447 i915_gem_obj_finish_shmem_access(obj); 1448 return ret; 1449 } 1450 1451 /** 1452 * Writes data to the object referenced by handle. 1453 * @dev: drm device 1454 * @data: ioctl data blob 1455 * @file: drm file 1456 * 1457 * On error, the contents of the buffer that were to be modified are undefined. 1458 */ 1459 int 1460 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1461 struct drm_file *file) 1462 { 1463 struct drm_i915_gem_pwrite *args = data; 1464 struct drm_i915_gem_object *obj; 1465 int ret; 1466 1467 if (args->size == 0) 1468 return 0; 1469 1470 if (!access_ok(u64_to_user_ptr(args->data_ptr), args->size)) 1471 return -EFAULT; 1472 1473 obj = i915_gem_object_lookup(file, args->handle); 1474 if (!obj) 1475 return -ENOENT; 1476 1477 /* Bounds check destination. */ 1478 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 1479 ret = -EINVAL; 1480 goto err; 1481 } 1482 1483 /* Writes not allowed into this read-only object */ 1484 if (i915_gem_object_is_readonly(obj)) { 1485 ret = -EINVAL; 1486 goto err; 1487 } 1488 1489 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1490 1491 ret = -ENODEV; 1492 if (obj->ops->pwrite) 1493 ret = obj->ops->pwrite(obj, args); 1494 if (ret != -ENODEV) 1495 goto err; 1496 1497 ret = i915_gem_object_wait(obj, 1498 I915_WAIT_INTERRUPTIBLE | 1499 I915_WAIT_ALL, 1500 MAX_SCHEDULE_TIMEOUT, 1501 to_rps_client(file)); 1502 if (ret) 1503 goto err; 1504 1505 ret = i915_gem_object_pin_pages(obj); 1506 if (ret) 1507 goto err; 1508 1509 ret = -EFAULT; 1510 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1511 * it would end up going through the fenced access, and we'll get 1512 * different detiling behavior between reading and writing. 1513 * pread/pwrite currently are reading and writing from the CPU 1514 * perspective, requiring manual detiling by the client. 1515 */ 1516 if (!i915_gem_object_has_struct_page(obj) || 1517 cpu_write_needs_clflush(obj)) 1518 /* Note that the gtt paths might fail with non-page-backed user 1519 * pointers (e.g. gtt mappings when moving data between 1520 * textures). Fallback to the shmem path in that case. 1521 */ 1522 ret = i915_gem_gtt_pwrite_fast(obj, args); 1523 1524 if (ret == -EFAULT || ret == -ENOSPC) { 1525 if (obj->phys_handle) 1526 ret = i915_gem_phys_pwrite(obj, args, file); 1527 else 1528 ret = i915_gem_shmem_pwrite(obj, args); 1529 } 1530 1531 i915_gem_object_unpin_pages(obj); 1532 err: 1533 i915_gem_object_put(obj); 1534 return ret; 1535 } 1536 1537 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) 1538 { 1539 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1540 struct list_head *list; 1541 struct i915_vma *vma; 1542 1543 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 1544 1545 mutex_lock(&i915->ggtt.vm.mutex); 1546 for_each_ggtt_vma(vma, obj) { 1547 if (!drm_mm_node_allocated(&vma->node)) 1548 continue; 1549 1550 list_move_tail(&vma->vm_link, &vma->vm->bound_list); 1551 } 1552 mutex_unlock(&i915->ggtt.vm.mutex); 1553 1554 spin_lock(&i915->mm.obj_lock); 1555 list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list; 1556 list_move_tail(&obj->mm.link, list); 1557 spin_unlock(&i915->mm.obj_lock); 1558 } 1559 1560 /** 1561 * Called when user space prepares to use an object with the CPU, either 1562 * through the mmap ioctl's mapping or a GTT mapping. 1563 * @dev: drm device 1564 * @data: ioctl data blob 1565 * @file: drm file 1566 */ 1567 int 1568 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1569 struct drm_file *file) 1570 { 1571 struct drm_i915_gem_set_domain *args = data; 1572 struct drm_i915_gem_object *obj; 1573 u32 read_domains = args->read_domains; 1574 u32 write_domain = args->write_domain; 1575 int err; 1576 1577 /* Only handle setting domains to types used by the CPU. */ 1578 if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) 1579 return -EINVAL; 1580 1581 /* Having something in the write domain implies it's in the read 1582 * domain, and only that read domain. Enforce that in the request. 1583 */ 1584 if (write_domain != 0 && read_domains != write_domain) 1585 return -EINVAL; 1586 1587 obj = i915_gem_object_lookup(file, args->handle); 1588 if (!obj) 1589 return -ENOENT; 1590 1591 /* Try to flush the object off the GPU without holding the lock. 1592 * We will repeat the flush holding the lock in the normal manner 1593 * to catch cases where we are gazumped. 1594 */ 1595 err = i915_gem_object_wait(obj, 1596 I915_WAIT_INTERRUPTIBLE | 1597 I915_WAIT_PRIORITY | 1598 (write_domain ? I915_WAIT_ALL : 0), 1599 MAX_SCHEDULE_TIMEOUT, 1600 to_rps_client(file)); 1601 if (err) 1602 goto out; 1603 1604 /* 1605 * Proxy objects do not control access to the backing storage, ergo 1606 * they cannot be used as a means to manipulate the cache domain 1607 * tracking for that backing storage. The proxy object is always 1608 * considered to be outside of any cache domain. 1609 */ 1610 if (i915_gem_object_is_proxy(obj)) { 1611 err = -ENXIO; 1612 goto out; 1613 } 1614 1615 /* 1616 * Flush and acquire obj->pages so that we are coherent through 1617 * direct access in memory with previous cached writes through 1618 * shmemfs and that our cache domain tracking remains valid. 1619 * For example, if the obj->filp was moved to swap without us 1620 * being notified and releasing the pages, we would mistakenly 1621 * continue to assume that the obj remained out of the CPU cached 1622 * domain. 1623 */ 1624 err = i915_gem_object_pin_pages(obj); 1625 if (err) 1626 goto out; 1627 1628 err = i915_mutex_lock_interruptible(dev); 1629 if (err) 1630 goto out_unpin; 1631 1632 if (read_domains & I915_GEM_DOMAIN_WC) 1633 err = i915_gem_object_set_to_wc_domain(obj, write_domain); 1634 else if (read_domains & I915_GEM_DOMAIN_GTT) 1635 err = i915_gem_object_set_to_gtt_domain(obj, write_domain); 1636 else 1637 err = i915_gem_object_set_to_cpu_domain(obj, write_domain); 1638 1639 /* And bump the LRU for this access */ 1640 i915_gem_object_bump_inactive_ggtt(obj); 1641 1642 mutex_unlock(&dev->struct_mutex); 1643 1644 if (write_domain != 0) 1645 intel_fb_obj_invalidate(obj, 1646 fb_write_origin(obj, write_domain)); 1647 1648 out_unpin: 1649 i915_gem_object_unpin_pages(obj); 1650 out: 1651 i915_gem_object_put(obj); 1652 return err; 1653 } 1654 1655 /** 1656 * Called when user space has done writes to this buffer 1657 * @dev: drm device 1658 * @data: ioctl data blob 1659 * @file: drm file 1660 */ 1661 int 1662 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1663 struct drm_file *file) 1664 { 1665 struct drm_i915_gem_sw_finish *args = data; 1666 struct drm_i915_gem_object *obj; 1667 1668 obj = i915_gem_object_lookup(file, args->handle); 1669 if (!obj) 1670 return -ENOENT; 1671 1672 /* 1673 * Proxy objects are barred from CPU access, so there is no 1674 * need to ban sw_finish as it is a nop. 1675 */ 1676 1677 /* Pinned buffers may be scanout, so flush the cache */ 1678 i915_gem_object_flush_if_display(obj); 1679 i915_gem_object_put(obj); 1680 1681 return 0; 1682 } 1683 1684 static inline bool 1685 __vma_matches(struct vm_area_struct *vma, struct file *filp, 1686 unsigned long addr, unsigned long size) 1687 { 1688 if (vma->vm_file != filp) 1689 return false; 1690 1691 return vma->vm_start == addr && 1692 (vma->vm_end - vma->vm_start) == PAGE_ALIGN(size); 1693 } 1694 1695 /** 1696 * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address 1697 * it is mapped to. 1698 * @dev: drm device 1699 * @data: ioctl data blob 1700 * @file: drm file 1701 * 1702 * While the mapping holds a reference on the contents of the object, it doesn't 1703 * imply a ref on the object itself. 1704 * 1705 * IMPORTANT: 1706 * 1707 * DRM driver writers who look a this function as an example for how to do GEM 1708 * mmap support, please don't implement mmap support like here. The modern way 1709 * to implement DRM mmap support is with an mmap offset ioctl (like 1710 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly. 1711 * That way debug tooling like valgrind will understand what's going on, hiding 1712 * the mmap call in a driver private ioctl will break that. The i915 driver only 1713 * does cpu mmaps this way because we didn't know better. 1714 */ 1715 int 1716 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1717 struct drm_file *file) 1718 { 1719 struct drm_i915_gem_mmap *args = data; 1720 struct drm_i915_gem_object *obj; 1721 unsigned long addr; 1722 1723 if (args->flags & ~(I915_MMAP_WC)) 1724 return -EINVAL; 1725 1726 if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT)) 1727 return -ENODEV; 1728 1729 obj = i915_gem_object_lookup(file, args->handle); 1730 if (!obj) 1731 return -ENOENT; 1732 1733 /* prime objects have no backing filp to GEM mmap 1734 * pages from. 1735 */ 1736 if (!obj->base.filp) { 1737 i915_gem_object_put(obj); 1738 return -ENXIO; 1739 } 1740 1741 addr = vm_mmap(obj->base.filp, 0, args->size, 1742 PROT_READ | PROT_WRITE, MAP_SHARED, 1743 args->offset); 1744 if (IS_ERR_VALUE(addr)) 1745 goto err; 1746 1747 if (args->flags & I915_MMAP_WC) { 1748 struct mm_struct *mm = current->mm; 1749 struct vm_area_struct *vma; 1750 1751 if (down_write_killable(&mm->mmap_sem)) { 1752 i915_gem_object_put(obj); 1753 return -EINTR; 1754 } 1755 vma = find_vma(mm, addr); 1756 if (vma && __vma_matches(vma, obj->base.filp, addr, args->size)) 1757 vma->vm_page_prot = 1758 pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); 1759 else 1760 addr = -ENOMEM; 1761 up_write(&mm->mmap_sem); 1762 if (IS_ERR_VALUE(addr)) 1763 goto err; 1764 1765 /* This may race, but that's ok, it only gets set */ 1766 WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU); 1767 } 1768 i915_gem_object_put(obj); 1769 1770 args->addr_ptr = (u64)addr; 1771 1772 return 0; 1773 1774 err: 1775 i915_gem_object_put(obj); 1776 1777 return addr; 1778 } 1779 1780 static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj) 1781 { 1782 return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT; 1783 } 1784 1785 /** 1786 * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps 1787 * 1788 * A history of the GTT mmap interface: 1789 * 1790 * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to 1791 * aligned and suitable for fencing, and still fit into the available 1792 * mappable space left by the pinned display objects. A classic problem 1793 * we called the page-fault-of-doom where we would ping-pong between 1794 * two objects that could not fit inside the GTT and so the memcpy 1795 * would page one object in at the expense of the other between every 1796 * single byte. 1797 * 1798 * 1 - Objects can be any size, and have any compatible fencing (X Y, or none 1799 * as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the 1800 * object is too large for the available space (or simply too large 1801 * for the mappable aperture!), a view is created instead and faulted 1802 * into userspace. (This view is aligned and sized appropriately for 1803 * fenced access.) 1804 * 1805 * 2 - Recognise WC as a separate cache domain so that we can flush the 1806 * delayed writes via GTT before performing direct access via WC. 1807 * 1808 * Restrictions: 1809 * 1810 * * snoopable objects cannot be accessed via the GTT. It can cause machine 1811 * hangs on some architectures, corruption on others. An attempt to service 1812 * a GTT page fault from a snoopable object will generate a SIGBUS. 1813 * 1814 * * the object must be able to fit into RAM (physical memory, though no 1815 * limited to the mappable aperture). 1816 * 1817 * 1818 * Caveats: 1819 * 1820 * * a new GTT page fault will synchronize rendering from the GPU and flush 1821 * all data to system memory. Subsequent access will not be synchronized. 1822 * 1823 * * all mappings are revoked on runtime device suspend. 1824 * 1825 * * there are only 8, 16 or 32 fence registers to share between all users 1826 * (older machines require fence register for display and blitter access 1827 * as well). Contention of the fence registers will cause the previous users 1828 * to be unmapped and any new access will generate new page faults. 1829 * 1830 * * running out of memory while servicing a fault may generate a SIGBUS, 1831 * rather than the expected SIGSEGV. 1832 */ 1833 int i915_gem_mmap_gtt_version(void) 1834 { 1835 return 2; 1836 } 1837 1838 static inline struct i915_ggtt_view 1839 compute_partial_view(const struct drm_i915_gem_object *obj, 1840 pgoff_t page_offset, 1841 unsigned int chunk) 1842 { 1843 struct i915_ggtt_view view; 1844 1845 if (i915_gem_object_is_tiled(obj)) 1846 chunk = roundup(chunk, tile_row_pages(obj)); 1847 1848 view.type = I915_GGTT_VIEW_PARTIAL; 1849 view.partial.offset = rounddown(page_offset, chunk); 1850 view.partial.size = 1851 min_t(unsigned int, chunk, 1852 (obj->base.size >> PAGE_SHIFT) - view.partial.offset); 1853 1854 /* If the partial covers the entire object, just create a normal VMA. */ 1855 if (chunk >= obj->base.size >> PAGE_SHIFT) 1856 view.type = I915_GGTT_VIEW_NORMAL; 1857 1858 return view; 1859 } 1860 1861 /** 1862 * i915_gem_fault - fault a page into the GTT 1863 * @vmf: fault info 1864 * 1865 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1866 * from userspace. The fault handler takes care of binding the object to 1867 * the GTT (if needed), allocating and programming a fence register (again, 1868 * only if needed based on whether the old reg is still valid or the object 1869 * is tiled) and inserting a new PTE into the faulting process. 1870 * 1871 * Note that the faulting process may involve evicting existing objects 1872 * from the GTT and/or fence registers to make room. So performance may 1873 * suffer if the GTT working set is large or there are few fence registers 1874 * left. 1875 * 1876 * The current feature set supported by i915_gem_fault() and thus GTT mmaps 1877 * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version). 1878 */ 1879 vm_fault_t i915_gem_fault(struct vm_fault *vmf) 1880 { 1881 #define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT) 1882 struct vm_area_struct *area = vmf->vma; 1883 struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data); 1884 struct drm_device *dev = obj->base.dev; 1885 struct drm_i915_private *dev_priv = to_i915(dev); 1886 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1887 bool write = area->vm_flags & VM_WRITE; 1888 intel_wakeref_t wakeref; 1889 struct i915_vma *vma; 1890 pgoff_t page_offset; 1891 int ret; 1892 1893 /* Sanity check that we allow writing into this object */ 1894 if (i915_gem_object_is_readonly(obj) && write) 1895 return VM_FAULT_SIGBUS; 1896 1897 /* We don't use vmf->pgoff since that has the fake offset */ 1898 page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT; 1899 1900 trace_i915_gem_object_fault(obj, page_offset, true, write); 1901 1902 /* Try to flush the object off the GPU first without holding the lock. 1903 * Upon acquiring the lock, we will perform our sanity checks and then 1904 * repeat the flush holding the lock in the normal manner to catch cases 1905 * where we are gazumped. 1906 */ 1907 ret = i915_gem_object_wait(obj, 1908 I915_WAIT_INTERRUPTIBLE, 1909 MAX_SCHEDULE_TIMEOUT, 1910 NULL); 1911 if (ret) 1912 goto err; 1913 1914 ret = i915_gem_object_pin_pages(obj); 1915 if (ret) 1916 goto err; 1917 1918 wakeref = intel_runtime_pm_get(dev_priv); 1919 1920 ret = i915_mutex_lock_interruptible(dev); 1921 if (ret) 1922 goto err_rpm; 1923 1924 /* Access to snoopable pages through the GTT is incoherent. */ 1925 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) { 1926 ret = -EFAULT; 1927 goto err_unlock; 1928 } 1929 1930 1931 /* Now pin it into the GTT as needed */ 1932 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 1933 PIN_MAPPABLE | 1934 PIN_NONBLOCK | 1935 PIN_NONFAULT); 1936 if (IS_ERR(vma)) { 1937 /* Use a partial view if it is bigger than available space */ 1938 struct i915_ggtt_view view = 1939 compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES); 1940 unsigned int flags; 1941 1942 flags = PIN_MAPPABLE; 1943 if (view.type == I915_GGTT_VIEW_NORMAL) 1944 flags |= PIN_NONBLOCK; /* avoid warnings for pinned */ 1945 1946 /* 1947 * Userspace is now writing through an untracked VMA, abandon 1948 * all hope that the hardware is able to track future writes. 1949 */ 1950 obj->frontbuffer_ggtt_origin = ORIGIN_CPU; 1951 1952 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); 1953 if (IS_ERR(vma) && !view.type) { 1954 flags = PIN_MAPPABLE; 1955 view.type = I915_GGTT_VIEW_PARTIAL; 1956 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); 1957 } 1958 } 1959 if (IS_ERR(vma)) { 1960 ret = PTR_ERR(vma); 1961 goto err_unlock; 1962 } 1963 1964 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1965 if (ret) 1966 goto err_unpin; 1967 1968 ret = i915_vma_pin_fence(vma); 1969 if (ret) 1970 goto err_unpin; 1971 1972 /* Finally, remap it using the new GTT offset */ 1973 ret = remap_io_mapping(area, 1974 area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT), 1975 (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT, 1976 min_t(u64, vma->size, area->vm_end - area->vm_start), 1977 &ggtt->iomap); 1978 if (ret) 1979 goto err_fence; 1980 1981 /* Mark as being mmapped into userspace for later revocation */ 1982 assert_rpm_wakelock_held(dev_priv); 1983 if (!i915_vma_set_userfault(vma) && !obj->userfault_count++) 1984 list_add(&obj->userfault_link, &dev_priv->mm.userfault_list); 1985 GEM_BUG_ON(!obj->userfault_count); 1986 1987 i915_vma_set_ggtt_write(vma); 1988 1989 err_fence: 1990 i915_vma_unpin_fence(vma); 1991 err_unpin: 1992 __i915_vma_unpin(vma); 1993 err_unlock: 1994 mutex_unlock(&dev->struct_mutex); 1995 err_rpm: 1996 intel_runtime_pm_put(dev_priv, wakeref); 1997 i915_gem_object_unpin_pages(obj); 1998 err: 1999 switch (ret) { 2000 case -EIO: 2001 /* 2002 * We eat errors when the gpu is terminally wedged to avoid 2003 * userspace unduly crashing (gl has no provisions for mmaps to 2004 * fail). But any other -EIO isn't ours (e.g. swap in failure) 2005 * and so needs to be reported. 2006 */ 2007 if (!i915_terminally_wedged(&dev_priv->gpu_error)) 2008 return VM_FAULT_SIGBUS; 2009 /* else: fall through */ 2010 case -EAGAIN: 2011 /* 2012 * EAGAIN means the gpu is hung and we'll wait for the error 2013 * handler to reset everything when re-faulting in 2014 * i915_mutex_lock_interruptible. 2015 */ 2016 case 0: 2017 case -ERESTARTSYS: 2018 case -EINTR: 2019 case -EBUSY: 2020 /* 2021 * EBUSY is ok: this just means that another thread 2022 * already did the job. 2023 */ 2024 return VM_FAULT_NOPAGE; 2025 case -ENOMEM: 2026 return VM_FAULT_OOM; 2027 case -ENOSPC: 2028 case -EFAULT: 2029 return VM_FAULT_SIGBUS; 2030 default: 2031 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 2032 return VM_FAULT_SIGBUS; 2033 } 2034 } 2035 2036 static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) 2037 { 2038 struct i915_vma *vma; 2039 2040 GEM_BUG_ON(!obj->userfault_count); 2041 2042 obj->userfault_count = 0; 2043 list_del(&obj->userfault_link); 2044 drm_vma_node_unmap(&obj->base.vma_node, 2045 obj->base.dev->anon_inode->i_mapping); 2046 2047 for_each_ggtt_vma(vma, obj) 2048 i915_vma_unset_userfault(vma); 2049 } 2050 2051 /** 2052 * i915_gem_release_mmap - remove physical page mappings 2053 * @obj: obj in question 2054 * 2055 * Preserve the reservation of the mmapping with the DRM core code, but 2056 * relinquish ownership of the pages back to the system. 2057 * 2058 * It is vital that we remove the page mapping if we have mapped a tiled 2059 * object through the GTT and then lose the fence register due to 2060 * resource pressure. Similarly if the object has been moved out of the 2061 * aperture, than pages mapped into userspace must be revoked. Removing the 2062 * mapping will then trigger a page fault on the next user access, allowing 2063 * fixup by i915_gem_fault(). 2064 */ 2065 void 2066 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 2067 { 2068 struct drm_i915_private *i915 = to_i915(obj->base.dev); 2069 intel_wakeref_t wakeref; 2070 2071 /* Serialisation between user GTT access and our code depends upon 2072 * revoking the CPU's PTE whilst the mutex is held. The next user 2073 * pagefault then has to wait until we release the mutex. 2074 * 2075 * Note that RPM complicates somewhat by adding an additional 2076 * requirement that operations to the GGTT be made holding the RPM 2077 * wakeref. 2078 */ 2079 lockdep_assert_held(&i915->drm.struct_mutex); 2080 wakeref = intel_runtime_pm_get(i915); 2081 2082 if (!obj->userfault_count) 2083 goto out; 2084 2085 __i915_gem_object_release_mmap(obj); 2086 2087 /* Ensure that the CPU's PTE are revoked and there are not outstanding 2088 * memory transactions from userspace before we return. The TLB 2089 * flushing implied above by changing the PTE above *should* be 2090 * sufficient, an extra barrier here just provides us with a bit 2091 * of paranoid documentation about our requirement to serialise 2092 * memory writes before touching registers / GSM. 2093 */ 2094 wmb(); 2095 2096 out: 2097 intel_runtime_pm_put(i915, wakeref); 2098 } 2099 2100 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv) 2101 { 2102 struct drm_i915_gem_object *obj, *on; 2103 int i; 2104 2105 /* 2106 * Only called during RPM suspend. All users of the userfault_list 2107 * must be holding an RPM wakeref to ensure that this can not 2108 * run concurrently with themselves (and use the struct_mutex for 2109 * protection between themselves). 2110 */ 2111 2112 list_for_each_entry_safe(obj, on, 2113 &dev_priv->mm.userfault_list, userfault_link) 2114 __i915_gem_object_release_mmap(obj); 2115 2116 /* The fence will be lost when the device powers down. If any were 2117 * in use by hardware (i.e. they are pinned), we should not be powering 2118 * down! All other fences will be reacquired by the user upon waking. 2119 */ 2120 for (i = 0; i < dev_priv->num_fence_regs; i++) { 2121 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 2122 2123 /* Ideally we want to assert that the fence register is not 2124 * live at this point (i.e. that no piece of code will be 2125 * trying to write through fence + GTT, as that both violates 2126 * our tracking of activity and associated locking/barriers, 2127 * but also is illegal given that the hw is powered down). 2128 * 2129 * Previously we used reg->pin_count as a "liveness" indicator. 2130 * That is not sufficient, and we need a more fine-grained 2131 * tool if we want to have a sanity check here. 2132 */ 2133 2134 if (!reg->vma) 2135 continue; 2136 2137 GEM_BUG_ON(i915_vma_has_userfault(reg->vma)); 2138 reg->dirty = true; 2139 } 2140 } 2141 2142 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 2143 { 2144 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2145 int err; 2146 2147 err = drm_gem_create_mmap_offset(&obj->base); 2148 if (likely(!err)) 2149 return 0; 2150 2151 /* Attempt to reap some mmap space from dead objects */ 2152 do { 2153 err = i915_gem_wait_for_idle(dev_priv, 2154 I915_WAIT_INTERRUPTIBLE, 2155 MAX_SCHEDULE_TIMEOUT); 2156 if (err) 2157 break; 2158 2159 i915_gem_drain_freed_objects(dev_priv); 2160 err = drm_gem_create_mmap_offset(&obj->base); 2161 if (!err) 2162 break; 2163 2164 } while (flush_delayed_work(&dev_priv->gt.retire_work)); 2165 2166 return err; 2167 } 2168 2169 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 2170 { 2171 drm_gem_free_mmap_offset(&obj->base); 2172 } 2173 2174 int 2175 i915_gem_mmap_gtt(struct drm_file *file, 2176 struct drm_device *dev, 2177 u32 handle, 2178 u64 *offset) 2179 { 2180 struct drm_i915_gem_object *obj; 2181 int ret; 2182 2183 obj = i915_gem_object_lookup(file, handle); 2184 if (!obj) 2185 return -ENOENT; 2186 2187 ret = i915_gem_object_create_mmap_offset(obj); 2188 if (ret == 0) 2189 *offset = drm_vma_node_offset_addr(&obj->base.vma_node); 2190 2191 i915_gem_object_put(obj); 2192 return ret; 2193 } 2194 2195 /** 2196 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 2197 * @dev: DRM device 2198 * @data: GTT mapping ioctl data 2199 * @file: GEM object info 2200 * 2201 * Simply returns the fake offset to userspace so it can mmap it. 2202 * The mmap call will end up in drm_gem_mmap(), which will set things 2203 * up so we can get faults in the handler above. 2204 * 2205 * The fault handler will take care of binding the object into the GTT 2206 * (since it may have been evicted to make room for something), allocating 2207 * a fence register, and mapping the appropriate aperture address into 2208 * userspace. 2209 */ 2210 int 2211 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 2212 struct drm_file *file) 2213 { 2214 struct drm_i915_gem_mmap_gtt *args = data; 2215 2216 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 2217 } 2218 2219 /* Immediately discard the backing storage */ 2220 static void 2221 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 2222 { 2223 i915_gem_object_free_mmap_offset(obj); 2224 2225 if (obj->base.filp == NULL) 2226 return; 2227 2228 /* Our goal here is to return as much of the memory as 2229 * is possible back to the system as we are called from OOM. 2230 * To do this we must instruct the shmfs to drop all of its 2231 * backing pages, *now*. 2232 */ 2233 shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1); 2234 obj->mm.madv = __I915_MADV_PURGED; 2235 obj->mm.pages = ERR_PTR(-EFAULT); 2236 } 2237 2238 /* Try to discard unwanted pages */ 2239 void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj) 2240 { 2241 struct address_space *mapping; 2242 2243 lockdep_assert_held(&obj->mm.lock); 2244 GEM_BUG_ON(i915_gem_object_has_pages(obj)); 2245 2246 switch (obj->mm.madv) { 2247 case I915_MADV_DONTNEED: 2248 i915_gem_object_truncate(obj); 2249 case __I915_MADV_PURGED: 2250 return; 2251 } 2252 2253 if (obj->base.filp == NULL) 2254 return; 2255 2256 mapping = obj->base.filp->f_mapping, 2257 invalidate_mapping_pages(mapping, 0, (loff_t)-1); 2258 } 2259 2260 /* 2261 * Move pages to appropriate lru and release the pagevec, decrementing the 2262 * ref count of those pages. 2263 */ 2264 static void check_release_pagevec(struct pagevec *pvec) 2265 { 2266 check_move_unevictable_pages(pvec); 2267 __pagevec_release(pvec); 2268 cond_resched(); 2269 } 2270 2271 static void 2272 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj, 2273 struct sg_table *pages) 2274 { 2275 struct sgt_iter sgt_iter; 2276 struct pagevec pvec; 2277 struct page *page; 2278 2279 __i915_gem_object_release_shmem(obj, pages, true); 2280 2281 i915_gem_gtt_finish_pages(obj, pages); 2282 2283 if (i915_gem_object_needs_bit17_swizzle(obj)) 2284 i915_gem_object_save_bit_17_swizzle(obj, pages); 2285 2286 mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping); 2287 2288 pagevec_init(&pvec); 2289 for_each_sgt_page(page, sgt_iter, pages) { 2290 if (obj->mm.dirty) 2291 set_page_dirty(page); 2292 2293 if (obj->mm.madv == I915_MADV_WILLNEED) 2294 mark_page_accessed(page); 2295 2296 if (!pagevec_add(&pvec, page)) 2297 check_release_pagevec(&pvec); 2298 } 2299 if (pagevec_count(&pvec)) 2300 check_release_pagevec(&pvec); 2301 obj->mm.dirty = false; 2302 2303 sg_free_table(pages); 2304 kfree(pages); 2305 } 2306 2307 static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj) 2308 { 2309 struct radix_tree_iter iter; 2310 void __rcu **slot; 2311 2312 rcu_read_lock(); 2313 radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0) 2314 radix_tree_delete(&obj->mm.get_page.radix, iter.index); 2315 rcu_read_unlock(); 2316 } 2317 2318 static struct sg_table * 2319 __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) 2320 { 2321 struct drm_i915_private *i915 = to_i915(obj->base.dev); 2322 struct sg_table *pages; 2323 2324 pages = fetch_and_zero(&obj->mm.pages); 2325 if (IS_ERR_OR_NULL(pages)) 2326 return pages; 2327 2328 spin_lock(&i915->mm.obj_lock); 2329 list_del(&obj->mm.link); 2330 spin_unlock(&i915->mm.obj_lock); 2331 2332 if (obj->mm.mapping) { 2333 void *ptr; 2334 2335 ptr = page_mask_bits(obj->mm.mapping); 2336 if (is_vmalloc_addr(ptr)) 2337 vunmap(ptr); 2338 else 2339 kunmap(kmap_to_page(ptr)); 2340 2341 obj->mm.mapping = NULL; 2342 } 2343 2344 __i915_gem_object_reset_page_iter(obj); 2345 obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0; 2346 2347 return pages; 2348 } 2349 2350 int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, 2351 enum i915_mm_subclass subclass) 2352 { 2353 struct sg_table *pages; 2354 int ret; 2355 2356 if (i915_gem_object_has_pinned_pages(obj)) 2357 return -EBUSY; 2358 2359 GEM_BUG_ON(obj->bind_count); 2360 2361 /* May be called by shrinker from within get_pages() (on another bo) */ 2362 mutex_lock_nested(&obj->mm.lock, subclass); 2363 if (unlikely(atomic_read(&obj->mm.pages_pin_count))) { 2364 ret = -EBUSY; 2365 goto unlock; 2366 } 2367 2368 /* 2369 * ->put_pages might need to allocate memory for the bit17 swizzle 2370 * array, hence protect them from being reaped by removing them from gtt 2371 * lists early. 2372 */ 2373 pages = __i915_gem_object_unset_pages(obj); 2374 2375 /* 2376 * XXX Temporary hijinx to avoid updating all backends to handle 2377 * NULL pages. In the future, when we have more asynchronous 2378 * get_pages backends we should be better able to handle the 2379 * cancellation of the async task in a more uniform manner. 2380 */ 2381 if (!pages && !i915_gem_object_needs_async_cancel(obj)) 2382 pages = ERR_PTR(-EINVAL); 2383 2384 if (!IS_ERR(pages)) 2385 obj->ops->put_pages(obj, pages); 2386 2387 ret = 0; 2388 unlock: 2389 mutex_unlock(&obj->mm.lock); 2390 2391 return ret; 2392 } 2393 2394 bool i915_sg_trim(struct sg_table *orig_st) 2395 { 2396 struct sg_table new_st; 2397 struct scatterlist *sg, *new_sg; 2398 unsigned int i; 2399 2400 if (orig_st->nents == orig_st->orig_nents) 2401 return false; 2402 2403 if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN)) 2404 return false; 2405 2406 new_sg = new_st.sgl; 2407 for_each_sg(orig_st->sgl, sg, orig_st->nents, i) { 2408 sg_set_page(new_sg, sg_page(sg), sg->length, 0); 2409 sg_dma_address(new_sg) = sg_dma_address(sg); 2410 sg_dma_len(new_sg) = sg_dma_len(sg); 2411 2412 new_sg = sg_next(new_sg); 2413 } 2414 GEM_BUG_ON(new_sg); /* Should walk exactly nents and hit the end */ 2415 2416 sg_free_table(orig_st); 2417 2418 *orig_st = new_st; 2419 return true; 2420 } 2421 2422 static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2423 { 2424 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2425 const unsigned long page_count = obj->base.size / PAGE_SIZE; 2426 unsigned long i; 2427 struct address_space *mapping; 2428 struct sg_table *st; 2429 struct scatterlist *sg; 2430 struct sgt_iter sgt_iter; 2431 struct page *page; 2432 unsigned long last_pfn = 0; /* suppress gcc warning */ 2433 unsigned int max_segment = i915_sg_segment_size(); 2434 unsigned int sg_page_sizes; 2435 struct pagevec pvec; 2436 gfp_t noreclaim; 2437 int ret; 2438 2439 /* 2440 * Assert that the object is not currently in any GPU domain. As it 2441 * wasn't in the GTT, there shouldn't be any way it could have been in 2442 * a GPU cache 2443 */ 2444 GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS); 2445 GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS); 2446 2447 /* 2448 * If there's no chance of allocating enough pages for the whole 2449 * object, bail early. 2450 */ 2451 if (page_count > totalram_pages()) 2452 return -ENOMEM; 2453 2454 st = kmalloc(sizeof(*st), GFP_KERNEL); 2455 if (st == NULL) 2456 return -ENOMEM; 2457 2458 rebuild_st: 2459 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 2460 kfree(st); 2461 return -ENOMEM; 2462 } 2463 2464 /* 2465 * Get the list of pages out of our struct file. They'll be pinned 2466 * at this point until we release them. 2467 * 2468 * Fail silently without starting the shrinker 2469 */ 2470 mapping = obj->base.filp->f_mapping; 2471 mapping_set_unevictable(mapping); 2472 noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM); 2473 noreclaim |= __GFP_NORETRY | __GFP_NOWARN; 2474 2475 sg = st->sgl; 2476 st->nents = 0; 2477 sg_page_sizes = 0; 2478 for (i = 0; i < page_count; i++) { 2479 const unsigned int shrink[] = { 2480 I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE, 2481 0, 2482 }, *s = shrink; 2483 gfp_t gfp = noreclaim; 2484 2485 do { 2486 cond_resched(); 2487 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2488 if (likely(!IS_ERR(page))) 2489 break; 2490 2491 if (!*s) { 2492 ret = PTR_ERR(page); 2493 goto err_sg; 2494 } 2495 2496 i915_gem_shrink(dev_priv, 2 * page_count, NULL, *s++); 2497 2498 /* 2499 * We've tried hard to allocate the memory by reaping 2500 * our own buffer, now let the real VM do its job and 2501 * go down in flames if truly OOM. 2502 * 2503 * However, since graphics tend to be disposable, 2504 * defer the oom here by reporting the ENOMEM back 2505 * to userspace. 2506 */ 2507 if (!*s) { 2508 /* reclaim and warn, but no oom */ 2509 gfp = mapping_gfp_mask(mapping); 2510 2511 /* 2512 * Our bo are always dirty and so we require 2513 * kswapd to reclaim our pages (direct reclaim 2514 * does not effectively begin pageout of our 2515 * buffers on its own). However, direct reclaim 2516 * only waits for kswapd when under allocation 2517 * congestion. So as a result __GFP_RECLAIM is 2518 * unreliable and fails to actually reclaim our 2519 * dirty pages -- unless you try over and over 2520 * again with !__GFP_NORETRY. However, we still 2521 * want to fail this allocation rather than 2522 * trigger the out-of-memory killer and for 2523 * this we want __GFP_RETRY_MAYFAIL. 2524 */ 2525 gfp |= __GFP_RETRY_MAYFAIL; 2526 } 2527 } while (1); 2528 2529 if (!i || 2530 sg->length >= max_segment || 2531 page_to_pfn(page) != last_pfn + 1) { 2532 if (i) { 2533 sg_page_sizes |= sg->length; 2534 sg = sg_next(sg); 2535 } 2536 st->nents++; 2537 sg_set_page(sg, page, PAGE_SIZE, 0); 2538 } else { 2539 sg->length += PAGE_SIZE; 2540 } 2541 last_pfn = page_to_pfn(page); 2542 2543 /* Check that the i965g/gm workaround works. */ 2544 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL)); 2545 } 2546 if (sg) { /* loop terminated early; short sg table */ 2547 sg_page_sizes |= sg->length; 2548 sg_mark_end(sg); 2549 } 2550 2551 /* Trim unused sg entries to avoid wasting memory. */ 2552 i915_sg_trim(st); 2553 2554 ret = i915_gem_gtt_prepare_pages(obj, st); 2555 if (ret) { 2556 /* 2557 * DMA remapping failed? One possible cause is that 2558 * it could not reserve enough large entries, asking 2559 * for PAGE_SIZE chunks instead may be helpful. 2560 */ 2561 if (max_segment > PAGE_SIZE) { 2562 for_each_sgt_page(page, sgt_iter, st) 2563 put_page(page); 2564 sg_free_table(st); 2565 2566 max_segment = PAGE_SIZE; 2567 goto rebuild_st; 2568 } else { 2569 dev_warn(&dev_priv->drm.pdev->dev, 2570 "Failed to DMA remap %lu pages\n", 2571 page_count); 2572 goto err_pages; 2573 } 2574 } 2575 2576 if (i915_gem_object_needs_bit17_swizzle(obj)) 2577 i915_gem_object_do_bit_17_swizzle(obj, st); 2578 2579 __i915_gem_object_set_pages(obj, st, sg_page_sizes); 2580 2581 return 0; 2582 2583 err_sg: 2584 sg_mark_end(sg); 2585 err_pages: 2586 mapping_clear_unevictable(mapping); 2587 pagevec_init(&pvec); 2588 for_each_sgt_page(page, sgt_iter, st) { 2589 if (!pagevec_add(&pvec, page)) 2590 check_release_pagevec(&pvec); 2591 } 2592 if (pagevec_count(&pvec)) 2593 check_release_pagevec(&pvec); 2594 sg_free_table(st); 2595 kfree(st); 2596 2597 /* 2598 * shmemfs first checks if there is enough memory to allocate the page 2599 * and reports ENOSPC should there be insufficient, along with the usual 2600 * ENOMEM for a genuine allocation failure. 2601 * 2602 * We use ENOSPC in our driver to mean that we have run out of aperture 2603 * space and so want to translate the error from shmemfs back to our 2604 * usual understanding of ENOMEM. 2605 */ 2606 if (ret == -ENOSPC) 2607 ret = -ENOMEM; 2608 2609 return ret; 2610 } 2611 2612 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, 2613 struct sg_table *pages, 2614 unsigned int sg_page_sizes) 2615 { 2616 struct drm_i915_private *i915 = to_i915(obj->base.dev); 2617 unsigned long supported = INTEL_INFO(i915)->page_sizes; 2618 int i; 2619 2620 lockdep_assert_held(&obj->mm.lock); 2621 2622 obj->mm.get_page.sg_pos = pages->sgl; 2623 obj->mm.get_page.sg_idx = 0; 2624 2625 obj->mm.pages = pages; 2626 2627 if (i915_gem_object_is_tiled(obj) && 2628 i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 2629 GEM_BUG_ON(obj->mm.quirked); 2630 __i915_gem_object_pin_pages(obj); 2631 obj->mm.quirked = true; 2632 } 2633 2634 GEM_BUG_ON(!sg_page_sizes); 2635 obj->mm.page_sizes.phys = sg_page_sizes; 2636 2637 /* 2638 * Calculate the supported page-sizes which fit into the given 2639 * sg_page_sizes. This will give us the page-sizes which we may be able 2640 * to use opportunistically when later inserting into the GTT. For 2641 * example if phys=2G, then in theory we should be able to use 1G, 2M, 2642 * 64K or 4K pages, although in practice this will depend on a number of 2643 * other factors. 2644 */ 2645 obj->mm.page_sizes.sg = 0; 2646 for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { 2647 if (obj->mm.page_sizes.phys & ~0u << i) 2648 obj->mm.page_sizes.sg |= BIT(i); 2649 } 2650 GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg)); 2651 2652 spin_lock(&i915->mm.obj_lock); 2653 list_add(&obj->mm.link, &i915->mm.unbound_list); 2654 spin_unlock(&i915->mm.obj_lock); 2655 } 2656 2657 static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2658 { 2659 int err; 2660 2661 if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) { 2662 DRM_DEBUG("Attempting to obtain a purgeable object\n"); 2663 return -EFAULT; 2664 } 2665 2666 err = obj->ops->get_pages(obj); 2667 GEM_BUG_ON(!err && !i915_gem_object_has_pages(obj)); 2668 2669 return err; 2670 } 2671 2672 /* Ensure that the associated pages are gathered from the backing storage 2673 * and pinned into our object. i915_gem_object_pin_pages() may be called 2674 * multiple times before they are released by a single call to 2675 * i915_gem_object_unpin_pages() - once the pages are no longer referenced 2676 * either as a result of memory pressure (reaping pages under the shrinker) 2677 * or as the object is itself released. 2678 */ 2679 int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2680 { 2681 int err; 2682 2683 err = mutex_lock_interruptible(&obj->mm.lock); 2684 if (err) 2685 return err; 2686 2687 if (unlikely(!i915_gem_object_has_pages(obj))) { 2688 GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); 2689 2690 err = ____i915_gem_object_get_pages(obj); 2691 if (err) 2692 goto unlock; 2693 2694 smp_mb__before_atomic(); 2695 } 2696 atomic_inc(&obj->mm.pages_pin_count); 2697 2698 unlock: 2699 mutex_unlock(&obj->mm.lock); 2700 return err; 2701 } 2702 2703 /* The 'mapping' part of i915_gem_object_pin_map() below */ 2704 static void *i915_gem_object_map(const struct drm_i915_gem_object *obj, 2705 enum i915_map_type type) 2706 { 2707 unsigned long n_pages = obj->base.size >> PAGE_SHIFT; 2708 struct sg_table *sgt = obj->mm.pages; 2709 struct sgt_iter sgt_iter; 2710 struct page *page; 2711 struct page *stack_pages[32]; 2712 struct page **pages = stack_pages; 2713 unsigned long i = 0; 2714 pgprot_t pgprot; 2715 void *addr; 2716 2717 /* A single page can always be kmapped */ 2718 if (n_pages == 1 && type == I915_MAP_WB) 2719 return kmap(sg_page(sgt->sgl)); 2720 2721 if (n_pages > ARRAY_SIZE(stack_pages)) { 2722 /* Too big for stack -- allocate temporary array instead */ 2723 pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL); 2724 if (!pages) 2725 return NULL; 2726 } 2727 2728 for_each_sgt_page(page, sgt_iter, sgt) 2729 pages[i++] = page; 2730 2731 /* Check that we have the expected number of pages */ 2732 GEM_BUG_ON(i != n_pages); 2733 2734 switch (type) { 2735 default: 2736 MISSING_CASE(type); 2737 /* fallthrough to use PAGE_KERNEL anyway */ 2738 case I915_MAP_WB: 2739 pgprot = PAGE_KERNEL; 2740 break; 2741 case I915_MAP_WC: 2742 pgprot = pgprot_writecombine(PAGE_KERNEL_IO); 2743 break; 2744 } 2745 addr = vmap(pages, n_pages, 0, pgprot); 2746 2747 if (pages != stack_pages) 2748 kvfree(pages); 2749 2750 return addr; 2751 } 2752 2753 /* get, pin, and map the pages of the object into kernel space */ 2754 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, 2755 enum i915_map_type type) 2756 { 2757 enum i915_map_type has_type; 2758 bool pinned; 2759 void *ptr; 2760 int ret; 2761 2762 if (unlikely(!i915_gem_object_has_struct_page(obj))) 2763 return ERR_PTR(-ENXIO); 2764 2765 ret = mutex_lock_interruptible(&obj->mm.lock); 2766 if (ret) 2767 return ERR_PTR(ret); 2768 2769 pinned = !(type & I915_MAP_OVERRIDE); 2770 type &= ~I915_MAP_OVERRIDE; 2771 2772 if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) { 2773 if (unlikely(!i915_gem_object_has_pages(obj))) { 2774 GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); 2775 2776 ret = ____i915_gem_object_get_pages(obj); 2777 if (ret) 2778 goto err_unlock; 2779 2780 smp_mb__before_atomic(); 2781 } 2782 atomic_inc(&obj->mm.pages_pin_count); 2783 pinned = false; 2784 } 2785 GEM_BUG_ON(!i915_gem_object_has_pages(obj)); 2786 2787 ptr = page_unpack_bits(obj->mm.mapping, &has_type); 2788 if (ptr && has_type != type) { 2789 if (pinned) { 2790 ret = -EBUSY; 2791 goto err_unpin; 2792 } 2793 2794 if (is_vmalloc_addr(ptr)) 2795 vunmap(ptr); 2796 else 2797 kunmap(kmap_to_page(ptr)); 2798 2799 ptr = obj->mm.mapping = NULL; 2800 } 2801 2802 if (!ptr) { 2803 ptr = i915_gem_object_map(obj, type); 2804 if (!ptr) { 2805 ret = -ENOMEM; 2806 goto err_unpin; 2807 } 2808 2809 obj->mm.mapping = page_pack_bits(ptr, type); 2810 } 2811 2812 out_unlock: 2813 mutex_unlock(&obj->mm.lock); 2814 return ptr; 2815 2816 err_unpin: 2817 atomic_dec(&obj->mm.pages_pin_count); 2818 err_unlock: 2819 ptr = ERR_PTR(ret); 2820 goto out_unlock; 2821 } 2822 2823 static int 2824 i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj, 2825 const struct drm_i915_gem_pwrite *arg) 2826 { 2827 struct address_space *mapping = obj->base.filp->f_mapping; 2828 char __user *user_data = u64_to_user_ptr(arg->data_ptr); 2829 u64 remain, offset; 2830 unsigned int pg; 2831 2832 /* Before we instantiate/pin the backing store for our use, we 2833 * can prepopulate the shmemfs filp efficiently using a write into 2834 * the pagecache. We avoid the penalty of instantiating all the 2835 * pages, important if the user is just writing to a few and never 2836 * uses the object on the GPU, and using a direct write into shmemfs 2837 * allows it to avoid the cost of retrieving a page (either swapin 2838 * or clearing-before-use) before it is overwritten. 2839 */ 2840 if (i915_gem_object_has_pages(obj)) 2841 return -ENODEV; 2842 2843 if (obj->mm.madv != I915_MADV_WILLNEED) 2844 return -EFAULT; 2845 2846 /* Before the pages are instantiated the object is treated as being 2847 * in the CPU domain. The pages will be clflushed as required before 2848 * use, and we can freely write into the pages directly. If userspace 2849 * races pwrite with any other operation; corruption will ensue - 2850 * that is userspace's prerogative! 2851 */ 2852 2853 remain = arg->size; 2854 offset = arg->offset; 2855 pg = offset_in_page(offset); 2856 2857 do { 2858 unsigned int len, unwritten; 2859 struct page *page; 2860 void *data, *vaddr; 2861 int err; 2862 2863 len = PAGE_SIZE - pg; 2864 if (len > remain) 2865 len = remain; 2866 2867 err = pagecache_write_begin(obj->base.filp, mapping, 2868 offset, len, 0, 2869 &page, &data); 2870 if (err < 0) 2871 return err; 2872 2873 vaddr = kmap(page); 2874 unwritten = copy_from_user(vaddr + pg, user_data, len); 2875 kunmap(page); 2876 2877 err = pagecache_write_end(obj->base.filp, mapping, 2878 offset, len, len - unwritten, 2879 page, data); 2880 if (err < 0) 2881 return err; 2882 2883 if (unwritten) 2884 return -EFAULT; 2885 2886 remain -= len; 2887 user_data += len; 2888 offset += len; 2889 pg = 0; 2890 } while (remain); 2891 2892 return 0; 2893 } 2894 2895 static bool match_ring(struct i915_request *rq) 2896 { 2897 struct drm_i915_private *dev_priv = rq->i915; 2898 u32 ring = I915_READ(RING_START(rq->engine->mmio_base)); 2899 2900 return ring == i915_ggtt_offset(rq->ring->vma); 2901 } 2902 2903 struct i915_request * 2904 i915_gem_find_active_request(struct intel_engine_cs *engine) 2905 { 2906 struct i915_request *request, *active = NULL; 2907 unsigned long flags; 2908 2909 /* 2910 * We are called by the error capture, reset and to dump engine 2911 * state at random points in time. In particular, note that neither is 2912 * crucially ordered with an interrupt. After a hang, the GPU is dead 2913 * and we assume that no more writes can happen (we waited long enough 2914 * for all writes that were in transaction to be flushed) - adding an 2915 * extra delay for a recent interrupt is pointless. Hence, we do 2916 * not need an engine->irq_seqno_barrier() before the seqno reads. 2917 * At all other times, we must assume the GPU is still running, but 2918 * we only care about the snapshot of this moment. 2919 */ 2920 spin_lock_irqsave(&engine->timeline.lock, flags); 2921 list_for_each_entry(request, &engine->timeline.requests, link) { 2922 if (i915_request_completed(request)) 2923 continue; 2924 2925 if (!i915_request_started(request)) 2926 break; 2927 2928 /* More than one preemptible request may match! */ 2929 if (!match_ring(request)) 2930 break; 2931 2932 active = request; 2933 break; 2934 } 2935 spin_unlock_irqrestore(&engine->timeline.lock, flags); 2936 2937 return active; 2938 } 2939 2940 static void 2941 i915_gem_retire_work_handler(struct work_struct *work) 2942 { 2943 struct drm_i915_private *dev_priv = 2944 container_of(work, typeof(*dev_priv), gt.retire_work.work); 2945 struct drm_device *dev = &dev_priv->drm; 2946 2947 /* Come back later if the device is busy... */ 2948 if (mutex_trylock(&dev->struct_mutex)) { 2949 i915_retire_requests(dev_priv); 2950 mutex_unlock(&dev->struct_mutex); 2951 } 2952 2953 /* 2954 * Keep the retire handler running until we are finally idle. 2955 * We do not need to do this test under locking as in the worst-case 2956 * we queue the retire worker once too often. 2957 */ 2958 if (READ_ONCE(dev_priv->gt.awake)) 2959 queue_delayed_work(dev_priv->wq, 2960 &dev_priv->gt.retire_work, 2961 round_jiffies_up_relative(HZ)); 2962 } 2963 2964 static void shrink_caches(struct drm_i915_private *i915) 2965 { 2966 /* 2967 * kmem_cache_shrink() discards empty slabs and reorders partially 2968 * filled slabs to prioritise allocating from the mostly full slabs, 2969 * with the aim of reducing fragmentation. 2970 */ 2971 kmem_cache_shrink(i915->priorities); 2972 kmem_cache_shrink(i915->dependencies); 2973 kmem_cache_shrink(i915->requests); 2974 kmem_cache_shrink(i915->luts); 2975 kmem_cache_shrink(i915->vmas); 2976 kmem_cache_shrink(i915->objects); 2977 } 2978 2979 struct sleep_rcu_work { 2980 union { 2981 struct rcu_head rcu; 2982 struct work_struct work; 2983 }; 2984 struct drm_i915_private *i915; 2985 unsigned int epoch; 2986 }; 2987 2988 static inline bool 2989 same_epoch(struct drm_i915_private *i915, unsigned int epoch) 2990 { 2991 /* 2992 * There is a small chance that the epoch wrapped since we started 2993 * sleeping. If we assume that epoch is at least a u32, then it will 2994 * take at least 2^32 * 100ms for it to wrap, or about 326 years. 2995 */ 2996 return epoch == READ_ONCE(i915->gt.epoch); 2997 } 2998 2999 static void __sleep_work(struct work_struct *work) 3000 { 3001 struct sleep_rcu_work *s = container_of(work, typeof(*s), work); 3002 struct drm_i915_private *i915 = s->i915; 3003 unsigned int epoch = s->epoch; 3004 3005 kfree(s); 3006 if (same_epoch(i915, epoch)) 3007 shrink_caches(i915); 3008 } 3009 3010 static void __sleep_rcu(struct rcu_head *rcu) 3011 { 3012 struct sleep_rcu_work *s = container_of(rcu, typeof(*s), rcu); 3013 struct drm_i915_private *i915 = s->i915; 3014 3015 destroy_rcu_head(&s->rcu); 3016 3017 if (same_epoch(i915, s->epoch)) { 3018 INIT_WORK(&s->work, __sleep_work); 3019 queue_work(i915->wq, &s->work); 3020 } else { 3021 kfree(s); 3022 } 3023 } 3024 3025 static inline bool 3026 new_requests_since_last_retire(const struct drm_i915_private *i915) 3027 { 3028 return (READ_ONCE(i915->gt.active_requests) || 3029 work_pending(&i915->gt.idle_work.work)); 3030 } 3031 3032 static void assert_kernel_context_is_current(struct drm_i915_private *i915) 3033 { 3034 struct intel_engine_cs *engine; 3035 enum intel_engine_id id; 3036 3037 if (i915_terminally_wedged(&i915->gpu_error)) 3038 return; 3039 3040 GEM_BUG_ON(i915->gt.active_requests); 3041 for_each_engine(engine, i915, id) { 3042 GEM_BUG_ON(__i915_active_request_peek(&engine->timeline.last_request)); 3043 GEM_BUG_ON(engine->last_retired_context != 3044 to_intel_context(i915->kernel_context, engine)); 3045 } 3046 } 3047 3048 static void 3049 i915_gem_idle_work_handler(struct work_struct *work) 3050 { 3051 struct drm_i915_private *dev_priv = 3052 container_of(work, typeof(*dev_priv), gt.idle_work.work); 3053 unsigned int epoch = I915_EPOCH_INVALID; 3054 bool rearm_hangcheck; 3055 3056 if (!READ_ONCE(dev_priv->gt.awake)) 3057 return; 3058 3059 if (READ_ONCE(dev_priv->gt.active_requests)) 3060 return; 3061 3062 /* 3063 * Flush out the last user context, leaving only the pinned 3064 * kernel context resident. When we are idling on the kernel_context, 3065 * no more new requests (with a context switch) are emitted and we 3066 * can finally rest. A consequence is that the idle work handler is 3067 * always called at least twice before idling (and if the system is 3068 * idle that implies a round trip through the retire worker). 3069 */ 3070 mutex_lock(&dev_priv->drm.struct_mutex); 3071 i915_gem_switch_to_kernel_context(dev_priv); 3072 mutex_unlock(&dev_priv->drm.struct_mutex); 3073 3074 GEM_TRACE("active_requests=%d (after switch-to-kernel-context)\n", 3075 READ_ONCE(dev_priv->gt.active_requests)); 3076 3077 /* 3078 * Wait for last execlists context complete, but bail out in case a 3079 * new request is submitted. As we don't trust the hardware, we 3080 * continue on if the wait times out. This is necessary to allow 3081 * the machine to suspend even if the hardware dies, and we will 3082 * try to recover in resume (after depriving the hardware of power, 3083 * it may be in a better mmod). 3084 */ 3085 __wait_for(if (new_requests_since_last_retire(dev_priv)) return, 3086 intel_engines_are_idle(dev_priv), 3087 I915_IDLE_ENGINES_TIMEOUT * 1000, 3088 10, 500); 3089 3090 rearm_hangcheck = 3091 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 3092 3093 if (!mutex_trylock(&dev_priv->drm.struct_mutex)) { 3094 /* Currently busy, come back later */ 3095 mod_delayed_work(dev_priv->wq, 3096 &dev_priv->gt.idle_work, 3097 msecs_to_jiffies(50)); 3098 goto out_rearm; 3099 } 3100 3101 /* 3102 * New request retired after this work handler started, extend active 3103 * period until next instance of the work. 3104 */ 3105 if (new_requests_since_last_retire(dev_priv)) 3106 goto out_unlock; 3107 3108 epoch = __i915_gem_park(dev_priv); 3109 3110 assert_kernel_context_is_current(dev_priv); 3111 3112 rearm_hangcheck = false; 3113 out_unlock: 3114 mutex_unlock(&dev_priv->drm.struct_mutex); 3115 3116 out_rearm: 3117 if (rearm_hangcheck) { 3118 GEM_BUG_ON(!dev_priv->gt.awake); 3119 i915_queue_hangcheck(dev_priv); 3120 } 3121 3122 /* 3123 * When we are idle, it is an opportune time to reap our caches. 3124 * However, we have many objects that utilise RCU and the ordered 3125 * i915->wq that this work is executing on. To try and flush any 3126 * pending frees now we are idle, we first wait for an RCU grace 3127 * period, and then queue a task (that will run last on the wq) to 3128 * shrink and re-optimize the caches. 3129 */ 3130 if (same_epoch(dev_priv, epoch)) { 3131 struct sleep_rcu_work *s = kmalloc(sizeof(*s), GFP_KERNEL); 3132 if (s) { 3133 init_rcu_head(&s->rcu); 3134 s->i915 = dev_priv; 3135 s->epoch = epoch; 3136 call_rcu(&s->rcu, __sleep_rcu); 3137 } 3138 } 3139 } 3140 3141 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) 3142 { 3143 struct drm_i915_private *i915 = to_i915(gem->dev); 3144 struct drm_i915_gem_object *obj = to_intel_bo(gem); 3145 struct drm_i915_file_private *fpriv = file->driver_priv; 3146 struct i915_lut_handle *lut, *ln; 3147 3148 mutex_lock(&i915->drm.struct_mutex); 3149 3150 list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) { 3151 struct i915_gem_context *ctx = lut->ctx; 3152 struct i915_vma *vma; 3153 3154 GEM_BUG_ON(ctx->file_priv == ERR_PTR(-EBADF)); 3155 if (ctx->file_priv != fpriv) 3156 continue; 3157 3158 vma = radix_tree_delete(&ctx->handles_vma, lut->handle); 3159 GEM_BUG_ON(vma->obj != obj); 3160 3161 /* We allow the process to have multiple handles to the same 3162 * vma, in the same fd namespace, by virtue of flink/open. 3163 */ 3164 GEM_BUG_ON(!vma->open_count); 3165 if (!--vma->open_count && !i915_vma_is_ggtt(vma)) 3166 i915_vma_close(vma); 3167 3168 list_del(&lut->obj_link); 3169 list_del(&lut->ctx_link); 3170 3171 kmem_cache_free(i915->luts, lut); 3172 __i915_gem_object_release_unless_active(obj); 3173 } 3174 3175 mutex_unlock(&i915->drm.struct_mutex); 3176 } 3177 3178 static unsigned long to_wait_timeout(s64 timeout_ns) 3179 { 3180 if (timeout_ns < 0) 3181 return MAX_SCHEDULE_TIMEOUT; 3182 3183 if (timeout_ns == 0) 3184 return 0; 3185 3186 return nsecs_to_jiffies_timeout(timeout_ns); 3187 } 3188 3189 /** 3190 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 3191 * @dev: drm device pointer 3192 * @data: ioctl data blob 3193 * @file: drm file pointer 3194 * 3195 * Returns 0 if successful, else an error is returned with the remaining time in 3196 * the timeout parameter. 3197 * -ETIME: object is still busy after timeout 3198 * -ERESTARTSYS: signal interrupted the wait 3199 * -ENONENT: object doesn't exist 3200 * Also possible, but rare: 3201 * -EAGAIN: incomplete, restart syscall 3202 * -ENOMEM: damn 3203 * -ENODEV: Internal IRQ fail 3204 * -E?: The add request failed 3205 * 3206 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 3207 * non-zero timeout parameter the wait ioctl will wait for the given number of 3208 * nanoseconds on an object becoming unbusy. Since the wait itself does so 3209 * without holding struct_mutex the object may become re-busied before this 3210 * function completes. A similar but shorter * race condition exists in the busy 3211 * ioctl 3212 */ 3213 int 3214 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3215 { 3216 struct drm_i915_gem_wait *args = data; 3217 struct drm_i915_gem_object *obj; 3218 ktime_t start; 3219 long ret; 3220 3221 if (args->flags != 0) 3222 return -EINVAL; 3223 3224 obj = i915_gem_object_lookup(file, args->bo_handle); 3225 if (!obj) 3226 return -ENOENT; 3227 3228 start = ktime_get(); 3229 3230 ret = i915_gem_object_wait(obj, 3231 I915_WAIT_INTERRUPTIBLE | 3232 I915_WAIT_PRIORITY | 3233 I915_WAIT_ALL, 3234 to_wait_timeout(args->timeout_ns), 3235 to_rps_client(file)); 3236 3237 if (args->timeout_ns > 0) { 3238 args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start)); 3239 if (args->timeout_ns < 0) 3240 args->timeout_ns = 0; 3241 3242 /* 3243 * Apparently ktime isn't accurate enough and occasionally has a 3244 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch 3245 * things up to make the test happy. We allow up to 1 jiffy. 3246 * 3247 * This is a regression from the timespec->ktime conversion. 3248 */ 3249 if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns)) 3250 args->timeout_ns = 0; 3251 3252 /* Asked to wait beyond the jiffie/scheduler precision? */ 3253 if (ret == -ETIME && args->timeout_ns) 3254 ret = -EAGAIN; 3255 } 3256 3257 i915_gem_object_put(obj); 3258 return ret; 3259 } 3260 3261 static int wait_for_engines(struct drm_i915_private *i915) 3262 { 3263 if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) { 3264 dev_err(i915->drm.dev, 3265 "Failed to idle engines, declaring wedged!\n"); 3266 GEM_TRACE_DUMP(); 3267 i915_gem_set_wedged(i915); 3268 return -EIO; 3269 } 3270 3271 return 0; 3272 } 3273 3274 static long 3275 wait_for_timelines(struct drm_i915_private *i915, 3276 unsigned int flags, long timeout) 3277 { 3278 struct i915_gt_timelines *gt = &i915->gt.timelines; 3279 struct i915_timeline *tl; 3280 3281 if (!READ_ONCE(i915->gt.active_requests)) 3282 return timeout; 3283 3284 mutex_lock(>->mutex); 3285 list_for_each_entry(tl, >->active_list, link) { 3286 struct i915_request *rq; 3287 3288 rq = i915_active_request_get_unlocked(&tl->last_request); 3289 if (!rq) 3290 continue; 3291 3292 mutex_unlock(>->mutex); 3293 3294 /* 3295 * "Race-to-idle". 3296 * 3297 * Switching to the kernel context is often used a synchronous 3298 * step prior to idling, e.g. in suspend for flushing all 3299 * current operations to memory before sleeping. These we 3300 * want to complete as quickly as possible to avoid prolonged 3301 * stalls, so allow the gpu to boost to maximum clocks. 3302 */ 3303 if (flags & I915_WAIT_FOR_IDLE_BOOST) 3304 gen6_rps_boost(rq, NULL); 3305 3306 timeout = i915_request_wait(rq, flags, timeout); 3307 i915_request_put(rq); 3308 if (timeout < 0) 3309 return timeout; 3310 3311 /* restart after reacquiring the lock */ 3312 mutex_lock(>->mutex); 3313 tl = list_entry(>->active_list, typeof(*tl), link); 3314 } 3315 mutex_unlock(>->mutex); 3316 3317 return timeout; 3318 } 3319 3320 int i915_gem_wait_for_idle(struct drm_i915_private *i915, 3321 unsigned int flags, long timeout) 3322 { 3323 GEM_TRACE("flags=%x (%s), timeout=%ld%s\n", 3324 flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked", 3325 timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : ""); 3326 3327 /* If the device is asleep, we have no requests outstanding */ 3328 if (!READ_ONCE(i915->gt.awake)) 3329 return 0; 3330 3331 timeout = wait_for_timelines(i915, flags, timeout); 3332 if (timeout < 0) 3333 return timeout; 3334 3335 if (flags & I915_WAIT_LOCKED) { 3336 int err; 3337 3338 lockdep_assert_held(&i915->drm.struct_mutex); 3339 3340 if (GEM_SHOW_DEBUG() && !timeout) { 3341 /* Presume that timeout was non-zero to begin with! */ 3342 dev_warn(&i915->drm.pdev->dev, 3343 "Missed idle-completion interrupt!\n"); 3344 GEM_TRACE_DUMP(); 3345 } 3346 3347 err = wait_for_engines(i915); 3348 if (err) 3349 return err; 3350 3351 i915_retire_requests(i915); 3352 GEM_BUG_ON(i915->gt.active_requests); 3353 } 3354 3355 return 0; 3356 } 3357 3358 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) 3359 { 3360 /* 3361 * We manually flush the CPU domain so that we can override and 3362 * force the flush for the display, and perform it asyncrhonously. 3363 */ 3364 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 3365 if (obj->cache_dirty) 3366 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); 3367 obj->write_domain = 0; 3368 } 3369 3370 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) 3371 { 3372 if (!READ_ONCE(obj->pin_global)) 3373 return; 3374 3375 mutex_lock(&obj->base.dev->struct_mutex); 3376 __i915_gem_object_flush_for_display(obj); 3377 mutex_unlock(&obj->base.dev->struct_mutex); 3378 } 3379 3380 /** 3381 * Moves a single object to the WC read, and possibly write domain. 3382 * @obj: object to act on 3383 * @write: ask for write access or read only 3384 * 3385 * This function returns when the move is complete, including waiting on 3386 * flushes to occur. 3387 */ 3388 int 3389 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) 3390 { 3391 int ret; 3392 3393 lockdep_assert_held(&obj->base.dev->struct_mutex); 3394 3395 ret = i915_gem_object_wait(obj, 3396 I915_WAIT_INTERRUPTIBLE | 3397 I915_WAIT_LOCKED | 3398 (write ? I915_WAIT_ALL : 0), 3399 MAX_SCHEDULE_TIMEOUT, 3400 NULL); 3401 if (ret) 3402 return ret; 3403 3404 if (obj->write_domain == I915_GEM_DOMAIN_WC) 3405 return 0; 3406 3407 /* Flush and acquire obj->pages so that we are coherent through 3408 * direct access in memory with previous cached writes through 3409 * shmemfs and that our cache domain tracking remains valid. 3410 * For example, if the obj->filp was moved to swap without us 3411 * being notified and releasing the pages, we would mistakenly 3412 * continue to assume that the obj remained out of the CPU cached 3413 * domain. 3414 */ 3415 ret = i915_gem_object_pin_pages(obj); 3416 if (ret) 3417 return ret; 3418 3419 flush_write_domain(obj, ~I915_GEM_DOMAIN_WC); 3420 3421 /* Serialise direct access to this object with the barriers for 3422 * coherent writes from the GPU, by effectively invalidating the 3423 * WC domain upon first access. 3424 */ 3425 if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0) 3426 mb(); 3427 3428 /* It should now be out of any other write domains, and we can update 3429 * the domain values for our changes. 3430 */ 3431 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0); 3432 obj->read_domains |= I915_GEM_DOMAIN_WC; 3433 if (write) { 3434 obj->read_domains = I915_GEM_DOMAIN_WC; 3435 obj->write_domain = I915_GEM_DOMAIN_WC; 3436 obj->mm.dirty = true; 3437 } 3438 3439 i915_gem_object_unpin_pages(obj); 3440 return 0; 3441 } 3442 3443 /** 3444 * Moves a single object to the GTT read, and possibly write domain. 3445 * @obj: object to act on 3446 * @write: ask for write access or read only 3447 * 3448 * This function returns when the move is complete, including waiting on 3449 * flushes to occur. 3450 */ 3451 int 3452 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3453 { 3454 int ret; 3455 3456 lockdep_assert_held(&obj->base.dev->struct_mutex); 3457 3458 ret = i915_gem_object_wait(obj, 3459 I915_WAIT_INTERRUPTIBLE | 3460 I915_WAIT_LOCKED | 3461 (write ? I915_WAIT_ALL : 0), 3462 MAX_SCHEDULE_TIMEOUT, 3463 NULL); 3464 if (ret) 3465 return ret; 3466 3467 if (obj->write_domain == I915_GEM_DOMAIN_GTT) 3468 return 0; 3469 3470 /* Flush and acquire obj->pages so that we are coherent through 3471 * direct access in memory with previous cached writes through 3472 * shmemfs and that our cache domain tracking remains valid. 3473 * For example, if the obj->filp was moved to swap without us 3474 * being notified and releasing the pages, we would mistakenly 3475 * continue to assume that the obj remained out of the CPU cached 3476 * domain. 3477 */ 3478 ret = i915_gem_object_pin_pages(obj); 3479 if (ret) 3480 return ret; 3481 3482 flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT); 3483 3484 /* Serialise direct access to this object with the barriers for 3485 * coherent writes from the GPU, by effectively invalidating the 3486 * GTT domain upon first access. 3487 */ 3488 if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0) 3489 mb(); 3490 3491 /* It should now be out of any other write domains, and we can update 3492 * the domain values for our changes. 3493 */ 3494 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3495 obj->read_domains |= I915_GEM_DOMAIN_GTT; 3496 if (write) { 3497 obj->read_domains = I915_GEM_DOMAIN_GTT; 3498 obj->write_domain = I915_GEM_DOMAIN_GTT; 3499 obj->mm.dirty = true; 3500 } 3501 3502 i915_gem_object_unpin_pages(obj); 3503 return 0; 3504 } 3505 3506 /** 3507 * Changes the cache-level of an object across all VMA. 3508 * @obj: object to act on 3509 * @cache_level: new cache level to set for the object 3510 * 3511 * After this function returns, the object will be in the new cache-level 3512 * across all GTT and the contents of the backing storage will be coherent, 3513 * with respect to the new cache-level. In order to keep the backing storage 3514 * coherent for all users, we only allow a single cache level to be set 3515 * globally on the object and prevent it from being changed whilst the 3516 * hardware is reading from the object. That is if the object is currently 3517 * on the scanout it will be set to uncached (or equivalent display 3518 * cache coherency) and all non-MOCS GPU access will also be uncached so 3519 * that all direct access to the scanout remains coherent. 3520 */ 3521 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 3522 enum i915_cache_level cache_level) 3523 { 3524 struct i915_vma *vma; 3525 int ret; 3526 3527 lockdep_assert_held(&obj->base.dev->struct_mutex); 3528 3529 if (obj->cache_level == cache_level) 3530 return 0; 3531 3532 /* Inspect the list of currently bound VMA and unbind any that would 3533 * be invalid given the new cache-level. This is principally to 3534 * catch the issue of the CS prefetch crossing page boundaries and 3535 * reading an invalid PTE on older architectures. 3536 */ 3537 restart: 3538 list_for_each_entry(vma, &obj->vma.list, obj_link) { 3539 if (!drm_mm_node_allocated(&vma->node)) 3540 continue; 3541 3542 if (i915_vma_is_pinned(vma)) { 3543 DRM_DEBUG("can not change the cache level of pinned objects\n"); 3544 return -EBUSY; 3545 } 3546 3547 if (!i915_vma_is_closed(vma) && 3548 i915_gem_valid_gtt_space(vma, cache_level)) 3549 continue; 3550 3551 ret = i915_vma_unbind(vma); 3552 if (ret) 3553 return ret; 3554 3555 /* As unbinding may affect other elements in the 3556 * obj->vma_list (due to side-effects from retiring 3557 * an active vma), play safe and restart the iterator. 3558 */ 3559 goto restart; 3560 } 3561 3562 /* We can reuse the existing drm_mm nodes but need to change the 3563 * cache-level on the PTE. We could simply unbind them all and 3564 * rebind with the correct cache-level on next use. However since 3565 * we already have a valid slot, dma mapping, pages etc, we may as 3566 * rewrite the PTE in the belief that doing so tramples upon less 3567 * state and so involves less work. 3568 */ 3569 if (obj->bind_count) { 3570 /* Before we change the PTE, the GPU must not be accessing it. 3571 * If we wait upon the object, we know that all the bound 3572 * VMA are no longer active. 3573 */ 3574 ret = i915_gem_object_wait(obj, 3575 I915_WAIT_INTERRUPTIBLE | 3576 I915_WAIT_LOCKED | 3577 I915_WAIT_ALL, 3578 MAX_SCHEDULE_TIMEOUT, 3579 NULL); 3580 if (ret) 3581 return ret; 3582 3583 if (!HAS_LLC(to_i915(obj->base.dev)) && 3584 cache_level != I915_CACHE_NONE) { 3585 /* Access to snoopable pages through the GTT is 3586 * incoherent and on some machines causes a hard 3587 * lockup. Relinquish the CPU mmaping to force 3588 * userspace to refault in the pages and we can 3589 * then double check if the GTT mapping is still 3590 * valid for that pointer access. 3591 */ 3592 i915_gem_release_mmap(obj); 3593 3594 /* As we no longer need a fence for GTT access, 3595 * we can relinquish it now (and so prevent having 3596 * to steal a fence from someone else on the next 3597 * fence request). Note GPU activity would have 3598 * dropped the fence as all snoopable access is 3599 * supposed to be linear. 3600 */ 3601 for_each_ggtt_vma(vma, obj) { 3602 ret = i915_vma_put_fence(vma); 3603 if (ret) 3604 return ret; 3605 } 3606 } else { 3607 /* We either have incoherent backing store and 3608 * so no GTT access or the architecture is fully 3609 * coherent. In such cases, existing GTT mmaps 3610 * ignore the cache bit in the PTE and we can 3611 * rewrite it without confusing the GPU or having 3612 * to force userspace to fault back in its mmaps. 3613 */ 3614 } 3615 3616 list_for_each_entry(vma, &obj->vma.list, obj_link) { 3617 if (!drm_mm_node_allocated(&vma->node)) 3618 continue; 3619 3620 ret = i915_vma_bind(vma, cache_level, PIN_UPDATE); 3621 if (ret) 3622 return ret; 3623 } 3624 } 3625 3626 list_for_each_entry(vma, &obj->vma.list, obj_link) 3627 vma->node.color = cache_level; 3628 i915_gem_object_set_cache_coherency(obj, cache_level); 3629 obj->cache_dirty = true; /* Always invalidate stale cachelines */ 3630 3631 return 0; 3632 } 3633 3634 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 3635 struct drm_file *file) 3636 { 3637 struct drm_i915_gem_caching *args = data; 3638 struct drm_i915_gem_object *obj; 3639 int err = 0; 3640 3641 rcu_read_lock(); 3642 obj = i915_gem_object_lookup_rcu(file, args->handle); 3643 if (!obj) { 3644 err = -ENOENT; 3645 goto out; 3646 } 3647 3648 switch (obj->cache_level) { 3649 case I915_CACHE_LLC: 3650 case I915_CACHE_L3_LLC: 3651 args->caching = I915_CACHING_CACHED; 3652 break; 3653 3654 case I915_CACHE_WT: 3655 args->caching = I915_CACHING_DISPLAY; 3656 break; 3657 3658 default: 3659 args->caching = I915_CACHING_NONE; 3660 break; 3661 } 3662 out: 3663 rcu_read_unlock(); 3664 return err; 3665 } 3666 3667 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 3668 struct drm_file *file) 3669 { 3670 struct drm_i915_private *i915 = to_i915(dev); 3671 struct drm_i915_gem_caching *args = data; 3672 struct drm_i915_gem_object *obj; 3673 enum i915_cache_level level; 3674 int ret = 0; 3675 3676 switch (args->caching) { 3677 case I915_CACHING_NONE: 3678 level = I915_CACHE_NONE; 3679 break; 3680 case I915_CACHING_CACHED: 3681 /* 3682 * Due to a HW issue on BXT A stepping, GPU stores via a 3683 * snooped mapping may leave stale data in a corresponding CPU 3684 * cacheline, whereas normally such cachelines would get 3685 * invalidated. 3686 */ 3687 if (!HAS_LLC(i915) && !HAS_SNOOP(i915)) 3688 return -ENODEV; 3689 3690 level = I915_CACHE_LLC; 3691 break; 3692 case I915_CACHING_DISPLAY: 3693 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE; 3694 break; 3695 default: 3696 return -EINVAL; 3697 } 3698 3699 obj = i915_gem_object_lookup(file, args->handle); 3700 if (!obj) 3701 return -ENOENT; 3702 3703 /* 3704 * The caching mode of proxy object is handled by its generator, and 3705 * not allowed to be changed by userspace. 3706 */ 3707 if (i915_gem_object_is_proxy(obj)) { 3708 ret = -ENXIO; 3709 goto out; 3710 } 3711 3712 if (obj->cache_level == level) 3713 goto out; 3714 3715 ret = i915_gem_object_wait(obj, 3716 I915_WAIT_INTERRUPTIBLE, 3717 MAX_SCHEDULE_TIMEOUT, 3718 to_rps_client(file)); 3719 if (ret) 3720 goto out; 3721 3722 ret = i915_mutex_lock_interruptible(dev); 3723 if (ret) 3724 goto out; 3725 3726 ret = i915_gem_object_set_cache_level(obj, level); 3727 mutex_unlock(&dev->struct_mutex); 3728 3729 out: 3730 i915_gem_object_put(obj); 3731 return ret; 3732 } 3733 3734 /* 3735 * Prepare buffer for display plane (scanout, cursors, etc). Can be called from 3736 * an uninterruptible phase (modesetting) and allows any flushes to be pipelined 3737 * (for pageflips). We only flush the caches while preparing the buffer for 3738 * display, the callers are responsible for frontbuffer flush. 3739 */ 3740 struct i915_vma * 3741 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 3742 u32 alignment, 3743 const struct i915_ggtt_view *view, 3744 unsigned int flags) 3745 { 3746 struct i915_vma *vma; 3747 int ret; 3748 3749 lockdep_assert_held(&obj->base.dev->struct_mutex); 3750 3751 /* Mark the global pin early so that we account for the 3752 * display coherency whilst setting up the cache domains. 3753 */ 3754 obj->pin_global++; 3755 3756 /* The display engine is not coherent with the LLC cache on gen6. As 3757 * a result, we make sure that the pinning that is about to occur is 3758 * done with uncached PTEs. This is lowest common denominator for all 3759 * chipsets. 3760 * 3761 * However for gen6+, we could do better by using the GFDT bit instead 3762 * of uncaching, which would allow us to flush all the LLC-cached data 3763 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 3764 */ 3765 ret = i915_gem_object_set_cache_level(obj, 3766 HAS_WT(to_i915(obj->base.dev)) ? 3767 I915_CACHE_WT : I915_CACHE_NONE); 3768 if (ret) { 3769 vma = ERR_PTR(ret); 3770 goto err_unpin_global; 3771 } 3772 3773 /* As the user may map the buffer once pinned in the display plane 3774 * (e.g. libkms for the bootup splash), we have to ensure that we 3775 * always use map_and_fenceable for all scanout buffers. However, 3776 * it may simply be too big to fit into mappable, in which case 3777 * put it anyway and hope that userspace can cope (but always first 3778 * try to preserve the existing ABI). 3779 */ 3780 vma = ERR_PTR(-ENOSPC); 3781 if ((flags & PIN_MAPPABLE) == 0 && 3782 (!view || view->type == I915_GGTT_VIEW_NORMAL)) 3783 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 3784 flags | 3785 PIN_MAPPABLE | 3786 PIN_NONBLOCK); 3787 if (IS_ERR(vma)) 3788 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags); 3789 if (IS_ERR(vma)) 3790 goto err_unpin_global; 3791 3792 vma->display_alignment = max_t(u64, vma->display_alignment, alignment); 3793 3794 __i915_gem_object_flush_for_display(obj); 3795 3796 /* It should now be out of any other write domains, and we can update 3797 * the domain values for our changes. 3798 */ 3799 obj->read_domains |= I915_GEM_DOMAIN_GTT; 3800 3801 return vma; 3802 3803 err_unpin_global: 3804 obj->pin_global--; 3805 return vma; 3806 } 3807 3808 void 3809 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) 3810 { 3811 lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); 3812 3813 if (WARN_ON(vma->obj->pin_global == 0)) 3814 return; 3815 3816 if (--vma->obj->pin_global == 0) 3817 vma->display_alignment = I915_GTT_MIN_ALIGNMENT; 3818 3819 /* Bump the LRU to try and avoid premature eviction whilst flipping */ 3820 i915_gem_object_bump_inactive_ggtt(vma->obj); 3821 3822 i915_vma_unpin(vma); 3823 } 3824 3825 /** 3826 * Moves a single object to the CPU read, and possibly write domain. 3827 * @obj: object to act on 3828 * @write: requesting write or read-only access 3829 * 3830 * This function returns when the move is complete, including waiting on 3831 * flushes to occur. 3832 */ 3833 int 3834 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 3835 { 3836 int ret; 3837 3838 lockdep_assert_held(&obj->base.dev->struct_mutex); 3839 3840 ret = i915_gem_object_wait(obj, 3841 I915_WAIT_INTERRUPTIBLE | 3842 I915_WAIT_LOCKED | 3843 (write ? I915_WAIT_ALL : 0), 3844 MAX_SCHEDULE_TIMEOUT, 3845 NULL); 3846 if (ret) 3847 return ret; 3848 3849 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 3850 3851 /* Flush the CPU cache if it's still invalid. */ 3852 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { 3853 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 3854 obj->read_domains |= I915_GEM_DOMAIN_CPU; 3855 } 3856 3857 /* It should now be out of any other write domains, and we can update 3858 * the domain values for our changes. 3859 */ 3860 GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU); 3861 3862 /* If we're writing through the CPU, then the GPU read domains will 3863 * need to be invalidated at next use. 3864 */ 3865 if (write) 3866 __start_cpu_write(obj); 3867 3868 return 0; 3869 } 3870 3871 /* Throttle our rendering by waiting until the ring has completed our requests 3872 * emitted over 20 msec ago. 3873 * 3874 * Note that if we were to use the current jiffies each time around the loop, 3875 * we wouldn't escape the function with any frames outstanding if the time to 3876 * render a frame was over 20ms. 3877 * 3878 * This should get us reasonable parallelism between CPU and GPU but also 3879 * relatively low latency when blocking on a particular request to finish. 3880 */ 3881 static int 3882 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 3883 { 3884 struct drm_i915_private *dev_priv = to_i915(dev); 3885 struct drm_i915_file_private *file_priv = file->driver_priv; 3886 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; 3887 struct i915_request *request, *target = NULL; 3888 long ret; 3889 3890 /* ABI: return -EIO if already wedged */ 3891 if (i915_terminally_wedged(&dev_priv->gpu_error)) 3892 return -EIO; 3893 3894 spin_lock(&file_priv->mm.lock); 3895 list_for_each_entry(request, &file_priv->mm.request_list, client_link) { 3896 if (time_after_eq(request->emitted_jiffies, recent_enough)) 3897 break; 3898 3899 if (target) { 3900 list_del(&target->client_link); 3901 target->file_priv = NULL; 3902 } 3903 3904 target = request; 3905 } 3906 if (target) 3907 i915_request_get(target); 3908 spin_unlock(&file_priv->mm.lock); 3909 3910 if (target == NULL) 3911 return 0; 3912 3913 ret = i915_request_wait(target, 3914 I915_WAIT_INTERRUPTIBLE, 3915 MAX_SCHEDULE_TIMEOUT); 3916 i915_request_put(target); 3917 3918 return ret < 0 ? ret : 0; 3919 } 3920 3921 struct i915_vma * 3922 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 3923 const struct i915_ggtt_view *view, 3924 u64 size, 3925 u64 alignment, 3926 u64 flags) 3927 { 3928 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 3929 struct i915_address_space *vm = &dev_priv->ggtt.vm; 3930 struct i915_vma *vma; 3931 int ret; 3932 3933 lockdep_assert_held(&obj->base.dev->struct_mutex); 3934 3935 if (flags & PIN_MAPPABLE && 3936 (!view || view->type == I915_GGTT_VIEW_NORMAL)) { 3937 /* If the required space is larger than the available 3938 * aperture, we will not able to find a slot for the 3939 * object and unbinding the object now will be in 3940 * vain. Worse, doing so may cause us to ping-pong 3941 * the object in and out of the Global GTT and 3942 * waste a lot of cycles under the mutex. 3943 */ 3944 if (obj->base.size > dev_priv->ggtt.mappable_end) 3945 return ERR_PTR(-E2BIG); 3946 3947 /* If NONBLOCK is set the caller is optimistically 3948 * trying to cache the full object within the mappable 3949 * aperture, and *must* have a fallback in place for 3950 * situations where we cannot bind the object. We 3951 * can be a little more lax here and use the fallback 3952 * more often to avoid costly migrations of ourselves 3953 * and other objects within the aperture. 3954 * 3955 * Half-the-aperture is used as a simple heuristic. 3956 * More interesting would to do search for a free 3957 * block prior to making the commitment to unbind. 3958 * That caters for the self-harm case, and with a 3959 * little more heuristics (e.g. NOFAULT, NOEVICT) 3960 * we could try to minimise harm to others. 3961 */ 3962 if (flags & PIN_NONBLOCK && 3963 obj->base.size > dev_priv->ggtt.mappable_end / 2) 3964 return ERR_PTR(-ENOSPC); 3965 } 3966 3967 vma = i915_vma_instance(obj, vm, view); 3968 if (unlikely(IS_ERR(vma))) 3969 return vma; 3970 3971 if (i915_vma_misplaced(vma, size, alignment, flags)) { 3972 if (flags & PIN_NONBLOCK) { 3973 if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) 3974 return ERR_PTR(-ENOSPC); 3975 3976 if (flags & PIN_MAPPABLE && 3977 vma->fence_size > dev_priv->ggtt.mappable_end / 2) 3978 return ERR_PTR(-ENOSPC); 3979 } 3980 3981 WARN(i915_vma_is_pinned(vma), 3982 "bo is already pinned in ggtt with incorrect alignment:" 3983 " offset=%08x, req.alignment=%llx," 3984 " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n", 3985 i915_ggtt_offset(vma), alignment, 3986 !!(flags & PIN_MAPPABLE), 3987 i915_vma_is_map_and_fenceable(vma)); 3988 ret = i915_vma_unbind(vma); 3989 if (ret) 3990 return ERR_PTR(ret); 3991 } 3992 3993 ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); 3994 if (ret) 3995 return ERR_PTR(ret); 3996 3997 return vma; 3998 } 3999 4000 static __always_inline unsigned int __busy_read_flag(unsigned int id) 4001 { 4002 /* Note that we could alias engines in the execbuf API, but 4003 * that would be very unwise as it prevents userspace from 4004 * fine control over engine selection. Ahem. 4005 * 4006 * This should be something like EXEC_MAX_ENGINE instead of 4007 * I915_NUM_ENGINES. 4008 */ 4009 BUILD_BUG_ON(I915_NUM_ENGINES > 16); 4010 return 0x10000 << id; 4011 } 4012 4013 static __always_inline unsigned int __busy_write_id(unsigned int id) 4014 { 4015 /* The uABI guarantees an active writer is also amongst the read 4016 * engines. This would be true if we accessed the activity tracking 4017 * under the lock, but as we perform the lookup of the object and 4018 * its activity locklessly we can not guarantee that the last_write 4019 * being active implies that we have set the same engine flag from 4020 * last_read - hence we always set both read and write busy for 4021 * last_write. 4022 */ 4023 return id | __busy_read_flag(id); 4024 } 4025 4026 static __always_inline unsigned int 4027 __busy_set_if_active(const struct dma_fence *fence, 4028 unsigned int (*flag)(unsigned int id)) 4029 { 4030 struct i915_request *rq; 4031 4032 /* We have to check the current hw status of the fence as the uABI 4033 * guarantees forward progress. We could rely on the idle worker 4034 * to eventually flush us, but to minimise latency just ask the 4035 * hardware. 4036 * 4037 * Note we only report on the status of native fences. 4038 */ 4039 if (!dma_fence_is_i915(fence)) 4040 return 0; 4041 4042 /* opencode to_request() in order to avoid const warnings */ 4043 rq = container_of(fence, struct i915_request, fence); 4044 if (i915_request_completed(rq)) 4045 return 0; 4046 4047 return flag(rq->engine->uabi_id); 4048 } 4049 4050 static __always_inline unsigned int 4051 busy_check_reader(const struct dma_fence *fence) 4052 { 4053 return __busy_set_if_active(fence, __busy_read_flag); 4054 } 4055 4056 static __always_inline unsigned int 4057 busy_check_writer(const struct dma_fence *fence) 4058 { 4059 if (!fence) 4060 return 0; 4061 4062 return __busy_set_if_active(fence, __busy_write_id); 4063 } 4064 4065 int 4066 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4067 struct drm_file *file) 4068 { 4069 struct drm_i915_gem_busy *args = data; 4070 struct drm_i915_gem_object *obj; 4071 struct reservation_object_list *list; 4072 unsigned int seq; 4073 int err; 4074 4075 err = -ENOENT; 4076 rcu_read_lock(); 4077 obj = i915_gem_object_lookup_rcu(file, args->handle); 4078 if (!obj) 4079 goto out; 4080 4081 /* A discrepancy here is that we do not report the status of 4082 * non-i915 fences, i.e. even though we may report the object as idle, 4083 * a call to set-domain may still stall waiting for foreign rendering. 4084 * This also means that wait-ioctl may report an object as busy, 4085 * where busy-ioctl considers it idle. 4086 * 4087 * We trade the ability to warn of foreign fences to report on which 4088 * i915 engines are active for the object. 4089 * 4090 * Alternatively, we can trade that extra information on read/write 4091 * activity with 4092 * args->busy = 4093 * !reservation_object_test_signaled_rcu(obj->resv, true); 4094 * to report the overall busyness. This is what the wait-ioctl does. 4095 * 4096 */ 4097 retry: 4098 seq = raw_read_seqcount(&obj->resv->seq); 4099 4100 /* Translate the exclusive fence to the READ *and* WRITE engine */ 4101 args->busy = busy_check_writer(rcu_dereference(obj->resv->fence_excl)); 4102 4103 /* Translate shared fences to READ set of engines */ 4104 list = rcu_dereference(obj->resv->fence); 4105 if (list) { 4106 unsigned int shared_count = list->shared_count, i; 4107 4108 for (i = 0; i < shared_count; ++i) { 4109 struct dma_fence *fence = 4110 rcu_dereference(list->shared[i]); 4111 4112 args->busy |= busy_check_reader(fence); 4113 } 4114 } 4115 4116 if (args->busy && read_seqcount_retry(&obj->resv->seq, seq)) 4117 goto retry; 4118 4119 err = 0; 4120 out: 4121 rcu_read_unlock(); 4122 return err; 4123 } 4124 4125 int 4126 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4127 struct drm_file *file_priv) 4128 { 4129 return i915_gem_ring_throttle(dev, file_priv); 4130 } 4131 4132 int 4133 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4134 struct drm_file *file_priv) 4135 { 4136 struct drm_i915_private *dev_priv = to_i915(dev); 4137 struct drm_i915_gem_madvise *args = data; 4138 struct drm_i915_gem_object *obj; 4139 int err; 4140 4141 switch (args->madv) { 4142 case I915_MADV_DONTNEED: 4143 case I915_MADV_WILLNEED: 4144 break; 4145 default: 4146 return -EINVAL; 4147 } 4148 4149 obj = i915_gem_object_lookup(file_priv, args->handle); 4150 if (!obj) 4151 return -ENOENT; 4152 4153 err = mutex_lock_interruptible(&obj->mm.lock); 4154 if (err) 4155 goto out; 4156 4157 if (i915_gem_object_has_pages(obj) && 4158 i915_gem_object_is_tiled(obj) && 4159 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 4160 if (obj->mm.madv == I915_MADV_WILLNEED) { 4161 GEM_BUG_ON(!obj->mm.quirked); 4162 __i915_gem_object_unpin_pages(obj); 4163 obj->mm.quirked = false; 4164 } 4165 if (args->madv == I915_MADV_WILLNEED) { 4166 GEM_BUG_ON(obj->mm.quirked); 4167 __i915_gem_object_pin_pages(obj); 4168 obj->mm.quirked = true; 4169 } 4170 } 4171 4172 if (obj->mm.madv != __I915_MADV_PURGED) 4173 obj->mm.madv = args->madv; 4174 4175 /* if the object is no longer attached, discard its backing storage */ 4176 if (obj->mm.madv == I915_MADV_DONTNEED && 4177 !i915_gem_object_has_pages(obj)) 4178 i915_gem_object_truncate(obj); 4179 4180 args->retained = obj->mm.madv != __I915_MADV_PURGED; 4181 mutex_unlock(&obj->mm.lock); 4182 4183 out: 4184 i915_gem_object_put(obj); 4185 return err; 4186 } 4187 4188 static void 4189 frontbuffer_retire(struct i915_active_request *active, 4190 struct i915_request *request) 4191 { 4192 struct drm_i915_gem_object *obj = 4193 container_of(active, typeof(*obj), frontbuffer_write); 4194 4195 intel_fb_obj_flush(obj, ORIGIN_CS); 4196 } 4197 4198 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4199 const struct drm_i915_gem_object_ops *ops) 4200 { 4201 mutex_init(&obj->mm.lock); 4202 4203 spin_lock_init(&obj->vma.lock); 4204 INIT_LIST_HEAD(&obj->vma.list); 4205 4206 INIT_LIST_HEAD(&obj->lut_list); 4207 INIT_LIST_HEAD(&obj->batch_pool_link); 4208 4209 init_rcu_head(&obj->rcu); 4210 4211 obj->ops = ops; 4212 4213 reservation_object_init(&obj->__builtin_resv); 4214 obj->resv = &obj->__builtin_resv; 4215 4216 obj->frontbuffer_ggtt_origin = ORIGIN_GTT; 4217 i915_active_request_init(&obj->frontbuffer_write, 4218 NULL, frontbuffer_retire); 4219 4220 obj->mm.madv = I915_MADV_WILLNEED; 4221 INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN); 4222 mutex_init(&obj->mm.get_page.lock); 4223 4224 i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size); 4225 } 4226 4227 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4228 .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | 4229 I915_GEM_OBJECT_IS_SHRINKABLE, 4230 4231 .get_pages = i915_gem_object_get_pages_gtt, 4232 .put_pages = i915_gem_object_put_pages_gtt, 4233 4234 .pwrite = i915_gem_object_pwrite_gtt, 4235 }; 4236 4237 static int i915_gem_object_create_shmem(struct drm_device *dev, 4238 struct drm_gem_object *obj, 4239 size_t size) 4240 { 4241 struct drm_i915_private *i915 = to_i915(dev); 4242 unsigned long flags = VM_NORESERVE; 4243 struct file *filp; 4244 4245 drm_gem_private_object_init(dev, obj, size); 4246 4247 if (i915->mm.gemfs) 4248 filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size, 4249 flags); 4250 else 4251 filp = shmem_file_setup("i915", size, flags); 4252 4253 if (IS_ERR(filp)) 4254 return PTR_ERR(filp); 4255 4256 obj->filp = filp; 4257 4258 return 0; 4259 } 4260 4261 struct drm_i915_gem_object * 4262 i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) 4263 { 4264 struct drm_i915_gem_object *obj; 4265 struct address_space *mapping; 4266 unsigned int cache_level; 4267 gfp_t mask; 4268 int ret; 4269 4270 /* There is a prevalence of the assumption that we fit the object's 4271 * page count inside a 32bit _signed_ variable. Let's document this and 4272 * catch if we ever need to fix it. In the meantime, if you do spot 4273 * such a local variable, please consider fixing! 4274 */ 4275 if (size >> PAGE_SHIFT > INT_MAX) 4276 return ERR_PTR(-E2BIG); 4277 4278 if (overflows_type(size, obj->base.size)) 4279 return ERR_PTR(-E2BIG); 4280 4281 obj = i915_gem_object_alloc(dev_priv); 4282 if (obj == NULL) 4283 return ERR_PTR(-ENOMEM); 4284 4285 ret = i915_gem_object_create_shmem(&dev_priv->drm, &obj->base, size); 4286 if (ret) 4287 goto fail; 4288 4289 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4290 if (IS_I965GM(dev_priv) || IS_I965G(dev_priv)) { 4291 /* 965gm cannot relocate objects above 4GiB. */ 4292 mask &= ~__GFP_HIGHMEM; 4293 mask |= __GFP_DMA32; 4294 } 4295 4296 mapping = obj->base.filp->f_mapping; 4297 mapping_set_gfp_mask(mapping, mask); 4298 GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM)); 4299 4300 i915_gem_object_init(obj, &i915_gem_object_ops); 4301 4302 obj->write_domain = I915_GEM_DOMAIN_CPU; 4303 obj->read_domains = I915_GEM_DOMAIN_CPU; 4304 4305 if (HAS_LLC(dev_priv)) 4306 /* On some devices, we can have the GPU use the LLC (the CPU 4307 * cache) for about a 10% performance improvement 4308 * compared to uncached. Graphics requests other than 4309 * display scanout are coherent with the CPU in 4310 * accessing this cache. This means in this mode we 4311 * don't need to clflush on the CPU side, and on the 4312 * GPU side we only need to flush internal caches to 4313 * get data visible to the CPU. 4314 * 4315 * However, we maintain the display planes as UC, and so 4316 * need to rebind when first used as such. 4317 */ 4318 cache_level = I915_CACHE_LLC; 4319 else 4320 cache_level = I915_CACHE_NONE; 4321 4322 i915_gem_object_set_cache_coherency(obj, cache_level); 4323 4324 trace_i915_gem_object_create(obj); 4325 4326 return obj; 4327 4328 fail: 4329 i915_gem_object_free(obj); 4330 return ERR_PTR(ret); 4331 } 4332 4333 static bool discard_backing_storage(struct drm_i915_gem_object *obj) 4334 { 4335 /* If we are the last user of the backing storage (be it shmemfs 4336 * pages or stolen etc), we know that the pages are going to be 4337 * immediately released. In this case, we can then skip copying 4338 * back the contents from the GPU. 4339 */ 4340 4341 if (obj->mm.madv != I915_MADV_WILLNEED) 4342 return false; 4343 4344 if (obj->base.filp == NULL) 4345 return true; 4346 4347 /* At first glance, this looks racy, but then again so would be 4348 * userspace racing mmap against close. However, the first external 4349 * reference to the filp can only be obtained through the 4350 * i915_gem_mmap_ioctl() which safeguards us against the user 4351 * acquiring such a reference whilst we are in the middle of 4352 * freeing the object. 4353 */ 4354 return atomic_long_read(&obj->base.filp->f_count) == 1; 4355 } 4356 4357 static void __i915_gem_free_objects(struct drm_i915_private *i915, 4358 struct llist_node *freed) 4359 { 4360 struct drm_i915_gem_object *obj, *on; 4361 intel_wakeref_t wakeref; 4362 4363 wakeref = intel_runtime_pm_get(i915); 4364 llist_for_each_entry_safe(obj, on, freed, freed) { 4365 struct i915_vma *vma, *vn; 4366 4367 trace_i915_gem_object_destroy(obj); 4368 4369 mutex_lock(&i915->drm.struct_mutex); 4370 4371 GEM_BUG_ON(i915_gem_object_is_active(obj)); 4372 list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) { 4373 GEM_BUG_ON(i915_vma_is_active(vma)); 4374 vma->flags &= ~I915_VMA_PIN_MASK; 4375 i915_vma_destroy(vma); 4376 } 4377 GEM_BUG_ON(!list_empty(&obj->vma.list)); 4378 GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree)); 4379 4380 /* This serializes freeing with the shrinker. Since the free 4381 * is delayed, first by RCU then by the workqueue, we want the 4382 * shrinker to be able to free pages of unreferenced objects, 4383 * or else we may oom whilst there are plenty of deferred 4384 * freed objects. 4385 */ 4386 if (i915_gem_object_has_pages(obj)) { 4387 spin_lock(&i915->mm.obj_lock); 4388 list_del_init(&obj->mm.link); 4389 spin_unlock(&i915->mm.obj_lock); 4390 } 4391 4392 mutex_unlock(&i915->drm.struct_mutex); 4393 4394 GEM_BUG_ON(obj->bind_count); 4395 GEM_BUG_ON(obj->userfault_count); 4396 GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits)); 4397 GEM_BUG_ON(!list_empty(&obj->lut_list)); 4398 4399 if (obj->ops->release) 4400 obj->ops->release(obj); 4401 4402 if (WARN_ON(i915_gem_object_has_pinned_pages(obj))) 4403 atomic_set(&obj->mm.pages_pin_count, 0); 4404 __i915_gem_object_put_pages(obj, I915_MM_NORMAL); 4405 GEM_BUG_ON(i915_gem_object_has_pages(obj)); 4406 4407 if (obj->base.import_attach) 4408 drm_prime_gem_destroy(&obj->base, NULL); 4409 4410 reservation_object_fini(&obj->__builtin_resv); 4411 drm_gem_object_release(&obj->base); 4412 i915_gem_info_remove_obj(i915, obj->base.size); 4413 4414 kfree(obj->bit_17); 4415 i915_gem_object_free(obj); 4416 4417 GEM_BUG_ON(!atomic_read(&i915->mm.free_count)); 4418 atomic_dec(&i915->mm.free_count); 4419 4420 if (on) 4421 cond_resched(); 4422 } 4423 intel_runtime_pm_put(i915, wakeref); 4424 } 4425 4426 static void i915_gem_flush_free_objects(struct drm_i915_private *i915) 4427 { 4428 struct llist_node *freed; 4429 4430 /* Free the oldest, most stale object to keep the free_list short */ 4431 freed = NULL; 4432 if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */ 4433 /* Only one consumer of llist_del_first() allowed */ 4434 spin_lock(&i915->mm.free_lock); 4435 freed = llist_del_first(&i915->mm.free_list); 4436 spin_unlock(&i915->mm.free_lock); 4437 } 4438 if (unlikely(freed)) { 4439 freed->next = NULL; 4440 __i915_gem_free_objects(i915, freed); 4441 } 4442 } 4443 4444 static void __i915_gem_free_work(struct work_struct *work) 4445 { 4446 struct drm_i915_private *i915 = 4447 container_of(work, struct drm_i915_private, mm.free_work); 4448 struct llist_node *freed; 4449 4450 /* 4451 * All file-owned VMA should have been released by this point through 4452 * i915_gem_close_object(), or earlier by i915_gem_context_close(). 4453 * However, the object may also be bound into the global GTT (e.g. 4454 * older GPUs without per-process support, or for direct access through 4455 * the GTT either for the user or for scanout). Those VMA still need to 4456 * unbound now. 4457 */ 4458 4459 spin_lock(&i915->mm.free_lock); 4460 while ((freed = llist_del_all(&i915->mm.free_list))) { 4461 spin_unlock(&i915->mm.free_lock); 4462 4463 __i915_gem_free_objects(i915, freed); 4464 if (need_resched()) 4465 return; 4466 4467 spin_lock(&i915->mm.free_lock); 4468 } 4469 spin_unlock(&i915->mm.free_lock); 4470 } 4471 4472 static void __i915_gem_free_object_rcu(struct rcu_head *head) 4473 { 4474 struct drm_i915_gem_object *obj = 4475 container_of(head, typeof(*obj), rcu); 4476 struct drm_i915_private *i915 = to_i915(obj->base.dev); 4477 4478 /* 4479 * We reuse obj->rcu for the freed list, so we had better not treat 4480 * it like a rcu_head from this point forwards. And we expect all 4481 * objects to be freed via this path. 4482 */ 4483 destroy_rcu_head(&obj->rcu); 4484 4485 /* 4486 * Since we require blocking on struct_mutex to unbind the freed 4487 * object from the GPU before releasing resources back to the 4488 * system, we can not do that directly from the RCU callback (which may 4489 * be a softirq context), but must instead then defer that work onto a 4490 * kthread. We use the RCU callback rather than move the freed object 4491 * directly onto the work queue so that we can mix between using the 4492 * worker and performing frees directly from subsequent allocations for 4493 * crude but effective memory throttling. 4494 */ 4495 if (llist_add(&obj->freed, &i915->mm.free_list)) 4496 queue_work(i915->wq, &i915->mm.free_work); 4497 } 4498 4499 void i915_gem_free_object(struct drm_gem_object *gem_obj) 4500 { 4501 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 4502 4503 if (obj->mm.quirked) 4504 __i915_gem_object_unpin_pages(obj); 4505 4506 if (discard_backing_storage(obj)) 4507 obj->mm.madv = I915_MADV_DONTNEED; 4508 4509 /* 4510 * Before we free the object, make sure any pure RCU-only 4511 * read-side critical sections are complete, e.g. 4512 * i915_gem_busy_ioctl(). For the corresponding synchronized 4513 * lookup see i915_gem_object_lookup_rcu(). 4514 */ 4515 atomic_inc(&to_i915(obj->base.dev)->mm.free_count); 4516 call_rcu(&obj->rcu, __i915_gem_free_object_rcu); 4517 } 4518 4519 void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj) 4520 { 4521 lockdep_assert_held(&obj->base.dev->struct_mutex); 4522 4523 if (!i915_gem_object_has_active_reference(obj) && 4524 i915_gem_object_is_active(obj)) 4525 i915_gem_object_set_active_reference(obj); 4526 else 4527 i915_gem_object_put(obj); 4528 } 4529 4530 void i915_gem_sanitize(struct drm_i915_private *i915) 4531 { 4532 intel_wakeref_t wakeref; 4533 4534 GEM_TRACE("\n"); 4535 4536 wakeref = intel_runtime_pm_get(i915); 4537 intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); 4538 4539 /* 4540 * As we have just resumed the machine and woken the device up from 4541 * deep PCI sleep (presumably D3_cold), assume the HW has been reset 4542 * back to defaults, recovering from whatever wedged state we left it 4543 * in and so worth trying to use the device once more. 4544 */ 4545 if (i915_terminally_wedged(&i915->gpu_error)) 4546 i915_gem_unset_wedged(i915); 4547 4548 /* 4549 * If we inherit context state from the BIOS or earlier occupants 4550 * of the GPU, the GPU may be in an inconsistent state when we 4551 * try to take over. The only way to remove the earlier state 4552 * is by resetting. However, resetting on earlier gen is tricky as 4553 * it may impact the display and we are uncertain about the stability 4554 * of the reset, so this could be applied to even earlier gen. 4555 */ 4556 intel_engines_sanitize(i915, false); 4557 4558 intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); 4559 intel_runtime_pm_put(i915, wakeref); 4560 4561 mutex_lock(&i915->drm.struct_mutex); 4562 i915_gem_contexts_lost(i915); 4563 mutex_unlock(&i915->drm.struct_mutex); 4564 } 4565 4566 int i915_gem_suspend(struct drm_i915_private *i915) 4567 { 4568 intel_wakeref_t wakeref; 4569 int ret; 4570 4571 GEM_TRACE("\n"); 4572 4573 wakeref = intel_runtime_pm_get(i915); 4574 intel_suspend_gt_powersave(i915); 4575 4576 flush_workqueue(i915->wq); 4577 4578 mutex_lock(&i915->drm.struct_mutex); 4579 4580 /* 4581 * We have to flush all the executing contexts to main memory so 4582 * that they can saved in the hibernation image. To ensure the last 4583 * context image is coherent, we have to switch away from it. That 4584 * leaves the i915->kernel_context still active when 4585 * we actually suspend, and its image in memory may not match the GPU 4586 * state. Fortunately, the kernel_context is disposable and we do 4587 * not rely on its state. 4588 */ 4589 if (!i915_terminally_wedged(&i915->gpu_error)) { 4590 ret = i915_gem_switch_to_kernel_context(i915); 4591 if (ret) 4592 goto err_unlock; 4593 4594 ret = i915_gem_wait_for_idle(i915, 4595 I915_WAIT_INTERRUPTIBLE | 4596 I915_WAIT_LOCKED | 4597 I915_WAIT_FOR_IDLE_BOOST, 4598 MAX_SCHEDULE_TIMEOUT); 4599 if (ret && ret != -EIO) 4600 goto err_unlock; 4601 4602 assert_kernel_context_is_current(i915); 4603 } 4604 i915_retire_requests(i915); /* ensure we flush after wedging */ 4605 4606 mutex_unlock(&i915->drm.struct_mutex); 4607 i915_reset_flush(i915); 4608 4609 drain_delayed_work(&i915->gt.retire_work); 4610 4611 /* 4612 * As the idle_work is rearming if it detects a race, play safe and 4613 * repeat the flush until it is definitely idle. 4614 */ 4615 drain_delayed_work(&i915->gt.idle_work); 4616 4617 intel_uc_suspend(i915); 4618 4619 /* 4620 * Assert that we successfully flushed all the work and 4621 * reset the GPU back to its idle, low power state. 4622 */ 4623 WARN_ON(i915->gt.awake); 4624 if (WARN_ON(!intel_engines_are_idle(i915))) 4625 i915_gem_set_wedged(i915); /* no hope, discard everything */ 4626 4627 intel_runtime_pm_put(i915, wakeref); 4628 return 0; 4629 4630 err_unlock: 4631 mutex_unlock(&i915->drm.struct_mutex); 4632 intel_runtime_pm_put(i915, wakeref); 4633 return ret; 4634 } 4635 4636 void i915_gem_suspend_late(struct drm_i915_private *i915) 4637 { 4638 struct drm_i915_gem_object *obj; 4639 struct list_head *phases[] = { 4640 &i915->mm.unbound_list, 4641 &i915->mm.bound_list, 4642 NULL 4643 }, **phase; 4644 4645 /* 4646 * Neither the BIOS, ourselves or any other kernel 4647 * expects the system to be in execlists mode on startup, 4648 * so we need to reset the GPU back to legacy mode. And the only 4649 * known way to disable logical contexts is through a GPU reset. 4650 * 4651 * So in order to leave the system in a known default configuration, 4652 * always reset the GPU upon unload and suspend. Afterwards we then 4653 * clean up the GEM state tracking, flushing off the requests and 4654 * leaving the system in a known idle state. 4655 * 4656 * Note that is of the upmost importance that the GPU is idle and 4657 * all stray writes are flushed *before* we dismantle the backing 4658 * storage for the pinned objects. 4659 * 4660 * However, since we are uncertain that resetting the GPU on older 4661 * machines is a good idea, we don't - just in case it leaves the 4662 * machine in an unusable condition. 4663 */ 4664 4665 mutex_lock(&i915->drm.struct_mutex); 4666 for (phase = phases; *phase; phase++) { 4667 list_for_each_entry(obj, *phase, mm.link) 4668 WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); 4669 } 4670 mutex_unlock(&i915->drm.struct_mutex); 4671 4672 intel_uc_sanitize(i915); 4673 i915_gem_sanitize(i915); 4674 } 4675 4676 void i915_gem_resume(struct drm_i915_private *i915) 4677 { 4678 GEM_TRACE("\n"); 4679 4680 WARN_ON(i915->gt.awake); 4681 4682 mutex_lock(&i915->drm.struct_mutex); 4683 intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); 4684 4685 i915_gem_restore_gtt_mappings(i915); 4686 i915_gem_restore_fences(i915); 4687 4688 /* 4689 * As we didn't flush the kernel context before suspend, we cannot 4690 * guarantee that the context image is complete. So let's just reset 4691 * it and start again. 4692 */ 4693 i915->gt.resume(i915); 4694 4695 if (i915_gem_init_hw(i915)) 4696 goto err_wedged; 4697 4698 intel_uc_resume(i915); 4699 4700 /* Always reload a context for powersaving. */ 4701 if (i915_gem_switch_to_kernel_context(i915)) 4702 goto err_wedged; 4703 4704 out_unlock: 4705 intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); 4706 mutex_unlock(&i915->drm.struct_mutex); 4707 return; 4708 4709 err_wedged: 4710 if (!i915_terminally_wedged(&i915->gpu_error)) { 4711 DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n"); 4712 i915_gem_set_wedged(i915); 4713 } 4714 goto out_unlock; 4715 } 4716 4717 void i915_gem_init_swizzling(struct drm_i915_private *dev_priv) 4718 { 4719 if (INTEL_GEN(dev_priv) < 5 || 4720 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 4721 return; 4722 4723 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 4724 DISP_TILE_SURFACE_SWIZZLING); 4725 4726 if (IS_GEN(dev_priv, 5)) 4727 return; 4728 4729 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 4730 if (IS_GEN(dev_priv, 6)) 4731 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 4732 else if (IS_GEN(dev_priv, 7)) 4733 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 4734 else if (IS_GEN(dev_priv, 8)) 4735 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 4736 else 4737 BUG(); 4738 } 4739 4740 static void init_unused_ring(struct drm_i915_private *dev_priv, u32 base) 4741 { 4742 I915_WRITE(RING_CTL(base), 0); 4743 I915_WRITE(RING_HEAD(base), 0); 4744 I915_WRITE(RING_TAIL(base), 0); 4745 I915_WRITE(RING_START(base), 0); 4746 } 4747 4748 static void init_unused_rings(struct drm_i915_private *dev_priv) 4749 { 4750 if (IS_I830(dev_priv)) { 4751 init_unused_ring(dev_priv, PRB1_BASE); 4752 init_unused_ring(dev_priv, SRB0_BASE); 4753 init_unused_ring(dev_priv, SRB1_BASE); 4754 init_unused_ring(dev_priv, SRB2_BASE); 4755 init_unused_ring(dev_priv, SRB3_BASE); 4756 } else if (IS_GEN(dev_priv, 2)) { 4757 init_unused_ring(dev_priv, SRB0_BASE); 4758 init_unused_ring(dev_priv, SRB1_BASE); 4759 } else if (IS_GEN(dev_priv, 3)) { 4760 init_unused_ring(dev_priv, PRB1_BASE); 4761 init_unused_ring(dev_priv, PRB2_BASE); 4762 } 4763 } 4764 4765 static int __i915_gem_restart_engines(void *data) 4766 { 4767 struct drm_i915_private *i915 = data; 4768 struct intel_engine_cs *engine; 4769 enum intel_engine_id id; 4770 int err; 4771 4772 for_each_engine(engine, i915, id) { 4773 err = engine->init_hw(engine); 4774 if (err) { 4775 DRM_ERROR("Failed to restart %s (%d)\n", 4776 engine->name, err); 4777 return err; 4778 } 4779 } 4780 4781 return 0; 4782 } 4783 4784 int i915_gem_init_hw(struct drm_i915_private *dev_priv) 4785 { 4786 int ret; 4787 4788 dev_priv->gt.last_init_time = ktime_get(); 4789 4790 /* Double layer security blanket, see i915_gem_init() */ 4791 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 4792 4793 if (HAS_EDRAM(dev_priv) && INTEL_GEN(dev_priv) < 9) 4794 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 4795 4796 if (IS_HASWELL(dev_priv)) 4797 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ? 4798 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 4799 4800 /* Apply the GT workarounds... */ 4801 intel_gt_apply_workarounds(dev_priv); 4802 /* ...and determine whether they are sticking. */ 4803 intel_gt_verify_workarounds(dev_priv, "init"); 4804 4805 i915_gem_init_swizzling(dev_priv); 4806 4807 /* 4808 * At least 830 can leave some of the unused rings 4809 * "active" (ie. head != tail) after resume which 4810 * will prevent c3 entry. Makes sure all unused rings 4811 * are totally idle. 4812 */ 4813 init_unused_rings(dev_priv); 4814 4815 BUG_ON(!dev_priv->kernel_context); 4816 if (i915_terminally_wedged(&dev_priv->gpu_error)) { 4817 ret = -EIO; 4818 goto out; 4819 } 4820 4821 ret = i915_ppgtt_init_hw(dev_priv); 4822 if (ret) { 4823 DRM_ERROR("Enabling PPGTT failed (%d)\n", ret); 4824 goto out; 4825 } 4826 4827 ret = intel_wopcm_init_hw(&dev_priv->wopcm); 4828 if (ret) { 4829 DRM_ERROR("Enabling WOPCM failed (%d)\n", ret); 4830 goto out; 4831 } 4832 4833 /* We can't enable contexts until all firmware is loaded */ 4834 ret = intel_uc_init_hw(dev_priv); 4835 if (ret) { 4836 DRM_ERROR("Enabling uc failed (%d)\n", ret); 4837 goto out; 4838 } 4839 4840 intel_mocs_init_l3cc_table(dev_priv); 4841 4842 /* Only when the HW is re-initialised, can we replay the requests */ 4843 ret = __i915_gem_restart_engines(dev_priv); 4844 if (ret) 4845 goto cleanup_uc; 4846 4847 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 4848 4849 return 0; 4850 4851 cleanup_uc: 4852 intel_uc_fini_hw(dev_priv); 4853 out: 4854 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 4855 4856 return ret; 4857 } 4858 4859 static int __intel_engines_record_defaults(struct drm_i915_private *i915) 4860 { 4861 struct i915_gem_context *ctx; 4862 struct intel_engine_cs *engine; 4863 enum intel_engine_id id; 4864 int err; 4865 4866 /* 4867 * As we reset the gpu during very early sanitisation, the current 4868 * register state on the GPU should reflect its defaults values. 4869 * We load a context onto the hw (with restore-inhibit), then switch 4870 * over to a second context to save that default register state. We 4871 * can then prime every new context with that state so they all start 4872 * from the same default HW values. 4873 */ 4874 4875 ctx = i915_gem_context_create_kernel(i915, 0); 4876 if (IS_ERR(ctx)) 4877 return PTR_ERR(ctx); 4878 4879 for_each_engine(engine, i915, id) { 4880 struct i915_request *rq; 4881 4882 rq = i915_request_alloc(engine, ctx); 4883 if (IS_ERR(rq)) { 4884 err = PTR_ERR(rq); 4885 goto out_ctx; 4886 } 4887 4888 err = 0; 4889 if (engine->init_context) 4890 err = engine->init_context(rq); 4891 4892 i915_request_add(rq); 4893 if (err) 4894 goto err_active; 4895 } 4896 4897 err = i915_gem_switch_to_kernel_context(i915); 4898 if (err) 4899 goto err_active; 4900 4901 if (i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED, HZ / 5)) { 4902 i915_gem_set_wedged(i915); 4903 err = -EIO; /* Caller will declare us wedged */ 4904 goto err_active; 4905 } 4906 4907 assert_kernel_context_is_current(i915); 4908 4909 /* 4910 * Immediately park the GPU so that we enable powersaving and 4911 * treat it as idle. The next time we issue a request, we will 4912 * unpark and start using the engine->pinned_default_state, otherwise 4913 * it is in limbo and an early reset may fail. 4914 */ 4915 __i915_gem_park(i915); 4916 4917 for_each_engine(engine, i915, id) { 4918 struct i915_vma *state; 4919 void *vaddr; 4920 4921 GEM_BUG_ON(to_intel_context(ctx, engine)->pin_count); 4922 4923 state = to_intel_context(ctx, engine)->state; 4924 if (!state) 4925 continue; 4926 4927 /* 4928 * As we will hold a reference to the logical state, it will 4929 * not be torn down with the context, and importantly the 4930 * object will hold onto its vma (making it possible for a 4931 * stray GTT write to corrupt our defaults). Unmap the vma 4932 * from the GTT to prevent such accidents and reclaim the 4933 * space. 4934 */ 4935 err = i915_vma_unbind(state); 4936 if (err) 4937 goto err_active; 4938 4939 err = i915_gem_object_set_to_cpu_domain(state->obj, false); 4940 if (err) 4941 goto err_active; 4942 4943 engine->default_state = i915_gem_object_get(state->obj); 4944 4945 /* Check we can acquire the image of the context state */ 4946 vaddr = i915_gem_object_pin_map(engine->default_state, 4947 I915_MAP_FORCE_WB); 4948 if (IS_ERR(vaddr)) { 4949 err = PTR_ERR(vaddr); 4950 goto err_active; 4951 } 4952 4953 i915_gem_object_unpin_map(engine->default_state); 4954 } 4955 4956 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) { 4957 unsigned int found = intel_engines_has_context_isolation(i915); 4958 4959 /* 4960 * Make sure that classes with multiple engine instances all 4961 * share the same basic configuration. 4962 */ 4963 for_each_engine(engine, i915, id) { 4964 unsigned int bit = BIT(engine->uabi_class); 4965 unsigned int expected = engine->default_state ? bit : 0; 4966 4967 if ((found & bit) != expected) { 4968 DRM_ERROR("mismatching default context state for class %d on engine %s\n", 4969 engine->uabi_class, engine->name); 4970 } 4971 } 4972 } 4973 4974 out_ctx: 4975 i915_gem_context_set_closed(ctx); 4976 i915_gem_context_put(ctx); 4977 return err; 4978 4979 err_active: 4980 /* 4981 * If we have to abandon now, we expect the engines to be idle 4982 * and ready to be torn-down. First try to flush any remaining 4983 * request, ensure we are pointing at the kernel context and 4984 * then remove it. 4985 */ 4986 if (WARN_ON(i915_gem_switch_to_kernel_context(i915))) 4987 goto out_ctx; 4988 4989 if (WARN_ON(i915_gem_wait_for_idle(i915, 4990 I915_WAIT_LOCKED, 4991 MAX_SCHEDULE_TIMEOUT))) 4992 goto out_ctx; 4993 4994 i915_gem_contexts_lost(i915); 4995 goto out_ctx; 4996 } 4997 4998 static int 4999 i915_gem_init_scratch(struct drm_i915_private *i915, unsigned int size) 5000 { 5001 struct drm_i915_gem_object *obj; 5002 struct i915_vma *vma; 5003 int ret; 5004 5005 obj = i915_gem_object_create_stolen(i915, size); 5006 if (!obj) 5007 obj = i915_gem_object_create_internal(i915, size); 5008 if (IS_ERR(obj)) { 5009 DRM_ERROR("Failed to allocate scratch page\n"); 5010 return PTR_ERR(obj); 5011 } 5012 5013 vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); 5014 if (IS_ERR(vma)) { 5015 ret = PTR_ERR(vma); 5016 goto err_unref; 5017 } 5018 5019 ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); 5020 if (ret) 5021 goto err_unref; 5022 5023 i915->gt.scratch = vma; 5024 return 0; 5025 5026 err_unref: 5027 i915_gem_object_put(obj); 5028 return ret; 5029 } 5030 5031 static void i915_gem_fini_scratch(struct drm_i915_private *i915) 5032 { 5033 i915_vma_unpin_and_release(&i915->gt.scratch, 0); 5034 } 5035 5036 int i915_gem_init(struct drm_i915_private *dev_priv) 5037 { 5038 int ret; 5039 5040 /* We need to fallback to 4K pages if host doesn't support huge gtt. */ 5041 if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_huge_gtt(dev_priv)) 5042 mkwrite_device_info(dev_priv)->page_sizes = 5043 I915_GTT_PAGE_SIZE_4K; 5044 5045 dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1); 5046 5047 if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { 5048 dev_priv->gt.resume = intel_lr_context_resume; 5049 dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup; 5050 } else { 5051 dev_priv->gt.resume = intel_legacy_submission_resume; 5052 dev_priv->gt.cleanup_engine = intel_engine_cleanup; 5053 } 5054 5055 i915_timelines_init(dev_priv); 5056 5057 ret = i915_gem_init_userptr(dev_priv); 5058 if (ret) 5059 return ret; 5060 5061 ret = intel_uc_init_misc(dev_priv); 5062 if (ret) 5063 return ret; 5064 5065 ret = intel_wopcm_init(&dev_priv->wopcm); 5066 if (ret) 5067 goto err_uc_misc; 5068 5069 /* This is just a security blanket to placate dragons. 5070 * On some systems, we very sporadically observe that the first TLBs 5071 * used by the CS may be stale, despite us poking the TLB reset. If 5072 * we hold the forcewake during initialisation these problems 5073 * just magically go away. 5074 */ 5075 mutex_lock(&dev_priv->drm.struct_mutex); 5076 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5077 5078 ret = i915_gem_init_ggtt(dev_priv); 5079 if (ret) { 5080 GEM_BUG_ON(ret == -EIO); 5081 goto err_unlock; 5082 } 5083 5084 ret = i915_gem_init_scratch(dev_priv, 5085 IS_GEN(dev_priv, 2) ? SZ_256K : PAGE_SIZE); 5086 if (ret) { 5087 GEM_BUG_ON(ret == -EIO); 5088 goto err_ggtt; 5089 } 5090 5091 ret = i915_gem_contexts_init(dev_priv); 5092 if (ret) { 5093 GEM_BUG_ON(ret == -EIO); 5094 goto err_scratch; 5095 } 5096 5097 ret = intel_engines_init(dev_priv); 5098 if (ret) { 5099 GEM_BUG_ON(ret == -EIO); 5100 goto err_context; 5101 } 5102 5103 intel_init_gt_powersave(dev_priv); 5104 5105 ret = intel_uc_init(dev_priv); 5106 if (ret) 5107 goto err_pm; 5108 5109 ret = i915_gem_init_hw(dev_priv); 5110 if (ret) 5111 goto err_uc_init; 5112 5113 /* 5114 * Despite its name intel_init_clock_gating applies both display 5115 * clock gating workarounds; GT mmio workarounds and the occasional 5116 * GT power context workaround. Worse, sometimes it includes a context 5117 * register workaround which we need to apply before we record the 5118 * default HW state for all contexts. 5119 * 5120 * FIXME: break up the workarounds and apply them at the right time! 5121 */ 5122 intel_init_clock_gating(dev_priv); 5123 5124 ret = __intel_engines_record_defaults(dev_priv); 5125 if (ret) 5126 goto err_init_hw; 5127 5128 if (i915_inject_load_failure()) { 5129 ret = -ENODEV; 5130 goto err_init_hw; 5131 } 5132 5133 if (i915_inject_load_failure()) { 5134 ret = -EIO; 5135 goto err_init_hw; 5136 } 5137 5138 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5139 mutex_unlock(&dev_priv->drm.struct_mutex); 5140 5141 return 0; 5142 5143 /* 5144 * Unwinding is complicated by that we want to handle -EIO to mean 5145 * disable GPU submission but keep KMS alive. We want to mark the 5146 * HW as irrevisibly wedged, but keep enough state around that the 5147 * driver doesn't explode during runtime. 5148 */ 5149 err_init_hw: 5150 mutex_unlock(&dev_priv->drm.struct_mutex); 5151 5152 WARN_ON(i915_gem_suspend(dev_priv)); 5153 i915_gem_suspend_late(dev_priv); 5154 5155 i915_gem_drain_workqueue(dev_priv); 5156 5157 mutex_lock(&dev_priv->drm.struct_mutex); 5158 intel_uc_fini_hw(dev_priv); 5159 err_uc_init: 5160 intel_uc_fini(dev_priv); 5161 err_pm: 5162 if (ret != -EIO) { 5163 intel_cleanup_gt_powersave(dev_priv); 5164 i915_gem_cleanup_engines(dev_priv); 5165 } 5166 err_context: 5167 if (ret != -EIO) 5168 i915_gem_contexts_fini(dev_priv); 5169 err_scratch: 5170 i915_gem_fini_scratch(dev_priv); 5171 err_ggtt: 5172 err_unlock: 5173 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5174 mutex_unlock(&dev_priv->drm.struct_mutex); 5175 5176 err_uc_misc: 5177 intel_uc_fini_misc(dev_priv); 5178 5179 if (ret != -EIO) { 5180 i915_gem_cleanup_userptr(dev_priv); 5181 i915_timelines_fini(dev_priv); 5182 } 5183 5184 if (ret == -EIO) { 5185 mutex_lock(&dev_priv->drm.struct_mutex); 5186 5187 /* 5188 * Allow engine initialisation to fail by marking the GPU as 5189 * wedged. But we only want to do this where the GPU is angry, 5190 * for all other failure, such as an allocation failure, bail. 5191 */ 5192 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 5193 i915_load_error(dev_priv, 5194 "Failed to initialize GPU, declaring it wedged!\n"); 5195 i915_gem_set_wedged(dev_priv); 5196 } 5197 5198 /* Minimal basic recovery for KMS */ 5199 ret = i915_ggtt_enable_hw(dev_priv); 5200 i915_gem_restore_gtt_mappings(dev_priv); 5201 i915_gem_restore_fences(dev_priv); 5202 intel_init_clock_gating(dev_priv); 5203 5204 mutex_unlock(&dev_priv->drm.struct_mutex); 5205 } 5206 5207 i915_gem_drain_freed_objects(dev_priv); 5208 return ret; 5209 } 5210 5211 void i915_gem_fini(struct drm_i915_private *dev_priv) 5212 { 5213 i915_gem_suspend_late(dev_priv); 5214 intel_disable_gt_powersave(dev_priv); 5215 5216 /* Flush any outstanding unpin_work. */ 5217 i915_gem_drain_workqueue(dev_priv); 5218 5219 mutex_lock(&dev_priv->drm.struct_mutex); 5220 intel_uc_fini_hw(dev_priv); 5221 intel_uc_fini(dev_priv); 5222 i915_gem_cleanup_engines(dev_priv); 5223 i915_gem_contexts_fini(dev_priv); 5224 i915_gem_fini_scratch(dev_priv); 5225 mutex_unlock(&dev_priv->drm.struct_mutex); 5226 5227 intel_wa_list_free(&dev_priv->gt_wa_list); 5228 5229 intel_cleanup_gt_powersave(dev_priv); 5230 5231 intel_uc_fini_misc(dev_priv); 5232 i915_gem_cleanup_userptr(dev_priv); 5233 i915_timelines_fini(dev_priv); 5234 5235 i915_gem_drain_freed_objects(dev_priv); 5236 5237 WARN_ON(!list_empty(&dev_priv->contexts.list)); 5238 } 5239 5240 void i915_gem_init_mmio(struct drm_i915_private *i915) 5241 { 5242 i915_gem_sanitize(i915); 5243 } 5244 5245 void 5246 i915_gem_cleanup_engines(struct drm_i915_private *dev_priv) 5247 { 5248 struct intel_engine_cs *engine; 5249 enum intel_engine_id id; 5250 5251 for_each_engine(engine, dev_priv, id) 5252 dev_priv->gt.cleanup_engine(engine); 5253 } 5254 5255 void 5256 i915_gem_load_init_fences(struct drm_i915_private *dev_priv) 5257 { 5258 int i; 5259 5260 if (INTEL_GEN(dev_priv) >= 7 && !IS_VALLEYVIEW(dev_priv) && 5261 !IS_CHERRYVIEW(dev_priv)) 5262 dev_priv->num_fence_regs = 32; 5263 else if (INTEL_GEN(dev_priv) >= 4 || 5264 IS_I945G(dev_priv) || IS_I945GM(dev_priv) || 5265 IS_G33(dev_priv) || IS_PINEVIEW(dev_priv)) 5266 dev_priv->num_fence_regs = 16; 5267 else 5268 dev_priv->num_fence_regs = 8; 5269 5270 if (intel_vgpu_active(dev_priv)) 5271 dev_priv->num_fence_regs = 5272 I915_READ(vgtif_reg(avail_rs.fence_num)); 5273 5274 /* Initialize fence registers to zero */ 5275 for (i = 0; i < dev_priv->num_fence_regs; i++) { 5276 struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i]; 5277 5278 fence->i915 = dev_priv; 5279 fence->id = i; 5280 list_add_tail(&fence->link, &dev_priv->mm.fence_list); 5281 } 5282 i915_gem_restore_fences(dev_priv); 5283 5284 i915_gem_detect_bit_6_swizzle(dev_priv); 5285 } 5286 5287 static void i915_gem_init__mm(struct drm_i915_private *i915) 5288 { 5289 spin_lock_init(&i915->mm.object_stat_lock); 5290 spin_lock_init(&i915->mm.obj_lock); 5291 spin_lock_init(&i915->mm.free_lock); 5292 5293 init_llist_head(&i915->mm.free_list); 5294 5295 INIT_LIST_HEAD(&i915->mm.unbound_list); 5296 INIT_LIST_HEAD(&i915->mm.bound_list); 5297 INIT_LIST_HEAD(&i915->mm.fence_list); 5298 INIT_LIST_HEAD(&i915->mm.userfault_list); 5299 5300 INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); 5301 } 5302 5303 int i915_gem_init_early(struct drm_i915_private *dev_priv) 5304 { 5305 int err = -ENOMEM; 5306 5307 dev_priv->objects = KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN); 5308 if (!dev_priv->objects) 5309 goto err_out; 5310 5311 dev_priv->vmas = KMEM_CACHE(i915_vma, SLAB_HWCACHE_ALIGN); 5312 if (!dev_priv->vmas) 5313 goto err_objects; 5314 5315 dev_priv->luts = KMEM_CACHE(i915_lut_handle, 0); 5316 if (!dev_priv->luts) 5317 goto err_vmas; 5318 5319 dev_priv->requests = KMEM_CACHE(i915_request, 5320 SLAB_HWCACHE_ALIGN | 5321 SLAB_RECLAIM_ACCOUNT | 5322 SLAB_TYPESAFE_BY_RCU); 5323 if (!dev_priv->requests) 5324 goto err_luts; 5325 5326 dev_priv->dependencies = KMEM_CACHE(i915_dependency, 5327 SLAB_HWCACHE_ALIGN | 5328 SLAB_RECLAIM_ACCOUNT); 5329 if (!dev_priv->dependencies) 5330 goto err_requests; 5331 5332 dev_priv->priorities = KMEM_CACHE(i915_priolist, SLAB_HWCACHE_ALIGN); 5333 if (!dev_priv->priorities) 5334 goto err_dependencies; 5335 5336 INIT_LIST_HEAD(&dev_priv->gt.active_rings); 5337 INIT_LIST_HEAD(&dev_priv->gt.closed_vma); 5338 5339 i915_gem_init__mm(dev_priv); 5340 5341 INIT_DELAYED_WORK(&dev_priv->gt.retire_work, 5342 i915_gem_retire_work_handler); 5343 INIT_DELAYED_WORK(&dev_priv->gt.idle_work, 5344 i915_gem_idle_work_handler); 5345 init_waitqueue_head(&dev_priv->gpu_error.wait_queue); 5346 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 5347 mutex_init(&dev_priv->gpu_error.wedge_mutex); 5348 5349 atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0); 5350 5351 spin_lock_init(&dev_priv->fb_tracking.lock); 5352 5353 err = i915_gemfs_init(dev_priv); 5354 if (err) 5355 DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err); 5356 5357 return 0; 5358 5359 err_dependencies: 5360 kmem_cache_destroy(dev_priv->dependencies); 5361 err_requests: 5362 kmem_cache_destroy(dev_priv->requests); 5363 err_luts: 5364 kmem_cache_destroy(dev_priv->luts); 5365 err_vmas: 5366 kmem_cache_destroy(dev_priv->vmas); 5367 err_objects: 5368 kmem_cache_destroy(dev_priv->objects); 5369 err_out: 5370 return err; 5371 } 5372 5373 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) 5374 { 5375 i915_gem_drain_freed_objects(dev_priv); 5376 GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list)); 5377 GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count)); 5378 WARN_ON(dev_priv->mm.object_count); 5379 5380 kmem_cache_destroy(dev_priv->priorities); 5381 kmem_cache_destroy(dev_priv->dependencies); 5382 kmem_cache_destroy(dev_priv->requests); 5383 kmem_cache_destroy(dev_priv->luts); 5384 kmem_cache_destroy(dev_priv->vmas); 5385 kmem_cache_destroy(dev_priv->objects); 5386 5387 /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */ 5388 rcu_barrier(); 5389 5390 i915_gemfs_fini(dev_priv); 5391 } 5392 5393 int i915_gem_freeze(struct drm_i915_private *dev_priv) 5394 { 5395 /* Discard all purgeable objects, let userspace recover those as 5396 * required after resuming. 5397 */ 5398 i915_gem_shrink_all(dev_priv); 5399 5400 return 0; 5401 } 5402 5403 int i915_gem_freeze_late(struct drm_i915_private *i915) 5404 { 5405 struct drm_i915_gem_object *obj; 5406 struct list_head *phases[] = { 5407 &i915->mm.unbound_list, 5408 &i915->mm.bound_list, 5409 NULL 5410 }, **phase; 5411 5412 /* 5413 * Called just before we write the hibernation image. 5414 * 5415 * We need to update the domain tracking to reflect that the CPU 5416 * will be accessing all the pages to create and restore from the 5417 * hibernation, and so upon restoration those pages will be in the 5418 * CPU domain. 5419 * 5420 * To make sure the hibernation image contains the latest state, 5421 * we update that state just before writing out the image. 5422 * 5423 * To try and reduce the hibernation image, we manually shrink 5424 * the objects as well, see i915_gem_freeze() 5425 */ 5426 5427 i915_gem_shrink(i915, -1UL, NULL, I915_SHRINK_UNBOUND); 5428 i915_gem_drain_freed_objects(i915); 5429 5430 mutex_lock(&i915->drm.struct_mutex); 5431 for (phase = phases; *phase; phase++) { 5432 list_for_each_entry(obj, *phase, mm.link) 5433 WARN_ON(i915_gem_object_set_to_cpu_domain(obj, true)); 5434 } 5435 mutex_unlock(&i915->drm.struct_mutex); 5436 5437 return 0; 5438 } 5439 5440 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 5441 { 5442 struct drm_i915_file_private *file_priv = file->driver_priv; 5443 struct i915_request *request; 5444 5445 /* Clean up our request list when the client is going away, so that 5446 * later retire_requests won't dereference our soon-to-be-gone 5447 * file_priv. 5448 */ 5449 spin_lock(&file_priv->mm.lock); 5450 list_for_each_entry(request, &file_priv->mm.request_list, client_link) 5451 request->file_priv = NULL; 5452 spin_unlock(&file_priv->mm.lock); 5453 } 5454 5455 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) 5456 { 5457 struct drm_i915_file_private *file_priv; 5458 int ret; 5459 5460 DRM_DEBUG("\n"); 5461 5462 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 5463 if (!file_priv) 5464 return -ENOMEM; 5465 5466 file->driver_priv = file_priv; 5467 file_priv->dev_priv = i915; 5468 file_priv->file = file; 5469 5470 spin_lock_init(&file_priv->mm.lock); 5471 INIT_LIST_HEAD(&file_priv->mm.request_list); 5472 5473 file_priv->bsd_engine = -1; 5474 file_priv->hang_timestamp = jiffies; 5475 5476 ret = i915_gem_context_open(i915, file); 5477 if (ret) 5478 kfree(file_priv); 5479 5480 return ret; 5481 } 5482 5483 /** 5484 * i915_gem_track_fb - update frontbuffer tracking 5485 * @old: current GEM buffer for the frontbuffer slots 5486 * @new: new GEM buffer for the frontbuffer slots 5487 * @frontbuffer_bits: bitmask of frontbuffer slots 5488 * 5489 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them 5490 * from @old and setting them in @new. Both @old and @new can be NULL. 5491 */ 5492 void i915_gem_track_fb(struct drm_i915_gem_object *old, 5493 struct drm_i915_gem_object *new, 5494 unsigned frontbuffer_bits) 5495 { 5496 /* Control of individual bits within the mask are guarded by 5497 * the owning plane->mutex, i.e. we can never see concurrent 5498 * manipulation of individual bits. But since the bitfield as a whole 5499 * is updated using RMW, we need to use atomics in order to update 5500 * the bits. 5501 */ 5502 BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES > 5503 BITS_PER_TYPE(atomic_t)); 5504 5505 if (old) { 5506 WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits)); 5507 atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits); 5508 } 5509 5510 if (new) { 5511 WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits); 5512 atomic_or(frontbuffer_bits, &new->frontbuffer_bits); 5513 } 5514 } 5515 5516 /* Allocate a new GEM object and fill it with the supplied data */ 5517 struct drm_i915_gem_object * 5518 i915_gem_object_create_from_data(struct drm_i915_private *dev_priv, 5519 const void *data, size_t size) 5520 { 5521 struct drm_i915_gem_object *obj; 5522 struct file *file; 5523 size_t offset; 5524 int err; 5525 5526 obj = i915_gem_object_create(dev_priv, round_up(size, PAGE_SIZE)); 5527 if (IS_ERR(obj)) 5528 return obj; 5529 5530 GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU); 5531 5532 file = obj->base.filp; 5533 offset = 0; 5534 do { 5535 unsigned int len = min_t(typeof(size), size, PAGE_SIZE); 5536 struct page *page; 5537 void *pgdata, *vaddr; 5538 5539 err = pagecache_write_begin(file, file->f_mapping, 5540 offset, len, 0, 5541 &page, &pgdata); 5542 if (err < 0) 5543 goto fail; 5544 5545 vaddr = kmap(page); 5546 memcpy(vaddr, data, len); 5547 kunmap(page); 5548 5549 err = pagecache_write_end(file, file->f_mapping, 5550 offset, len, len, 5551 page, pgdata); 5552 if (err < 0) 5553 goto fail; 5554 5555 size -= len; 5556 data += len; 5557 offset += len; 5558 } while (size); 5559 5560 return obj; 5561 5562 fail: 5563 i915_gem_object_put(obj); 5564 return ERR_PTR(err); 5565 } 5566 5567 struct scatterlist * 5568 i915_gem_object_get_sg(struct drm_i915_gem_object *obj, 5569 unsigned int n, 5570 unsigned int *offset) 5571 { 5572 struct i915_gem_object_page_iter *iter = &obj->mm.get_page; 5573 struct scatterlist *sg; 5574 unsigned int idx, count; 5575 5576 might_sleep(); 5577 GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT); 5578 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 5579 5580 /* As we iterate forward through the sg, we record each entry in a 5581 * radixtree for quick repeated (backwards) lookups. If we have seen 5582 * this index previously, we will have an entry for it. 5583 * 5584 * Initial lookup is O(N), but this is amortized to O(1) for 5585 * sequential page access (where each new request is consecutive 5586 * to the previous one). Repeated lookups are O(lg(obj->base.size)), 5587 * i.e. O(1) with a large constant! 5588 */ 5589 if (n < READ_ONCE(iter->sg_idx)) 5590 goto lookup; 5591 5592 mutex_lock(&iter->lock); 5593 5594 /* We prefer to reuse the last sg so that repeated lookup of this 5595 * (or the subsequent) sg are fast - comparing against the last 5596 * sg is faster than going through the radixtree. 5597 */ 5598 5599 sg = iter->sg_pos; 5600 idx = iter->sg_idx; 5601 count = __sg_page_count(sg); 5602 5603 while (idx + count <= n) { 5604 void *entry; 5605 unsigned long i; 5606 int ret; 5607 5608 /* If we cannot allocate and insert this entry, or the 5609 * individual pages from this range, cancel updating the 5610 * sg_idx so that on this lookup we are forced to linearly 5611 * scan onwards, but on future lookups we will try the 5612 * insertion again (in which case we need to be careful of 5613 * the error return reporting that we have already inserted 5614 * this index). 5615 */ 5616 ret = radix_tree_insert(&iter->radix, idx, sg); 5617 if (ret && ret != -EEXIST) 5618 goto scan; 5619 5620 entry = xa_mk_value(idx); 5621 for (i = 1; i < count; i++) { 5622 ret = radix_tree_insert(&iter->radix, idx + i, entry); 5623 if (ret && ret != -EEXIST) 5624 goto scan; 5625 } 5626 5627 idx += count; 5628 sg = ____sg_next(sg); 5629 count = __sg_page_count(sg); 5630 } 5631 5632 scan: 5633 iter->sg_pos = sg; 5634 iter->sg_idx = idx; 5635 5636 mutex_unlock(&iter->lock); 5637 5638 if (unlikely(n < idx)) /* insertion completed by another thread */ 5639 goto lookup; 5640 5641 /* In case we failed to insert the entry into the radixtree, we need 5642 * to look beyond the current sg. 5643 */ 5644 while (idx + count <= n) { 5645 idx += count; 5646 sg = ____sg_next(sg); 5647 count = __sg_page_count(sg); 5648 } 5649 5650 *offset = n - idx; 5651 return sg; 5652 5653 lookup: 5654 rcu_read_lock(); 5655 5656 sg = radix_tree_lookup(&iter->radix, n); 5657 GEM_BUG_ON(!sg); 5658 5659 /* If this index is in the middle of multi-page sg entry, 5660 * the radix tree will contain a value entry that points 5661 * to the start of that range. We will return the pointer to 5662 * the base page and the offset of this page within the 5663 * sg entry's range. 5664 */ 5665 *offset = 0; 5666 if (unlikely(xa_is_value(sg))) { 5667 unsigned long base = xa_to_value(sg); 5668 5669 sg = radix_tree_lookup(&iter->radix, base); 5670 GEM_BUG_ON(!sg); 5671 5672 *offset = n - base; 5673 } 5674 5675 rcu_read_unlock(); 5676 5677 return sg; 5678 } 5679 5680 struct page * 5681 i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n) 5682 { 5683 struct scatterlist *sg; 5684 unsigned int offset; 5685 5686 GEM_BUG_ON(!i915_gem_object_has_struct_page(obj)); 5687 5688 sg = i915_gem_object_get_sg(obj, n, &offset); 5689 return nth_page(sg_page(sg), offset); 5690 } 5691 5692 /* Like i915_gem_object_get_page(), but mark the returned page dirty */ 5693 struct page * 5694 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, 5695 unsigned int n) 5696 { 5697 struct page *page; 5698 5699 page = i915_gem_object_get_page(obj, n); 5700 if (!obj->mm.dirty) 5701 set_page_dirty(page); 5702 5703 return page; 5704 } 5705 5706 dma_addr_t 5707 i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, 5708 unsigned long n) 5709 { 5710 struct scatterlist *sg; 5711 unsigned int offset; 5712 5713 sg = i915_gem_object_get_sg(obj, n, &offset); 5714 return sg_dma_address(sg) + (offset << PAGE_SHIFT); 5715 } 5716 5717 int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) 5718 { 5719 struct sg_table *pages; 5720 int err; 5721 5722 if (align > obj->base.size) 5723 return -EINVAL; 5724 5725 if (obj->ops == &i915_gem_phys_ops) 5726 return 0; 5727 5728 if (obj->ops != &i915_gem_object_ops) 5729 return -EINVAL; 5730 5731 err = i915_gem_object_unbind(obj); 5732 if (err) 5733 return err; 5734 5735 mutex_lock(&obj->mm.lock); 5736 5737 if (obj->mm.madv != I915_MADV_WILLNEED) { 5738 err = -EFAULT; 5739 goto err_unlock; 5740 } 5741 5742 if (obj->mm.quirked) { 5743 err = -EFAULT; 5744 goto err_unlock; 5745 } 5746 5747 if (obj->mm.mapping) { 5748 err = -EBUSY; 5749 goto err_unlock; 5750 } 5751 5752 pages = __i915_gem_object_unset_pages(obj); 5753 5754 obj->ops = &i915_gem_phys_ops; 5755 5756 err = ____i915_gem_object_get_pages(obj); 5757 if (err) 5758 goto err_xfer; 5759 5760 /* Perma-pin (until release) the physical set of pages */ 5761 __i915_gem_object_pin_pages(obj); 5762 5763 if (!IS_ERR_OR_NULL(pages)) 5764 i915_gem_object_ops.put_pages(obj, pages); 5765 mutex_unlock(&obj->mm.lock); 5766 return 0; 5767 5768 err_xfer: 5769 obj->ops = &i915_gem_object_ops; 5770 if (!IS_ERR_OR_NULL(pages)) { 5771 unsigned int sg_page_sizes = i915_sg_page_sizes(pages->sgl); 5772 5773 __i915_gem_object_set_pages(obj, pages, sg_page_sizes); 5774 } 5775 err_unlock: 5776 mutex_unlock(&obj->mm.lock); 5777 return err; 5778 } 5779 5780 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 5781 #include "selftests/scatterlist.c" 5782 #include "selftests/mock_gem_device.c" 5783 #include "selftests/huge_gem_object.c" 5784 #include "selftests/huge_pages.c" 5785 #include "selftests/i915_gem_object.c" 5786 #include "selftests/i915_gem_coherency.c" 5787 #include "selftests/i915_gem.c" 5788 #endif 5789