1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drmP.h> 29 #include <drm/drm_vma_manager.h> 30 #include <drm/i915_drm.h> 31 #include "i915_drv.h" 32 #include "i915_gem_clflush.h" 33 #include "i915_vgpu.h" 34 #include "i915_trace.h" 35 #include "intel_drv.h" 36 #include "intel_frontbuffer.h" 37 #include "intel_mocs.h" 38 #include "intel_workarounds.h" 39 #include "i915_gemfs.h" 40 #include <linux/dma-fence-array.h> 41 #include <linux/kthread.h> 42 #include <linux/reservation.h> 43 #include <linux/shmem_fs.h> 44 #include <linux/slab.h> 45 #include <linux/stop_machine.h> 46 #include <linux/swap.h> 47 #include <linux/pci.h> 48 #include <linux/dma-buf.h> 49 50 static void i915_gem_flush_free_objects(struct drm_i915_private *i915); 51 52 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 53 { 54 if (obj->cache_dirty) 55 return false; 56 57 if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) 58 return true; 59 60 return obj->pin_global; /* currently in use by HW, keep flushed */ 61 } 62 63 static int 64 insert_mappable_node(struct i915_ggtt *ggtt, 65 struct drm_mm_node *node, u32 size) 66 { 67 memset(node, 0, sizeof(*node)); 68 return drm_mm_insert_node_in_range(&ggtt->vm.mm, node, 69 size, 0, I915_COLOR_UNEVICTABLE, 70 0, ggtt->mappable_end, 71 DRM_MM_INSERT_LOW); 72 } 73 74 static void 75 remove_mappable_node(struct drm_mm_node *node) 76 { 77 drm_mm_remove_node(node); 78 } 79 80 /* some bookkeeping */ 81 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 82 u64 size) 83 { 84 spin_lock(&dev_priv->mm.object_stat_lock); 85 dev_priv->mm.object_count++; 86 dev_priv->mm.object_memory += size; 87 spin_unlock(&dev_priv->mm.object_stat_lock); 88 } 89 90 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 91 u64 size) 92 { 93 spin_lock(&dev_priv->mm.object_stat_lock); 94 dev_priv->mm.object_count--; 95 dev_priv->mm.object_memory -= size; 96 spin_unlock(&dev_priv->mm.object_stat_lock); 97 } 98 99 static int 100 i915_gem_wait_for_error(struct i915_gpu_error *error) 101 { 102 int ret; 103 104 might_sleep(); 105 106 /* 107 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 108 * userspace. If it takes that long something really bad is going on and 109 * we should simply try to bail out and fail as gracefully as possible. 110 */ 111 ret = wait_event_interruptible_timeout(error->reset_queue, 112 !i915_reset_backoff(error), 113 I915_RESET_TIMEOUT); 114 if (ret == 0) { 115 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 116 return -EIO; 117 } else if (ret < 0) { 118 return ret; 119 } else { 120 return 0; 121 } 122 } 123 124 int i915_mutex_lock_interruptible(struct drm_device *dev) 125 { 126 struct drm_i915_private *dev_priv = to_i915(dev); 127 int ret; 128 129 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 130 if (ret) 131 return ret; 132 133 ret = mutex_lock_interruptible(&dev->struct_mutex); 134 if (ret) 135 return ret; 136 137 return 0; 138 } 139 140 static u32 __i915_gem_park(struct drm_i915_private *i915) 141 { 142 GEM_TRACE("\n"); 143 144 lockdep_assert_held(&i915->drm.struct_mutex); 145 GEM_BUG_ON(i915->gt.active_requests); 146 GEM_BUG_ON(!list_empty(&i915->gt.active_rings)); 147 148 if (!i915->gt.awake) 149 return I915_EPOCH_INVALID; 150 151 GEM_BUG_ON(i915->gt.epoch == I915_EPOCH_INVALID); 152 153 /* 154 * Be paranoid and flush a concurrent interrupt to make sure 155 * we don't reactivate any irq tasklets after parking. 156 * 157 * FIXME: Note that even though we have waited for execlists to be idle, 158 * there may still be an in-flight interrupt even though the CSB 159 * is now empty. synchronize_irq() makes sure that a residual interrupt 160 * is completed before we continue, but it doesn't prevent the HW from 161 * raising a spurious interrupt later. To complete the shield we should 162 * coordinate disabling the CS irq with flushing the interrupts. 163 */ 164 synchronize_irq(i915->drm.irq); 165 166 intel_engines_park(i915); 167 i915_timelines_park(i915); 168 169 i915_pmu_gt_parked(i915); 170 i915_vma_parked(i915); 171 172 i915->gt.awake = false; 173 174 if (INTEL_GEN(i915) >= 6) 175 gen6_rps_idle(i915); 176 177 intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ); 178 179 intel_runtime_pm_put(i915); 180 181 return i915->gt.epoch; 182 } 183 184 void i915_gem_park(struct drm_i915_private *i915) 185 { 186 GEM_TRACE("\n"); 187 188 lockdep_assert_held(&i915->drm.struct_mutex); 189 GEM_BUG_ON(i915->gt.active_requests); 190 191 if (!i915->gt.awake) 192 return; 193 194 /* Defer the actual call to __i915_gem_park() to prevent ping-pongs */ 195 mod_delayed_work(i915->wq, &i915->gt.idle_work, msecs_to_jiffies(100)); 196 } 197 198 void i915_gem_unpark(struct drm_i915_private *i915) 199 { 200 GEM_TRACE("\n"); 201 202 lockdep_assert_held(&i915->drm.struct_mutex); 203 GEM_BUG_ON(!i915->gt.active_requests); 204 205 if (i915->gt.awake) 206 return; 207 208 intel_runtime_pm_get_noresume(i915); 209 210 /* 211 * It seems that the DMC likes to transition between the DC states a lot 212 * when there are no connected displays (no active power domains) during 213 * command submission. 214 * 215 * This activity has negative impact on the performance of the chip with 216 * huge latencies observed in the interrupt handler and elsewhere. 217 * 218 * Work around it by grabbing a GT IRQ power domain whilst there is any 219 * GT activity, preventing any DC state transitions. 220 */ 221 intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); 222 223 i915->gt.awake = true; 224 if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */ 225 i915->gt.epoch = 1; 226 227 intel_enable_gt_powersave(i915); 228 i915_update_gfx_val(i915); 229 if (INTEL_GEN(i915) >= 6) 230 gen6_rps_busy(i915); 231 i915_pmu_gt_unparked(i915); 232 233 intel_engines_unpark(i915); 234 235 i915_queue_hangcheck(i915); 236 237 queue_delayed_work(i915->wq, 238 &i915->gt.retire_work, 239 round_jiffies_up_relative(HZ)); 240 } 241 242 int 243 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 244 struct drm_file *file) 245 { 246 struct drm_i915_private *dev_priv = to_i915(dev); 247 struct i915_ggtt *ggtt = &dev_priv->ggtt; 248 struct drm_i915_gem_get_aperture *args = data; 249 struct i915_vma *vma; 250 u64 pinned; 251 252 pinned = ggtt->vm.reserved; 253 mutex_lock(&dev->struct_mutex); 254 list_for_each_entry(vma, &ggtt->vm.active_list, vm_link) 255 if (i915_vma_is_pinned(vma)) 256 pinned += vma->node.size; 257 list_for_each_entry(vma, &ggtt->vm.inactive_list, vm_link) 258 if (i915_vma_is_pinned(vma)) 259 pinned += vma->node.size; 260 mutex_unlock(&dev->struct_mutex); 261 262 args->aper_size = ggtt->vm.total; 263 args->aper_available_size = args->aper_size - pinned; 264 265 return 0; 266 } 267 268 static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) 269 { 270 struct address_space *mapping = obj->base.filp->f_mapping; 271 drm_dma_handle_t *phys; 272 struct sg_table *st; 273 struct scatterlist *sg; 274 char *vaddr; 275 int i; 276 int err; 277 278 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) 279 return -EINVAL; 280 281 /* Always aligning to the object size, allows a single allocation 282 * to handle all possible callers, and given typical object sizes, 283 * the alignment of the buddy allocation will naturally match. 284 */ 285 phys = drm_pci_alloc(obj->base.dev, 286 roundup_pow_of_two(obj->base.size), 287 roundup_pow_of_two(obj->base.size)); 288 if (!phys) 289 return -ENOMEM; 290 291 vaddr = phys->vaddr; 292 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 293 struct page *page; 294 char *src; 295 296 page = shmem_read_mapping_page(mapping, i); 297 if (IS_ERR(page)) { 298 err = PTR_ERR(page); 299 goto err_phys; 300 } 301 302 src = kmap_atomic(page); 303 memcpy(vaddr, src, PAGE_SIZE); 304 drm_clflush_virt_range(vaddr, PAGE_SIZE); 305 kunmap_atomic(src); 306 307 put_page(page); 308 vaddr += PAGE_SIZE; 309 } 310 311 i915_gem_chipset_flush(to_i915(obj->base.dev)); 312 313 st = kmalloc(sizeof(*st), GFP_KERNEL); 314 if (!st) { 315 err = -ENOMEM; 316 goto err_phys; 317 } 318 319 if (sg_alloc_table(st, 1, GFP_KERNEL)) { 320 kfree(st); 321 err = -ENOMEM; 322 goto err_phys; 323 } 324 325 sg = st->sgl; 326 sg->offset = 0; 327 sg->length = obj->base.size; 328 329 sg_dma_address(sg) = phys->busaddr; 330 sg_dma_len(sg) = obj->base.size; 331 332 obj->phys_handle = phys; 333 334 __i915_gem_object_set_pages(obj, st, sg->length); 335 336 return 0; 337 338 err_phys: 339 drm_pci_free(obj->base.dev, phys); 340 341 return err; 342 } 343 344 static void __start_cpu_write(struct drm_i915_gem_object *obj) 345 { 346 obj->read_domains = I915_GEM_DOMAIN_CPU; 347 obj->write_domain = I915_GEM_DOMAIN_CPU; 348 if (cpu_write_needs_clflush(obj)) 349 obj->cache_dirty = true; 350 } 351 352 static void 353 __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, 354 struct sg_table *pages, 355 bool needs_clflush) 356 { 357 GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED); 358 359 if (obj->mm.madv == I915_MADV_DONTNEED) 360 obj->mm.dirty = false; 361 362 if (needs_clflush && 363 (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 && 364 !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) 365 drm_clflush_sg(pages); 366 367 __start_cpu_write(obj); 368 } 369 370 static void 371 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj, 372 struct sg_table *pages) 373 { 374 __i915_gem_object_release_shmem(obj, pages, false); 375 376 if (obj->mm.dirty) { 377 struct address_space *mapping = obj->base.filp->f_mapping; 378 char *vaddr = obj->phys_handle->vaddr; 379 int i; 380 381 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 382 struct page *page; 383 char *dst; 384 385 page = shmem_read_mapping_page(mapping, i); 386 if (IS_ERR(page)) 387 continue; 388 389 dst = kmap_atomic(page); 390 drm_clflush_virt_range(vaddr, PAGE_SIZE); 391 memcpy(dst, vaddr, PAGE_SIZE); 392 kunmap_atomic(dst); 393 394 set_page_dirty(page); 395 if (obj->mm.madv == I915_MADV_WILLNEED) 396 mark_page_accessed(page); 397 put_page(page); 398 vaddr += PAGE_SIZE; 399 } 400 obj->mm.dirty = false; 401 } 402 403 sg_free_table(pages); 404 kfree(pages); 405 406 drm_pci_free(obj->base.dev, obj->phys_handle); 407 } 408 409 static void 410 i915_gem_object_release_phys(struct drm_i915_gem_object *obj) 411 { 412 i915_gem_object_unpin_pages(obj); 413 } 414 415 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { 416 .get_pages = i915_gem_object_get_pages_phys, 417 .put_pages = i915_gem_object_put_pages_phys, 418 .release = i915_gem_object_release_phys, 419 }; 420 421 static const struct drm_i915_gem_object_ops i915_gem_object_ops; 422 423 int i915_gem_object_unbind(struct drm_i915_gem_object *obj) 424 { 425 struct i915_vma *vma; 426 LIST_HEAD(still_in_list); 427 int ret; 428 429 lockdep_assert_held(&obj->base.dev->struct_mutex); 430 431 /* Closed vma are removed from the obj->vma_list - but they may 432 * still have an active binding on the object. To remove those we 433 * must wait for all rendering to complete to the object (as unbinding 434 * must anyway), and retire the requests. 435 */ 436 ret = i915_gem_object_set_to_cpu_domain(obj, false); 437 if (ret) 438 return ret; 439 440 while ((vma = list_first_entry_or_null(&obj->vma_list, 441 struct i915_vma, 442 obj_link))) { 443 list_move_tail(&vma->obj_link, &still_in_list); 444 ret = i915_vma_unbind(vma); 445 if (ret) 446 break; 447 } 448 list_splice(&still_in_list, &obj->vma_list); 449 450 return ret; 451 } 452 453 static long 454 i915_gem_object_wait_fence(struct dma_fence *fence, 455 unsigned int flags, 456 long timeout, 457 struct intel_rps_client *rps_client) 458 { 459 struct i915_request *rq; 460 461 BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1); 462 463 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) 464 return timeout; 465 466 if (!dma_fence_is_i915(fence)) 467 return dma_fence_wait_timeout(fence, 468 flags & I915_WAIT_INTERRUPTIBLE, 469 timeout); 470 471 rq = to_request(fence); 472 if (i915_request_completed(rq)) 473 goto out; 474 475 /* 476 * This client is about to stall waiting for the GPU. In many cases 477 * this is undesirable and limits the throughput of the system, as 478 * many clients cannot continue processing user input/output whilst 479 * blocked. RPS autotuning may take tens of milliseconds to respond 480 * to the GPU load and thus incurs additional latency for the client. 481 * We can circumvent that by promoting the GPU frequency to maximum 482 * before we wait. This makes the GPU throttle up much more quickly 483 * (good for benchmarks and user experience, e.g. window animations), 484 * but at a cost of spending more power processing the workload 485 * (bad for battery). Not all clients even want their results 486 * immediately and for them we should just let the GPU select its own 487 * frequency to maximise efficiency. To prevent a single client from 488 * forcing the clocks too high for the whole system, we only allow 489 * each client to waitboost once in a busy period. 490 */ 491 if (rps_client && !i915_request_started(rq)) { 492 if (INTEL_GEN(rq->i915) >= 6) 493 gen6_rps_boost(rq, rps_client); 494 } 495 496 timeout = i915_request_wait(rq, flags, timeout); 497 498 out: 499 if (flags & I915_WAIT_LOCKED && i915_request_completed(rq)) 500 i915_request_retire_upto(rq); 501 502 return timeout; 503 } 504 505 static long 506 i915_gem_object_wait_reservation(struct reservation_object *resv, 507 unsigned int flags, 508 long timeout, 509 struct intel_rps_client *rps_client) 510 { 511 unsigned int seq = __read_seqcount_begin(&resv->seq); 512 struct dma_fence *excl; 513 bool prune_fences = false; 514 515 if (flags & I915_WAIT_ALL) { 516 struct dma_fence **shared; 517 unsigned int count, i; 518 int ret; 519 520 ret = reservation_object_get_fences_rcu(resv, 521 &excl, &count, &shared); 522 if (ret) 523 return ret; 524 525 for (i = 0; i < count; i++) { 526 timeout = i915_gem_object_wait_fence(shared[i], 527 flags, timeout, 528 rps_client); 529 if (timeout < 0) 530 break; 531 532 dma_fence_put(shared[i]); 533 } 534 535 for (; i < count; i++) 536 dma_fence_put(shared[i]); 537 kfree(shared); 538 539 /* 540 * If both shared fences and an exclusive fence exist, 541 * then by construction the shared fences must be later 542 * than the exclusive fence. If we successfully wait for 543 * all the shared fences, we know that the exclusive fence 544 * must all be signaled. If all the shared fences are 545 * signaled, we can prune the array and recover the 546 * floating references on the fences/requests. 547 */ 548 prune_fences = count && timeout >= 0; 549 } else { 550 excl = reservation_object_get_excl_rcu(resv); 551 } 552 553 if (excl && timeout >= 0) 554 timeout = i915_gem_object_wait_fence(excl, flags, timeout, 555 rps_client); 556 557 dma_fence_put(excl); 558 559 /* 560 * Opportunistically prune the fences iff we know they have *all* been 561 * signaled and that the reservation object has not been changed (i.e. 562 * no new fences have been added). 563 */ 564 if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) { 565 if (reservation_object_trylock(resv)) { 566 if (!__read_seqcount_retry(&resv->seq, seq)) 567 reservation_object_add_excl_fence(resv, NULL); 568 reservation_object_unlock(resv); 569 } 570 } 571 572 return timeout; 573 } 574 575 static void __fence_set_priority(struct dma_fence *fence, 576 const struct i915_sched_attr *attr) 577 { 578 struct i915_request *rq; 579 struct intel_engine_cs *engine; 580 581 if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence)) 582 return; 583 584 rq = to_request(fence); 585 engine = rq->engine; 586 587 local_bh_disable(); 588 rcu_read_lock(); /* RCU serialisation for set-wedged protection */ 589 if (engine->schedule) 590 engine->schedule(rq, attr); 591 rcu_read_unlock(); 592 local_bh_enable(); /* kick the tasklets if queues were reprioritised */ 593 } 594 595 static void fence_set_priority(struct dma_fence *fence, 596 const struct i915_sched_attr *attr) 597 { 598 /* Recurse once into a fence-array */ 599 if (dma_fence_is_array(fence)) { 600 struct dma_fence_array *array = to_dma_fence_array(fence); 601 int i; 602 603 for (i = 0; i < array->num_fences; i++) 604 __fence_set_priority(array->fences[i], attr); 605 } else { 606 __fence_set_priority(fence, attr); 607 } 608 } 609 610 int 611 i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, 612 unsigned int flags, 613 const struct i915_sched_attr *attr) 614 { 615 struct dma_fence *excl; 616 617 if (flags & I915_WAIT_ALL) { 618 struct dma_fence **shared; 619 unsigned int count, i; 620 int ret; 621 622 ret = reservation_object_get_fences_rcu(obj->resv, 623 &excl, &count, &shared); 624 if (ret) 625 return ret; 626 627 for (i = 0; i < count; i++) { 628 fence_set_priority(shared[i], attr); 629 dma_fence_put(shared[i]); 630 } 631 632 kfree(shared); 633 } else { 634 excl = reservation_object_get_excl_rcu(obj->resv); 635 } 636 637 if (excl) { 638 fence_set_priority(excl, attr); 639 dma_fence_put(excl); 640 } 641 return 0; 642 } 643 644 /** 645 * Waits for rendering to the object to be completed 646 * @obj: i915 gem object 647 * @flags: how to wait (under a lock, for all rendering or just for writes etc) 648 * @timeout: how long to wait 649 * @rps_client: client (user process) to charge for any waitboosting 650 */ 651 int 652 i915_gem_object_wait(struct drm_i915_gem_object *obj, 653 unsigned int flags, 654 long timeout, 655 struct intel_rps_client *rps_client) 656 { 657 might_sleep(); 658 #if IS_ENABLED(CONFIG_LOCKDEP) 659 GEM_BUG_ON(debug_locks && 660 !!lockdep_is_held(&obj->base.dev->struct_mutex) != 661 !!(flags & I915_WAIT_LOCKED)); 662 #endif 663 GEM_BUG_ON(timeout < 0); 664 665 timeout = i915_gem_object_wait_reservation(obj->resv, 666 flags, timeout, 667 rps_client); 668 return timeout < 0 ? timeout : 0; 669 } 670 671 static struct intel_rps_client *to_rps_client(struct drm_file *file) 672 { 673 struct drm_i915_file_private *fpriv = file->driver_priv; 674 675 return &fpriv->rps_client; 676 } 677 678 static int 679 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 680 struct drm_i915_gem_pwrite *args, 681 struct drm_file *file) 682 { 683 void *vaddr = obj->phys_handle->vaddr + args->offset; 684 char __user *user_data = u64_to_user_ptr(args->data_ptr); 685 686 /* We manually control the domain here and pretend that it 687 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 688 */ 689 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 690 if (copy_from_user(vaddr, user_data, args->size)) 691 return -EFAULT; 692 693 drm_clflush_virt_range(vaddr, args->size); 694 i915_gem_chipset_flush(to_i915(obj->base.dev)); 695 696 intel_fb_obj_flush(obj, ORIGIN_CPU); 697 return 0; 698 } 699 700 void *i915_gem_object_alloc(struct drm_i915_private *dev_priv) 701 { 702 return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL); 703 } 704 705 void i915_gem_object_free(struct drm_i915_gem_object *obj) 706 { 707 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 708 kmem_cache_free(dev_priv->objects, obj); 709 } 710 711 static int 712 i915_gem_create(struct drm_file *file, 713 struct drm_i915_private *dev_priv, 714 uint64_t size, 715 uint32_t *handle_p) 716 { 717 struct drm_i915_gem_object *obj; 718 int ret; 719 u32 handle; 720 721 size = roundup(size, PAGE_SIZE); 722 if (size == 0) 723 return -EINVAL; 724 725 /* Allocate the new object */ 726 obj = i915_gem_object_create(dev_priv, size); 727 if (IS_ERR(obj)) 728 return PTR_ERR(obj); 729 730 ret = drm_gem_handle_create(file, &obj->base, &handle); 731 /* drop reference from allocate - handle holds it now */ 732 i915_gem_object_put(obj); 733 if (ret) 734 return ret; 735 736 *handle_p = handle; 737 return 0; 738 } 739 740 int 741 i915_gem_dumb_create(struct drm_file *file, 742 struct drm_device *dev, 743 struct drm_mode_create_dumb *args) 744 { 745 /* have to work out size/pitch and return them */ 746 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); 747 args->size = args->pitch * args->height; 748 return i915_gem_create(file, to_i915(dev), 749 args->size, &args->handle); 750 } 751 752 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) 753 { 754 return !(obj->cache_level == I915_CACHE_NONE || 755 obj->cache_level == I915_CACHE_WT); 756 } 757 758 /** 759 * Creates a new mm object and returns a handle to it. 760 * @dev: drm device pointer 761 * @data: ioctl data blob 762 * @file: drm file pointer 763 */ 764 int 765 i915_gem_create_ioctl(struct drm_device *dev, void *data, 766 struct drm_file *file) 767 { 768 struct drm_i915_private *dev_priv = to_i915(dev); 769 struct drm_i915_gem_create *args = data; 770 771 i915_gem_flush_free_objects(dev_priv); 772 773 return i915_gem_create(file, dev_priv, 774 args->size, &args->handle); 775 } 776 777 static inline enum fb_op_origin 778 fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain) 779 { 780 return (domain == I915_GEM_DOMAIN_GTT ? 781 obj->frontbuffer_ggtt_origin : ORIGIN_CPU); 782 } 783 784 void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv) 785 { 786 /* 787 * No actual flushing is required for the GTT write domain for reads 788 * from the GTT domain. Writes to it "immediately" go to main memory 789 * as far as we know, so there's no chipset flush. It also doesn't 790 * land in the GPU render cache. 791 * 792 * However, we do have to enforce the order so that all writes through 793 * the GTT land before any writes to the device, such as updates to 794 * the GATT itself. 795 * 796 * We also have to wait a bit for the writes to land from the GTT. 797 * An uncached read (i.e. mmio) seems to be ideal for the round-trip 798 * timing. This issue has only been observed when switching quickly 799 * between GTT writes and CPU reads from inside the kernel on recent hw, 800 * and it appears to only affect discrete GTT blocks (i.e. on LLC 801 * system agents we cannot reproduce this behaviour, until Cannonlake 802 * that was!). 803 */ 804 805 wmb(); 806 807 if (INTEL_INFO(dev_priv)->has_coherent_ggtt) 808 return; 809 810 i915_gem_chipset_flush(dev_priv); 811 812 intel_runtime_pm_get(dev_priv); 813 spin_lock_irq(&dev_priv->uncore.lock); 814 815 POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE)); 816 817 spin_unlock_irq(&dev_priv->uncore.lock); 818 intel_runtime_pm_put(dev_priv); 819 } 820 821 static void 822 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) 823 { 824 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 825 struct i915_vma *vma; 826 827 if (!(obj->write_domain & flush_domains)) 828 return; 829 830 switch (obj->write_domain) { 831 case I915_GEM_DOMAIN_GTT: 832 i915_gem_flush_ggtt_writes(dev_priv); 833 834 intel_fb_obj_flush(obj, 835 fb_write_origin(obj, I915_GEM_DOMAIN_GTT)); 836 837 for_each_ggtt_vma(vma, obj) { 838 if (vma->iomap) 839 continue; 840 841 i915_vma_unset_ggtt_write(vma); 842 } 843 break; 844 845 case I915_GEM_DOMAIN_WC: 846 wmb(); 847 break; 848 849 case I915_GEM_DOMAIN_CPU: 850 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 851 break; 852 853 case I915_GEM_DOMAIN_RENDER: 854 if (gpu_write_needs_clflush(obj)) 855 obj->cache_dirty = true; 856 break; 857 } 858 859 obj->write_domain = 0; 860 } 861 862 static inline int 863 __copy_to_user_swizzled(char __user *cpu_vaddr, 864 const char *gpu_vaddr, int gpu_offset, 865 int length) 866 { 867 int ret, cpu_offset = 0; 868 869 while (length > 0) { 870 int cacheline_end = ALIGN(gpu_offset + 1, 64); 871 int this_length = min(cacheline_end - gpu_offset, length); 872 int swizzled_gpu_offset = gpu_offset ^ 64; 873 874 ret = __copy_to_user(cpu_vaddr + cpu_offset, 875 gpu_vaddr + swizzled_gpu_offset, 876 this_length); 877 if (ret) 878 return ret + length; 879 880 cpu_offset += this_length; 881 gpu_offset += this_length; 882 length -= this_length; 883 } 884 885 return 0; 886 } 887 888 static inline int 889 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 890 const char __user *cpu_vaddr, 891 int length) 892 { 893 int ret, cpu_offset = 0; 894 895 while (length > 0) { 896 int cacheline_end = ALIGN(gpu_offset + 1, 64); 897 int this_length = min(cacheline_end - gpu_offset, length); 898 int swizzled_gpu_offset = gpu_offset ^ 64; 899 900 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 901 cpu_vaddr + cpu_offset, 902 this_length); 903 if (ret) 904 return ret + length; 905 906 cpu_offset += this_length; 907 gpu_offset += this_length; 908 length -= this_length; 909 } 910 911 return 0; 912 } 913 914 /* 915 * Pins the specified object's pages and synchronizes the object with 916 * GPU accesses. Sets needs_clflush to non-zero if the caller should 917 * flush the object from the CPU cache. 918 */ 919 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, 920 unsigned int *needs_clflush) 921 { 922 int ret; 923 924 lockdep_assert_held(&obj->base.dev->struct_mutex); 925 926 *needs_clflush = 0; 927 if (!i915_gem_object_has_struct_page(obj)) 928 return -ENODEV; 929 930 ret = i915_gem_object_wait(obj, 931 I915_WAIT_INTERRUPTIBLE | 932 I915_WAIT_LOCKED, 933 MAX_SCHEDULE_TIMEOUT, 934 NULL); 935 if (ret) 936 return ret; 937 938 ret = i915_gem_object_pin_pages(obj); 939 if (ret) 940 return ret; 941 942 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ || 943 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 944 ret = i915_gem_object_set_to_cpu_domain(obj, false); 945 if (ret) 946 goto err_unpin; 947 else 948 goto out; 949 } 950 951 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 952 953 /* If we're not in the cpu read domain, set ourself into the gtt 954 * read domain and manually flush cachelines (if required). This 955 * optimizes for the case when the gpu will dirty the data 956 * anyway again before the next pread happens. 957 */ 958 if (!obj->cache_dirty && 959 !(obj->read_domains & I915_GEM_DOMAIN_CPU)) 960 *needs_clflush = CLFLUSH_BEFORE; 961 962 out: 963 /* return with the pages pinned */ 964 return 0; 965 966 err_unpin: 967 i915_gem_object_unpin_pages(obj); 968 return ret; 969 } 970 971 int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, 972 unsigned int *needs_clflush) 973 { 974 int ret; 975 976 lockdep_assert_held(&obj->base.dev->struct_mutex); 977 978 *needs_clflush = 0; 979 if (!i915_gem_object_has_struct_page(obj)) 980 return -ENODEV; 981 982 ret = i915_gem_object_wait(obj, 983 I915_WAIT_INTERRUPTIBLE | 984 I915_WAIT_LOCKED | 985 I915_WAIT_ALL, 986 MAX_SCHEDULE_TIMEOUT, 987 NULL); 988 if (ret) 989 return ret; 990 991 ret = i915_gem_object_pin_pages(obj); 992 if (ret) 993 return ret; 994 995 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE || 996 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 997 ret = i915_gem_object_set_to_cpu_domain(obj, true); 998 if (ret) 999 goto err_unpin; 1000 else 1001 goto out; 1002 } 1003 1004 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 1005 1006 /* If we're not in the cpu write domain, set ourself into the 1007 * gtt write domain and manually flush cachelines (as required). 1008 * This optimizes for the case when the gpu will use the data 1009 * right away and we therefore have to clflush anyway. 1010 */ 1011 if (!obj->cache_dirty) { 1012 *needs_clflush |= CLFLUSH_AFTER; 1013 1014 /* 1015 * Same trick applies to invalidate partially written 1016 * cachelines read before writing. 1017 */ 1018 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU)) 1019 *needs_clflush |= CLFLUSH_BEFORE; 1020 } 1021 1022 out: 1023 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 1024 obj->mm.dirty = true; 1025 /* return with the pages pinned */ 1026 return 0; 1027 1028 err_unpin: 1029 i915_gem_object_unpin_pages(obj); 1030 return ret; 1031 } 1032 1033 static void 1034 shmem_clflush_swizzled_range(char *addr, unsigned long length, 1035 bool swizzled) 1036 { 1037 if (unlikely(swizzled)) { 1038 unsigned long start = (unsigned long) addr; 1039 unsigned long end = (unsigned long) addr + length; 1040 1041 /* For swizzling simply ensure that we always flush both 1042 * channels. Lame, but simple and it works. Swizzled 1043 * pwrite/pread is far from a hotpath - current userspace 1044 * doesn't use it at all. */ 1045 start = round_down(start, 128); 1046 end = round_up(end, 128); 1047 1048 drm_clflush_virt_range((void *)start, end - start); 1049 } else { 1050 drm_clflush_virt_range(addr, length); 1051 } 1052 1053 } 1054 1055 /* Only difference to the fast-path function is that this can handle bit17 1056 * and uses non-atomic copy and kmap functions. */ 1057 static int 1058 shmem_pread_slow(struct page *page, int offset, int length, 1059 char __user *user_data, 1060 bool page_do_bit17_swizzling, bool needs_clflush) 1061 { 1062 char *vaddr; 1063 int ret; 1064 1065 vaddr = kmap(page); 1066 if (needs_clflush) 1067 shmem_clflush_swizzled_range(vaddr + offset, length, 1068 page_do_bit17_swizzling); 1069 1070 if (page_do_bit17_swizzling) 1071 ret = __copy_to_user_swizzled(user_data, vaddr, offset, length); 1072 else 1073 ret = __copy_to_user(user_data, vaddr + offset, length); 1074 kunmap(page); 1075 1076 return ret ? - EFAULT : 0; 1077 } 1078 1079 static int 1080 shmem_pread(struct page *page, int offset, int length, char __user *user_data, 1081 bool page_do_bit17_swizzling, bool needs_clflush) 1082 { 1083 int ret; 1084 1085 ret = -ENODEV; 1086 if (!page_do_bit17_swizzling) { 1087 char *vaddr = kmap_atomic(page); 1088 1089 if (needs_clflush) 1090 drm_clflush_virt_range(vaddr + offset, length); 1091 ret = __copy_to_user_inatomic(user_data, vaddr + offset, length); 1092 kunmap_atomic(vaddr); 1093 } 1094 if (ret == 0) 1095 return 0; 1096 1097 return shmem_pread_slow(page, offset, length, user_data, 1098 page_do_bit17_swizzling, needs_clflush); 1099 } 1100 1101 static int 1102 i915_gem_shmem_pread(struct drm_i915_gem_object *obj, 1103 struct drm_i915_gem_pread *args) 1104 { 1105 char __user *user_data; 1106 u64 remain; 1107 unsigned int obj_do_bit17_swizzling; 1108 unsigned int needs_clflush; 1109 unsigned int idx, offset; 1110 int ret; 1111 1112 obj_do_bit17_swizzling = 0; 1113 if (i915_gem_object_needs_bit17_swizzle(obj)) 1114 obj_do_bit17_swizzling = BIT(17); 1115 1116 ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex); 1117 if (ret) 1118 return ret; 1119 1120 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); 1121 mutex_unlock(&obj->base.dev->struct_mutex); 1122 if (ret) 1123 return ret; 1124 1125 remain = args->size; 1126 user_data = u64_to_user_ptr(args->data_ptr); 1127 offset = offset_in_page(args->offset); 1128 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 1129 struct page *page = i915_gem_object_get_page(obj, idx); 1130 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); 1131 1132 ret = shmem_pread(page, offset, length, user_data, 1133 page_to_phys(page) & obj_do_bit17_swizzling, 1134 needs_clflush); 1135 if (ret) 1136 break; 1137 1138 remain -= length; 1139 user_data += length; 1140 offset = 0; 1141 } 1142 1143 i915_gem_obj_finish_shmem_access(obj); 1144 return ret; 1145 } 1146 1147 static inline bool 1148 gtt_user_read(struct io_mapping *mapping, 1149 loff_t base, int offset, 1150 char __user *user_data, int length) 1151 { 1152 void __iomem *vaddr; 1153 unsigned long unwritten; 1154 1155 /* We can use the cpu mem copy function because this is X86. */ 1156 vaddr = io_mapping_map_atomic_wc(mapping, base); 1157 unwritten = __copy_to_user_inatomic(user_data, 1158 (void __force *)vaddr + offset, 1159 length); 1160 io_mapping_unmap_atomic(vaddr); 1161 if (unwritten) { 1162 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 1163 unwritten = copy_to_user(user_data, 1164 (void __force *)vaddr + offset, 1165 length); 1166 io_mapping_unmap(vaddr); 1167 } 1168 return unwritten; 1169 } 1170 1171 static int 1172 i915_gem_gtt_pread(struct drm_i915_gem_object *obj, 1173 const struct drm_i915_gem_pread *args) 1174 { 1175 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1176 struct i915_ggtt *ggtt = &i915->ggtt; 1177 struct drm_mm_node node; 1178 struct i915_vma *vma; 1179 void __user *user_data; 1180 u64 remain, offset; 1181 int ret; 1182 1183 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1184 if (ret) 1185 return ret; 1186 1187 intel_runtime_pm_get(i915); 1188 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 1189 PIN_MAPPABLE | 1190 PIN_NONFAULT | 1191 PIN_NONBLOCK); 1192 if (!IS_ERR(vma)) { 1193 node.start = i915_ggtt_offset(vma); 1194 node.allocated = false; 1195 ret = i915_vma_put_fence(vma); 1196 if (ret) { 1197 i915_vma_unpin(vma); 1198 vma = ERR_PTR(ret); 1199 } 1200 } 1201 if (IS_ERR(vma)) { 1202 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 1203 if (ret) 1204 goto out_unlock; 1205 GEM_BUG_ON(!node.allocated); 1206 } 1207 1208 ret = i915_gem_object_set_to_gtt_domain(obj, false); 1209 if (ret) 1210 goto out_unpin; 1211 1212 mutex_unlock(&i915->drm.struct_mutex); 1213 1214 user_data = u64_to_user_ptr(args->data_ptr); 1215 remain = args->size; 1216 offset = args->offset; 1217 1218 while (remain > 0) { 1219 /* Operation in this page 1220 * 1221 * page_base = page offset within aperture 1222 * page_offset = offset within page 1223 * page_length = bytes to copy for this page 1224 */ 1225 u32 page_base = node.start; 1226 unsigned page_offset = offset_in_page(offset); 1227 unsigned page_length = PAGE_SIZE - page_offset; 1228 page_length = remain < page_length ? remain : page_length; 1229 if (node.allocated) { 1230 wmb(); 1231 ggtt->vm.insert_page(&ggtt->vm, 1232 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 1233 node.start, I915_CACHE_NONE, 0); 1234 wmb(); 1235 } else { 1236 page_base += offset & PAGE_MASK; 1237 } 1238 1239 if (gtt_user_read(&ggtt->iomap, page_base, page_offset, 1240 user_data, page_length)) { 1241 ret = -EFAULT; 1242 break; 1243 } 1244 1245 remain -= page_length; 1246 user_data += page_length; 1247 offset += page_length; 1248 } 1249 1250 mutex_lock(&i915->drm.struct_mutex); 1251 out_unpin: 1252 if (node.allocated) { 1253 wmb(); 1254 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); 1255 remove_mappable_node(&node); 1256 } else { 1257 i915_vma_unpin(vma); 1258 } 1259 out_unlock: 1260 intel_runtime_pm_put(i915); 1261 mutex_unlock(&i915->drm.struct_mutex); 1262 1263 return ret; 1264 } 1265 1266 /** 1267 * Reads data from the object referenced by handle. 1268 * @dev: drm device pointer 1269 * @data: ioctl data blob 1270 * @file: drm file pointer 1271 * 1272 * On error, the contents of *data are undefined. 1273 */ 1274 int 1275 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 1276 struct drm_file *file) 1277 { 1278 struct drm_i915_gem_pread *args = data; 1279 struct drm_i915_gem_object *obj; 1280 int ret; 1281 1282 if (args->size == 0) 1283 return 0; 1284 1285 if (!access_ok(VERIFY_WRITE, 1286 u64_to_user_ptr(args->data_ptr), 1287 args->size)) 1288 return -EFAULT; 1289 1290 obj = i915_gem_object_lookup(file, args->handle); 1291 if (!obj) 1292 return -ENOENT; 1293 1294 /* Bounds check source. */ 1295 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 1296 ret = -EINVAL; 1297 goto out; 1298 } 1299 1300 trace_i915_gem_object_pread(obj, args->offset, args->size); 1301 1302 ret = i915_gem_object_wait(obj, 1303 I915_WAIT_INTERRUPTIBLE, 1304 MAX_SCHEDULE_TIMEOUT, 1305 to_rps_client(file)); 1306 if (ret) 1307 goto out; 1308 1309 ret = i915_gem_object_pin_pages(obj); 1310 if (ret) 1311 goto out; 1312 1313 ret = i915_gem_shmem_pread(obj, args); 1314 if (ret == -EFAULT || ret == -ENODEV) 1315 ret = i915_gem_gtt_pread(obj, args); 1316 1317 i915_gem_object_unpin_pages(obj); 1318 out: 1319 i915_gem_object_put(obj); 1320 return ret; 1321 } 1322 1323 /* This is the fast write path which cannot handle 1324 * page faults in the source data 1325 */ 1326 1327 static inline bool 1328 ggtt_write(struct io_mapping *mapping, 1329 loff_t base, int offset, 1330 char __user *user_data, int length) 1331 { 1332 void __iomem *vaddr; 1333 unsigned long unwritten; 1334 1335 /* We can use the cpu mem copy function because this is X86. */ 1336 vaddr = io_mapping_map_atomic_wc(mapping, base); 1337 unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset, 1338 user_data, length); 1339 io_mapping_unmap_atomic(vaddr); 1340 if (unwritten) { 1341 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 1342 unwritten = copy_from_user((void __force *)vaddr + offset, 1343 user_data, length); 1344 io_mapping_unmap(vaddr); 1345 } 1346 1347 return unwritten; 1348 } 1349 1350 /** 1351 * This is the fast pwrite path, where we copy the data directly from the 1352 * user into the GTT, uncached. 1353 * @obj: i915 GEM object 1354 * @args: pwrite arguments structure 1355 */ 1356 static int 1357 i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, 1358 const struct drm_i915_gem_pwrite *args) 1359 { 1360 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1361 struct i915_ggtt *ggtt = &i915->ggtt; 1362 struct drm_mm_node node; 1363 struct i915_vma *vma; 1364 u64 remain, offset; 1365 void __user *user_data; 1366 int ret; 1367 1368 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1369 if (ret) 1370 return ret; 1371 1372 if (i915_gem_object_has_struct_page(obj)) { 1373 /* 1374 * Avoid waking the device up if we can fallback, as 1375 * waking/resuming is very slow (worst-case 10-100 ms 1376 * depending on PCI sleeps and our own resume time). 1377 * This easily dwarfs any performance advantage from 1378 * using the cache bypass of indirect GGTT access. 1379 */ 1380 if (!intel_runtime_pm_get_if_in_use(i915)) { 1381 ret = -EFAULT; 1382 goto out_unlock; 1383 } 1384 } else { 1385 /* No backing pages, no fallback, we must force GGTT access */ 1386 intel_runtime_pm_get(i915); 1387 } 1388 1389 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 1390 PIN_MAPPABLE | 1391 PIN_NONFAULT | 1392 PIN_NONBLOCK); 1393 if (!IS_ERR(vma)) { 1394 node.start = i915_ggtt_offset(vma); 1395 node.allocated = false; 1396 ret = i915_vma_put_fence(vma); 1397 if (ret) { 1398 i915_vma_unpin(vma); 1399 vma = ERR_PTR(ret); 1400 } 1401 } 1402 if (IS_ERR(vma)) { 1403 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 1404 if (ret) 1405 goto out_rpm; 1406 GEM_BUG_ON(!node.allocated); 1407 } 1408 1409 ret = i915_gem_object_set_to_gtt_domain(obj, true); 1410 if (ret) 1411 goto out_unpin; 1412 1413 mutex_unlock(&i915->drm.struct_mutex); 1414 1415 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 1416 1417 user_data = u64_to_user_ptr(args->data_ptr); 1418 offset = args->offset; 1419 remain = args->size; 1420 while (remain) { 1421 /* Operation in this page 1422 * 1423 * page_base = page offset within aperture 1424 * page_offset = offset within page 1425 * page_length = bytes to copy for this page 1426 */ 1427 u32 page_base = node.start; 1428 unsigned int page_offset = offset_in_page(offset); 1429 unsigned int page_length = PAGE_SIZE - page_offset; 1430 page_length = remain < page_length ? remain : page_length; 1431 if (node.allocated) { 1432 wmb(); /* flush the write before we modify the GGTT */ 1433 ggtt->vm.insert_page(&ggtt->vm, 1434 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 1435 node.start, I915_CACHE_NONE, 0); 1436 wmb(); /* flush modifications to the GGTT (insert_page) */ 1437 } else { 1438 page_base += offset & PAGE_MASK; 1439 } 1440 /* If we get a fault while copying data, then (presumably) our 1441 * source page isn't available. Return the error and we'll 1442 * retry in the slow path. 1443 * If the object is non-shmem backed, we retry again with the 1444 * path that handles page fault. 1445 */ 1446 if (ggtt_write(&ggtt->iomap, page_base, page_offset, 1447 user_data, page_length)) { 1448 ret = -EFAULT; 1449 break; 1450 } 1451 1452 remain -= page_length; 1453 user_data += page_length; 1454 offset += page_length; 1455 } 1456 intel_fb_obj_flush(obj, ORIGIN_CPU); 1457 1458 mutex_lock(&i915->drm.struct_mutex); 1459 out_unpin: 1460 if (node.allocated) { 1461 wmb(); 1462 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); 1463 remove_mappable_node(&node); 1464 } else { 1465 i915_vma_unpin(vma); 1466 } 1467 out_rpm: 1468 intel_runtime_pm_put(i915); 1469 out_unlock: 1470 mutex_unlock(&i915->drm.struct_mutex); 1471 return ret; 1472 } 1473 1474 static int 1475 shmem_pwrite_slow(struct page *page, int offset, int length, 1476 char __user *user_data, 1477 bool page_do_bit17_swizzling, 1478 bool needs_clflush_before, 1479 bool needs_clflush_after) 1480 { 1481 char *vaddr; 1482 int ret; 1483 1484 vaddr = kmap(page); 1485 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 1486 shmem_clflush_swizzled_range(vaddr + offset, length, 1487 page_do_bit17_swizzling); 1488 if (page_do_bit17_swizzling) 1489 ret = __copy_from_user_swizzled(vaddr, offset, user_data, 1490 length); 1491 else 1492 ret = __copy_from_user(vaddr + offset, user_data, length); 1493 if (needs_clflush_after) 1494 shmem_clflush_swizzled_range(vaddr + offset, length, 1495 page_do_bit17_swizzling); 1496 kunmap(page); 1497 1498 return ret ? -EFAULT : 0; 1499 } 1500 1501 /* Per-page copy function for the shmem pwrite fastpath. 1502 * Flushes invalid cachelines before writing to the target if 1503 * needs_clflush_before is set and flushes out any written cachelines after 1504 * writing if needs_clflush is set. 1505 */ 1506 static int 1507 shmem_pwrite(struct page *page, int offset, int len, char __user *user_data, 1508 bool page_do_bit17_swizzling, 1509 bool needs_clflush_before, 1510 bool needs_clflush_after) 1511 { 1512 int ret; 1513 1514 ret = -ENODEV; 1515 if (!page_do_bit17_swizzling) { 1516 char *vaddr = kmap_atomic(page); 1517 1518 if (needs_clflush_before) 1519 drm_clflush_virt_range(vaddr + offset, len); 1520 ret = __copy_from_user_inatomic(vaddr + offset, user_data, len); 1521 if (needs_clflush_after) 1522 drm_clflush_virt_range(vaddr + offset, len); 1523 1524 kunmap_atomic(vaddr); 1525 } 1526 if (ret == 0) 1527 return ret; 1528 1529 return shmem_pwrite_slow(page, offset, len, user_data, 1530 page_do_bit17_swizzling, 1531 needs_clflush_before, 1532 needs_clflush_after); 1533 } 1534 1535 static int 1536 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, 1537 const struct drm_i915_gem_pwrite *args) 1538 { 1539 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1540 void __user *user_data; 1541 u64 remain; 1542 unsigned int obj_do_bit17_swizzling; 1543 unsigned int partial_cacheline_write; 1544 unsigned int needs_clflush; 1545 unsigned int offset, idx; 1546 int ret; 1547 1548 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1549 if (ret) 1550 return ret; 1551 1552 ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush); 1553 mutex_unlock(&i915->drm.struct_mutex); 1554 if (ret) 1555 return ret; 1556 1557 obj_do_bit17_swizzling = 0; 1558 if (i915_gem_object_needs_bit17_swizzle(obj)) 1559 obj_do_bit17_swizzling = BIT(17); 1560 1561 /* If we don't overwrite a cacheline completely we need to be 1562 * careful to have up-to-date data by first clflushing. Don't 1563 * overcomplicate things and flush the entire patch. 1564 */ 1565 partial_cacheline_write = 0; 1566 if (needs_clflush & CLFLUSH_BEFORE) 1567 partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1; 1568 1569 user_data = u64_to_user_ptr(args->data_ptr); 1570 remain = args->size; 1571 offset = offset_in_page(args->offset); 1572 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 1573 struct page *page = i915_gem_object_get_page(obj, idx); 1574 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); 1575 1576 ret = shmem_pwrite(page, offset, length, user_data, 1577 page_to_phys(page) & obj_do_bit17_swizzling, 1578 (offset | length) & partial_cacheline_write, 1579 needs_clflush & CLFLUSH_AFTER); 1580 if (ret) 1581 break; 1582 1583 remain -= length; 1584 user_data += length; 1585 offset = 0; 1586 } 1587 1588 intel_fb_obj_flush(obj, ORIGIN_CPU); 1589 i915_gem_obj_finish_shmem_access(obj); 1590 return ret; 1591 } 1592 1593 /** 1594 * Writes data to the object referenced by handle. 1595 * @dev: drm device 1596 * @data: ioctl data blob 1597 * @file: drm file 1598 * 1599 * On error, the contents of the buffer that were to be modified are undefined. 1600 */ 1601 int 1602 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1603 struct drm_file *file) 1604 { 1605 struct drm_i915_gem_pwrite *args = data; 1606 struct drm_i915_gem_object *obj; 1607 int ret; 1608 1609 if (args->size == 0) 1610 return 0; 1611 1612 if (!access_ok(VERIFY_READ, 1613 u64_to_user_ptr(args->data_ptr), 1614 args->size)) 1615 return -EFAULT; 1616 1617 obj = i915_gem_object_lookup(file, args->handle); 1618 if (!obj) 1619 return -ENOENT; 1620 1621 /* Bounds check destination. */ 1622 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 1623 ret = -EINVAL; 1624 goto err; 1625 } 1626 1627 /* Writes not allowed into this read-only object */ 1628 if (i915_gem_object_is_readonly(obj)) { 1629 ret = -EINVAL; 1630 goto err; 1631 } 1632 1633 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1634 1635 ret = -ENODEV; 1636 if (obj->ops->pwrite) 1637 ret = obj->ops->pwrite(obj, args); 1638 if (ret != -ENODEV) 1639 goto err; 1640 1641 ret = i915_gem_object_wait(obj, 1642 I915_WAIT_INTERRUPTIBLE | 1643 I915_WAIT_ALL, 1644 MAX_SCHEDULE_TIMEOUT, 1645 to_rps_client(file)); 1646 if (ret) 1647 goto err; 1648 1649 ret = i915_gem_object_pin_pages(obj); 1650 if (ret) 1651 goto err; 1652 1653 ret = -EFAULT; 1654 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1655 * it would end up going through the fenced access, and we'll get 1656 * different detiling behavior between reading and writing. 1657 * pread/pwrite currently are reading and writing from the CPU 1658 * perspective, requiring manual detiling by the client. 1659 */ 1660 if (!i915_gem_object_has_struct_page(obj) || 1661 cpu_write_needs_clflush(obj)) 1662 /* Note that the gtt paths might fail with non-page-backed user 1663 * pointers (e.g. gtt mappings when moving data between 1664 * textures). Fallback to the shmem path in that case. 1665 */ 1666 ret = i915_gem_gtt_pwrite_fast(obj, args); 1667 1668 if (ret == -EFAULT || ret == -ENOSPC) { 1669 if (obj->phys_handle) 1670 ret = i915_gem_phys_pwrite(obj, args, file); 1671 else 1672 ret = i915_gem_shmem_pwrite(obj, args); 1673 } 1674 1675 i915_gem_object_unpin_pages(obj); 1676 err: 1677 i915_gem_object_put(obj); 1678 return ret; 1679 } 1680 1681 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) 1682 { 1683 struct drm_i915_private *i915; 1684 struct list_head *list; 1685 struct i915_vma *vma; 1686 1687 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 1688 1689 for_each_ggtt_vma(vma, obj) { 1690 if (i915_vma_is_active(vma)) 1691 continue; 1692 1693 if (!drm_mm_node_allocated(&vma->node)) 1694 continue; 1695 1696 list_move_tail(&vma->vm_link, &vma->vm->inactive_list); 1697 } 1698 1699 i915 = to_i915(obj->base.dev); 1700 spin_lock(&i915->mm.obj_lock); 1701 list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list; 1702 list_move_tail(&obj->mm.link, list); 1703 spin_unlock(&i915->mm.obj_lock); 1704 } 1705 1706 /** 1707 * Called when user space prepares to use an object with the CPU, either 1708 * through the mmap ioctl's mapping or a GTT mapping. 1709 * @dev: drm device 1710 * @data: ioctl data blob 1711 * @file: drm file 1712 */ 1713 int 1714 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1715 struct drm_file *file) 1716 { 1717 struct drm_i915_gem_set_domain *args = data; 1718 struct drm_i915_gem_object *obj; 1719 uint32_t read_domains = args->read_domains; 1720 uint32_t write_domain = args->write_domain; 1721 int err; 1722 1723 /* Only handle setting domains to types used by the CPU. */ 1724 if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) 1725 return -EINVAL; 1726 1727 /* Having something in the write domain implies it's in the read 1728 * domain, and only that read domain. Enforce that in the request. 1729 */ 1730 if (write_domain != 0 && read_domains != write_domain) 1731 return -EINVAL; 1732 1733 obj = i915_gem_object_lookup(file, args->handle); 1734 if (!obj) 1735 return -ENOENT; 1736 1737 /* Try to flush the object off the GPU without holding the lock. 1738 * We will repeat the flush holding the lock in the normal manner 1739 * to catch cases where we are gazumped. 1740 */ 1741 err = i915_gem_object_wait(obj, 1742 I915_WAIT_INTERRUPTIBLE | 1743 (write_domain ? I915_WAIT_ALL : 0), 1744 MAX_SCHEDULE_TIMEOUT, 1745 to_rps_client(file)); 1746 if (err) 1747 goto out; 1748 1749 /* 1750 * Proxy objects do not control access to the backing storage, ergo 1751 * they cannot be used as a means to manipulate the cache domain 1752 * tracking for that backing storage. The proxy object is always 1753 * considered to be outside of any cache domain. 1754 */ 1755 if (i915_gem_object_is_proxy(obj)) { 1756 err = -ENXIO; 1757 goto out; 1758 } 1759 1760 /* 1761 * Flush and acquire obj->pages so that we are coherent through 1762 * direct access in memory with previous cached writes through 1763 * shmemfs and that our cache domain tracking remains valid. 1764 * For example, if the obj->filp was moved to swap without us 1765 * being notified and releasing the pages, we would mistakenly 1766 * continue to assume that the obj remained out of the CPU cached 1767 * domain. 1768 */ 1769 err = i915_gem_object_pin_pages(obj); 1770 if (err) 1771 goto out; 1772 1773 err = i915_mutex_lock_interruptible(dev); 1774 if (err) 1775 goto out_unpin; 1776 1777 if (read_domains & I915_GEM_DOMAIN_WC) 1778 err = i915_gem_object_set_to_wc_domain(obj, write_domain); 1779 else if (read_domains & I915_GEM_DOMAIN_GTT) 1780 err = i915_gem_object_set_to_gtt_domain(obj, write_domain); 1781 else 1782 err = i915_gem_object_set_to_cpu_domain(obj, write_domain); 1783 1784 /* And bump the LRU for this access */ 1785 i915_gem_object_bump_inactive_ggtt(obj); 1786 1787 mutex_unlock(&dev->struct_mutex); 1788 1789 if (write_domain != 0) 1790 intel_fb_obj_invalidate(obj, 1791 fb_write_origin(obj, write_domain)); 1792 1793 out_unpin: 1794 i915_gem_object_unpin_pages(obj); 1795 out: 1796 i915_gem_object_put(obj); 1797 return err; 1798 } 1799 1800 /** 1801 * Called when user space has done writes to this buffer 1802 * @dev: drm device 1803 * @data: ioctl data blob 1804 * @file: drm file 1805 */ 1806 int 1807 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1808 struct drm_file *file) 1809 { 1810 struct drm_i915_gem_sw_finish *args = data; 1811 struct drm_i915_gem_object *obj; 1812 1813 obj = i915_gem_object_lookup(file, args->handle); 1814 if (!obj) 1815 return -ENOENT; 1816 1817 /* 1818 * Proxy objects are barred from CPU access, so there is no 1819 * need to ban sw_finish as it is a nop. 1820 */ 1821 1822 /* Pinned buffers may be scanout, so flush the cache */ 1823 i915_gem_object_flush_if_display(obj); 1824 i915_gem_object_put(obj); 1825 1826 return 0; 1827 } 1828 1829 /** 1830 * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address 1831 * it is mapped to. 1832 * @dev: drm device 1833 * @data: ioctl data blob 1834 * @file: drm file 1835 * 1836 * While the mapping holds a reference on the contents of the object, it doesn't 1837 * imply a ref on the object itself. 1838 * 1839 * IMPORTANT: 1840 * 1841 * DRM driver writers who look a this function as an example for how to do GEM 1842 * mmap support, please don't implement mmap support like here. The modern way 1843 * to implement DRM mmap support is with an mmap offset ioctl (like 1844 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly. 1845 * That way debug tooling like valgrind will understand what's going on, hiding 1846 * the mmap call in a driver private ioctl will break that. The i915 driver only 1847 * does cpu mmaps this way because we didn't know better. 1848 */ 1849 int 1850 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1851 struct drm_file *file) 1852 { 1853 struct drm_i915_gem_mmap *args = data; 1854 struct drm_i915_gem_object *obj; 1855 unsigned long addr; 1856 1857 if (args->flags & ~(I915_MMAP_WC)) 1858 return -EINVAL; 1859 1860 if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT)) 1861 return -ENODEV; 1862 1863 obj = i915_gem_object_lookup(file, args->handle); 1864 if (!obj) 1865 return -ENOENT; 1866 1867 /* prime objects have no backing filp to GEM mmap 1868 * pages from. 1869 */ 1870 if (!obj->base.filp) { 1871 i915_gem_object_put(obj); 1872 return -ENXIO; 1873 } 1874 1875 addr = vm_mmap(obj->base.filp, 0, args->size, 1876 PROT_READ | PROT_WRITE, MAP_SHARED, 1877 args->offset); 1878 if (args->flags & I915_MMAP_WC) { 1879 struct mm_struct *mm = current->mm; 1880 struct vm_area_struct *vma; 1881 1882 if (down_write_killable(&mm->mmap_sem)) { 1883 i915_gem_object_put(obj); 1884 return -EINTR; 1885 } 1886 vma = find_vma(mm, addr); 1887 if (vma) 1888 vma->vm_page_prot = 1889 pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); 1890 else 1891 addr = -ENOMEM; 1892 up_write(&mm->mmap_sem); 1893 1894 /* This may race, but that's ok, it only gets set */ 1895 WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU); 1896 } 1897 i915_gem_object_put(obj); 1898 if (IS_ERR((void *)addr)) 1899 return addr; 1900 1901 args->addr_ptr = (uint64_t) addr; 1902 1903 return 0; 1904 } 1905 1906 static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj) 1907 { 1908 return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT; 1909 } 1910 1911 /** 1912 * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps 1913 * 1914 * A history of the GTT mmap interface: 1915 * 1916 * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to 1917 * aligned and suitable for fencing, and still fit into the available 1918 * mappable space left by the pinned display objects. A classic problem 1919 * we called the page-fault-of-doom where we would ping-pong between 1920 * two objects that could not fit inside the GTT and so the memcpy 1921 * would page one object in at the expense of the other between every 1922 * single byte. 1923 * 1924 * 1 - Objects can be any size, and have any compatible fencing (X Y, or none 1925 * as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the 1926 * object is too large for the available space (or simply too large 1927 * for the mappable aperture!), a view is created instead and faulted 1928 * into userspace. (This view is aligned and sized appropriately for 1929 * fenced access.) 1930 * 1931 * 2 - Recognise WC as a separate cache domain so that we can flush the 1932 * delayed writes via GTT before performing direct access via WC. 1933 * 1934 * Restrictions: 1935 * 1936 * * snoopable objects cannot be accessed via the GTT. It can cause machine 1937 * hangs on some architectures, corruption on others. An attempt to service 1938 * a GTT page fault from a snoopable object will generate a SIGBUS. 1939 * 1940 * * the object must be able to fit into RAM (physical memory, though no 1941 * limited to the mappable aperture). 1942 * 1943 * 1944 * Caveats: 1945 * 1946 * * a new GTT page fault will synchronize rendering from the GPU and flush 1947 * all data to system memory. Subsequent access will not be synchronized. 1948 * 1949 * * all mappings are revoked on runtime device suspend. 1950 * 1951 * * there are only 8, 16 or 32 fence registers to share between all users 1952 * (older machines require fence register for display and blitter access 1953 * as well). Contention of the fence registers will cause the previous users 1954 * to be unmapped and any new access will generate new page faults. 1955 * 1956 * * running out of memory while servicing a fault may generate a SIGBUS, 1957 * rather than the expected SIGSEGV. 1958 */ 1959 int i915_gem_mmap_gtt_version(void) 1960 { 1961 return 2; 1962 } 1963 1964 static inline struct i915_ggtt_view 1965 compute_partial_view(const struct drm_i915_gem_object *obj, 1966 pgoff_t page_offset, 1967 unsigned int chunk) 1968 { 1969 struct i915_ggtt_view view; 1970 1971 if (i915_gem_object_is_tiled(obj)) 1972 chunk = roundup(chunk, tile_row_pages(obj)); 1973 1974 view.type = I915_GGTT_VIEW_PARTIAL; 1975 view.partial.offset = rounddown(page_offset, chunk); 1976 view.partial.size = 1977 min_t(unsigned int, chunk, 1978 (obj->base.size >> PAGE_SHIFT) - view.partial.offset); 1979 1980 /* If the partial covers the entire object, just create a normal VMA. */ 1981 if (chunk >= obj->base.size >> PAGE_SHIFT) 1982 view.type = I915_GGTT_VIEW_NORMAL; 1983 1984 return view; 1985 } 1986 1987 /** 1988 * i915_gem_fault - fault a page into the GTT 1989 * @vmf: fault info 1990 * 1991 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1992 * from userspace. The fault handler takes care of binding the object to 1993 * the GTT (if needed), allocating and programming a fence register (again, 1994 * only if needed based on whether the old reg is still valid or the object 1995 * is tiled) and inserting a new PTE into the faulting process. 1996 * 1997 * Note that the faulting process may involve evicting existing objects 1998 * from the GTT and/or fence registers to make room. So performance may 1999 * suffer if the GTT working set is large or there are few fence registers 2000 * left. 2001 * 2002 * The current feature set supported by i915_gem_fault() and thus GTT mmaps 2003 * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version). 2004 */ 2005 vm_fault_t i915_gem_fault(struct vm_fault *vmf) 2006 { 2007 #define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT) 2008 struct vm_area_struct *area = vmf->vma; 2009 struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data); 2010 struct drm_device *dev = obj->base.dev; 2011 struct drm_i915_private *dev_priv = to_i915(dev); 2012 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2013 bool write = area->vm_flags & VM_WRITE; 2014 struct i915_vma *vma; 2015 pgoff_t page_offset; 2016 int ret; 2017 2018 /* Sanity check that we allow writing into this object */ 2019 if (i915_gem_object_is_readonly(obj) && write) 2020 return VM_FAULT_SIGBUS; 2021 2022 /* We don't use vmf->pgoff since that has the fake offset */ 2023 page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT; 2024 2025 trace_i915_gem_object_fault(obj, page_offset, true, write); 2026 2027 /* Try to flush the object off the GPU first without holding the lock. 2028 * Upon acquiring the lock, we will perform our sanity checks and then 2029 * repeat the flush holding the lock in the normal manner to catch cases 2030 * where we are gazumped. 2031 */ 2032 ret = i915_gem_object_wait(obj, 2033 I915_WAIT_INTERRUPTIBLE, 2034 MAX_SCHEDULE_TIMEOUT, 2035 NULL); 2036 if (ret) 2037 goto err; 2038 2039 ret = i915_gem_object_pin_pages(obj); 2040 if (ret) 2041 goto err; 2042 2043 intel_runtime_pm_get(dev_priv); 2044 2045 ret = i915_mutex_lock_interruptible(dev); 2046 if (ret) 2047 goto err_rpm; 2048 2049 /* Access to snoopable pages through the GTT is incoherent. */ 2050 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) { 2051 ret = -EFAULT; 2052 goto err_unlock; 2053 } 2054 2055 2056 /* Now pin it into the GTT as needed */ 2057 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 2058 PIN_MAPPABLE | 2059 PIN_NONBLOCK | 2060 PIN_NONFAULT); 2061 if (IS_ERR(vma)) { 2062 /* Use a partial view if it is bigger than available space */ 2063 struct i915_ggtt_view view = 2064 compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES); 2065 unsigned int flags; 2066 2067 flags = PIN_MAPPABLE; 2068 if (view.type == I915_GGTT_VIEW_NORMAL) 2069 flags |= PIN_NONBLOCK; /* avoid warnings for pinned */ 2070 2071 /* 2072 * Userspace is now writing through an untracked VMA, abandon 2073 * all hope that the hardware is able to track future writes. 2074 */ 2075 obj->frontbuffer_ggtt_origin = ORIGIN_CPU; 2076 2077 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); 2078 if (IS_ERR(vma) && !view.type) { 2079 flags = PIN_MAPPABLE; 2080 view.type = I915_GGTT_VIEW_PARTIAL; 2081 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); 2082 } 2083 } 2084 if (IS_ERR(vma)) { 2085 ret = PTR_ERR(vma); 2086 goto err_unlock; 2087 } 2088 2089 ret = i915_gem_object_set_to_gtt_domain(obj, write); 2090 if (ret) 2091 goto err_unpin; 2092 2093 ret = i915_vma_pin_fence(vma); 2094 if (ret) 2095 goto err_unpin; 2096 2097 /* Finally, remap it using the new GTT offset */ 2098 ret = remap_io_mapping(area, 2099 area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT), 2100 (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT, 2101 min_t(u64, vma->size, area->vm_end - area->vm_start), 2102 &ggtt->iomap); 2103 if (ret) 2104 goto err_fence; 2105 2106 /* Mark as being mmapped into userspace for later revocation */ 2107 assert_rpm_wakelock_held(dev_priv); 2108 if (!i915_vma_set_userfault(vma) && !obj->userfault_count++) 2109 list_add(&obj->userfault_link, &dev_priv->mm.userfault_list); 2110 GEM_BUG_ON(!obj->userfault_count); 2111 2112 i915_vma_set_ggtt_write(vma); 2113 2114 err_fence: 2115 i915_vma_unpin_fence(vma); 2116 err_unpin: 2117 __i915_vma_unpin(vma); 2118 err_unlock: 2119 mutex_unlock(&dev->struct_mutex); 2120 err_rpm: 2121 intel_runtime_pm_put(dev_priv); 2122 i915_gem_object_unpin_pages(obj); 2123 err: 2124 switch (ret) { 2125 case -EIO: 2126 /* 2127 * We eat errors when the gpu is terminally wedged to avoid 2128 * userspace unduly crashing (gl has no provisions for mmaps to 2129 * fail). But any other -EIO isn't ours (e.g. swap in failure) 2130 * and so needs to be reported. 2131 */ 2132 if (!i915_terminally_wedged(&dev_priv->gpu_error)) 2133 return VM_FAULT_SIGBUS; 2134 /* else: fall through */ 2135 case -EAGAIN: 2136 /* 2137 * EAGAIN means the gpu is hung and we'll wait for the error 2138 * handler to reset everything when re-faulting in 2139 * i915_mutex_lock_interruptible. 2140 */ 2141 case 0: 2142 case -ERESTARTSYS: 2143 case -EINTR: 2144 case -EBUSY: 2145 /* 2146 * EBUSY is ok: this just means that another thread 2147 * already did the job. 2148 */ 2149 return VM_FAULT_NOPAGE; 2150 case -ENOMEM: 2151 return VM_FAULT_OOM; 2152 case -ENOSPC: 2153 case -EFAULT: 2154 return VM_FAULT_SIGBUS; 2155 default: 2156 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 2157 return VM_FAULT_SIGBUS; 2158 } 2159 } 2160 2161 static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) 2162 { 2163 struct i915_vma *vma; 2164 2165 GEM_BUG_ON(!obj->userfault_count); 2166 2167 obj->userfault_count = 0; 2168 list_del(&obj->userfault_link); 2169 drm_vma_node_unmap(&obj->base.vma_node, 2170 obj->base.dev->anon_inode->i_mapping); 2171 2172 for_each_ggtt_vma(vma, obj) 2173 i915_vma_unset_userfault(vma); 2174 } 2175 2176 /** 2177 * i915_gem_release_mmap - remove physical page mappings 2178 * @obj: obj in question 2179 * 2180 * Preserve the reservation of the mmapping with the DRM core code, but 2181 * relinquish ownership of the pages back to the system. 2182 * 2183 * It is vital that we remove the page mapping if we have mapped a tiled 2184 * object through the GTT and then lose the fence register due to 2185 * resource pressure. Similarly if the object has been moved out of the 2186 * aperture, than pages mapped into userspace must be revoked. Removing the 2187 * mapping will then trigger a page fault on the next user access, allowing 2188 * fixup by i915_gem_fault(). 2189 */ 2190 void 2191 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 2192 { 2193 struct drm_i915_private *i915 = to_i915(obj->base.dev); 2194 2195 /* Serialisation between user GTT access and our code depends upon 2196 * revoking the CPU's PTE whilst the mutex is held. The next user 2197 * pagefault then has to wait until we release the mutex. 2198 * 2199 * Note that RPM complicates somewhat by adding an additional 2200 * requirement that operations to the GGTT be made holding the RPM 2201 * wakeref. 2202 */ 2203 lockdep_assert_held(&i915->drm.struct_mutex); 2204 intel_runtime_pm_get(i915); 2205 2206 if (!obj->userfault_count) 2207 goto out; 2208 2209 __i915_gem_object_release_mmap(obj); 2210 2211 /* Ensure that the CPU's PTE are revoked and there are not outstanding 2212 * memory transactions from userspace before we return. The TLB 2213 * flushing implied above by changing the PTE above *should* be 2214 * sufficient, an extra barrier here just provides us with a bit 2215 * of paranoid documentation about our requirement to serialise 2216 * memory writes before touching registers / GSM. 2217 */ 2218 wmb(); 2219 2220 out: 2221 intel_runtime_pm_put(i915); 2222 } 2223 2224 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv) 2225 { 2226 struct drm_i915_gem_object *obj, *on; 2227 int i; 2228 2229 /* 2230 * Only called during RPM suspend. All users of the userfault_list 2231 * must be holding an RPM wakeref to ensure that this can not 2232 * run concurrently with themselves (and use the struct_mutex for 2233 * protection between themselves). 2234 */ 2235 2236 list_for_each_entry_safe(obj, on, 2237 &dev_priv->mm.userfault_list, userfault_link) 2238 __i915_gem_object_release_mmap(obj); 2239 2240 /* The fence will be lost when the device powers down. If any were 2241 * in use by hardware (i.e. they are pinned), we should not be powering 2242 * down! All other fences will be reacquired by the user upon waking. 2243 */ 2244 for (i = 0; i < dev_priv->num_fence_regs; i++) { 2245 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 2246 2247 /* Ideally we want to assert that the fence register is not 2248 * live at this point (i.e. that no piece of code will be 2249 * trying to write through fence + GTT, as that both violates 2250 * our tracking of activity and associated locking/barriers, 2251 * but also is illegal given that the hw is powered down). 2252 * 2253 * Previously we used reg->pin_count as a "liveness" indicator. 2254 * That is not sufficient, and we need a more fine-grained 2255 * tool if we want to have a sanity check here. 2256 */ 2257 2258 if (!reg->vma) 2259 continue; 2260 2261 GEM_BUG_ON(i915_vma_has_userfault(reg->vma)); 2262 reg->dirty = true; 2263 } 2264 } 2265 2266 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 2267 { 2268 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2269 int err; 2270 2271 err = drm_gem_create_mmap_offset(&obj->base); 2272 if (likely(!err)) 2273 return 0; 2274 2275 /* Attempt to reap some mmap space from dead objects */ 2276 do { 2277 err = i915_gem_wait_for_idle(dev_priv, 2278 I915_WAIT_INTERRUPTIBLE, 2279 MAX_SCHEDULE_TIMEOUT); 2280 if (err) 2281 break; 2282 2283 i915_gem_drain_freed_objects(dev_priv); 2284 err = drm_gem_create_mmap_offset(&obj->base); 2285 if (!err) 2286 break; 2287 2288 } while (flush_delayed_work(&dev_priv->gt.retire_work)); 2289 2290 return err; 2291 } 2292 2293 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 2294 { 2295 drm_gem_free_mmap_offset(&obj->base); 2296 } 2297 2298 int 2299 i915_gem_mmap_gtt(struct drm_file *file, 2300 struct drm_device *dev, 2301 uint32_t handle, 2302 uint64_t *offset) 2303 { 2304 struct drm_i915_gem_object *obj; 2305 int ret; 2306 2307 obj = i915_gem_object_lookup(file, handle); 2308 if (!obj) 2309 return -ENOENT; 2310 2311 ret = i915_gem_object_create_mmap_offset(obj); 2312 if (ret == 0) 2313 *offset = drm_vma_node_offset_addr(&obj->base.vma_node); 2314 2315 i915_gem_object_put(obj); 2316 return ret; 2317 } 2318 2319 /** 2320 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 2321 * @dev: DRM device 2322 * @data: GTT mapping ioctl data 2323 * @file: GEM object info 2324 * 2325 * Simply returns the fake offset to userspace so it can mmap it. 2326 * The mmap call will end up in drm_gem_mmap(), which will set things 2327 * up so we can get faults in the handler above. 2328 * 2329 * The fault handler will take care of binding the object into the GTT 2330 * (since it may have been evicted to make room for something), allocating 2331 * a fence register, and mapping the appropriate aperture address into 2332 * userspace. 2333 */ 2334 int 2335 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 2336 struct drm_file *file) 2337 { 2338 struct drm_i915_gem_mmap_gtt *args = data; 2339 2340 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 2341 } 2342 2343 /* Immediately discard the backing storage */ 2344 static void 2345 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 2346 { 2347 i915_gem_object_free_mmap_offset(obj); 2348 2349 if (obj->base.filp == NULL) 2350 return; 2351 2352 /* Our goal here is to return as much of the memory as 2353 * is possible back to the system as we are called from OOM. 2354 * To do this we must instruct the shmfs to drop all of its 2355 * backing pages, *now*. 2356 */ 2357 shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1); 2358 obj->mm.madv = __I915_MADV_PURGED; 2359 obj->mm.pages = ERR_PTR(-EFAULT); 2360 } 2361 2362 /* Try to discard unwanted pages */ 2363 void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj) 2364 { 2365 struct address_space *mapping; 2366 2367 lockdep_assert_held(&obj->mm.lock); 2368 GEM_BUG_ON(i915_gem_object_has_pages(obj)); 2369 2370 switch (obj->mm.madv) { 2371 case I915_MADV_DONTNEED: 2372 i915_gem_object_truncate(obj); 2373 case __I915_MADV_PURGED: 2374 return; 2375 } 2376 2377 if (obj->base.filp == NULL) 2378 return; 2379 2380 mapping = obj->base.filp->f_mapping, 2381 invalidate_mapping_pages(mapping, 0, (loff_t)-1); 2382 } 2383 2384 static void 2385 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj, 2386 struct sg_table *pages) 2387 { 2388 struct sgt_iter sgt_iter; 2389 struct page *page; 2390 2391 __i915_gem_object_release_shmem(obj, pages, true); 2392 2393 i915_gem_gtt_finish_pages(obj, pages); 2394 2395 if (i915_gem_object_needs_bit17_swizzle(obj)) 2396 i915_gem_object_save_bit_17_swizzle(obj, pages); 2397 2398 for_each_sgt_page(page, sgt_iter, pages) { 2399 if (obj->mm.dirty) 2400 set_page_dirty(page); 2401 2402 if (obj->mm.madv == I915_MADV_WILLNEED) 2403 mark_page_accessed(page); 2404 2405 put_page(page); 2406 } 2407 obj->mm.dirty = false; 2408 2409 sg_free_table(pages); 2410 kfree(pages); 2411 } 2412 2413 static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj) 2414 { 2415 struct radix_tree_iter iter; 2416 void __rcu **slot; 2417 2418 rcu_read_lock(); 2419 radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0) 2420 radix_tree_delete(&obj->mm.get_page.radix, iter.index); 2421 rcu_read_unlock(); 2422 } 2423 2424 static struct sg_table * 2425 __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) 2426 { 2427 struct drm_i915_private *i915 = to_i915(obj->base.dev); 2428 struct sg_table *pages; 2429 2430 pages = fetch_and_zero(&obj->mm.pages); 2431 if (!pages) 2432 return NULL; 2433 2434 spin_lock(&i915->mm.obj_lock); 2435 list_del(&obj->mm.link); 2436 spin_unlock(&i915->mm.obj_lock); 2437 2438 if (obj->mm.mapping) { 2439 void *ptr; 2440 2441 ptr = page_mask_bits(obj->mm.mapping); 2442 if (is_vmalloc_addr(ptr)) 2443 vunmap(ptr); 2444 else 2445 kunmap(kmap_to_page(ptr)); 2446 2447 obj->mm.mapping = NULL; 2448 } 2449 2450 __i915_gem_object_reset_page_iter(obj); 2451 obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0; 2452 2453 return pages; 2454 } 2455 2456 void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, 2457 enum i915_mm_subclass subclass) 2458 { 2459 struct sg_table *pages; 2460 2461 if (i915_gem_object_has_pinned_pages(obj)) 2462 return; 2463 2464 GEM_BUG_ON(obj->bind_count); 2465 if (!i915_gem_object_has_pages(obj)) 2466 return; 2467 2468 /* May be called by shrinker from within get_pages() (on another bo) */ 2469 mutex_lock_nested(&obj->mm.lock, subclass); 2470 if (unlikely(atomic_read(&obj->mm.pages_pin_count))) 2471 goto unlock; 2472 2473 /* 2474 * ->put_pages might need to allocate memory for the bit17 swizzle 2475 * array, hence protect them from being reaped by removing them from gtt 2476 * lists early. 2477 */ 2478 pages = __i915_gem_object_unset_pages(obj); 2479 if (!IS_ERR(pages)) 2480 obj->ops->put_pages(obj, pages); 2481 2482 unlock: 2483 mutex_unlock(&obj->mm.lock); 2484 } 2485 2486 static bool i915_sg_trim(struct sg_table *orig_st) 2487 { 2488 struct sg_table new_st; 2489 struct scatterlist *sg, *new_sg; 2490 unsigned int i; 2491 2492 if (orig_st->nents == orig_st->orig_nents) 2493 return false; 2494 2495 if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN)) 2496 return false; 2497 2498 new_sg = new_st.sgl; 2499 for_each_sg(orig_st->sgl, sg, orig_st->nents, i) { 2500 sg_set_page(new_sg, sg_page(sg), sg->length, 0); 2501 sg_dma_address(new_sg) = sg_dma_address(sg); 2502 sg_dma_len(new_sg) = sg_dma_len(sg); 2503 2504 new_sg = sg_next(new_sg); 2505 } 2506 GEM_BUG_ON(new_sg); /* Should walk exactly nents and hit the end */ 2507 2508 sg_free_table(orig_st); 2509 2510 *orig_st = new_st; 2511 return true; 2512 } 2513 2514 static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2515 { 2516 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2517 const unsigned long page_count = obj->base.size / PAGE_SIZE; 2518 unsigned long i; 2519 struct address_space *mapping; 2520 struct sg_table *st; 2521 struct scatterlist *sg; 2522 struct sgt_iter sgt_iter; 2523 struct page *page; 2524 unsigned long last_pfn = 0; /* suppress gcc warning */ 2525 unsigned int max_segment = i915_sg_segment_size(); 2526 unsigned int sg_page_sizes; 2527 gfp_t noreclaim; 2528 int ret; 2529 2530 /* 2531 * Assert that the object is not currently in any GPU domain. As it 2532 * wasn't in the GTT, there shouldn't be any way it could have been in 2533 * a GPU cache 2534 */ 2535 GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS); 2536 GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS); 2537 2538 /* 2539 * If there's no chance of allocating enough pages for the whole 2540 * object, bail early. 2541 */ 2542 if (page_count > totalram_pages) 2543 return -ENOMEM; 2544 2545 st = kmalloc(sizeof(*st), GFP_KERNEL); 2546 if (st == NULL) 2547 return -ENOMEM; 2548 2549 rebuild_st: 2550 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 2551 kfree(st); 2552 return -ENOMEM; 2553 } 2554 2555 /* 2556 * Get the list of pages out of our struct file. They'll be pinned 2557 * at this point until we release them. 2558 * 2559 * Fail silently without starting the shrinker 2560 */ 2561 mapping = obj->base.filp->f_mapping; 2562 noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM); 2563 noreclaim |= __GFP_NORETRY | __GFP_NOWARN; 2564 2565 sg = st->sgl; 2566 st->nents = 0; 2567 sg_page_sizes = 0; 2568 for (i = 0; i < page_count; i++) { 2569 const unsigned int shrink[] = { 2570 I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE, 2571 0, 2572 }, *s = shrink; 2573 gfp_t gfp = noreclaim; 2574 2575 do { 2576 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2577 if (likely(!IS_ERR(page))) 2578 break; 2579 2580 if (!*s) { 2581 ret = PTR_ERR(page); 2582 goto err_sg; 2583 } 2584 2585 i915_gem_shrink(dev_priv, 2 * page_count, NULL, *s++); 2586 cond_resched(); 2587 2588 /* 2589 * We've tried hard to allocate the memory by reaping 2590 * our own buffer, now let the real VM do its job and 2591 * go down in flames if truly OOM. 2592 * 2593 * However, since graphics tend to be disposable, 2594 * defer the oom here by reporting the ENOMEM back 2595 * to userspace. 2596 */ 2597 if (!*s) { 2598 /* reclaim and warn, but no oom */ 2599 gfp = mapping_gfp_mask(mapping); 2600 2601 /* 2602 * Our bo are always dirty and so we require 2603 * kswapd to reclaim our pages (direct reclaim 2604 * does not effectively begin pageout of our 2605 * buffers on its own). However, direct reclaim 2606 * only waits for kswapd when under allocation 2607 * congestion. So as a result __GFP_RECLAIM is 2608 * unreliable and fails to actually reclaim our 2609 * dirty pages -- unless you try over and over 2610 * again with !__GFP_NORETRY. However, we still 2611 * want to fail this allocation rather than 2612 * trigger the out-of-memory killer and for 2613 * this we want __GFP_RETRY_MAYFAIL. 2614 */ 2615 gfp |= __GFP_RETRY_MAYFAIL; 2616 } 2617 } while (1); 2618 2619 if (!i || 2620 sg->length >= max_segment || 2621 page_to_pfn(page) != last_pfn + 1) { 2622 if (i) { 2623 sg_page_sizes |= sg->length; 2624 sg = sg_next(sg); 2625 } 2626 st->nents++; 2627 sg_set_page(sg, page, PAGE_SIZE, 0); 2628 } else { 2629 sg->length += PAGE_SIZE; 2630 } 2631 last_pfn = page_to_pfn(page); 2632 2633 /* Check that the i965g/gm workaround works. */ 2634 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL)); 2635 } 2636 if (sg) { /* loop terminated early; short sg table */ 2637 sg_page_sizes |= sg->length; 2638 sg_mark_end(sg); 2639 } 2640 2641 /* Trim unused sg entries to avoid wasting memory. */ 2642 i915_sg_trim(st); 2643 2644 ret = i915_gem_gtt_prepare_pages(obj, st); 2645 if (ret) { 2646 /* 2647 * DMA remapping failed? One possible cause is that 2648 * it could not reserve enough large entries, asking 2649 * for PAGE_SIZE chunks instead may be helpful. 2650 */ 2651 if (max_segment > PAGE_SIZE) { 2652 for_each_sgt_page(page, sgt_iter, st) 2653 put_page(page); 2654 sg_free_table(st); 2655 2656 max_segment = PAGE_SIZE; 2657 goto rebuild_st; 2658 } else { 2659 dev_warn(&dev_priv->drm.pdev->dev, 2660 "Failed to DMA remap %lu pages\n", 2661 page_count); 2662 goto err_pages; 2663 } 2664 } 2665 2666 if (i915_gem_object_needs_bit17_swizzle(obj)) 2667 i915_gem_object_do_bit_17_swizzle(obj, st); 2668 2669 __i915_gem_object_set_pages(obj, st, sg_page_sizes); 2670 2671 return 0; 2672 2673 err_sg: 2674 sg_mark_end(sg); 2675 err_pages: 2676 for_each_sgt_page(page, sgt_iter, st) 2677 put_page(page); 2678 sg_free_table(st); 2679 kfree(st); 2680 2681 /* 2682 * shmemfs first checks if there is enough memory to allocate the page 2683 * and reports ENOSPC should there be insufficient, along with the usual 2684 * ENOMEM for a genuine allocation failure. 2685 * 2686 * We use ENOSPC in our driver to mean that we have run out of aperture 2687 * space and so want to translate the error from shmemfs back to our 2688 * usual understanding of ENOMEM. 2689 */ 2690 if (ret == -ENOSPC) 2691 ret = -ENOMEM; 2692 2693 return ret; 2694 } 2695 2696 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, 2697 struct sg_table *pages, 2698 unsigned int sg_page_sizes) 2699 { 2700 struct drm_i915_private *i915 = to_i915(obj->base.dev); 2701 unsigned long supported = INTEL_INFO(i915)->page_sizes; 2702 int i; 2703 2704 lockdep_assert_held(&obj->mm.lock); 2705 2706 obj->mm.get_page.sg_pos = pages->sgl; 2707 obj->mm.get_page.sg_idx = 0; 2708 2709 obj->mm.pages = pages; 2710 2711 if (i915_gem_object_is_tiled(obj) && 2712 i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 2713 GEM_BUG_ON(obj->mm.quirked); 2714 __i915_gem_object_pin_pages(obj); 2715 obj->mm.quirked = true; 2716 } 2717 2718 GEM_BUG_ON(!sg_page_sizes); 2719 obj->mm.page_sizes.phys = sg_page_sizes; 2720 2721 /* 2722 * Calculate the supported page-sizes which fit into the given 2723 * sg_page_sizes. This will give us the page-sizes which we may be able 2724 * to use opportunistically when later inserting into the GTT. For 2725 * example if phys=2G, then in theory we should be able to use 1G, 2M, 2726 * 64K or 4K pages, although in practice this will depend on a number of 2727 * other factors. 2728 */ 2729 obj->mm.page_sizes.sg = 0; 2730 for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { 2731 if (obj->mm.page_sizes.phys & ~0u << i) 2732 obj->mm.page_sizes.sg |= BIT(i); 2733 } 2734 GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg)); 2735 2736 spin_lock(&i915->mm.obj_lock); 2737 list_add(&obj->mm.link, &i915->mm.unbound_list); 2738 spin_unlock(&i915->mm.obj_lock); 2739 } 2740 2741 static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2742 { 2743 int err; 2744 2745 if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) { 2746 DRM_DEBUG("Attempting to obtain a purgeable object\n"); 2747 return -EFAULT; 2748 } 2749 2750 err = obj->ops->get_pages(obj); 2751 GEM_BUG_ON(!err && !i915_gem_object_has_pages(obj)); 2752 2753 return err; 2754 } 2755 2756 /* Ensure that the associated pages are gathered from the backing storage 2757 * and pinned into our object. i915_gem_object_pin_pages() may be called 2758 * multiple times before they are released by a single call to 2759 * i915_gem_object_unpin_pages() - once the pages are no longer referenced 2760 * either as a result of memory pressure (reaping pages under the shrinker) 2761 * or as the object is itself released. 2762 */ 2763 int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2764 { 2765 int err; 2766 2767 err = mutex_lock_interruptible(&obj->mm.lock); 2768 if (err) 2769 return err; 2770 2771 if (unlikely(!i915_gem_object_has_pages(obj))) { 2772 GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); 2773 2774 err = ____i915_gem_object_get_pages(obj); 2775 if (err) 2776 goto unlock; 2777 2778 smp_mb__before_atomic(); 2779 } 2780 atomic_inc(&obj->mm.pages_pin_count); 2781 2782 unlock: 2783 mutex_unlock(&obj->mm.lock); 2784 return err; 2785 } 2786 2787 /* The 'mapping' part of i915_gem_object_pin_map() below */ 2788 static void *i915_gem_object_map(const struct drm_i915_gem_object *obj, 2789 enum i915_map_type type) 2790 { 2791 unsigned long n_pages = obj->base.size >> PAGE_SHIFT; 2792 struct sg_table *sgt = obj->mm.pages; 2793 struct sgt_iter sgt_iter; 2794 struct page *page; 2795 struct page *stack_pages[32]; 2796 struct page **pages = stack_pages; 2797 unsigned long i = 0; 2798 pgprot_t pgprot; 2799 void *addr; 2800 2801 /* A single page can always be kmapped */ 2802 if (n_pages == 1 && type == I915_MAP_WB) 2803 return kmap(sg_page(sgt->sgl)); 2804 2805 if (n_pages > ARRAY_SIZE(stack_pages)) { 2806 /* Too big for stack -- allocate temporary array instead */ 2807 pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL); 2808 if (!pages) 2809 return NULL; 2810 } 2811 2812 for_each_sgt_page(page, sgt_iter, sgt) 2813 pages[i++] = page; 2814 2815 /* Check that we have the expected number of pages */ 2816 GEM_BUG_ON(i != n_pages); 2817 2818 switch (type) { 2819 default: 2820 MISSING_CASE(type); 2821 /* fallthrough to use PAGE_KERNEL anyway */ 2822 case I915_MAP_WB: 2823 pgprot = PAGE_KERNEL; 2824 break; 2825 case I915_MAP_WC: 2826 pgprot = pgprot_writecombine(PAGE_KERNEL_IO); 2827 break; 2828 } 2829 addr = vmap(pages, n_pages, 0, pgprot); 2830 2831 if (pages != stack_pages) 2832 kvfree(pages); 2833 2834 return addr; 2835 } 2836 2837 /* get, pin, and map the pages of the object into kernel space */ 2838 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, 2839 enum i915_map_type type) 2840 { 2841 enum i915_map_type has_type; 2842 bool pinned; 2843 void *ptr; 2844 int ret; 2845 2846 if (unlikely(!i915_gem_object_has_struct_page(obj))) 2847 return ERR_PTR(-ENXIO); 2848 2849 ret = mutex_lock_interruptible(&obj->mm.lock); 2850 if (ret) 2851 return ERR_PTR(ret); 2852 2853 pinned = !(type & I915_MAP_OVERRIDE); 2854 type &= ~I915_MAP_OVERRIDE; 2855 2856 if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) { 2857 if (unlikely(!i915_gem_object_has_pages(obj))) { 2858 GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); 2859 2860 ret = ____i915_gem_object_get_pages(obj); 2861 if (ret) 2862 goto err_unlock; 2863 2864 smp_mb__before_atomic(); 2865 } 2866 atomic_inc(&obj->mm.pages_pin_count); 2867 pinned = false; 2868 } 2869 GEM_BUG_ON(!i915_gem_object_has_pages(obj)); 2870 2871 ptr = page_unpack_bits(obj->mm.mapping, &has_type); 2872 if (ptr && has_type != type) { 2873 if (pinned) { 2874 ret = -EBUSY; 2875 goto err_unpin; 2876 } 2877 2878 if (is_vmalloc_addr(ptr)) 2879 vunmap(ptr); 2880 else 2881 kunmap(kmap_to_page(ptr)); 2882 2883 ptr = obj->mm.mapping = NULL; 2884 } 2885 2886 if (!ptr) { 2887 ptr = i915_gem_object_map(obj, type); 2888 if (!ptr) { 2889 ret = -ENOMEM; 2890 goto err_unpin; 2891 } 2892 2893 obj->mm.mapping = page_pack_bits(ptr, type); 2894 } 2895 2896 out_unlock: 2897 mutex_unlock(&obj->mm.lock); 2898 return ptr; 2899 2900 err_unpin: 2901 atomic_dec(&obj->mm.pages_pin_count); 2902 err_unlock: 2903 ptr = ERR_PTR(ret); 2904 goto out_unlock; 2905 } 2906 2907 static int 2908 i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj, 2909 const struct drm_i915_gem_pwrite *arg) 2910 { 2911 struct address_space *mapping = obj->base.filp->f_mapping; 2912 char __user *user_data = u64_to_user_ptr(arg->data_ptr); 2913 u64 remain, offset; 2914 unsigned int pg; 2915 2916 /* Before we instantiate/pin the backing store for our use, we 2917 * can prepopulate the shmemfs filp efficiently using a write into 2918 * the pagecache. We avoid the penalty of instantiating all the 2919 * pages, important if the user is just writing to a few and never 2920 * uses the object on the GPU, and using a direct write into shmemfs 2921 * allows it to avoid the cost of retrieving a page (either swapin 2922 * or clearing-before-use) before it is overwritten. 2923 */ 2924 if (i915_gem_object_has_pages(obj)) 2925 return -ENODEV; 2926 2927 if (obj->mm.madv != I915_MADV_WILLNEED) 2928 return -EFAULT; 2929 2930 /* Before the pages are instantiated the object is treated as being 2931 * in the CPU domain. The pages will be clflushed as required before 2932 * use, and we can freely write into the pages directly. If userspace 2933 * races pwrite with any other operation; corruption will ensue - 2934 * that is userspace's prerogative! 2935 */ 2936 2937 remain = arg->size; 2938 offset = arg->offset; 2939 pg = offset_in_page(offset); 2940 2941 do { 2942 unsigned int len, unwritten; 2943 struct page *page; 2944 void *data, *vaddr; 2945 int err; 2946 2947 len = PAGE_SIZE - pg; 2948 if (len > remain) 2949 len = remain; 2950 2951 err = pagecache_write_begin(obj->base.filp, mapping, 2952 offset, len, 0, 2953 &page, &data); 2954 if (err < 0) 2955 return err; 2956 2957 vaddr = kmap(page); 2958 unwritten = copy_from_user(vaddr + pg, user_data, len); 2959 kunmap(page); 2960 2961 err = pagecache_write_end(obj->base.filp, mapping, 2962 offset, len, len - unwritten, 2963 page, data); 2964 if (err < 0) 2965 return err; 2966 2967 if (unwritten) 2968 return -EFAULT; 2969 2970 remain -= len; 2971 user_data += len; 2972 offset += len; 2973 pg = 0; 2974 } while (remain); 2975 2976 return 0; 2977 } 2978 2979 static void i915_gem_client_mark_guilty(struct drm_i915_file_private *file_priv, 2980 const struct i915_gem_context *ctx) 2981 { 2982 unsigned int score; 2983 unsigned long prev_hang; 2984 2985 if (i915_gem_context_is_banned(ctx)) 2986 score = I915_CLIENT_SCORE_CONTEXT_BAN; 2987 else 2988 score = 0; 2989 2990 prev_hang = xchg(&file_priv->hang_timestamp, jiffies); 2991 if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES)) 2992 score += I915_CLIENT_SCORE_HANG_FAST; 2993 2994 if (score) { 2995 atomic_add(score, &file_priv->ban_score); 2996 2997 DRM_DEBUG_DRIVER("client %s: gained %u ban score, now %u\n", 2998 ctx->name, score, 2999 atomic_read(&file_priv->ban_score)); 3000 } 3001 } 3002 3003 static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx) 3004 { 3005 unsigned int score; 3006 bool banned, bannable; 3007 3008 atomic_inc(&ctx->guilty_count); 3009 3010 bannable = i915_gem_context_is_bannable(ctx); 3011 score = atomic_add_return(CONTEXT_SCORE_GUILTY, &ctx->ban_score); 3012 banned = score >= CONTEXT_SCORE_BAN_THRESHOLD; 3013 3014 /* Cool contexts don't accumulate client ban score */ 3015 if (!bannable) 3016 return; 3017 3018 if (banned) { 3019 DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, banned\n", 3020 ctx->name, atomic_read(&ctx->guilty_count), 3021 score); 3022 i915_gem_context_set_banned(ctx); 3023 } 3024 3025 if (!IS_ERR_OR_NULL(ctx->file_priv)) 3026 i915_gem_client_mark_guilty(ctx->file_priv, ctx); 3027 } 3028 3029 static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx) 3030 { 3031 atomic_inc(&ctx->active_count); 3032 } 3033 3034 struct i915_request * 3035 i915_gem_find_active_request(struct intel_engine_cs *engine) 3036 { 3037 struct i915_request *request, *active = NULL; 3038 unsigned long flags; 3039 3040 /* 3041 * We are called by the error capture, reset and to dump engine 3042 * state at random points in time. In particular, note that neither is 3043 * crucially ordered with an interrupt. After a hang, the GPU is dead 3044 * and we assume that no more writes can happen (we waited long enough 3045 * for all writes that were in transaction to be flushed) - adding an 3046 * extra delay for a recent interrupt is pointless. Hence, we do 3047 * not need an engine->irq_seqno_barrier() before the seqno reads. 3048 * At all other times, we must assume the GPU is still running, but 3049 * we only care about the snapshot of this moment. 3050 */ 3051 spin_lock_irqsave(&engine->timeline.lock, flags); 3052 list_for_each_entry(request, &engine->timeline.requests, link) { 3053 if (__i915_request_completed(request, request->global_seqno)) 3054 continue; 3055 3056 active = request; 3057 break; 3058 } 3059 spin_unlock_irqrestore(&engine->timeline.lock, flags); 3060 3061 return active; 3062 } 3063 3064 /* 3065 * Ensure irq handler finishes, and not run again. 3066 * Also return the active request so that we only search for it once. 3067 */ 3068 struct i915_request * 3069 i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) 3070 { 3071 struct i915_request *request; 3072 3073 /* 3074 * During the reset sequence, we must prevent the engine from 3075 * entering RC6. As the context state is undefined until we restart 3076 * the engine, if it does enter RC6 during the reset, the state 3077 * written to the powercontext is undefined and so we may lose 3078 * GPU state upon resume, i.e. fail to restart after a reset. 3079 */ 3080 intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL); 3081 3082 request = engine->reset.prepare(engine); 3083 if (request && request->fence.error == -EIO) 3084 request = ERR_PTR(-EIO); /* Previous reset failed! */ 3085 3086 return request; 3087 } 3088 3089 int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) 3090 { 3091 struct intel_engine_cs *engine; 3092 struct i915_request *request; 3093 enum intel_engine_id id; 3094 int err = 0; 3095 3096 for_each_engine(engine, dev_priv, id) { 3097 request = i915_gem_reset_prepare_engine(engine); 3098 if (IS_ERR(request)) { 3099 err = PTR_ERR(request); 3100 continue; 3101 } 3102 3103 engine->hangcheck.active_request = request; 3104 } 3105 3106 i915_gem_revoke_fences(dev_priv); 3107 intel_uc_sanitize(dev_priv); 3108 3109 return err; 3110 } 3111 3112 static void engine_skip_context(struct i915_request *request) 3113 { 3114 struct intel_engine_cs *engine = request->engine; 3115 struct i915_gem_context *hung_ctx = request->gem_context; 3116 struct i915_timeline *timeline = request->timeline; 3117 unsigned long flags; 3118 3119 GEM_BUG_ON(timeline == &engine->timeline); 3120 3121 spin_lock_irqsave(&engine->timeline.lock, flags); 3122 spin_lock(&timeline->lock); 3123 3124 list_for_each_entry_continue(request, &engine->timeline.requests, link) 3125 if (request->gem_context == hung_ctx) 3126 i915_request_skip(request, -EIO); 3127 3128 list_for_each_entry(request, &timeline->requests, link) 3129 i915_request_skip(request, -EIO); 3130 3131 spin_unlock(&timeline->lock); 3132 spin_unlock_irqrestore(&engine->timeline.lock, flags); 3133 } 3134 3135 /* Returns the request if it was guilty of the hang */ 3136 static struct i915_request * 3137 i915_gem_reset_request(struct intel_engine_cs *engine, 3138 struct i915_request *request, 3139 bool stalled) 3140 { 3141 /* The guilty request will get skipped on a hung engine. 3142 * 3143 * Users of client default contexts do not rely on logical 3144 * state preserved between batches so it is safe to execute 3145 * queued requests following the hang. Non default contexts 3146 * rely on preserved state, so skipping a batch loses the 3147 * evolution of the state and it needs to be considered corrupted. 3148 * Executing more queued batches on top of corrupted state is 3149 * risky. But we take the risk by trying to advance through 3150 * the queued requests in order to make the client behaviour 3151 * more predictable around resets, by not throwing away random 3152 * amount of batches it has prepared for execution. Sophisticated 3153 * clients can use gem_reset_stats_ioctl and dma fence status 3154 * (exported via sync_file info ioctl on explicit fences) to observe 3155 * when it loses the context state and should rebuild accordingly. 3156 * 3157 * The context ban, and ultimately the client ban, mechanism are safety 3158 * valves if client submission ends up resulting in nothing more than 3159 * subsequent hangs. 3160 */ 3161 3162 if (i915_request_completed(request)) { 3163 GEM_TRACE("%s pardoned global=%d (fence %llx:%d), current %d\n", 3164 engine->name, request->global_seqno, 3165 request->fence.context, request->fence.seqno, 3166 intel_engine_get_seqno(engine)); 3167 stalled = false; 3168 } 3169 3170 if (stalled) { 3171 i915_gem_context_mark_guilty(request->gem_context); 3172 i915_request_skip(request, -EIO); 3173 3174 /* If this context is now banned, skip all pending requests. */ 3175 if (i915_gem_context_is_banned(request->gem_context)) 3176 engine_skip_context(request); 3177 } else { 3178 /* 3179 * Since this is not the hung engine, it may have advanced 3180 * since the hang declaration. Double check by refinding 3181 * the active request at the time of the reset. 3182 */ 3183 request = i915_gem_find_active_request(engine); 3184 if (request) { 3185 unsigned long flags; 3186 3187 i915_gem_context_mark_innocent(request->gem_context); 3188 dma_fence_set_error(&request->fence, -EAGAIN); 3189 3190 /* Rewind the engine to replay the incomplete rq */ 3191 spin_lock_irqsave(&engine->timeline.lock, flags); 3192 request = list_prev_entry(request, link); 3193 if (&request->link == &engine->timeline.requests) 3194 request = NULL; 3195 spin_unlock_irqrestore(&engine->timeline.lock, flags); 3196 } 3197 } 3198 3199 return request; 3200 } 3201 3202 void i915_gem_reset_engine(struct intel_engine_cs *engine, 3203 struct i915_request *request, 3204 bool stalled) 3205 { 3206 /* 3207 * Make sure this write is visible before we re-enable the interrupt 3208 * handlers on another CPU, as tasklet_enable() resolves to just 3209 * a compiler barrier which is insufficient for our purpose here. 3210 */ 3211 smp_store_mb(engine->irq_posted, 0); 3212 3213 if (request) 3214 request = i915_gem_reset_request(engine, request, stalled); 3215 3216 /* Setup the CS to resume from the breadcrumb of the hung request */ 3217 engine->reset.reset(engine, request); 3218 } 3219 3220 void i915_gem_reset(struct drm_i915_private *dev_priv, 3221 unsigned int stalled_mask) 3222 { 3223 struct intel_engine_cs *engine; 3224 enum intel_engine_id id; 3225 3226 lockdep_assert_held(&dev_priv->drm.struct_mutex); 3227 3228 i915_retire_requests(dev_priv); 3229 3230 for_each_engine(engine, dev_priv, id) { 3231 struct intel_context *ce; 3232 3233 i915_gem_reset_engine(engine, 3234 engine->hangcheck.active_request, 3235 stalled_mask & ENGINE_MASK(id)); 3236 ce = fetch_and_zero(&engine->last_retired_context); 3237 if (ce) 3238 intel_context_unpin(ce); 3239 3240 /* 3241 * Ostensibily, we always want a context loaded for powersaving, 3242 * so if the engine is idle after the reset, send a request 3243 * to load our scratch kernel_context. 3244 * 3245 * More mysteriously, if we leave the engine idle after a reset, 3246 * the next userspace batch may hang, with what appears to be 3247 * an incoherent read by the CS (presumably stale TLB). An 3248 * empty request appears sufficient to paper over the glitch. 3249 */ 3250 if (intel_engine_is_idle(engine)) { 3251 struct i915_request *rq; 3252 3253 rq = i915_request_alloc(engine, 3254 dev_priv->kernel_context); 3255 if (!IS_ERR(rq)) 3256 i915_request_add(rq); 3257 } 3258 } 3259 3260 i915_gem_restore_fences(dev_priv); 3261 } 3262 3263 void i915_gem_reset_finish_engine(struct intel_engine_cs *engine) 3264 { 3265 engine->reset.finish(engine); 3266 3267 intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL); 3268 } 3269 3270 void i915_gem_reset_finish(struct drm_i915_private *dev_priv) 3271 { 3272 struct intel_engine_cs *engine; 3273 enum intel_engine_id id; 3274 3275 lockdep_assert_held(&dev_priv->drm.struct_mutex); 3276 3277 for_each_engine(engine, dev_priv, id) { 3278 engine->hangcheck.active_request = NULL; 3279 i915_gem_reset_finish_engine(engine); 3280 } 3281 } 3282 3283 static void nop_submit_request(struct i915_request *request) 3284 { 3285 GEM_TRACE("%s fence %llx:%d -> -EIO\n", 3286 request->engine->name, 3287 request->fence.context, request->fence.seqno); 3288 dma_fence_set_error(&request->fence, -EIO); 3289 3290 i915_request_submit(request); 3291 } 3292 3293 static void nop_complete_submit_request(struct i915_request *request) 3294 { 3295 unsigned long flags; 3296 3297 GEM_TRACE("%s fence %llx:%d -> -EIO\n", 3298 request->engine->name, 3299 request->fence.context, request->fence.seqno); 3300 dma_fence_set_error(&request->fence, -EIO); 3301 3302 spin_lock_irqsave(&request->engine->timeline.lock, flags); 3303 __i915_request_submit(request); 3304 intel_engine_init_global_seqno(request->engine, request->global_seqno); 3305 spin_unlock_irqrestore(&request->engine->timeline.lock, flags); 3306 } 3307 3308 void i915_gem_set_wedged(struct drm_i915_private *i915) 3309 { 3310 struct intel_engine_cs *engine; 3311 enum intel_engine_id id; 3312 3313 GEM_TRACE("start\n"); 3314 3315 if (GEM_SHOW_DEBUG()) { 3316 struct drm_printer p = drm_debug_printer(__func__); 3317 3318 for_each_engine(engine, i915, id) 3319 intel_engine_dump(engine, &p, "%s\n", engine->name); 3320 } 3321 3322 if (test_and_set_bit(I915_WEDGED, &i915->gpu_error.flags)) 3323 goto out; 3324 3325 /* 3326 * First, stop submission to hw, but do not yet complete requests by 3327 * rolling the global seqno forward (since this would complete requests 3328 * for which we haven't set the fence error to EIO yet). 3329 */ 3330 for_each_engine(engine, i915, id) { 3331 i915_gem_reset_prepare_engine(engine); 3332 3333 engine->submit_request = nop_submit_request; 3334 engine->schedule = NULL; 3335 } 3336 i915->caps.scheduler = 0; 3337 3338 /* Even if the GPU reset fails, it should still stop the engines */ 3339 if (INTEL_GEN(i915) >= 5) 3340 intel_gpu_reset(i915, ALL_ENGINES); 3341 3342 /* 3343 * Make sure no one is running the old callback before we proceed with 3344 * cancelling requests and resetting the completion tracking. Otherwise 3345 * we might submit a request to the hardware which never completes. 3346 */ 3347 synchronize_rcu(); 3348 3349 for_each_engine(engine, i915, id) { 3350 /* Mark all executing requests as skipped */ 3351 engine->cancel_requests(engine); 3352 3353 /* 3354 * Only once we've force-cancelled all in-flight requests can we 3355 * start to complete all requests. 3356 */ 3357 engine->submit_request = nop_complete_submit_request; 3358 } 3359 3360 /* 3361 * Make sure no request can slip through without getting completed by 3362 * either this call here to intel_engine_init_global_seqno, or the one 3363 * in nop_complete_submit_request. 3364 */ 3365 synchronize_rcu(); 3366 3367 for_each_engine(engine, i915, id) { 3368 unsigned long flags; 3369 3370 /* 3371 * Mark all pending requests as complete so that any concurrent 3372 * (lockless) lookup doesn't try and wait upon the request as we 3373 * reset it. 3374 */ 3375 spin_lock_irqsave(&engine->timeline.lock, flags); 3376 intel_engine_init_global_seqno(engine, 3377 intel_engine_last_submit(engine)); 3378 spin_unlock_irqrestore(&engine->timeline.lock, flags); 3379 3380 i915_gem_reset_finish_engine(engine); 3381 } 3382 3383 out: 3384 GEM_TRACE("end\n"); 3385 3386 wake_up_all(&i915->gpu_error.reset_queue); 3387 } 3388 3389 bool i915_gem_unset_wedged(struct drm_i915_private *i915) 3390 { 3391 struct i915_timeline *tl; 3392 3393 lockdep_assert_held(&i915->drm.struct_mutex); 3394 if (!test_bit(I915_WEDGED, &i915->gpu_error.flags)) 3395 return true; 3396 3397 GEM_TRACE("start\n"); 3398 3399 /* 3400 * Before unwedging, make sure that all pending operations 3401 * are flushed and errored out - we may have requests waiting upon 3402 * third party fences. We marked all inflight requests as EIO, and 3403 * every execbuf since returned EIO, for consistency we want all 3404 * the currently pending requests to also be marked as EIO, which 3405 * is done inside our nop_submit_request - and so we must wait. 3406 * 3407 * No more can be submitted until we reset the wedged bit. 3408 */ 3409 list_for_each_entry(tl, &i915->gt.timelines, link) { 3410 struct i915_request *rq; 3411 3412 rq = i915_gem_active_peek(&tl->last_request, 3413 &i915->drm.struct_mutex); 3414 if (!rq) 3415 continue; 3416 3417 /* 3418 * We can't use our normal waiter as we want to 3419 * avoid recursively trying to handle the current 3420 * reset. The basic dma_fence_default_wait() installs 3421 * a callback for dma_fence_signal(), which is 3422 * triggered by our nop handler (indirectly, the 3423 * callback enables the signaler thread which is 3424 * woken by the nop_submit_request() advancing the seqno 3425 * and when the seqno passes the fence, the signaler 3426 * then signals the fence waking us up). 3427 */ 3428 if (dma_fence_default_wait(&rq->fence, true, 3429 MAX_SCHEDULE_TIMEOUT) < 0) 3430 return false; 3431 } 3432 i915_retire_requests(i915); 3433 GEM_BUG_ON(i915->gt.active_requests); 3434 3435 if (!intel_gpu_reset(i915, ALL_ENGINES)) 3436 intel_engines_sanitize(i915); 3437 3438 /* 3439 * Undo nop_submit_request. We prevent all new i915 requests from 3440 * being queued (by disallowing execbuf whilst wedged) so having 3441 * waited for all active requests above, we know the system is idle 3442 * and do not have to worry about a thread being inside 3443 * engine->submit_request() as we swap over. So unlike installing 3444 * the nop_submit_request on reset, we can do this from normal 3445 * context and do not require stop_machine(). 3446 */ 3447 intel_engines_reset_default_submission(i915); 3448 i915_gem_contexts_lost(i915); 3449 3450 GEM_TRACE("end\n"); 3451 3452 smp_mb__before_atomic(); /* complete takeover before enabling execbuf */ 3453 clear_bit(I915_WEDGED, &i915->gpu_error.flags); 3454 3455 return true; 3456 } 3457 3458 static void 3459 i915_gem_retire_work_handler(struct work_struct *work) 3460 { 3461 struct drm_i915_private *dev_priv = 3462 container_of(work, typeof(*dev_priv), gt.retire_work.work); 3463 struct drm_device *dev = &dev_priv->drm; 3464 3465 /* Come back later if the device is busy... */ 3466 if (mutex_trylock(&dev->struct_mutex)) { 3467 i915_retire_requests(dev_priv); 3468 mutex_unlock(&dev->struct_mutex); 3469 } 3470 3471 /* 3472 * Keep the retire handler running until we are finally idle. 3473 * We do not need to do this test under locking as in the worst-case 3474 * we queue the retire worker once too often. 3475 */ 3476 if (READ_ONCE(dev_priv->gt.awake)) 3477 queue_delayed_work(dev_priv->wq, 3478 &dev_priv->gt.retire_work, 3479 round_jiffies_up_relative(HZ)); 3480 } 3481 3482 static void shrink_caches(struct drm_i915_private *i915) 3483 { 3484 /* 3485 * kmem_cache_shrink() discards empty slabs and reorders partially 3486 * filled slabs to prioritise allocating from the mostly full slabs, 3487 * with the aim of reducing fragmentation. 3488 */ 3489 kmem_cache_shrink(i915->priorities); 3490 kmem_cache_shrink(i915->dependencies); 3491 kmem_cache_shrink(i915->requests); 3492 kmem_cache_shrink(i915->luts); 3493 kmem_cache_shrink(i915->vmas); 3494 kmem_cache_shrink(i915->objects); 3495 } 3496 3497 struct sleep_rcu_work { 3498 union { 3499 struct rcu_head rcu; 3500 struct work_struct work; 3501 }; 3502 struct drm_i915_private *i915; 3503 unsigned int epoch; 3504 }; 3505 3506 static inline bool 3507 same_epoch(struct drm_i915_private *i915, unsigned int epoch) 3508 { 3509 /* 3510 * There is a small chance that the epoch wrapped since we started 3511 * sleeping. If we assume that epoch is at least a u32, then it will 3512 * take at least 2^32 * 100ms for it to wrap, or about 326 years. 3513 */ 3514 return epoch == READ_ONCE(i915->gt.epoch); 3515 } 3516 3517 static void __sleep_work(struct work_struct *work) 3518 { 3519 struct sleep_rcu_work *s = container_of(work, typeof(*s), work); 3520 struct drm_i915_private *i915 = s->i915; 3521 unsigned int epoch = s->epoch; 3522 3523 kfree(s); 3524 if (same_epoch(i915, epoch)) 3525 shrink_caches(i915); 3526 } 3527 3528 static void __sleep_rcu(struct rcu_head *rcu) 3529 { 3530 struct sleep_rcu_work *s = container_of(rcu, typeof(*s), rcu); 3531 struct drm_i915_private *i915 = s->i915; 3532 3533 if (same_epoch(i915, s->epoch)) { 3534 INIT_WORK(&s->work, __sleep_work); 3535 queue_work(i915->wq, &s->work); 3536 } else { 3537 kfree(s); 3538 } 3539 } 3540 3541 static inline bool 3542 new_requests_since_last_retire(const struct drm_i915_private *i915) 3543 { 3544 return (READ_ONCE(i915->gt.active_requests) || 3545 work_pending(&i915->gt.idle_work.work)); 3546 } 3547 3548 static void assert_kernel_context_is_current(struct drm_i915_private *i915) 3549 { 3550 struct intel_engine_cs *engine; 3551 enum intel_engine_id id; 3552 3553 if (i915_terminally_wedged(&i915->gpu_error)) 3554 return; 3555 3556 GEM_BUG_ON(i915->gt.active_requests); 3557 for_each_engine(engine, i915, id) { 3558 GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline.last_request)); 3559 GEM_BUG_ON(engine->last_retired_context != 3560 to_intel_context(i915->kernel_context, engine)); 3561 } 3562 } 3563 3564 static void 3565 i915_gem_idle_work_handler(struct work_struct *work) 3566 { 3567 struct drm_i915_private *dev_priv = 3568 container_of(work, typeof(*dev_priv), gt.idle_work.work); 3569 unsigned int epoch = I915_EPOCH_INVALID; 3570 bool rearm_hangcheck; 3571 3572 if (!READ_ONCE(dev_priv->gt.awake)) 3573 return; 3574 3575 if (READ_ONCE(dev_priv->gt.active_requests)) 3576 return; 3577 3578 /* 3579 * Flush out the last user context, leaving only the pinned 3580 * kernel context resident. When we are idling on the kernel_context, 3581 * no more new requests (with a context switch) are emitted and we 3582 * can finally rest. A consequence is that the idle work handler is 3583 * always called at least twice before idling (and if the system is 3584 * idle that implies a round trip through the retire worker). 3585 */ 3586 mutex_lock(&dev_priv->drm.struct_mutex); 3587 i915_gem_switch_to_kernel_context(dev_priv); 3588 mutex_unlock(&dev_priv->drm.struct_mutex); 3589 3590 GEM_TRACE("active_requests=%d (after switch-to-kernel-context)\n", 3591 READ_ONCE(dev_priv->gt.active_requests)); 3592 3593 /* 3594 * Wait for last execlists context complete, but bail out in case a 3595 * new request is submitted. As we don't trust the hardware, we 3596 * continue on if the wait times out. This is necessary to allow 3597 * the machine to suspend even if the hardware dies, and we will 3598 * try to recover in resume (after depriving the hardware of power, 3599 * it may be in a better mmod). 3600 */ 3601 __wait_for(if (new_requests_since_last_retire(dev_priv)) return, 3602 intel_engines_are_idle(dev_priv), 3603 I915_IDLE_ENGINES_TIMEOUT * 1000, 3604 10, 500); 3605 3606 rearm_hangcheck = 3607 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 3608 3609 if (!mutex_trylock(&dev_priv->drm.struct_mutex)) { 3610 /* Currently busy, come back later */ 3611 mod_delayed_work(dev_priv->wq, 3612 &dev_priv->gt.idle_work, 3613 msecs_to_jiffies(50)); 3614 goto out_rearm; 3615 } 3616 3617 /* 3618 * New request retired after this work handler started, extend active 3619 * period until next instance of the work. 3620 */ 3621 if (new_requests_since_last_retire(dev_priv)) 3622 goto out_unlock; 3623 3624 epoch = __i915_gem_park(dev_priv); 3625 3626 assert_kernel_context_is_current(dev_priv); 3627 3628 rearm_hangcheck = false; 3629 out_unlock: 3630 mutex_unlock(&dev_priv->drm.struct_mutex); 3631 3632 out_rearm: 3633 if (rearm_hangcheck) { 3634 GEM_BUG_ON(!dev_priv->gt.awake); 3635 i915_queue_hangcheck(dev_priv); 3636 } 3637 3638 /* 3639 * When we are idle, it is an opportune time to reap our caches. 3640 * However, we have many objects that utilise RCU and the ordered 3641 * i915->wq that this work is executing on. To try and flush any 3642 * pending frees now we are idle, we first wait for an RCU grace 3643 * period, and then queue a task (that will run last on the wq) to 3644 * shrink and re-optimize the caches. 3645 */ 3646 if (same_epoch(dev_priv, epoch)) { 3647 struct sleep_rcu_work *s = kmalloc(sizeof(*s), GFP_KERNEL); 3648 if (s) { 3649 s->i915 = dev_priv; 3650 s->epoch = epoch; 3651 call_rcu(&s->rcu, __sleep_rcu); 3652 } 3653 } 3654 } 3655 3656 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) 3657 { 3658 struct drm_i915_private *i915 = to_i915(gem->dev); 3659 struct drm_i915_gem_object *obj = to_intel_bo(gem); 3660 struct drm_i915_file_private *fpriv = file->driver_priv; 3661 struct i915_lut_handle *lut, *ln; 3662 3663 mutex_lock(&i915->drm.struct_mutex); 3664 3665 list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) { 3666 struct i915_gem_context *ctx = lut->ctx; 3667 struct i915_vma *vma; 3668 3669 GEM_BUG_ON(ctx->file_priv == ERR_PTR(-EBADF)); 3670 if (ctx->file_priv != fpriv) 3671 continue; 3672 3673 vma = radix_tree_delete(&ctx->handles_vma, lut->handle); 3674 GEM_BUG_ON(vma->obj != obj); 3675 3676 /* We allow the process to have multiple handles to the same 3677 * vma, in the same fd namespace, by virtue of flink/open. 3678 */ 3679 GEM_BUG_ON(!vma->open_count); 3680 if (!--vma->open_count && !i915_vma_is_ggtt(vma)) 3681 i915_vma_close(vma); 3682 3683 list_del(&lut->obj_link); 3684 list_del(&lut->ctx_link); 3685 3686 kmem_cache_free(i915->luts, lut); 3687 __i915_gem_object_release_unless_active(obj); 3688 } 3689 3690 mutex_unlock(&i915->drm.struct_mutex); 3691 } 3692 3693 static unsigned long to_wait_timeout(s64 timeout_ns) 3694 { 3695 if (timeout_ns < 0) 3696 return MAX_SCHEDULE_TIMEOUT; 3697 3698 if (timeout_ns == 0) 3699 return 0; 3700 3701 return nsecs_to_jiffies_timeout(timeout_ns); 3702 } 3703 3704 /** 3705 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 3706 * @dev: drm device pointer 3707 * @data: ioctl data blob 3708 * @file: drm file pointer 3709 * 3710 * Returns 0 if successful, else an error is returned with the remaining time in 3711 * the timeout parameter. 3712 * -ETIME: object is still busy after timeout 3713 * -ERESTARTSYS: signal interrupted the wait 3714 * -ENONENT: object doesn't exist 3715 * Also possible, but rare: 3716 * -EAGAIN: incomplete, restart syscall 3717 * -ENOMEM: damn 3718 * -ENODEV: Internal IRQ fail 3719 * -E?: The add request failed 3720 * 3721 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 3722 * non-zero timeout parameter the wait ioctl will wait for the given number of 3723 * nanoseconds on an object becoming unbusy. Since the wait itself does so 3724 * without holding struct_mutex the object may become re-busied before this 3725 * function completes. A similar but shorter * race condition exists in the busy 3726 * ioctl 3727 */ 3728 int 3729 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3730 { 3731 struct drm_i915_gem_wait *args = data; 3732 struct drm_i915_gem_object *obj; 3733 ktime_t start; 3734 long ret; 3735 3736 if (args->flags != 0) 3737 return -EINVAL; 3738 3739 obj = i915_gem_object_lookup(file, args->bo_handle); 3740 if (!obj) 3741 return -ENOENT; 3742 3743 start = ktime_get(); 3744 3745 ret = i915_gem_object_wait(obj, 3746 I915_WAIT_INTERRUPTIBLE | I915_WAIT_ALL, 3747 to_wait_timeout(args->timeout_ns), 3748 to_rps_client(file)); 3749 3750 if (args->timeout_ns > 0) { 3751 args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start)); 3752 if (args->timeout_ns < 0) 3753 args->timeout_ns = 0; 3754 3755 /* 3756 * Apparently ktime isn't accurate enough and occasionally has a 3757 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch 3758 * things up to make the test happy. We allow up to 1 jiffy. 3759 * 3760 * This is a regression from the timespec->ktime conversion. 3761 */ 3762 if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns)) 3763 args->timeout_ns = 0; 3764 3765 /* Asked to wait beyond the jiffie/scheduler precision? */ 3766 if (ret == -ETIME && args->timeout_ns) 3767 ret = -EAGAIN; 3768 } 3769 3770 i915_gem_object_put(obj); 3771 return ret; 3772 } 3773 3774 static long wait_for_timeline(struct i915_timeline *tl, 3775 unsigned int flags, long timeout) 3776 { 3777 struct i915_request *rq; 3778 3779 rq = i915_gem_active_get_unlocked(&tl->last_request); 3780 if (!rq) 3781 return timeout; 3782 3783 /* 3784 * "Race-to-idle". 3785 * 3786 * Switching to the kernel context is often used a synchronous 3787 * step prior to idling, e.g. in suspend for flushing all 3788 * current operations to memory before sleeping. These we 3789 * want to complete as quickly as possible to avoid prolonged 3790 * stalls, so allow the gpu to boost to maximum clocks. 3791 */ 3792 if (flags & I915_WAIT_FOR_IDLE_BOOST) 3793 gen6_rps_boost(rq, NULL); 3794 3795 timeout = i915_request_wait(rq, flags, timeout); 3796 i915_request_put(rq); 3797 3798 return timeout; 3799 } 3800 3801 static int wait_for_engines(struct drm_i915_private *i915) 3802 { 3803 if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) { 3804 dev_err(i915->drm.dev, 3805 "Failed to idle engines, declaring wedged!\n"); 3806 GEM_TRACE_DUMP(); 3807 i915_gem_set_wedged(i915); 3808 return -EIO; 3809 } 3810 3811 return 0; 3812 } 3813 3814 int i915_gem_wait_for_idle(struct drm_i915_private *i915, 3815 unsigned int flags, long timeout) 3816 { 3817 GEM_TRACE("flags=%x (%s), timeout=%ld%s\n", 3818 flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked", 3819 timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : ""); 3820 3821 /* If the device is asleep, we have no requests outstanding */ 3822 if (!READ_ONCE(i915->gt.awake)) 3823 return 0; 3824 3825 if (flags & I915_WAIT_LOCKED) { 3826 struct i915_timeline *tl; 3827 int err; 3828 3829 lockdep_assert_held(&i915->drm.struct_mutex); 3830 3831 list_for_each_entry(tl, &i915->gt.timelines, link) { 3832 timeout = wait_for_timeline(tl, flags, timeout); 3833 if (timeout < 0) 3834 return timeout; 3835 } 3836 if (GEM_SHOW_DEBUG() && !timeout) { 3837 /* Presume that timeout was non-zero to begin with! */ 3838 dev_warn(&i915->drm.pdev->dev, 3839 "Missed idle-completion interrupt!\n"); 3840 GEM_TRACE_DUMP(); 3841 } 3842 3843 err = wait_for_engines(i915); 3844 if (err) 3845 return err; 3846 3847 i915_retire_requests(i915); 3848 GEM_BUG_ON(i915->gt.active_requests); 3849 } else { 3850 struct intel_engine_cs *engine; 3851 enum intel_engine_id id; 3852 3853 for_each_engine(engine, i915, id) { 3854 struct i915_timeline *tl = &engine->timeline; 3855 3856 timeout = wait_for_timeline(tl, flags, timeout); 3857 if (timeout < 0) 3858 return timeout; 3859 } 3860 } 3861 3862 return 0; 3863 } 3864 3865 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) 3866 { 3867 /* 3868 * We manually flush the CPU domain so that we can override and 3869 * force the flush for the display, and perform it asyncrhonously. 3870 */ 3871 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 3872 if (obj->cache_dirty) 3873 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); 3874 obj->write_domain = 0; 3875 } 3876 3877 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) 3878 { 3879 if (!READ_ONCE(obj->pin_global)) 3880 return; 3881 3882 mutex_lock(&obj->base.dev->struct_mutex); 3883 __i915_gem_object_flush_for_display(obj); 3884 mutex_unlock(&obj->base.dev->struct_mutex); 3885 } 3886 3887 /** 3888 * Moves a single object to the WC read, and possibly write domain. 3889 * @obj: object to act on 3890 * @write: ask for write access or read only 3891 * 3892 * This function returns when the move is complete, including waiting on 3893 * flushes to occur. 3894 */ 3895 int 3896 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) 3897 { 3898 int ret; 3899 3900 lockdep_assert_held(&obj->base.dev->struct_mutex); 3901 3902 ret = i915_gem_object_wait(obj, 3903 I915_WAIT_INTERRUPTIBLE | 3904 I915_WAIT_LOCKED | 3905 (write ? I915_WAIT_ALL : 0), 3906 MAX_SCHEDULE_TIMEOUT, 3907 NULL); 3908 if (ret) 3909 return ret; 3910 3911 if (obj->write_domain == I915_GEM_DOMAIN_WC) 3912 return 0; 3913 3914 /* Flush and acquire obj->pages so that we are coherent through 3915 * direct access in memory with previous cached writes through 3916 * shmemfs and that our cache domain tracking remains valid. 3917 * For example, if the obj->filp was moved to swap without us 3918 * being notified and releasing the pages, we would mistakenly 3919 * continue to assume that the obj remained out of the CPU cached 3920 * domain. 3921 */ 3922 ret = i915_gem_object_pin_pages(obj); 3923 if (ret) 3924 return ret; 3925 3926 flush_write_domain(obj, ~I915_GEM_DOMAIN_WC); 3927 3928 /* Serialise direct access to this object with the barriers for 3929 * coherent writes from the GPU, by effectively invalidating the 3930 * WC domain upon first access. 3931 */ 3932 if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0) 3933 mb(); 3934 3935 /* It should now be out of any other write domains, and we can update 3936 * the domain values for our changes. 3937 */ 3938 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0); 3939 obj->read_domains |= I915_GEM_DOMAIN_WC; 3940 if (write) { 3941 obj->read_domains = I915_GEM_DOMAIN_WC; 3942 obj->write_domain = I915_GEM_DOMAIN_WC; 3943 obj->mm.dirty = true; 3944 } 3945 3946 i915_gem_object_unpin_pages(obj); 3947 return 0; 3948 } 3949 3950 /** 3951 * Moves a single object to the GTT read, and possibly write domain. 3952 * @obj: object to act on 3953 * @write: ask for write access or read only 3954 * 3955 * This function returns when the move is complete, including waiting on 3956 * flushes to occur. 3957 */ 3958 int 3959 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3960 { 3961 int ret; 3962 3963 lockdep_assert_held(&obj->base.dev->struct_mutex); 3964 3965 ret = i915_gem_object_wait(obj, 3966 I915_WAIT_INTERRUPTIBLE | 3967 I915_WAIT_LOCKED | 3968 (write ? I915_WAIT_ALL : 0), 3969 MAX_SCHEDULE_TIMEOUT, 3970 NULL); 3971 if (ret) 3972 return ret; 3973 3974 if (obj->write_domain == I915_GEM_DOMAIN_GTT) 3975 return 0; 3976 3977 /* Flush and acquire obj->pages so that we are coherent through 3978 * direct access in memory with previous cached writes through 3979 * shmemfs and that our cache domain tracking remains valid. 3980 * For example, if the obj->filp was moved to swap without us 3981 * being notified and releasing the pages, we would mistakenly 3982 * continue to assume that the obj remained out of the CPU cached 3983 * domain. 3984 */ 3985 ret = i915_gem_object_pin_pages(obj); 3986 if (ret) 3987 return ret; 3988 3989 flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT); 3990 3991 /* Serialise direct access to this object with the barriers for 3992 * coherent writes from the GPU, by effectively invalidating the 3993 * GTT domain upon first access. 3994 */ 3995 if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0) 3996 mb(); 3997 3998 /* It should now be out of any other write domains, and we can update 3999 * the domain values for our changes. 4000 */ 4001 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 4002 obj->read_domains |= I915_GEM_DOMAIN_GTT; 4003 if (write) { 4004 obj->read_domains = I915_GEM_DOMAIN_GTT; 4005 obj->write_domain = I915_GEM_DOMAIN_GTT; 4006 obj->mm.dirty = true; 4007 } 4008 4009 i915_gem_object_unpin_pages(obj); 4010 return 0; 4011 } 4012 4013 /** 4014 * Changes the cache-level of an object across all VMA. 4015 * @obj: object to act on 4016 * @cache_level: new cache level to set for the object 4017 * 4018 * After this function returns, the object will be in the new cache-level 4019 * across all GTT and the contents of the backing storage will be coherent, 4020 * with respect to the new cache-level. In order to keep the backing storage 4021 * coherent for all users, we only allow a single cache level to be set 4022 * globally on the object and prevent it from being changed whilst the 4023 * hardware is reading from the object. That is if the object is currently 4024 * on the scanout it will be set to uncached (or equivalent display 4025 * cache coherency) and all non-MOCS GPU access will also be uncached so 4026 * that all direct access to the scanout remains coherent. 4027 */ 4028 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 4029 enum i915_cache_level cache_level) 4030 { 4031 struct i915_vma *vma; 4032 int ret; 4033 4034 lockdep_assert_held(&obj->base.dev->struct_mutex); 4035 4036 if (obj->cache_level == cache_level) 4037 return 0; 4038 4039 /* Inspect the list of currently bound VMA and unbind any that would 4040 * be invalid given the new cache-level. This is principally to 4041 * catch the issue of the CS prefetch crossing page boundaries and 4042 * reading an invalid PTE on older architectures. 4043 */ 4044 restart: 4045 list_for_each_entry(vma, &obj->vma_list, obj_link) { 4046 if (!drm_mm_node_allocated(&vma->node)) 4047 continue; 4048 4049 if (i915_vma_is_pinned(vma)) { 4050 DRM_DEBUG("can not change the cache level of pinned objects\n"); 4051 return -EBUSY; 4052 } 4053 4054 if (!i915_vma_is_closed(vma) && 4055 i915_gem_valid_gtt_space(vma, cache_level)) 4056 continue; 4057 4058 ret = i915_vma_unbind(vma); 4059 if (ret) 4060 return ret; 4061 4062 /* As unbinding may affect other elements in the 4063 * obj->vma_list (due to side-effects from retiring 4064 * an active vma), play safe and restart the iterator. 4065 */ 4066 goto restart; 4067 } 4068 4069 /* We can reuse the existing drm_mm nodes but need to change the 4070 * cache-level on the PTE. We could simply unbind them all and 4071 * rebind with the correct cache-level on next use. However since 4072 * we already have a valid slot, dma mapping, pages etc, we may as 4073 * rewrite the PTE in the belief that doing so tramples upon less 4074 * state and so involves less work. 4075 */ 4076 if (obj->bind_count) { 4077 /* Before we change the PTE, the GPU must not be accessing it. 4078 * If we wait upon the object, we know that all the bound 4079 * VMA are no longer active. 4080 */ 4081 ret = i915_gem_object_wait(obj, 4082 I915_WAIT_INTERRUPTIBLE | 4083 I915_WAIT_LOCKED | 4084 I915_WAIT_ALL, 4085 MAX_SCHEDULE_TIMEOUT, 4086 NULL); 4087 if (ret) 4088 return ret; 4089 4090 if (!HAS_LLC(to_i915(obj->base.dev)) && 4091 cache_level != I915_CACHE_NONE) { 4092 /* Access to snoopable pages through the GTT is 4093 * incoherent and on some machines causes a hard 4094 * lockup. Relinquish the CPU mmaping to force 4095 * userspace to refault in the pages and we can 4096 * then double check if the GTT mapping is still 4097 * valid for that pointer access. 4098 */ 4099 i915_gem_release_mmap(obj); 4100 4101 /* As we no longer need a fence for GTT access, 4102 * we can relinquish it now (and so prevent having 4103 * to steal a fence from someone else on the next 4104 * fence request). Note GPU activity would have 4105 * dropped the fence as all snoopable access is 4106 * supposed to be linear. 4107 */ 4108 for_each_ggtt_vma(vma, obj) { 4109 ret = i915_vma_put_fence(vma); 4110 if (ret) 4111 return ret; 4112 } 4113 } else { 4114 /* We either have incoherent backing store and 4115 * so no GTT access or the architecture is fully 4116 * coherent. In such cases, existing GTT mmaps 4117 * ignore the cache bit in the PTE and we can 4118 * rewrite it without confusing the GPU or having 4119 * to force userspace to fault back in its mmaps. 4120 */ 4121 } 4122 4123 list_for_each_entry(vma, &obj->vma_list, obj_link) { 4124 if (!drm_mm_node_allocated(&vma->node)) 4125 continue; 4126 4127 ret = i915_vma_bind(vma, cache_level, PIN_UPDATE); 4128 if (ret) 4129 return ret; 4130 } 4131 } 4132 4133 list_for_each_entry(vma, &obj->vma_list, obj_link) 4134 vma->node.color = cache_level; 4135 i915_gem_object_set_cache_coherency(obj, cache_level); 4136 obj->cache_dirty = true; /* Always invalidate stale cachelines */ 4137 4138 return 0; 4139 } 4140 4141 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 4142 struct drm_file *file) 4143 { 4144 struct drm_i915_gem_caching *args = data; 4145 struct drm_i915_gem_object *obj; 4146 int err = 0; 4147 4148 rcu_read_lock(); 4149 obj = i915_gem_object_lookup_rcu(file, args->handle); 4150 if (!obj) { 4151 err = -ENOENT; 4152 goto out; 4153 } 4154 4155 switch (obj->cache_level) { 4156 case I915_CACHE_LLC: 4157 case I915_CACHE_L3_LLC: 4158 args->caching = I915_CACHING_CACHED; 4159 break; 4160 4161 case I915_CACHE_WT: 4162 args->caching = I915_CACHING_DISPLAY; 4163 break; 4164 4165 default: 4166 args->caching = I915_CACHING_NONE; 4167 break; 4168 } 4169 out: 4170 rcu_read_unlock(); 4171 return err; 4172 } 4173 4174 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 4175 struct drm_file *file) 4176 { 4177 struct drm_i915_private *i915 = to_i915(dev); 4178 struct drm_i915_gem_caching *args = data; 4179 struct drm_i915_gem_object *obj; 4180 enum i915_cache_level level; 4181 int ret = 0; 4182 4183 switch (args->caching) { 4184 case I915_CACHING_NONE: 4185 level = I915_CACHE_NONE; 4186 break; 4187 case I915_CACHING_CACHED: 4188 /* 4189 * Due to a HW issue on BXT A stepping, GPU stores via a 4190 * snooped mapping may leave stale data in a corresponding CPU 4191 * cacheline, whereas normally such cachelines would get 4192 * invalidated. 4193 */ 4194 if (!HAS_LLC(i915) && !HAS_SNOOP(i915)) 4195 return -ENODEV; 4196 4197 level = I915_CACHE_LLC; 4198 break; 4199 case I915_CACHING_DISPLAY: 4200 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE; 4201 break; 4202 default: 4203 return -EINVAL; 4204 } 4205 4206 obj = i915_gem_object_lookup(file, args->handle); 4207 if (!obj) 4208 return -ENOENT; 4209 4210 /* 4211 * The caching mode of proxy object is handled by its generator, and 4212 * not allowed to be changed by userspace. 4213 */ 4214 if (i915_gem_object_is_proxy(obj)) { 4215 ret = -ENXIO; 4216 goto out; 4217 } 4218 4219 if (obj->cache_level == level) 4220 goto out; 4221 4222 ret = i915_gem_object_wait(obj, 4223 I915_WAIT_INTERRUPTIBLE, 4224 MAX_SCHEDULE_TIMEOUT, 4225 to_rps_client(file)); 4226 if (ret) 4227 goto out; 4228 4229 ret = i915_mutex_lock_interruptible(dev); 4230 if (ret) 4231 goto out; 4232 4233 ret = i915_gem_object_set_cache_level(obj, level); 4234 mutex_unlock(&dev->struct_mutex); 4235 4236 out: 4237 i915_gem_object_put(obj); 4238 return ret; 4239 } 4240 4241 /* 4242 * Prepare buffer for display plane (scanout, cursors, etc). Can be called from 4243 * an uninterruptible phase (modesetting) and allows any flushes to be pipelined 4244 * (for pageflips). We only flush the caches while preparing the buffer for 4245 * display, the callers are responsible for frontbuffer flush. 4246 */ 4247 struct i915_vma * 4248 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 4249 u32 alignment, 4250 const struct i915_ggtt_view *view, 4251 unsigned int flags) 4252 { 4253 struct i915_vma *vma; 4254 int ret; 4255 4256 lockdep_assert_held(&obj->base.dev->struct_mutex); 4257 4258 /* Mark the global pin early so that we account for the 4259 * display coherency whilst setting up the cache domains. 4260 */ 4261 obj->pin_global++; 4262 4263 /* The display engine is not coherent with the LLC cache on gen6. As 4264 * a result, we make sure that the pinning that is about to occur is 4265 * done with uncached PTEs. This is lowest common denominator for all 4266 * chipsets. 4267 * 4268 * However for gen6+, we could do better by using the GFDT bit instead 4269 * of uncaching, which would allow us to flush all the LLC-cached data 4270 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 4271 */ 4272 ret = i915_gem_object_set_cache_level(obj, 4273 HAS_WT(to_i915(obj->base.dev)) ? 4274 I915_CACHE_WT : I915_CACHE_NONE); 4275 if (ret) { 4276 vma = ERR_PTR(ret); 4277 goto err_unpin_global; 4278 } 4279 4280 /* As the user may map the buffer once pinned in the display plane 4281 * (e.g. libkms for the bootup splash), we have to ensure that we 4282 * always use map_and_fenceable for all scanout buffers. However, 4283 * it may simply be too big to fit into mappable, in which case 4284 * put it anyway and hope that userspace can cope (but always first 4285 * try to preserve the existing ABI). 4286 */ 4287 vma = ERR_PTR(-ENOSPC); 4288 if ((flags & PIN_MAPPABLE) == 0 && 4289 (!view || view->type == I915_GGTT_VIEW_NORMAL)) 4290 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 4291 flags | 4292 PIN_MAPPABLE | 4293 PIN_NONBLOCK); 4294 if (IS_ERR(vma)) 4295 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags); 4296 if (IS_ERR(vma)) 4297 goto err_unpin_global; 4298 4299 vma->display_alignment = max_t(u64, vma->display_alignment, alignment); 4300 4301 __i915_gem_object_flush_for_display(obj); 4302 4303 /* It should now be out of any other write domains, and we can update 4304 * the domain values for our changes. 4305 */ 4306 obj->read_domains |= I915_GEM_DOMAIN_GTT; 4307 4308 return vma; 4309 4310 err_unpin_global: 4311 obj->pin_global--; 4312 return vma; 4313 } 4314 4315 void 4316 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) 4317 { 4318 lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); 4319 4320 if (WARN_ON(vma->obj->pin_global == 0)) 4321 return; 4322 4323 if (--vma->obj->pin_global == 0) 4324 vma->display_alignment = I915_GTT_MIN_ALIGNMENT; 4325 4326 /* Bump the LRU to try and avoid premature eviction whilst flipping */ 4327 i915_gem_object_bump_inactive_ggtt(vma->obj); 4328 4329 i915_vma_unpin(vma); 4330 } 4331 4332 /** 4333 * Moves a single object to the CPU read, and possibly write domain. 4334 * @obj: object to act on 4335 * @write: requesting write or read-only access 4336 * 4337 * This function returns when the move is complete, including waiting on 4338 * flushes to occur. 4339 */ 4340 int 4341 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 4342 { 4343 int ret; 4344 4345 lockdep_assert_held(&obj->base.dev->struct_mutex); 4346 4347 ret = i915_gem_object_wait(obj, 4348 I915_WAIT_INTERRUPTIBLE | 4349 I915_WAIT_LOCKED | 4350 (write ? I915_WAIT_ALL : 0), 4351 MAX_SCHEDULE_TIMEOUT, 4352 NULL); 4353 if (ret) 4354 return ret; 4355 4356 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 4357 4358 /* Flush the CPU cache if it's still invalid. */ 4359 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { 4360 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 4361 obj->read_domains |= I915_GEM_DOMAIN_CPU; 4362 } 4363 4364 /* It should now be out of any other write domains, and we can update 4365 * the domain values for our changes. 4366 */ 4367 GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU); 4368 4369 /* If we're writing through the CPU, then the GPU read domains will 4370 * need to be invalidated at next use. 4371 */ 4372 if (write) 4373 __start_cpu_write(obj); 4374 4375 return 0; 4376 } 4377 4378 /* Throttle our rendering by waiting until the ring has completed our requests 4379 * emitted over 20 msec ago. 4380 * 4381 * Note that if we were to use the current jiffies each time around the loop, 4382 * we wouldn't escape the function with any frames outstanding if the time to 4383 * render a frame was over 20ms. 4384 * 4385 * This should get us reasonable parallelism between CPU and GPU but also 4386 * relatively low latency when blocking on a particular request to finish. 4387 */ 4388 static int 4389 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 4390 { 4391 struct drm_i915_private *dev_priv = to_i915(dev); 4392 struct drm_i915_file_private *file_priv = file->driver_priv; 4393 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; 4394 struct i915_request *request, *target = NULL; 4395 long ret; 4396 4397 /* ABI: return -EIO if already wedged */ 4398 if (i915_terminally_wedged(&dev_priv->gpu_error)) 4399 return -EIO; 4400 4401 spin_lock(&file_priv->mm.lock); 4402 list_for_each_entry(request, &file_priv->mm.request_list, client_link) { 4403 if (time_after_eq(request->emitted_jiffies, recent_enough)) 4404 break; 4405 4406 if (target) { 4407 list_del(&target->client_link); 4408 target->file_priv = NULL; 4409 } 4410 4411 target = request; 4412 } 4413 if (target) 4414 i915_request_get(target); 4415 spin_unlock(&file_priv->mm.lock); 4416 4417 if (target == NULL) 4418 return 0; 4419 4420 ret = i915_request_wait(target, 4421 I915_WAIT_INTERRUPTIBLE, 4422 MAX_SCHEDULE_TIMEOUT); 4423 i915_request_put(target); 4424 4425 return ret < 0 ? ret : 0; 4426 } 4427 4428 struct i915_vma * 4429 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 4430 const struct i915_ggtt_view *view, 4431 u64 size, 4432 u64 alignment, 4433 u64 flags) 4434 { 4435 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 4436 struct i915_address_space *vm = &dev_priv->ggtt.vm; 4437 struct i915_vma *vma; 4438 int ret; 4439 4440 lockdep_assert_held(&obj->base.dev->struct_mutex); 4441 4442 if (flags & PIN_MAPPABLE && 4443 (!view || view->type == I915_GGTT_VIEW_NORMAL)) { 4444 /* If the required space is larger than the available 4445 * aperture, we will not able to find a slot for the 4446 * object and unbinding the object now will be in 4447 * vain. Worse, doing so may cause us to ping-pong 4448 * the object in and out of the Global GTT and 4449 * waste a lot of cycles under the mutex. 4450 */ 4451 if (obj->base.size > dev_priv->ggtt.mappable_end) 4452 return ERR_PTR(-E2BIG); 4453 4454 /* If NONBLOCK is set the caller is optimistically 4455 * trying to cache the full object within the mappable 4456 * aperture, and *must* have a fallback in place for 4457 * situations where we cannot bind the object. We 4458 * can be a little more lax here and use the fallback 4459 * more often to avoid costly migrations of ourselves 4460 * and other objects within the aperture. 4461 * 4462 * Half-the-aperture is used as a simple heuristic. 4463 * More interesting would to do search for a free 4464 * block prior to making the commitment to unbind. 4465 * That caters for the self-harm case, and with a 4466 * little more heuristics (e.g. NOFAULT, NOEVICT) 4467 * we could try to minimise harm to others. 4468 */ 4469 if (flags & PIN_NONBLOCK && 4470 obj->base.size > dev_priv->ggtt.mappable_end / 2) 4471 return ERR_PTR(-ENOSPC); 4472 } 4473 4474 vma = i915_vma_instance(obj, vm, view); 4475 if (unlikely(IS_ERR(vma))) 4476 return vma; 4477 4478 if (i915_vma_misplaced(vma, size, alignment, flags)) { 4479 if (flags & PIN_NONBLOCK) { 4480 if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) 4481 return ERR_PTR(-ENOSPC); 4482 4483 if (flags & PIN_MAPPABLE && 4484 vma->fence_size > dev_priv->ggtt.mappable_end / 2) 4485 return ERR_PTR(-ENOSPC); 4486 } 4487 4488 WARN(i915_vma_is_pinned(vma), 4489 "bo is already pinned in ggtt with incorrect alignment:" 4490 " offset=%08x, req.alignment=%llx," 4491 " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n", 4492 i915_ggtt_offset(vma), alignment, 4493 !!(flags & PIN_MAPPABLE), 4494 i915_vma_is_map_and_fenceable(vma)); 4495 ret = i915_vma_unbind(vma); 4496 if (ret) 4497 return ERR_PTR(ret); 4498 } 4499 4500 ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); 4501 if (ret) 4502 return ERR_PTR(ret); 4503 4504 return vma; 4505 } 4506 4507 static __always_inline unsigned int __busy_read_flag(unsigned int id) 4508 { 4509 /* Note that we could alias engines in the execbuf API, but 4510 * that would be very unwise as it prevents userspace from 4511 * fine control over engine selection. Ahem. 4512 * 4513 * This should be something like EXEC_MAX_ENGINE instead of 4514 * I915_NUM_ENGINES. 4515 */ 4516 BUILD_BUG_ON(I915_NUM_ENGINES > 16); 4517 return 0x10000 << id; 4518 } 4519 4520 static __always_inline unsigned int __busy_write_id(unsigned int id) 4521 { 4522 /* The uABI guarantees an active writer is also amongst the read 4523 * engines. This would be true if we accessed the activity tracking 4524 * under the lock, but as we perform the lookup of the object and 4525 * its activity locklessly we can not guarantee that the last_write 4526 * being active implies that we have set the same engine flag from 4527 * last_read - hence we always set both read and write busy for 4528 * last_write. 4529 */ 4530 return id | __busy_read_flag(id); 4531 } 4532 4533 static __always_inline unsigned int 4534 __busy_set_if_active(const struct dma_fence *fence, 4535 unsigned int (*flag)(unsigned int id)) 4536 { 4537 struct i915_request *rq; 4538 4539 /* We have to check the current hw status of the fence as the uABI 4540 * guarantees forward progress. We could rely on the idle worker 4541 * to eventually flush us, but to minimise latency just ask the 4542 * hardware. 4543 * 4544 * Note we only report on the status of native fences. 4545 */ 4546 if (!dma_fence_is_i915(fence)) 4547 return 0; 4548 4549 /* opencode to_request() in order to avoid const warnings */ 4550 rq = container_of(fence, struct i915_request, fence); 4551 if (i915_request_completed(rq)) 4552 return 0; 4553 4554 return flag(rq->engine->uabi_id); 4555 } 4556 4557 static __always_inline unsigned int 4558 busy_check_reader(const struct dma_fence *fence) 4559 { 4560 return __busy_set_if_active(fence, __busy_read_flag); 4561 } 4562 4563 static __always_inline unsigned int 4564 busy_check_writer(const struct dma_fence *fence) 4565 { 4566 if (!fence) 4567 return 0; 4568 4569 return __busy_set_if_active(fence, __busy_write_id); 4570 } 4571 4572 int 4573 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4574 struct drm_file *file) 4575 { 4576 struct drm_i915_gem_busy *args = data; 4577 struct drm_i915_gem_object *obj; 4578 struct reservation_object_list *list; 4579 unsigned int seq; 4580 int err; 4581 4582 err = -ENOENT; 4583 rcu_read_lock(); 4584 obj = i915_gem_object_lookup_rcu(file, args->handle); 4585 if (!obj) 4586 goto out; 4587 4588 /* A discrepancy here is that we do not report the status of 4589 * non-i915 fences, i.e. even though we may report the object as idle, 4590 * a call to set-domain may still stall waiting for foreign rendering. 4591 * This also means that wait-ioctl may report an object as busy, 4592 * where busy-ioctl considers it idle. 4593 * 4594 * We trade the ability to warn of foreign fences to report on which 4595 * i915 engines are active for the object. 4596 * 4597 * Alternatively, we can trade that extra information on read/write 4598 * activity with 4599 * args->busy = 4600 * !reservation_object_test_signaled_rcu(obj->resv, true); 4601 * to report the overall busyness. This is what the wait-ioctl does. 4602 * 4603 */ 4604 retry: 4605 seq = raw_read_seqcount(&obj->resv->seq); 4606 4607 /* Translate the exclusive fence to the READ *and* WRITE engine */ 4608 args->busy = busy_check_writer(rcu_dereference(obj->resv->fence_excl)); 4609 4610 /* Translate shared fences to READ set of engines */ 4611 list = rcu_dereference(obj->resv->fence); 4612 if (list) { 4613 unsigned int shared_count = list->shared_count, i; 4614 4615 for (i = 0; i < shared_count; ++i) { 4616 struct dma_fence *fence = 4617 rcu_dereference(list->shared[i]); 4618 4619 args->busy |= busy_check_reader(fence); 4620 } 4621 } 4622 4623 if (args->busy && read_seqcount_retry(&obj->resv->seq, seq)) 4624 goto retry; 4625 4626 err = 0; 4627 out: 4628 rcu_read_unlock(); 4629 return err; 4630 } 4631 4632 int 4633 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4634 struct drm_file *file_priv) 4635 { 4636 return i915_gem_ring_throttle(dev, file_priv); 4637 } 4638 4639 int 4640 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4641 struct drm_file *file_priv) 4642 { 4643 struct drm_i915_private *dev_priv = to_i915(dev); 4644 struct drm_i915_gem_madvise *args = data; 4645 struct drm_i915_gem_object *obj; 4646 int err; 4647 4648 switch (args->madv) { 4649 case I915_MADV_DONTNEED: 4650 case I915_MADV_WILLNEED: 4651 break; 4652 default: 4653 return -EINVAL; 4654 } 4655 4656 obj = i915_gem_object_lookup(file_priv, args->handle); 4657 if (!obj) 4658 return -ENOENT; 4659 4660 err = mutex_lock_interruptible(&obj->mm.lock); 4661 if (err) 4662 goto out; 4663 4664 if (i915_gem_object_has_pages(obj) && 4665 i915_gem_object_is_tiled(obj) && 4666 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 4667 if (obj->mm.madv == I915_MADV_WILLNEED) { 4668 GEM_BUG_ON(!obj->mm.quirked); 4669 __i915_gem_object_unpin_pages(obj); 4670 obj->mm.quirked = false; 4671 } 4672 if (args->madv == I915_MADV_WILLNEED) { 4673 GEM_BUG_ON(obj->mm.quirked); 4674 __i915_gem_object_pin_pages(obj); 4675 obj->mm.quirked = true; 4676 } 4677 } 4678 4679 if (obj->mm.madv != __I915_MADV_PURGED) 4680 obj->mm.madv = args->madv; 4681 4682 /* if the object is no longer attached, discard its backing storage */ 4683 if (obj->mm.madv == I915_MADV_DONTNEED && 4684 !i915_gem_object_has_pages(obj)) 4685 i915_gem_object_truncate(obj); 4686 4687 args->retained = obj->mm.madv != __I915_MADV_PURGED; 4688 mutex_unlock(&obj->mm.lock); 4689 4690 out: 4691 i915_gem_object_put(obj); 4692 return err; 4693 } 4694 4695 static void 4696 frontbuffer_retire(struct i915_gem_active *active, struct i915_request *request) 4697 { 4698 struct drm_i915_gem_object *obj = 4699 container_of(active, typeof(*obj), frontbuffer_write); 4700 4701 intel_fb_obj_flush(obj, ORIGIN_CS); 4702 } 4703 4704 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4705 const struct drm_i915_gem_object_ops *ops) 4706 { 4707 mutex_init(&obj->mm.lock); 4708 4709 INIT_LIST_HEAD(&obj->vma_list); 4710 INIT_LIST_HEAD(&obj->lut_list); 4711 INIT_LIST_HEAD(&obj->batch_pool_link); 4712 4713 obj->ops = ops; 4714 4715 reservation_object_init(&obj->__builtin_resv); 4716 obj->resv = &obj->__builtin_resv; 4717 4718 obj->frontbuffer_ggtt_origin = ORIGIN_GTT; 4719 init_request_active(&obj->frontbuffer_write, frontbuffer_retire); 4720 4721 obj->mm.madv = I915_MADV_WILLNEED; 4722 INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN); 4723 mutex_init(&obj->mm.get_page.lock); 4724 4725 i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size); 4726 } 4727 4728 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4729 .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | 4730 I915_GEM_OBJECT_IS_SHRINKABLE, 4731 4732 .get_pages = i915_gem_object_get_pages_gtt, 4733 .put_pages = i915_gem_object_put_pages_gtt, 4734 4735 .pwrite = i915_gem_object_pwrite_gtt, 4736 }; 4737 4738 static int i915_gem_object_create_shmem(struct drm_device *dev, 4739 struct drm_gem_object *obj, 4740 size_t size) 4741 { 4742 struct drm_i915_private *i915 = to_i915(dev); 4743 unsigned long flags = VM_NORESERVE; 4744 struct file *filp; 4745 4746 drm_gem_private_object_init(dev, obj, size); 4747 4748 if (i915->mm.gemfs) 4749 filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size, 4750 flags); 4751 else 4752 filp = shmem_file_setup("i915", size, flags); 4753 4754 if (IS_ERR(filp)) 4755 return PTR_ERR(filp); 4756 4757 obj->filp = filp; 4758 4759 return 0; 4760 } 4761 4762 struct drm_i915_gem_object * 4763 i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) 4764 { 4765 struct drm_i915_gem_object *obj; 4766 struct address_space *mapping; 4767 unsigned int cache_level; 4768 gfp_t mask; 4769 int ret; 4770 4771 /* There is a prevalence of the assumption that we fit the object's 4772 * page count inside a 32bit _signed_ variable. Let's document this and 4773 * catch if we ever need to fix it. In the meantime, if you do spot 4774 * such a local variable, please consider fixing! 4775 */ 4776 if (size >> PAGE_SHIFT > INT_MAX) 4777 return ERR_PTR(-E2BIG); 4778 4779 if (overflows_type(size, obj->base.size)) 4780 return ERR_PTR(-E2BIG); 4781 4782 obj = i915_gem_object_alloc(dev_priv); 4783 if (obj == NULL) 4784 return ERR_PTR(-ENOMEM); 4785 4786 ret = i915_gem_object_create_shmem(&dev_priv->drm, &obj->base, size); 4787 if (ret) 4788 goto fail; 4789 4790 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4791 if (IS_I965GM(dev_priv) || IS_I965G(dev_priv)) { 4792 /* 965gm cannot relocate objects above 4GiB. */ 4793 mask &= ~__GFP_HIGHMEM; 4794 mask |= __GFP_DMA32; 4795 } 4796 4797 mapping = obj->base.filp->f_mapping; 4798 mapping_set_gfp_mask(mapping, mask); 4799 GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM)); 4800 4801 i915_gem_object_init(obj, &i915_gem_object_ops); 4802 4803 obj->write_domain = I915_GEM_DOMAIN_CPU; 4804 obj->read_domains = I915_GEM_DOMAIN_CPU; 4805 4806 if (HAS_LLC(dev_priv)) 4807 /* On some devices, we can have the GPU use the LLC (the CPU 4808 * cache) for about a 10% performance improvement 4809 * compared to uncached. Graphics requests other than 4810 * display scanout are coherent with the CPU in 4811 * accessing this cache. This means in this mode we 4812 * don't need to clflush on the CPU side, and on the 4813 * GPU side we only need to flush internal caches to 4814 * get data visible to the CPU. 4815 * 4816 * However, we maintain the display planes as UC, and so 4817 * need to rebind when first used as such. 4818 */ 4819 cache_level = I915_CACHE_LLC; 4820 else 4821 cache_level = I915_CACHE_NONE; 4822 4823 i915_gem_object_set_cache_coherency(obj, cache_level); 4824 4825 trace_i915_gem_object_create(obj); 4826 4827 return obj; 4828 4829 fail: 4830 i915_gem_object_free(obj); 4831 return ERR_PTR(ret); 4832 } 4833 4834 static bool discard_backing_storage(struct drm_i915_gem_object *obj) 4835 { 4836 /* If we are the last user of the backing storage (be it shmemfs 4837 * pages or stolen etc), we know that the pages are going to be 4838 * immediately released. In this case, we can then skip copying 4839 * back the contents from the GPU. 4840 */ 4841 4842 if (obj->mm.madv != I915_MADV_WILLNEED) 4843 return false; 4844 4845 if (obj->base.filp == NULL) 4846 return true; 4847 4848 /* At first glance, this looks racy, but then again so would be 4849 * userspace racing mmap against close. However, the first external 4850 * reference to the filp can only be obtained through the 4851 * i915_gem_mmap_ioctl() which safeguards us against the user 4852 * acquiring such a reference whilst we are in the middle of 4853 * freeing the object. 4854 */ 4855 return atomic_long_read(&obj->base.filp->f_count) == 1; 4856 } 4857 4858 static void __i915_gem_free_objects(struct drm_i915_private *i915, 4859 struct llist_node *freed) 4860 { 4861 struct drm_i915_gem_object *obj, *on; 4862 4863 intel_runtime_pm_get(i915); 4864 llist_for_each_entry_safe(obj, on, freed, freed) { 4865 struct i915_vma *vma, *vn; 4866 4867 trace_i915_gem_object_destroy(obj); 4868 4869 mutex_lock(&i915->drm.struct_mutex); 4870 4871 GEM_BUG_ON(i915_gem_object_is_active(obj)); 4872 list_for_each_entry_safe(vma, vn, 4873 &obj->vma_list, obj_link) { 4874 GEM_BUG_ON(i915_vma_is_active(vma)); 4875 vma->flags &= ~I915_VMA_PIN_MASK; 4876 i915_vma_destroy(vma); 4877 } 4878 GEM_BUG_ON(!list_empty(&obj->vma_list)); 4879 GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree)); 4880 4881 /* This serializes freeing with the shrinker. Since the free 4882 * is delayed, first by RCU then by the workqueue, we want the 4883 * shrinker to be able to free pages of unreferenced objects, 4884 * or else we may oom whilst there are plenty of deferred 4885 * freed objects. 4886 */ 4887 if (i915_gem_object_has_pages(obj)) { 4888 spin_lock(&i915->mm.obj_lock); 4889 list_del_init(&obj->mm.link); 4890 spin_unlock(&i915->mm.obj_lock); 4891 } 4892 4893 mutex_unlock(&i915->drm.struct_mutex); 4894 4895 GEM_BUG_ON(obj->bind_count); 4896 GEM_BUG_ON(obj->userfault_count); 4897 GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits)); 4898 GEM_BUG_ON(!list_empty(&obj->lut_list)); 4899 4900 if (obj->ops->release) 4901 obj->ops->release(obj); 4902 4903 if (WARN_ON(i915_gem_object_has_pinned_pages(obj))) 4904 atomic_set(&obj->mm.pages_pin_count, 0); 4905 __i915_gem_object_put_pages(obj, I915_MM_NORMAL); 4906 GEM_BUG_ON(i915_gem_object_has_pages(obj)); 4907 4908 if (obj->base.import_attach) 4909 drm_prime_gem_destroy(&obj->base, NULL); 4910 4911 reservation_object_fini(&obj->__builtin_resv); 4912 drm_gem_object_release(&obj->base); 4913 i915_gem_info_remove_obj(i915, obj->base.size); 4914 4915 kfree(obj->bit_17); 4916 i915_gem_object_free(obj); 4917 4918 GEM_BUG_ON(!atomic_read(&i915->mm.free_count)); 4919 atomic_dec(&i915->mm.free_count); 4920 4921 if (on) 4922 cond_resched(); 4923 } 4924 intel_runtime_pm_put(i915); 4925 } 4926 4927 static void i915_gem_flush_free_objects(struct drm_i915_private *i915) 4928 { 4929 struct llist_node *freed; 4930 4931 /* Free the oldest, most stale object to keep the free_list short */ 4932 freed = NULL; 4933 if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */ 4934 /* Only one consumer of llist_del_first() allowed */ 4935 spin_lock(&i915->mm.free_lock); 4936 freed = llist_del_first(&i915->mm.free_list); 4937 spin_unlock(&i915->mm.free_lock); 4938 } 4939 if (unlikely(freed)) { 4940 freed->next = NULL; 4941 __i915_gem_free_objects(i915, freed); 4942 } 4943 } 4944 4945 static void __i915_gem_free_work(struct work_struct *work) 4946 { 4947 struct drm_i915_private *i915 = 4948 container_of(work, struct drm_i915_private, mm.free_work); 4949 struct llist_node *freed; 4950 4951 /* 4952 * All file-owned VMA should have been released by this point through 4953 * i915_gem_close_object(), or earlier by i915_gem_context_close(). 4954 * However, the object may also be bound into the global GTT (e.g. 4955 * older GPUs without per-process support, or for direct access through 4956 * the GTT either for the user or for scanout). Those VMA still need to 4957 * unbound now. 4958 */ 4959 4960 spin_lock(&i915->mm.free_lock); 4961 while ((freed = llist_del_all(&i915->mm.free_list))) { 4962 spin_unlock(&i915->mm.free_lock); 4963 4964 __i915_gem_free_objects(i915, freed); 4965 if (need_resched()) 4966 return; 4967 4968 spin_lock(&i915->mm.free_lock); 4969 } 4970 spin_unlock(&i915->mm.free_lock); 4971 } 4972 4973 static void __i915_gem_free_object_rcu(struct rcu_head *head) 4974 { 4975 struct drm_i915_gem_object *obj = 4976 container_of(head, typeof(*obj), rcu); 4977 struct drm_i915_private *i915 = to_i915(obj->base.dev); 4978 4979 /* 4980 * Since we require blocking on struct_mutex to unbind the freed 4981 * object from the GPU before releasing resources back to the 4982 * system, we can not do that directly from the RCU callback (which may 4983 * be a softirq context), but must instead then defer that work onto a 4984 * kthread. We use the RCU callback rather than move the freed object 4985 * directly onto the work queue so that we can mix between using the 4986 * worker and performing frees directly from subsequent allocations for 4987 * crude but effective memory throttling. 4988 */ 4989 if (llist_add(&obj->freed, &i915->mm.free_list)) 4990 queue_work(i915->wq, &i915->mm.free_work); 4991 } 4992 4993 void i915_gem_free_object(struct drm_gem_object *gem_obj) 4994 { 4995 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 4996 4997 if (obj->mm.quirked) 4998 __i915_gem_object_unpin_pages(obj); 4999 5000 if (discard_backing_storage(obj)) 5001 obj->mm.madv = I915_MADV_DONTNEED; 5002 5003 /* 5004 * Before we free the object, make sure any pure RCU-only 5005 * read-side critical sections are complete, e.g. 5006 * i915_gem_busy_ioctl(). For the corresponding synchronized 5007 * lookup see i915_gem_object_lookup_rcu(). 5008 */ 5009 atomic_inc(&to_i915(obj->base.dev)->mm.free_count); 5010 call_rcu(&obj->rcu, __i915_gem_free_object_rcu); 5011 } 5012 5013 void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj) 5014 { 5015 lockdep_assert_held(&obj->base.dev->struct_mutex); 5016 5017 if (!i915_gem_object_has_active_reference(obj) && 5018 i915_gem_object_is_active(obj)) 5019 i915_gem_object_set_active_reference(obj); 5020 else 5021 i915_gem_object_put(obj); 5022 } 5023 5024 void i915_gem_sanitize(struct drm_i915_private *i915) 5025 { 5026 int err; 5027 5028 GEM_TRACE("\n"); 5029 5030 mutex_lock(&i915->drm.struct_mutex); 5031 5032 intel_runtime_pm_get(i915); 5033 intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); 5034 5035 /* 5036 * As we have just resumed the machine and woken the device up from 5037 * deep PCI sleep (presumably D3_cold), assume the HW has been reset 5038 * back to defaults, recovering from whatever wedged state we left it 5039 * in and so worth trying to use the device once more. 5040 */ 5041 if (i915_terminally_wedged(&i915->gpu_error)) 5042 i915_gem_unset_wedged(i915); 5043 5044 /* 5045 * If we inherit context state from the BIOS or earlier occupants 5046 * of the GPU, the GPU may be in an inconsistent state when we 5047 * try to take over. The only way to remove the earlier state 5048 * is by resetting. However, resetting on earlier gen is tricky as 5049 * it may impact the display and we are uncertain about the stability 5050 * of the reset, so this could be applied to even earlier gen. 5051 */ 5052 err = -ENODEV; 5053 if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915)) 5054 err = WARN_ON(intel_gpu_reset(i915, ALL_ENGINES)); 5055 if (!err) 5056 intel_engines_sanitize(i915); 5057 5058 intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); 5059 intel_runtime_pm_put(i915); 5060 5061 i915_gem_contexts_lost(i915); 5062 mutex_unlock(&i915->drm.struct_mutex); 5063 } 5064 5065 int i915_gem_suspend(struct drm_i915_private *i915) 5066 { 5067 int ret; 5068 5069 GEM_TRACE("\n"); 5070 5071 intel_runtime_pm_get(i915); 5072 intel_suspend_gt_powersave(i915); 5073 5074 mutex_lock(&i915->drm.struct_mutex); 5075 5076 /* 5077 * We have to flush all the executing contexts to main memory so 5078 * that they can saved in the hibernation image. To ensure the last 5079 * context image is coherent, we have to switch away from it. That 5080 * leaves the i915->kernel_context still active when 5081 * we actually suspend, and its image in memory may not match the GPU 5082 * state. Fortunately, the kernel_context is disposable and we do 5083 * not rely on its state. 5084 */ 5085 if (!i915_terminally_wedged(&i915->gpu_error)) { 5086 ret = i915_gem_switch_to_kernel_context(i915); 5087 if (ret) 5088 goto err_unlock; 5089 5090 ret = i915_gem_wait_for_idle(i915, 5091 I915_WAIT_INTERRUPTIBLE | 5092 I915_WAIT_LOCKED | 5093 I915_WAIT_FOR_IDLE_BOOST, 5094 MAX_SCHEDULE_TIMEOUT); 5095 if (ret && ret != -EIO) 5096 goto err_unlock; 5097 5098 assert_kernel_context_is_current(i915); 5099 } 5100 i915_retire_requests(i915); /* ensure we flush after wedging */ 5101 5102 mutex_unlock(&i915->drm.struct_mutex); 5103 5104 intel_uc_suspend(i915); 5105 5106 cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work); 5107 cancel_delayed_work_sync(&i915->gt.retire_work); 5108 5109 /* 5110 * As the idle_work is rearming if it detects a race, play safe and 5111 * repeat the flush until it is definitely idle. 5112 */ 5113 drain_delayed_work(&i915->gt.idle_work); 5114 5115 /* 5116 * Assert that we successfully flushed all the work and 5117 * reset the GPU back to its idle, low power state. 5118 */ 5119 WARN_ON(i915->gt.awake); 5120 if (WARN_ON(!intel_engines_are_idle(i915))) 5121 i915_gem_set_wedged(i915); /* no hope, discard everything */ 5122 5123 intel_runtime_pm_put(i915); 5124 return 0; 5125 5126 err_unlock: 5127 mutex_unlock(&i915->drm.struct_mutex); 5128 intel_runtime_pm_put(i915); 5129 return ret; 5130 } 5131 5132 void i915_gem_suspend_late(struct drm_i915_private *i915) 5133 { 5134 struct drm_i915_gem_object *obj; 5135 struct list_head *phases[] = { 5136 &i915->mm.unbound_list, 5137 &i915->mm.bound_list, 5138 NULL 5139 }, **phase; 5140 5141 /* 5142 * Neither the BIOS, ourselves or any other kernel 5143 * expects the system to be in execlists mode on startup, 5144 * so we need to reset the GPU back to legacy mode. And the only 5145 * known way to disable logical contexts is through a GPU reset. 5146 * 5147 * So in order to leave the system in a known default configuration, 5148 * always reset the GPU upon unload and suspend. Afterwards we then 5149 * clean up the GEM state tracking, flushing off the requests and 5150 * leaving the system in a known idle state. 5151 * 5152 * Note that is of the upmost importance that the GPU is idle and 5153 * all stray writes are flushed *before* we dismantle the backing 5154 * storage for the pinned objects. 5155 * 5156 * However, since we are uncertain that resetting the GPU on older 5157 * machines is a good idea, we don't - just in case it leaves the 5158 * machine in an unusable condition. 5159 */ 5160 5161 mutex_lock(&i915->drm.struct_mutex); 5162 for (phase = phases; *phase; phase++) { 5163 list_for_each_entry(obj, *phase, mm.link) 5164 WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); 5165 } 5166 mutex_unlock(&i915->drm.struct_mutex); 5167 5168 intel_uc_sanitize(i915); 5169 i915_gem_sanitize(i915); 5170 } 5171 5172 void i915_gem_resume(struct drm_i915_private *i915) 5173 { 5174 GEM_TRACE("\n"); 5175 5176 WARN_ON(i915->gt.awake); 5177 5178 mutex_lock(&i915->drm.struct_mutex); 5179 intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); 5180 5181 i915_gem_restore_gtt_mappings(i915); 5182 i915_gem_restore_fences(i915); 5183 5184 /* 5185 * As we didn't flush the kernel context before suspend, we cannot 5186 * guarantee that the context image is complete. So let's just reset 5187 * it and start again. 5188 */ 5189 i915->gt.resume(i915); 5190 5191 if (i915_gem_init_hw(i915)) 5192 goto err_wedged; 5193 5194 intel_uc_resume(i915); 5195 5196 /* Always reload a context for powersaving. */ 5197 if (i915_gem_switch_to_kernel_context(i915)) 5198 goto err_wedged; 5199 5200 out_unlock: 5201 intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); 5202 mutex_unlock(&i915->drm.struct_mutex); 5203 return; 5204 5205 err_wedged: 5206 if (!i915_terminally_wedged(&i915->gpu_error)) { 5207 DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n"); 5208 i915_gem_set_wedged(i915); 5209 } 5210 goto out_unlock; 5211 } 5212 5213 void i915_gem_init_swizzling(struct drm_i915_private *dev_priv) 5214 { 5215 if (INTEL_GEN(dev_priv) < 5 || 5216 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 5217 return; 5218 5219 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 5220 DISP_TILE_SURFACE_SWIZZLING); 5221 5222 if (IS_GEN5(dev_priv)) 5223 return; 5224 5225 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 5226 if (IS_GEN6(dev_priv)) 5227 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 5228 else if (IS_GEN7(dev_priv)) 5229 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 5230 else if (IS_GEN8(dev_priv)) 5231 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 5232 else 5233 BUG(); 5234 } 5235 5236 static void init_unused_ring(struct drm_i915_private *dev_priv, u32 base) 5237 { 5238 I915_WRITE(RING_CTL(base), 0); 5239 I915_WRITE(RING_HEAD(base), 0); 5240 I915_WRITE(RING_TAIL(base), 0); 5241 I915_WRITE(RING_START(base), 0); 5242 } 5243 5244 static void init_unused_rings(struct drm_i915_private *dev_priv) 5245 { 5246 if (IS_I830(dev_priv)) { 5247 init_unused_ring(dev_priv, PRB1_BASE); 5248 init_unused_ring(dev_priv, SRB0_BASE); 5249 init_unused_ring(dev_priv, SRB1_BASE); 5250 init_unused_ring(dev_priv, SRB2_BASE); 5251 init_unused_ring(dev_priv, SRB3_BASE); 5252 } else if (IS_GEN2(dev_priv)) { 5253 init_unused_ring(dev_priv, SRB0_BASE); 5254 init_unused_ring(dev_priv, SRB1_BASE); 5255 } else if (IS_GEN3(dev_priv)) { 5256 init_unused_ring(dev_priv, PRB1_BASE); 5257 init_unused_ring(dev_priv, PRB2_BASE); 5258 } 5259 } 5260 5261 static int __i915_gem_restart_engines(void *data) 5262 { 5263 struct drm_i915_private *i915 = data; 5264 struct intel_engine_cs *engine; 5265 enum intel_engine_id id; 5266 int err; 5267 5268 for_each_engine(engine, i915, id) { 5269 err = engine->init_hw(engine); 5270 if (err) { 5271 DRM_ERROR("Failed to restart %s (%d)\n", 5272 engine->name, err); 5273 return err; 5274 } 5275 } 5276 5277 return 0; 5278 } 5279 5280 int i915_gem_init_hw(struct drm_i915_private *dev_priv) 5281 { 5282 int ret; 5283 5284 dev_priv->gt.last_init_time = ktime_get(); 5285 5286 /* Double layer security blanket, see i915_gem_init() */ 5287 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5288 5289 if (HAS_EDRAM(dev_priv) && INTEL_GEN(dev_priv) < 9) 5290 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 5291 5292 if (IS_HASWELL(dev_priv)) 5293 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ? 5294 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 5295 5296 if (HAS_PCH_NOP(dev_priv)) { 5297 if (IS_IVYBRIDGE(dev_priv)) { 5298 u32 temp = I915_READ(GEN7_MSG_CTL); 5299 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); 5300 I915_WRITE(GEN7_MSG_CTL, temp); 5301 } else if (INTEL_GEN(dev_priv) >= 7) { 5302 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT); 5303 temp &= ~RESET_PCH_HANDSHAKE_ENABLE; 5304 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp); 5305 } 5306 } 5307 5308 intel_gt_workarounds_apply(dev_priv); 5309 5310 i915_gem_init_swizzling(dev_priv); 5311 5312 /* 5313 * At least 830 can leave some of the unused rings 5314 * "active" (ie. head != tail) after resume which 5315 * will prevent c3 entry. Makes sure all unused rings 5316 * are totally idle. 5317 */ 5318 init_unused_rings(dev_priv); 5319 5320 BUG_ON(!dev_priv->kernel_context); 5321 if (i915_terminally_wedged(&dev_priv->gpu_error)) { 5322 ret = -EIO; 5323 goto out; 5324 } 5325 5326 ret = i915_ppgtt_init_hw(dev_priv); 5327 if (ret) { 5328 DRM_ERROR("Enabling PPGTT failed (%d)\n", ret); 5329 goto out; 5330 } 5331 5332 ret = intel_wopcm_init_hw(&dev_priv->wopcm); 5333 if (ret) { 5334 DRM_ERROR("Enabling WOPCM failed (%d)\n", ret); 5335 goto out; 5336 } 5337 5338 /* We can't enable contexts until all firmware is loaded */ 5339 ret = intel_uc_init_hw(dev_priv); 5340 if (ret) { 5341 DRM_ERROR("Enabling uc failed (%d)\n", ret); 5342 goto out; 5343 } 5344 5345 intel_mocs_init_l3cc_table(dev_priv); 5346 5347 /* Only when the HW is re-initialised, can we replay the requests */ 5348 ret = __i915_gem_restart_engines(dev_priv); 5349 if (ret) 5350 goto cleanup_uc; 5351 5352 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5353 5354 return 0; 5355 5356 cleanup_uc: 5357 intel_uc_fini_hw(dev_priv); 5358 out: 5359 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5360 5361 return ret; 5362 } 5363 5364 static int __intel_engines_record_defaults(struct drm_i915_private *i915) 5365 { 5366 struct i915_gem_context *ctx; 5367 struct intel_engine_cs *engine; 5368 enum intel_engine_id id; 5369 int err; 5370 5371 /* 5372 * As we reset the gpu during very early sanitisation, the current 5373 * register state on the GPU should reflect its defaults values. 5374 * We load a context onto the hw (with restore-inhibit), then switch 5375 * over to a second context to save that default register state. We 5376 * can then prime every new context with that state so they all start 5377 * from the same default HW values. 5378 */ 5379 5380 ctx = i915_gem_context_create_kernel(i915, 0); 5381 if (IS_ERR(ctx)) 5382 return PTR_ERR(ctx); 5383 5384 for_each_engine(engine, i915, id) { 5385 struct i915_request *rq; 5386 5387 rq = i915_request_alloc(engine, ctx); 5388 if (IS_ERR(rq)) { 5389 err = PTR_ERR(rq); 5390 goto out_ctx; 5391 } 5392 5393 err = 0; 5394 if (engine->init_context) 5395 err = engine->init_context(rq); 5396 5397 i915_request_add(rq); 5398 if (err) 5399 goto err_active; 5400 } 5401 5402 err = i915_gem_switch_to_kernel_context(i915); 5403 if (err) 5404 goto err_active; 5405 5406 if (i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED, HZ / 5)) { 5407 i915_gem_set_wedged(i915); 5408 err = -EIO; /* Caller will declare us wedged */ 5409 goto err_active; 5410 } 5411 5412 assert_kernel_context_is_current(i915); 5413 5414 /* 5415 * Immediately park the GPU so that we enable powersaving and 5416 * treat it as idle. The next time we issue a request, we will 5417 * unpark and start using the engine->pinned_default_state, otherwise 5418 * it is in limbo and an early reset may fail. 5419 */ 5420 __i915_gem_park(i915); 5421 5422 for_each_engine(engine, i915, id) { 5423 struct i915_vma *state; 5424 void *vaddr; 5425 5426 GEM_BUG_ON(to_intel_context(ctx, engine)->pin_count); 5427 5428 state = to_intel_context(ctx, engine)->state; 5429 if (!state) 5430 continue; 5431 5432 /* 5433 * As we will hold a reference to the logical state, it will 5434 * not be torn down with the context, and importantly the 5435 * object will hold onto its vma (making it possible for a 5436 * stray GTT write to corrupt our defaults). Unmap the vma 5437 * from the GTT to prevent such accidents and reclaim the 5438 * space. 5439 */ 5440 err = i915_vma_unbind(state); 5441 if (err) 5442 goto err_active; 5443 5444 err = i915_gem_object_set_to_cpu_domain(state->obj, false); 5445 if (err) 5446 goto err_active; 5447 5448 engine->default_state = i915_gem_object_get(state->obj); 5449 5450 /* Check we can acquire the image of the context state */ 5451 vaddr = i915_gem_object_pin_map(engine->default_state, 5452 I915_MAP_FORCE_WB); 5453 if (IS_ERR(vaddr)) { 5454 err = PTR_ERR(vaddr); 5455 goto err_active; 5456 } 5457 5458 i915_gem_object_unpin_map(engine->default_state); 5459 } 5460 5461 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) { 5462 unsigned int found = intel_engines_has_context_isolation(i915); 5463 5464 /* 5465 * Make sure that classes with multiple engine instances all 5466 * share the same basic configuration. 5467 */ 5468 for_each_engine(engine, i915, id) { 5469 unsigned int bit = BIT(engine->uabi_class); 5470 unsigned int expected = engine->default_state ? bit : 0; 5471 5472 if ((found & bit) != expected) { 5473 DRM_ERROR("mismatching default context state for class %d on engine %s\n", 5474 engine->uabi_class, engine->name); 5475 } 5476 } 5477 } 5478 5479 out_ctx: 5480 i915_gem_context_set_closed(ctx); 5481 i915_gem_context_put(ctx); 5482 return err; 5483 5484 err_active: 5485 /* 5486 * If we have to abandon now, we expect the engines to be idle 5487 * and ready to be torn-down. First try to flush any remaining 5488 * request, ensure we are pointing at the kernel context and 5489 * then remove it. 5490 */ 5491 if (WARN_ON(i915_gem_switch_to_kernel_context(i915))) 5492 goto out_ctx; 5493 5494 if (WARN_ON(i915_gem_wait_for_idle(i915, 5495 I915_WAIT_LOCKED, 5496 MAX_SCHEDULE_TIMEOUT))) 5497 goto out_ctx; 5498 5499 i915_gem_contexts_lost(i915); 5500 goto out_ctx; 5501 } 5502 5503 int i915_gem_init(struct drm_i915_private *dev_priv) 5504 { 5505 int ret; 5506 5507 /* We need to fallback to 4K pages if host doesn't support huge gtt. */ 5508 if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_huge_gtt(dev_priv)) 5509 mkwrite_device_info(dev_priv)->page_sizes = 5510 I915_GTT_PAGE_SIZE_4K; 5511 5512 dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1); 5513 5514 if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { 5515 dev_priv->gt.resume = intel_lr_context_resume; 5516 dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup; 5517 } else { 5518 dev_priv->gt.resume = intel_legacy_submission_resume; 5519 dev_priv->gt.cleanup_engine = intel_engine_cleanup; 5520 } 5521 5522 ret = i915_gem_init_userptr(dev_priv); 5523 if (ret) 5524 return ret; 5525 5526 ret = intel_uc_init_misc(dev_priv); 5527 if (ret) 5528 return ret; 5529 5530 ret = intel_wopcm_init(&dev_priv->wopcm); 5531 if (ret) 5532 goto err_uc_misc; 5533 5534 /* This is just a security blanket to placate dragons. 5535 * On some systems, we very sporadically observe that the first TLBs 5536 * used by the CS may be stale, despite us poking the TLB reset. If 5537 * we hold the forcewake during initialisation these problems 5538 * just magically go away. 5539 */ 5540 mutex_lock(&dev_priv->drm.struct_mutex); 5541 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5542 5543 ret = i915_gem_init_ggtt(dev_priv); 5544 if (ret) { 5545 GEM_BUG_ON(ret == -EIO); 5546 goto err_unlock; 5547 } 5548 5549 ret = i915_gem_contexts_init(dev_priv); 5550 if (ret) { 5551 GEM_BUG_ON(ret == -EIO); 5552 goto err_ggtt; 5553 } 5554 5555 ret = intel_engines_init(dev_priv); 5556 if (ret) { 5557 GEM_BUG_ON(ret == -EIO); 5558 goto err_context; 5559 } 5560 5561 intel_init_gt_powersave(dev_priv); 5562 5563 ret = intel_uc_init(dev_priv); 5564 if (ret) 5565 goto err_pm; 5566 5567 ret = i915_gem_init_hw(dev_priv); 5568 if (ret) 5569 goto err_uc_init; 5570 5571 /* 5572 * Despite its name intel_init_clock_gating applies both display 5573 * clock gating workarounds; GT mmio workarounds and the occasional 5574 * GT power context workaround. Worse, sometimes it includes a context 5575 * register workaround which we need to apply before we record the 5576 * default HW state for all contexts. 5577 * 5578 * FIXME: break up the workarounds and apply them at the right time! 5579 */ 5580 intel_init_clock_gating(dev_priv); 5581 5582 ret = __intel_engines_record_defaults(dev_priv); 5583 if (ret) 5584 goto err_init_hw; 5585 5586 if (i915_inject_load_failure()) { 5587 ret = -ENODEV; 5588 goto err_init_hw; 5589 } 5590 5591 if (i915_inject_load_failure()) { 5592 ret = -EIO; 5593 goto err_init_hw; 5594 } 5595 5596 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5597 mutex_unlock(&dev_priv->drm.struct_mutex); 5598 5599 return 0; 5600 5601 /* 5602 * Unwinding is complicated by that we want to handle -EIO to mean 5603 * disable GPU submission but keep KMS alive. We want to mark the 5604 * HW as irrevisibly wedged, but keep enough state around that the 5605 * driver doesn't explode during runtime. 5606 */ 5607 err_init_hw: 5608 mutex_unlock(&dev_priv->drm.struct_mutex); 5609 5610 WARN_ON(i915_gem_suspend(dev_priv)); 5611 i915_gem_suspend_late(dev_priv); 5612 5613 i915_gem_drain_workqueue(dev_priv); 5614 5615 mutex_lock(&dev_priv->drm.struct_mutex); 5616 intel_uc_fini_hw(dev_priv); 5617 err_uc_init: 5618 intel_uc_fini(dev_priv); 5619 err_pm: 5620 if (ret != -EIO) { 5621 intel_cleanup_gt_powersave(dev_priv); 5622 i915_gem_cleanup_engines(dev_priv); 5623 } 5624 err_context: 5625 if (ret != -EIO) 5626 i915_gem_contexts_fini(dev_priv); 5627 err_ggtt: 5628 err_unlock: 5629 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5630 mutex_unlock(&dev_priv->drm.struct_mutex); 5631 5632 err_uc_misc: 5633 intel_uc_fini_misc(dev_priv); 5634 5635 if (ret != -EIO) 5636 i915_gem_cleanup_userptr(dev_priv); 5637 5638 if (ret == -EIO) { 5639 mutex_lock(&dev_priv->drm.struct_mutex); 5640 5641 /* 5642 * Allow engine initialisation to fail by marking the GPU as 5643 * wedged. But we only want to do this where the GPU is angry, 5644 * for all other failure, such as an allocation failure, bail. 5645 */ 5646 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 5647 i915_load_error(dev_priv, 5648 "Failed to initialize GPU, declaring it wedged!\n"); 5649 i915_gem_set_wedged(dev_priv); 5650 } 5651 5652 /* Minimal basic recovery for KMS */ 5653 ret = i915_ggtt_enable_hw(dev_priv); 5654 i915_gem_restore_gtt_mappings(dev_priv); 5655 i915_gem_restore_fences(dev_priv); 5656 intel_init_clock_gating(dev_priv); 5657 5658 mutex_unlock(&dev_priv->drm.struct_mutex); 5659 } 5660 5661 i915_gem_drain_freed_objects(dev_priv); 5662 return ret; 5663 } 5664 5665 void i915_gem_fini(struct drm_i915_private *dev_priv) 5666 { 5667 i915_gem_suspend_late(dev_priv); 5668 intel_disable_gt_powersave(dev_priv); 5669 5670 /* Flush any outstanding unpin_work. */ 5671 i915_gem_drain_workqueue(dev_priv); 5672 5673 mutex_lock(&dev_priv->drm.struct_mutex); 5674 intel_uc_fini_hw(dev_priv); 5675 intel_uc_fini(dev_priv); 5676 i915_gem_cleanup_engines(dev_priv); 5677 i915_gem_contexts_fini(dev_priv); 5678 mutex_unlock(&dev_priv->drm.struct_mutex); 5679 5680 intel_cleanup_gt_powersave(dev_priv); 5681 5682 intel_uc_fini_misc(dev_priv); 5683 i915_gem_cleanup_userptr(dev_priv); 5684 5685 i915_gem_drain_freed_objects(dev_priv); 5686 5687 WARN_ON(!list_empty(&dev_priv->contexts.list)); 5688 } 5689 5690 void i915_gem_init_mmio(struct drm_i915_private *i915) 5691 { 5692 i915_gem_sanitize(i915); 5693 } 5694 5695 void 5696 i915_gem_cleanup_engines(struct drm_i915_private *dev_priv) 5697 { 5698 struct intel_engine_cs *engine; 5699 enum intel_engine_id id; 5700 5701 for_each_engine(engine, dev_priv, id) 5702 dev_priv->gt.cleanup_engine(engine); 5703 } 5704 5705 void 5706 i915_gem_load_init_fences(struct drm_i915_private *dev_priv) 5707 { 5708 int i; 5709 5710 if (INTEL_GEN(dev_priv) >= 7 && !IS_VALLEYVIEW(dev_priv) && 5711 !IS_CHERRYVIEW(dev_priv)) 5712 dev_priv->num_fence_regs = 32; 5713 else if (INTEL_GEN(dev_priv) >= 4 || 5714 IS_I945G(dev_priv) || IS_I945GM(dev_priv) || 5715 IS_G33(dev_priv) || IS_PINEVIEW(dev_priv)) 5716 dev_priv->num_fence_regs = 16; 5717 else 5718 dev_priv->num_fence_regs = 8; 5719 5720 if (intel_vgpu_active(dev_priv)) 5721 dev_priv->num_fence_regs = 5722 I915_READ(vgtif_reg(avail_rs.fence_num)); 5723 5724 /* Initialize fence registers to zero */ 5725 for (i = 0; i < dev_priv->num_fence_regs; i++) { 5726 struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i]; 5727 5728 fence->i915 = dev_priv; 5729 fence->id = i; 5730 list_add_tail(&fence->link, &dev_priv->mm.fence_list); 5731 } 5732 i915_gem_restore_fences(dev_priv); 5733 5734 i915_gem_detect_bit_6_swizzle(dev_priv); 5735 } 5736 5737 static void i915_gem_init__mm(struct drm_i915_private *i915) 5738 { 5739 spin_lock_init(&i915->mm.object_stat_lock); 5740 spin_lock_init(&i915->mm.obj_lock); 5741 spin_lock_init(&i915->mm.free_lock); 5742 5743 init_llist_head(&i915->mm.free_list); 5744 5745 INIT_LIST_HEAD(&i915->mm.unbound_list); 5746 INIT_LIST_HEAD(&i915->mm.bound_list); 5747 INIT_LIST_HEAD(&i915->mm.fence_list); 5748 INIT_LIST_HEAD(&i915->mm.userfault_list); 5749 5750 INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); 5751 } 5752 5753 int i915_gem_init_early(struct drm_i915_private *dev_priv) 5754 { 5755 int err = -ENOMEM; 5756 5757 dev_priv->objects = KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN); 5758 if (!dev_priv->objects) 5759 goto err_out; 5760 5761 dev_priv->vmas = KMEM_CACHE(i915_vma, SLAB_HWCACHE_ALIGN); 5762 if (!dev_priv->vmas) 5763 goto err_objects; 5764 5765 dev_priv->luts = KMEM_CACHE(i915_lut_handle, 0); 5766 if (!dev_priv->luts) 5767 goto err_vmas; 5768 5769 dev_priv->requests = KMEM_CACHE(i915_request, 5770 SLAB_HWCACHE_ALIGN | 5771 SLAB_RECLAIM_ACCOUNT | 5772 SLAB_TYPESAFE_BY_RCU); 5773 if (!dev_priv->requests) 5774 goto err_luts; 5775 5776 dev_priv->dependencies = KMEM_CACHE(i915_dependency, 5777 SLAB_HWCACHE_ALIGN | 5778 SLAB_RECLAIM_ACCOUNT); 5779 if (!dev_priv->dependencies) 5780 goto err_requests; 5781 5782 dev_priv->priorities = KMEM_CACHE(i915_priolist, SLAB_HWCACHE_ALIGN); 5783 if (!dev_priv->priorities) 5784 goto err_dependencies; 5785 5786 INIT_LIST_HEAD(&dev_priv->gt.timelines); 5787 INIT_LIST_HEAD(&dev_priv->gt.active_rings); 5788 INIT_LIST_HEAD(&dev_priv->gt.closed_vma); 5789 5790 i915_gem_init__mm(dev_priv); 5791 5792 INIT_DELAYED_WORK(&dev_priv->gt.retire_work, 5793 i915_gem_retire_work_handler); 5794 INIT_DELAYED_WORK(&dev_priv->gt.idle_work, 5795 i915_gem_idle_work_handler); 5796 init_waitqueue_head(&dev_priv->gpu_error.wait_queue); 5797 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 5798 5799 atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0); 5800 5801 spin_lock_init(&dev_priv->fb_tracking.lock); 5802 5803 err = i915_gemfs_init(dev_priv); 5804 if (err) 5805 DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err); 5806 5807 return 0; 5808 5809 err_dependencies: 5810 kmem_cache_destroy(dev_priv->dependencies); 5811 err_requests: 5812 kmem_cache_destroy(dev_priv->requests); 5813 err_luts: 5814 kmem_cache_destroy(dev_priv->luts); 5815 err_vmas: 5816 kmem_cache_destroy(dev_priv->vmas); 5817 err_objects: 5818 kmem_cache_destroy(dev_priv->objects); 5819 err_out: 5820 return err; 5821 } 5822 5823 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) 5824 { 5825 i915_gem_drain_freed_objects(dev_priv); 5826 GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list)); 5827 GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count)); 5828 WARN_ON(dev_priv->mm.object_count); 5829 WARN_ON(!list_empty(&dev_priv->gt.timelines)); 5830 5831 kmem_cache_destroy(dev_priv->priorities); 5832 kmem_cache_destroy(dev_priv->dependencies); 5833 kmem_cache_destroy(dev_priv->requests); 5834 kmem_cache_destroy(dev_priv->luts); 5835 kmem_cache_destroy(dev_priv->vmas); 5836 kmem_cache_destroy(dev_priv->objects); 5837 5838 /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */ 5839 rcu_barrier(); 5840 5841 i915_gemfs_fini(dev_priv); 5842 } 5843 5844 int i915_gem_freeze(struct drm_i915_private *dev_priv) 5845 { 5846 /* Discard all purgeable objects, let userspace recover those as 5847 * required after resuming. 5848 */ 5849 i915_gem_shrink_all(dev_priv); 5850 5851 return 0; 5852 } 5853 5854 int i915_gem_freeze_late(struct drm_i915_private *i915) 5855 { 5856 struct drm_i915_gem_object *obj; 5857 struct list_head *phases[] = { 5858 &i915->mm.unbound_list, 5859 &i915->mm.bound_list, 5860 NULL 5861 }, **phase; 5862 5863 /* 5864 * Called just before we write the hibernation image. 5865 * 5866 * We need to update the domain tracking to reflect that the CPU 5867 * will be accessing all the pages to create and restore from the 5868 * hibernation, and so upon restoration those pages will be in the 5869 * CPU domain. 5870 * 5871 * To make sure the hibernation image contains the latest state, 5872 * we update that state just before writing out the image. 5873 * 5874 * To try and reduce the hibernation image, we manually shrink 5875 * the objects as well, see i915_gem_freeze() 5876 */ 5877 5878 i915_gem_shrink(i915, -1UL, NULL, I915_SHRINK_UNBOUND); 5879 i915_gem_drain_freed_objects(i915); 5880 5881 mutex_lock(&i915->drm.struct_mutex); 5882 for (phase = phases; *phase; phase++) { 5883 list_for_each_entry(obj, *phase, mm.link) 5884 WARN_ON(i915_gem_object_set_to_cpu_domain(obj, true)); 5885 } 5886 mutex_unlock(&i915->drm.struct_mutex); 5887 5888 return 0; 5889 } 5890 5891 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 5892 { 5893 struct drm_i915_file_private *file_priv = file->driver_priv; 5894 struct i915_request *request; 5895 5896 /* Clean up our request list when the client is going away, so that 5897 * later retire_requests won't dereference our soon-to-be-gone 5898 * file_priv. 5899 */ 5900 spin_lock(&file_priv->mm.lock); 5901 list_for_each_entry(request, &file_priv->mm.request_list, client_link) 5902 request->file_priv = NULL; 5903 spin_unlock(&file_priv->mm.lock); 5904 } 5905 5906 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) 5907 { 5908 struct drm_i915_file_private *file_priv; 5909 int ret; 5910 5911 DRM_DEBUG("\n"); 5912 5913 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 5914 if (!file_priv) 5915 return -ENOMEM; 5916 5917 file->driver_priv = file_priv; 5918 file_priv->dev_priv = i915; 5919 file_priv->file = file; 5920 5921 spin_lock_init(&file_priv->mm.lock); 5922 INIT_LIST_HEAD(&file_priv->mm.request_list); 5923 5924 file_priv->bsd_engine = -1; 5925 file_priv->hang_timestamp = jiffies; 5926 5927 ret = i915_gem_context_open(i915, file); 5928 if (ret) 5929 kfree(file_priv); 5930 5931 return ret; 5932 } 5933 5934 /** 5935 * i915_gem_track_fb - update frontbuffer tracking 5936 * @old: current GEM buffer for the frontbuffer slots 5937 * @new: new GEM buffer for the frontbuffer slots 5938 * @frontbuffer_bits: bitmask of frontbuffer slots 5939 * 5940 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them 5941 * from @old and setting them in @new. Both @old and @new can be NULL. 5942 */ 5943 void i915_gem_track_fb(struct drm_i915_gem_object *old, 5944 struct drm_i915_gem_object *new, 5945 unsigned frontbuffer_bits) 5946 { 5947 /* Control of individual bits within the mask are guarded by 5948 * the owning plane->mutex, i.e. we can never see concurrent 5949 * manipulation of individual bits. But since the bitfield as a whole 5950 * is updated using RMW, we need to use atomics in order to update 5951 * the bits. 5952 */ 5953 BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES > 5954 sizeof(atomic_t) * BITS_PER_BYTE); 5955 5956 if (old) { 5957 WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits)); 5958 atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits); 5959 } 5960 5961 if (new) { 5962 WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits); 5963 atomic_or(frontbuffer_bits, &new->frontbuffer_bits); 5964 } 5965 } 5966 5967 /* Allocate a new GEM object and fill it with the supplied data */ 5968 struct drm_i915_gem_object * 5969 i915_gem_object_create_from_data(struct drm_i915_private *dev_priv, 5970 const void *data, size_t size) 5971 { 5972 struct drm_i915_gem_object *obj; 5973 struct file *file; 5974 size_t offset; 5975 int err; 5976 5977 obj = i915_gem_object_create(dev_priv, round_up(size, PAGE_SIZE)); 5978 if (IS_ERR(obj)) 5979 return obj; 5980 5981 GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU); 5982 5983 file = obj->base.filp; 5984 offset = 0; 5985 do { 5986 unsigned int len = min_t(typeof(size), size, PAGE_SIZE); 5987 struct page *page; 5988 void *pgdata, *vaddr; 5989 5990 err = pagecache_write_begin(file, file->f_mapping, 5991 offset, len, 0, 5992 &page, &pgdata); 5993 if (err < 0) 5994 goto fail; 5995 5996 vaddr = kmap(page); 5997 memcpy(vaddr, data, len); 5998 kunmap(page); 5999 6000 err = pagecache_write_end(file, file->f_mapping, 6001 offset, len, len, 6002 page, pgdata); 6003 if (err < 0) 6004 goto fail; 6005 6006 size -= len; 6007 data += len; 6008 offset += len; 6009 } while (size); 6010 6011 return obj; 6012 6013 fail: 6014 i915_gem_object_put(obj); 6015 return ERR_PTR(err); 6016 } 6017 6018 struct scatterlist * 6019 i915_gem_object_get_sg(struct drm_i915_gem_object *obj, 6020 unsigned int n, 6021 unsigned int *offset) 6022 { 6023 struct i915_gem_object_page_iter *iter = &obj->mm.get_page; 6024 struct scatterlist *sg; 6025 unsigned int idx, count; 6026 6027 might_sleep(); 6028 GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT); 6029 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 6030 6031 /* As we iterate forward through the sg, we record each entry in a 6032 * radixtree for quick repeated (backwards) lookups. If we have seen 6033 * this index previously, we will have an entry for it. 6034 * 6035 * Initial lookup is O(N), but this is amortized to O(1) for 6036 * sequential page access (where each new request is consecutive 6037 * to the previous one). Repeated lookups are O(lg(obj->base.size)), 6038 * i.e. O(1) with a large constant! 6039 */ 6040 if (n < READ_ONCE(iter->sg_idx)) 6041 goto lookup; 6042 6043 mutex_lock(&iter->lock); 6044 6045 /* We prefer to reuse the last sg so that repeated lookup of this 6046 * (or the subsequent) sg are fast - comparing against the last 6047 * sg is faster than going through the radixtree. 6048 */ 6049 6050 sg = iter->sg_pos; 6051 idx = iter->sg_idx; 6052 count = __sg_page_count(sg); 6053 6054 while (idx + count <= n) { 6055 void *entry; 6056 unsigned long i; 6057 int ret; 6058 6059 /* If we cannot allocate and insert this entry, or the 6060 * individual pages from this range, cancel updating the 6061 * sg_idx so that on this lookup we are forced to linearly 6062 * scan onwards, but on future lookups we will try the 6063 * insertion again (in which case we need to be careful of 6064 * the error return reporting that we have already inserted 6065 * this index). 6066 */ 6067 ret = radix_tree_insert(&iter->radix, idx, sg); 6068 if (ret && ret != -EEXIST) 6069 goto scan; 6070 6071 entry = xa_mk_value(idx); 6072 for (i = 1; i < count; i++) { 6073 ret = radix_tree_insert(&iter->radix, idx + i, entry); 6074 if (ret && ret != -EEXIST) 6075 goto scan; 6076 } 6077 6078 idx += count; 6079 sg = ____sg_next(sg); 6080 count = __sg_page_count(sg); 6081 } 6082 6083 scan: 6084 iter->sg_pos = sg; 6085 iter->sg_idx = idx; 6086 6087 mutex_unlock(&iter->lock); 6088 6089 if (unlikely(n < idx)) /* insertion completed by another thread */ 6090 goto lookup; 6091 6092 /* In case we failed to insert the entry into the radixtree, we need 6093 * to look beyond the current sg. 6094 */ 6095 while (idx + count <= n) { 6096 idx += count; 6097 sg = ____sg_next(sg); 6098 count = __sg_page_count(sg); 6099 } 6100 6101 *offset = n - idx; 6102 return sg; 6103 6104 lookup: 6105 rcu_read_lock(); 6106 6107 sg = radix_tree_lookup(&iter->radix, n); 6108 GEM_BUG_ON(!sg); 6109 6110 /* If this index is in the middle of multi-page sg entry, 6111 * the radix tree will contain a value entry that points 6112 * to the start of that range. We will return the pointer to 6113 * the base page and the offset of this page within the 6114 * sg entry's range. 6115 */ 6116 *offset = 0; 6117 if (unlikely(xa_is_value(sg))) { 6118 unsigned long base = xa_to_value(sg); 6119 6120 sg = radix_tree_lookup(&iter->radix, base); 6121 GEM_BUG_ON(!sg); 6122 6123 *offset = n - base; 6124 } 6125 6126 rcu_read_unlock(); 6127 6128 return sg; 6129 } 6130 6131 struct page * 6132 i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n) 6133 { 6134 struct scatterlist *sg; 6135 unsigned int offset; 6136 6137 GEM_BUG_ON(!i915_gem_object_has_struct_page(obj)); 6138 6139 sg = i915_gem_object_get_sg(obj, n, &offset); 6140 return nth_page(sg_page(sg), offset); 6141 } 6142 6143 /* Like i915_gem_object_get_page(), but mark the returned page dirty */ 6144 struct page * 6145 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, 6146 unsigned int n) 6147 { 6148 struct page *page; 6149 6150 page = i915_gem_object_get_page(obj, n); 6151 if (!obj->mm.dirty) 6152 set_page_dirty(page); 6153 6154 return page; 6155 } 6156 6157 dma_addr_t 6158 i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, 6159 unsigned long n) 6160 { 6161 struct scatterlist *sg; 6162 unsigned int offset; 6163 6164 sg = i915_gem_object_get_sg(obj, n, &offset); 6165 return sg_dma_address(sg) + (offset << PAGE_SHIFT); 6166 } 6167 6168 int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) 6169 { 6170 struct sg_table *pages; 6171 int err; 6172 6173 if (align > obj->base.size) 6174 return -EINVAL; 6175 6176 if (obj->ops == &i915_gem_phys_ops) 6177 return 0; 6178 6179 if (obj->ops != &i915_gem_object_ops) 6180 return -EINVAL; 6181 6182 err = i915_gem_object_unbind(obj); 6183 if (err) 6184 return err; 6185 6186 mutex_lock(&obj->mm.lock); 6187 6188 if (obj->mm.madv != I915_MADV_WILLNEED) { 6189 err = -EFAULT; 6190 goto err_unlock; 6191 } 6192 6193 if (obj->mm.quirked) { 6194 err = -EFAULT; 6195 goto err_unlock; 6196 } 6197 6198 if (obj->mm.mapping) { 6199 err = -EBUSY; 6200 goto err_unlock; 6201 } 6202 6203 pages = __i915_gem_object_unset_pages(obj); 6204 6205 obj->ops = &i915_gem_phys_ops; 6206 6207 err = ____i915_gem_object_get_pages(obj); 6208 if (err) 6209 goto err_xfer; 6210 6211 /* Perma-pin (until release) the physical set of pages */ 6212 __i915_gem_object_pin_pages(obj); 6213 6214 if (!IS_ERR_OR_NULL(pages)) 6215 i915_gem_object_ops.put_pages(obj, pages); 6216 mutex_unlock(&obj->mm.lock); 6217 return 0; 6218 6219 err_xfer: 6220 obj->ops = &i915_gem_object_ops; 6221 if (!IS_ERR_OR_NULL(pages)) { 6222 unsigned int sg_page_sizes = i915_sg_page_sizes(pages->sgl); 6223 6224 __i915_gem_object_set_pages(obj, pages, sg_page_sizes); 6225 } 6226 err_unlock: 6227 mutex_unlock(&obj->mm.lock); 6228 return err; 6229 } 6230 6231 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 6232 #include "selftests/scatterlist.c" 6233 #include "selftests/mock_gem_device.c" 6234 #include "selftests/huge_gem_object.c" 6235 #include "selftests/huge_pages.c" 6236 #include "selftests/i915_gem_object.c" 6237 #include "selftests/i915_gem_coherency.c" 6238 #include "selftests/i915_gem.c" 6239 #endif 6240